1/*
2 * Copyright (c) 2004-2007 Voltaire, Inc. All rights reserved.
3 * Copyright (c) 2005 Intel Corporation.  All rights reserved.
4 * Copyright (c) 2005 Mellanox Technologies Ltd.  All rights reserved.
5 * Copyright (c) 2009 HNR Consulting. All rights reserved.
6 * Copyright (c) 2014,2018 Intel Corporation.  All rights reserved.
7 *
8 * This software is available to you under a choice of one of two
9 * licenses.  You may choose to be licensed under the terms of the GNU
10 * General Public License (GPL) Version 2, available from the file
11 * COPYING in the main directory of this source tree, or the
12 * OpenIB.org BSD license below:
13 *
14 *     Redistribution and use in source and binary forms, with or
15 *     without modification, are permitted provided that the following
16 *     conditions are met:
17 *
18 *      - Redistributions of source code must retain the above
19 *        copyright notice, this list of conditions and the following
20 *        disclaimer.
21 *
22 *      - Redistributions in binary form must reproduce the above
23 *        copyright notice, this list of conditions and the following
24 *        disclaimer in the documentation and/or other materials
25 *        provided with the distribution.
26 *
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * SOFTWARE.
35 *
36 */
37
38#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
39
40#include <linux/dma-mapping.h>
41#include <linux/slab.h>
42#include <linux/module.h>
43#include <linux/security.h>
44#include <linux/xarray.h>
45#include <rdma/ib_cache.h>
46
47#include "mad_priv.h"
48#include "core_priv.h"
49#include "mad_rmpp.h"
50#include "smi.h"
51#include "opa_smi.h"
52#include "agent.h"
53
54#define CREATE_TRACE_POINTS
55#include <trace/events/ib_mad.h>
56
57#ifdef CONFIG_TRACEPOINTS
58static void create_mad_addr_info(struct ib_mad_send_wr_private *mad_send_wr,
59			  struct ib_mad_qp_info *qp_info,
60			  struct trace_event_raw_ib_mad_send_template *entry)
61{
62	struct ib_ud_wr *wr = &mad_send_wr->send_wr;
63	struct rdma_ah_attr attr = {};
64
65	rdma_query_ah(wr->ah, &attr);
66
67	/* These are common */
68	entry->sl = attr.sl;
69	entry->rqpn = wr->remote_qpn;
70	entry->rqkey = wr->remote_qkey;
71	entry->dlid = rdma_ah_get_dlid(&attr);
72}
73#endif
74
75static int mad_sendq_size = IB_MAD_QP_SEND_SIZE;
76static int mad_recvq_size = IB_MAD_QP_RECV_SIZE;
77
78module_param_named(send_queue_size, mad_sendq_size, int, 0444);
79MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests");
80module_param_named(recv_queue_size, mad_recvq_size, int, 0444);
81MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests");
82
83static DEFINE_XARRAY_ALLOC1(ib_mad_clients);
84static u32 ib_mad_client_next;
85static struct list_head ib_mad_port_list;
86
87/* Port list lock */
88static DEFINE_SPINLOCK(ib_mad_port_list_lock);
89
90/* Forward declarations */
91static int method_in_use(struct ib_mad_mgmt_method_table **method,
92			 struct ib_mad_reg_req *mad_reg_req);
93static void remove_mad_reg_req(struct ib_mad_agent_private *priv);
94static struct ib_mad_agent_private *find_mad_agent(
95					struct ib_mad_port_private *port_priv,
96					const struct ib_mad_hdr *mad);
97static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
98				    struct ib_mad_private *mad);
99static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv);
100static void timeout_sends(struct work_struct *work);
101static void local_completions(struct work_struct *work);
102static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
103			      struct ib_mad_agent_private *agent_priv,
104			      u8 mgmt_class);
105static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
106			   struct ib_mad_agent_private *agent_priv);
107static bool ib_mad_send_error(struct ib_mad_port_private *port_priv,
108			      struct ib_wc *wc);
109static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc);
110
111/*
112 * Returns a ib_mad_port_private structure or NULL for a device/port
113 * Assumes ib_mad_port_list_lock is being held
114 */
115static inline struct ib_mad_port_private *
116__ib_get_mad_port(struct ib_device *device, u32 port_num)
117{
118	struct ib_mad_port_private *entry;
119
120	list_for_each_entry(entry, &ib_mad_port_list, port_list) {
121		if (entry->device == device && entry->port_num == port_num)
122			return entry;
123	}
124	return NULL;
125}
126
127/*
128 * Wrapper function to return a ib_mad_port_private structure or NULL
129 * for a device/port
130 */
131static inline struct ib_mad_port_private *
132ib_get_mad_port(struct ib_device *device, u32 port_num)
133{
134	struct ib_mad_port_private *entry;
135	unsigned long flags;
136
137	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
138	entry = __ib_get_mad_port(device, port_num);
139	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
140
141	return entry;
142}
143
144static inline u8 convert_mgmt_class(u8 mgmt_class)
145{
146	/* Alias IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE to 0 */
147	return mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE ?
148		0 : mgmt_class;
149}
150
151static int get_spl_qp_index(enum ib_qp_type qp_type)
152{
153	switch (qp_type) {
154	case IB_QPT_SMI:
155		return 0;
156	case IB_QPT_GSI:
157		return 1;
158	default:
159		return -1;
160	}
161}
162
163static int vendor_class_index(u8 mgmt_class)
164{
165	return mgmt_class - IB_MGMT_CLASS_VENDOR_RANGE2_START;
166}
167
168static int is_vendor_class(u8 mgmt_class)
169{
170	if ((mgmt_class < IB_MGMT_CLASS_VENDOR_RANGE2_START) ||
171	    (mgmt_class > IB_MGMT_CLASS_VENDOR_RANGE2_END))
172		return 0;
173	return 1;
174}
175
176static int is_vendor_oui(char *oui)
177{
178	if (oui[0] || oui[1] || oui[2])
179		return 1;
180	return 0;
181}
182
183static int is_vendor_method_in_use(
184		struct ib_mad_mgmt_vendor_class *vendor_class,
185		struct ib_mad_reg_req *mad_reg_req)
186{
187	struct ib_mad_mgmt_method_table *method;
188	int i;
189
190	for (i = 0; i < MAX_MGMT_OUI; i++) {
191		if (!memcmp(vendor_class->oui[i], mad_reg_req->oui, 3)) {
192			method = vendor_class->method_table[i];
193			if (method) {
194				if (method_in_use(&method, mad_reg_req))
195					return 1;
196				else
197					break;
198			}
199		}
200	}
201	return 0;
202}
203
204int ib_response_mad(const struct ib_mad_hdr *hdr)
205{
206	return ((hdr->method & IB_MGMT_METHOD_RESP) ||
207		(hdr->method == IB_MGMT_METHOD_TRAP_REPRESS) ||
208		((hdr->mgmt_class == IB_MGMT_CLASS_BM) &&
209		 (hdr->attr_mod & IB_BM_ATTR_MOD_RESP)));
210}
211EXPORT_SYMBOL(ib_response_mad);
212
213/*
214 * ib_register_mad_agent - Register to send/receive MADs
215 *
216 * Context: Process context.
217 */
218struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
219					   u32 port_num,
220					   enum ib_qp_type qp_type,
221					   struct ib_mad_reg_req *mad_reg_req,
222					   u8 rmpp_version,
223					   ib_mad_send_handler send_handler,
224					   ib_mad_recv_handler recv_handler,
225					   void *context,
226					   u32 registration_flags)
227{
228	struct ib_mad_port_private *port_priv;
229	struct ib_mad_agent *ret = ERR_PTR(-EINVAL);
230	struct ib_mad_agent_private *mad_agent_priv;
231	struct ib_mad_reg_req *reg_req = NULL;
232	struct ib_mad_mgmt_class_table *class;
233	struct ib_mad_mgmt_vendor_class_table *vendor;
234	struct ib_mad_mgmt_vendor_class *vendor_class;
235	struct ib_mad_mgmt_method_table *method;
236	int ret2, qpn;
237	u8 mgmt_class, vclass;
238
239	if ((qp_type == IB_QPT_SMI && !rdma_cap_ib_smi(device, port_num)) ||
240	    (qp_type == IB_QPT_GSI && !rdma_cap_ib_cm(device, port_num)))
241		return ERR_PTR(-EPROTONOSUPPORT);
242
243	/* Validate parameters */
244	qpn = get_spl_qp_index(qp_type);
245	if (qpn == -1) {
246		dev_dbg_ratelimited(&device->dev, "%s: invalid QP Type %d\n",
247				    __func__, qp_type);
248		goto error1;
249	}
250
251	if (rmpp_version && rmpp_version != IB_MGMT_RMPP_VERSION) {
252		dev_dbg_ratelimited(&device->dev,
253				    "%s: invalid RMPP Version %u\n",
254				    __func__, rmpp_version);
255		goto error1;
256	}
257
258	/* Validate MAD registration request if supplied */
259	if (mad_reg_req) {
260		if (mad_reg_req->mgmt_class_version >= MAX_MGMT_VERSION) {
261			dev_dbg_ratelimited(&device->dev,
262					    "%s: invalid Class Version %u\n",
263					    __func__,
264					    mad_reg_req->mgmt_class_version);
265			goto error1;
266		}
267		if (!recv_handler) {
268			dev_dbg_ratelimited(&device->dev,
269					    "%s: no recv_handler\n", __func__);
270			goto error1;
271		}
272		if (mad_reg_req->mgmt_class >= MAX_MGMT_CLASS) {
273			/*
274			 * IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE is the only
275			 * one in this range currently allowed
276			 */
277			if (mad_reg_req->mgmt_class !=
278			    IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
279				dev_dbg_ratelimited(&device->dev,
280					"%s: Invalid Mgmt Class 0x%x\n",
281					__func__, mad_reg_req->mgmt_class);
282				goto error1;
283			}
284		} else if (mad_reg_req->mgmt_class == 0) {
285			/*
286			 * Class 0 is reserved in IBA and is used for
287			 * aliasing of IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
288			 */
289			dev_dbg_ratelimited(&device->dev,
290					    "%s: Invalid Mgmt Class 0\n",
291					    __func__);
292			goto error1;
293		} else if (is_vendor_class(mad_reg_req->mgmt_class)) {
294			/*
295			 * If class is in "new" vendor range,
296			 * ensure supplied OUI is not zero
297			 */
298			if (!is_vendor_oui(mad_reg_req->oui)) {
299				dev_dbg_ratelimited(&device->dev,
300					"%s: No OUI specified for class 0x%x\n",
301					__func__,
302					mad_reg_req->mgmt_class);
303				goto error1;
304			}
305		}
306		/* Make sure class supplied is consistent with RMPP */
307		if (!ib_is_mad_class_rmpp(mad_reg_req->mgmt_class)) {
308			if (rmpp_version) {
309				dev_dbg_ratelimited(&device->dev,
310					"%s: RMPP version for non-RMPP class 0x%x\n",
311					__func__, mad_reg_req->mgmt_class);
312				goto error1;
313			}
314		}
315
316		/* Make sure class supplied is consistent with QP type */
317		if (qp_type == IB_QPT_SMI) {
318			if ((mad_reg_req->mgmt_class !=
319					IB_MGMT_CLASS_SUBN_LID_ROUTED) &&
320			    (mad_reg_req->mgmt_class !=
321					IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
322				dev_dbg_ratelimited(&device->dev,
323					"%s: Invalid SM QP type: class 0x%x\n",
324					__func__, mad_reg_req->mgmt_class);
325				goto error1;
326			}
327		} else {
328			if ((mad_reg_req->mgmt_class ==
329					IB_MGMT_CLASS_SUBN_LID_ROUTED) ||
330			    (mad_reg_req->mgmt_class ==
331					IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
332				dev_dbg_ratelimited(&device->dev,
333					"%s: Invalid GS QP type: class 0x%x\n",
334					__func__, mad_reg_req->mgmt_class);
335				goto error1;
336			}
337		}
338	} else {
339		/* No registration request supplied */
340		if (!send_handler)
341			goto error1;
342		if (registration_flags & IB_MAD_USER_RMPP)
343			goto error1;
344	}
345
346	/* Validate device and port */
347	port_priv = ib_get_mad_port(device, port_num);
348	if (!port_priv) {
349		dev_dbg_ratelimited(&device->dev, "%s: Invalid port %u\n",
350				    __func__, port_num);
351		ret = ERR_PTR(-ENODEV);
352		goto error1;
353	}
354
355	/* Verify the QP requested is supported. For example, Ethernet devices
356	 * will not have QP0.
357	 */
358	if (!port_priv->qp_info[qpn].qp) {
359		dev_dbg_ratelimited(&device->dev, "%s: QP %d not supported\n",
360				    __func__, qpn);
361		ret = ERR_PTR(-EPROTONOSUPPORT);
362		goto error1;
363	}
364
365	/* Allocate structures */
366	mad_agent_priv = kzalloc(sizeof *mad_agent_priv, GFP_KERNEL);
367	if (!mad_agent_priv) {
368		ret = ERR_PTR(-ENOMEM);
369		goto error1;
370	}
371
372	if (mad_reg_req) {
373		reg_req = kmemdup(mad_reg_req, sizeof *reg_req, GFP_KERNEL);
374		if (!reg_req) {
375			ret = ERR_PTR(-ENOMEM);
376			goto error3;
377		}
378	}
379
380	/* Now, fill in the various structures */
381	mad_agent_priv->qp_info = &port_priv->qp_info[qpn];
382	mad_agent_priv->reg_req = reg_req;
383	mad_agent_priv->agent.rmpp_version = rmpp_version;
384	mad_agent_priv->agent.device = device;
385	mad_agent_priv->agent.recv_handler = recv_handler;
386	mad_agent_priv->agent.send_handler = send_handler;
387	mad_agent_priv->agent.context = context;
388	mad_agent_priv->agent.qp = port_priv->qp_info[qpn].qp;
389	mad_agent_priv->agent.port_num = port_num;
390	mad_agent_priv->agent.flags = registration_flags;
391	spin_lock_init(&mad_agent_priv->lock);
392	INIT_LIST_HEAD(&mad_agent_priv->send_list);
393	INIT_LIST_HEAD(&mad_agent_priv->wait_list);
394	INIT_LIST_HEAD(&mad_agent_priv->done_list);
395	INIT_LIST_HEAD(&mad_agent_priv->rmpp_list);
396	INIT_DELAYED_WORK(&mad_agent_priv->timed_work, timeout_sends);
397	INIT_LIST_HEAD(&mad_agent_priv->local_list);
398	INIT_WORK(&mad_agent_priv->local_work, local_completions);
399	refcount_set(&mad_agent_priv->refcount, 1);
400	init_completion(&mad_agent_priv->comp);
401
402	ret2 = ib_mad_agent_security_setup(&mad_agent_priv->agent, qp_type);
403	if (ret2) {
404		ret = ERR_PTR(ret2);
405		goto error4;
406	}
407
408	/*
409	 * The mlx4 driver uses the top byte to distinguish which virtual
410	 * function generated the MAD, so we must avoid using it.
411	 */
412	ret2 = xa_alloc_cyclic(&ib_mad_clients, &mad_agent_priv->agent.hi_tid,
413			mad_agent_priv, XA_LIMIT(0, (1 << 24) - 1),
414			&ib_mad_client_next, GFP_KERNEL);
415	if (ret2 < 0) {
416		ret = ERR_PTR(ret2);
417		goto error5;
418	}
419
420	/*
421	 * Make sure MAD registration (if supplied)
422	 * is non overlapping with any existing ones
423	 */
424	spin_lock_irq(&port_priv->reg_lock);
425	if (mad_reg_req) {
426		mgmt_class = convert_mgmt_class(mad_reg_req->mgmt_class);
427		if (!is_vendor_class(mgmt_class)) {
428			class = port_priv->version[mad_reg_req->
429						   mgmt_class_version].class;
430			if (class) {
431				method = class->method_table[mgmt_class];
432				if (method) {
433					if (method_in_use(&method,
434							   mad_reg_req))
435						goto error6;
436				}
437			}
438			ret2 = add_nonoui_reg_req(mad_reg_req, mad_agent_priv,
439						  mgmt_class);
440		} else {
441			/* "New" vendor class range */
442			vendor = port_priv->version[mad_reg_req->
443						    mgmt_class_version].vendor;
444			if (vendor) {
445				vclass = vendor_class_index(mgmt_class);
446				vendor_class = vendor->vendor_class[vclass];
447				if (vendor_class) {
448					if (is_vendor_method_in_use(
449							vendor_class,
450							mad_reg_req))
451						goto error6;
452				}
453			}
454			ret2 = add_oui_reg_req(mad_reg_req, mad_agent_priv);
455		}
456		if (ret2) {
457			ret = ERR_PTR(ret2);
458			goto error6;
459		}
460	}
461	spin_unlock_irq(&port_priv->reg_lock);
462
463	trace_ib_mad_create_agent(mad_agent_priv);
464	return &mad_agent_priv->agent;
465error6:
466	spin_unlock_irq(&port_priv->reg_lock);
467	xa_erase(&ib_mad_clients, mad_agent_priv->agent.hi_tid);
468error5:
469	ib_mad_agent_security_cleanup(&mad_agent_priv->agent);
470error4:
471	kfree(reg_req);
472error3:
473	kfree(mad_agent_priv);
474error1:
475	return ret;
476}
477EXPORT_SYMBOL(ib_register_mad_agent);
478
479static inline void deref_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
480{
481	if (refcount_dec_and_test(&mad_agent_priv->refcount))
482		complete(&mad_agent_priv->comp);
483}
484
485static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
486{
487	struct ib_mad_port_private *port_priv;
488
489	/* Note that we could still be handling received MADs */
490	trace_ib_mad_unregister_agent(mad_agent_priv);
491
492	/*
493	 * Canceling all sends results in dropping received response
494	 * MADs, preventing us from queuing additional work
495	 */
496	cancel_mads(mad_agent_priv);
497	port_priv = mad_agent_priv->qp_info->port_priv;
498	cancel_delayed_work(&mad_agent_priv->timed_work);
499
500	spin_lock_irq(&port_priv->reg_lock);
501	remove_mad_reg_req(mad_agent_priv);
502	spin_unlock_irq(&port_priv->reg_lock);
503	xa_erase(&ib_mad_clients, mad_agent_priv->agent.hi_tid);
504
505	flush_workqueue(port_priv->wq);
506
507	deref_mad_agent(mad_agent_priv);
508	wait_for_completion(&mad_agent_priv->comp);
509	ib_cancel_rmpp_recvs(mad_agent_priv);
510
511	ib_mad_agent_security_cleanup(&mad_agent_priv->agent);
512
513	kfree(mad_agent_priv->reg_req);
514	kfree_rcu(mad_agent_priv, rcu);
515}
516
517/*
518 * ib_unregister_mad_agent - Unregisters a client from using MAD services
519 *
520 * Context: Process context.
521 */
522void ib_unregister_mad_agent(struct ib_mad_agent *mad_agent)
523{
524	struct ib_mad_agent_private *mad_agent_priv;
525
526	mad_agent_priv = container_of(mad_agent,
527				      struct ib_mad_agent_private,
528				      agent);
529	unregister_mad_agent(mad_agent_priv);
530}
531EXPORT_SYMBOL(ib_unregister_mad_agent);
532
533static void dequeue_mad(struct ib_mad_list_head *mad_list)
534{
535	struct ib_mad_queue *mad_queue;
536	unsigned long flags;
537
538	mad_queue = mad_list->mad_queue;
539	spin_lock_irqsave(&mad_queue->lock, flags);
540	list_del(&mad_list->list);
541	mad_queue->count--;
542	spin_unlock_irqrestore(&mad_queue->lock, flags);
543}
544
545static void build_smp_wc(struct ib_qp *qp, struct ib_cqe *cqe, u16 slid,
546		u16 pkey_index, u32 port_num, struct ib_wc *wc)
547{
548	memset(wc, 0, sizeof *wc);
549	wc->wr_cqe = cqe;
550	wc->status = IB_WC_SUCCESS;
551	wc->opcode = IB_WC_RECV;
552	wc->pkey_index = pkey_index;
553	wc->byte_len = sizeof(struct ib_mad) + sizeof(struct ib_grh);
554	wc->src_qp = IB_QP0;
555	wc->qp = qp;
556	wc->slid = slid;
557	wc->sl = 0;
558	wc->dlid_path_bits = 0;
559	wc->port_num = port_num;
560}
561
562static size_t mad_priv_size(const struct ib_mad_private *mp)
563{
564	return sizeof(struct ib_mad_private) + mp->mad_size;
565}
566
567static struct ib_mad_private *alloc_mad_private(size_t mad_size, gfp_t flags)
568{
569	size_t size = sizeof(struct ib_mad_private) + mad_size;
570	struct ib_mad_private *ret = kzalloc(size, flags);
571
572	if (ret)
573		ret->mad_size = mad_size;
574
575	return ret;
576}
577
578static size_t port_mad_size(const struct ib_mad_port_private *port_priv)
579{
580	return rdma_max_mad_size(port_priv->device, port_priv->port_num);
581}
582
583static size_t mad_priv_dma_size(const struct ib_mad_private *mp)
584{
585	return sizeof(struct ib_grh) + mp->mad_size;
586}
587
588/*
589 * Return 0 if SMP is to be sent
590 * Return 1 if SMP was consumed locally (whether or not solicited)
591 * Return < 0 if error
592 */
593static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
594				  struct ib_mad_send_wr_private *mad_send_wr)
595{
596	int ret = 0;
597	struct ib_smp *smp = mad_send_wr->send_buf.mad;
598	struct opa_smp *opa_smp = (struct opa_smp *)smp;
599	unsigned long flags;
600	struct ib_mad_local_private *local;
601	struct ib_mad_private *mad_priv;
602	struct ib_mad_port_private *port_priv;
603	struct ib_mad_agent_private *recv_mad_agent = NULL;
604	struct ib_device *device = mad_agent_priv->agent.device;
605	u32 port_num;
606	struct ib_wc mad_wc;
607	struct ib_ud_wr *send_wr = &mad_send_wr->send_wr;
608	size_t mad_size = port_mad_size(mad_agent_priv->qp_info->port_priv);
609	u16 out_mad_pkey_index = 0;
610	u16 drslid;
611	bool opa = rdma_cap_opa_mad(mad_agent_priv->qp_info->port_priv->device,
612				    mad_agent_priv->qp_info->port_priv->port_num);
613
614	if (rdma_cap_ib_switch(device) &&
615	    smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
616		port_num = send_wr->port_num;
617	else
618		port_num = mad_agent_priv->agent.port_num;
619
620	/*
621	 * Directed route handling starts if the initial LID routed part of
622	 * a request or the ending LID routed part of a response is empty.
623	 * If we are at the start of the LID routed part, don't update the
624	 * hop_ptr or hop_cnt.  See section 14.2.2, Vol 1 IB spec.
625	 */
626	if (opa && smp->class_version == OPA_SM_CLASS_VERSION) {
627		u32 opa_drslid;
628
629		trace_ib_mad_handle_out_opa_smi(opa_smp);
630
631		if ((opa_get_smp_direction(opa_smp)
632		     ? opa_smp->route.dr.dr_dlid : opa_smp->route.dr.dr_slid) ==
633		     OPA_LID_PERMISSIVE &&
634		     opa_smi_handle_dr_smp_send(opa_smp,
635						rdma_cap_ib_switch(device),
636						port_num) == IB_SMI_DISCARD) {
637			ret = -EINVAL;
638			dev_err(&device->dev, "OPA Invalid directed route\n");
639			goto out;
640		}
641		opa_drslid = be32_to_cpu(opa_smp->route.dr.dr_slid);
642		if (opa_drslid != be32_to_cpu(OPA_LID_PERMISSIVE) &&
643		    opa_drslid & 0xffff0000) {
644			ret = -EINVAL;
645			dev_err(&device->dev, "OPA Invalid dr_slid 0x%x\n",
646			       opa_drslid);
647			goto out;
648		}
649		drslid = (u16)(opa_drslid & 0x0000ffff);
650
651		/* Check to post send on QP or process locally */
652		if (opa_smi_check_local_smp(opa_smp, device) == IB_SMI_DISCARD &&
653		    opa_smi_check_local_returning_smp(opa_smp, device) == IB_SMI_DISCARD)
654			goto out;
655	} else {
656		trace_ib_mad_handle_out_ib_smi(smp);
657
658		if ((ib_get_smp_direction(smp) ? smp->dr_dlid : smp->dr_slid) ==
659		     IB_LID_PERMISSIVE &&
660		     smi_handle_dr_smp_send(smp, rdma_cap_ib_switch(device), port_num) ==
661		     IB_SMI_DISCARD) {
662			ret = -EINVAL;
663			dev_err(&device->dev, "Invalid directed route\n");
664			goto out;
665		}
666		drslid = be16_to_cpu(smp->dr_slid);
667
668		/* Check to post send on QP or process locally */
669		if (smi_check_local_smp(smp, device) == IB_SMI_DISCARD &&
670		    smi_check_local_returning_smp(smp, device) == IB_SMI_DISCARD)
671			goto out;
672	}
673
674	local = kmalloc(sizeof *local, GFP_ATOMIC);
675	if (!local) {
676		ret = -ENOMEM;
677		goto out;
678	}
679	local->mad_priv = NULL;
680	local->recv_mad_agent = NULL;
681	mad_priv = alloc_mad_private(mad_size, GFP_ATOMIC);
682	if (!mad_priv) {
683		ret = -ENOMEM;
684		kfree(local);
685		goto out;
686	}
687
688	build_smp_wc(mad_agent_priv->agent.qp,
689		     send_wr->wr.wr_cqe, drslid,
690		     send_wr->pkey_index,
691		     send_wr->port_num, &mad_wc);
692
693	if (opa && smp->base_version == OPA_MGMT_BASE_VERSION) {
694		mad_wc.byte_len = mad_send_wr->send_buf.hdr_len
695					+ mad_send_wr->send_buf.data_len
696					+ sizeof(struct ib_grh);
697	}
698
699	/* No GRH for DR SMP */
700	ret = device->ops.process_mad(device, 0, port_num, &mad_wc, NULL,
701				      (const struct ib_mad *)smp,
702				      (struct ib_mad *)mad_priv->mad, &mad_size,
703				      &out_mad_pkey_index);
704	switch (ret) {
705	case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY:
706		if (ib_response_mad((const struct ib_mad_hdr *)mad_priv->mad) &&
707		    mad_agent_priv->agent.recv_handler) {
708			local->mad_priv = mad_priv;
709			local->recv_mad_agent = mad_agent_priv;
710			/*
711			 * Reference MAD agent until receive
712			 * side of local completion handled
713			 */
714			refcount_inc(&mad_agent_priv->refcount);
715		} else
716			kfree(mad_priv);
717		break;
718	case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED:
719		kfree(mad_priv);
720		break;
721	case IB_MAD_RESULT_SUCCESS:
722		/* Treat like an incoming receive MAD */
723		port_priv = ib_get_mad_port(mad_agent_priv->agent.device,
724					    mad_agent_priv->agent.port_num);
725		if (port_priv) {
726			memcpy(mad_priv->mad, smp, mad_priv->mad_size);
727			recv_mad_agent = find_mad_agent(port_priv,
728						        (const struct ib_mad_hdr *)mad_priv->mad);
729		}
730		if (!port_priv || !recv_mad_agent) {
731			/*
732			 * No receiving agent so drop packet and
733			 * generate send completion.
734			 */
735			kfree(mad_priv);
736			break;
737		}
738		local->mad_priv = mad_priv;
739		local->recv_mad_agent = recv_mad_agent;
740		break;
741	default:
742		kfree(mad_priv);
743		kfree(local);
744		ret = -EINVAL;
745		goto out;
746	}
747
748	local->mad_send_wr = mad_send_wr;
749	if (opa) {
750		local->mad_send_wr->send_wr.pkey_index = out_mad_pkey_index;
751		local->return_wc_byte_len = mad_size;
752	}
753	/* Reference MAD agent until send side of local completion handled */
754	refcount_inc(&mad_agent_priv->refcount);
755	/* Queue local completion to local list */
756	spin_lock_irqsave(&mad_agent_priv->lock, flags);
757	list_add_tail(&local->completion_list, &mad_agent_priv->local_list);
758	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
759	queue_work(mad_agent_priv->qp_info->port_priv->wq,
760		   &mad_agent_priv->local_work);
761	ret = 1;
762out:
763	return ret;
764}
765
766static int get_pad_size(int hdr_len, int data_len, size_t mad_size)
767{
768	int seg_size, pad;
769
770	seg_size = mad_size - hdr_len;
771	if (data_len && seg_size) {
772		pad = seg_size - data_len % seg_size;
773		return pad == seg_size ? 0 : pad;
774	} else
775		return seg_size;
776}
777
778static void free_send_rmpp_list(struct ib_mad_send_wr_private *mad_send_wr)
779{
780	struct ib_rmpp_segment *s, *t;
781
782	list_for_each_entry_safe(s, t, &mad_send_wr->rmpp_list, list) {
783		list_del(&s->list);
784		kfree(s);
785	}
786}
787
788static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr,
789				size_t mad_size, gfp_t gfp_mask)
790{
791	struct ib_mad_send_buf *send_buf = &send_wr->send_buf;
792	struct ib_rmpp_mad *rmpp_mad = send_buf->mad;
793	struct ib_rmpp_segment *seg = NULL;
794	int left, seg_size, pad;
795
796	send_buf->seg_size = mad_size - send_buf->hdr_len;
797	send_buf->seg_rmpp_size = mad_size - IB_MGMT_RMPP_HDR;
798	seg_size = send_buf->seg_size;
799	pad = send_wr->pad;
800
801	/* Allocate data segments. */
802	for (left = send_buf->data_len + pad; left > 0; left -= seg_size) {
803		seg = kmalloc(sizeof(*seg) + seg_size, gfp_mask);
804		if (!seg) {
805			free_send_rmpp_list(send_wr);
806			return -ENOMEM;
807		}
808		seg->num = ++send_buf->seg_count;
809		list_add_tail(&seg->list, &send_wr->rmpp_list);
810	}
811
812	/* Zero any padding */
813	if (pad)
814		memset(seg->data + seg_size - pad, 0, pad);
815
816	rmpp_mad->rmpp_hdr.rmpp_version = send_wr->mad_agent_priv->
817					  agent.rmpp_version;
818	rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA;
819	ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
820
821	send_wr->cur_seg = container_of(send_wr->rmpp_list.next,
822					struct ib_rmpp_segment, list);
823	send_wr->last_ack_seg = send_wr->cur_seg;
824	return 0;
825}
826
827int ib_mad_kernel_rmpp_agent(const struct ib_mad_agent *agent)
828{
829	return agent->rmpp_version && !(agent->flags & IB_MAD_USER_RMPP);
830}
831EXPORT_SYMBOL(ib_mad_kernel_rmpp_agent);
832
833struct ib_mad_send_buf *ib_create_send_mad(struct ib_mad_agent *mad_agent,
834					   u32 remote_qpn, u16 pkey_index,
835					   int rmpp_active, int hdr_len,
836					   int data_len, gfp_t gfp_mask,
837					   u8 base_version)
838{
839	struct ib_mad_agent_private *mad_agent_priv;
840	struct ib_mad_send_wr_private *mad_send_wr;
841	int pad, message_size, ret, size;
842	void *buf;
843	size_t mad_size;
844	bool opa;
845
846	mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private,
847				      agent);
848
849	opa = rdma_cap_opa_mad(mad_agent->device, mad_agent->port_num);
850
851	if (opa && base_version == OPA_MGMT_BASE_VERSION)
852		mad_size = sizeof(struct opa_mad);
853	else
854		mad_size = sizeof(struct ib_mad);
855
856	pad = get_pad_size(hdr_len, data_len, mad_size);
857	message_size = hdr_len + data_len + pad;
858
859	if (ib_mad_kernel_rmpp_agent(mad_agent)) {
860		if (!rmpp_active && message_size > mad_size)
861			return ERR_PTR(-EINVAL);
862	} else
863		if (rmpp_active || message_size > mad_size)
864			return ERR_PTR(-EINVAL);
865
866	size = rmpp_active ? hdr_len : mad_size;
867	buf = kzalloc(sizeof *mad_send_wr + size, gfp_mask);
868	if (!buf)
869		return ERR_PTR(-ENOMEM);
870
871	mad_send_wr = buf + size;
872	INIT_LIST_HEAD(&mad_send_wr->rmpp_list);
873	mad_send_wr->send_buf.mad = buf;
874	mad_send_wr->send_buf.hdr_len = hdr_len;
875	mad_send_wr->send_buf.data_len = data_len;
876	mad_send_wr->pad = pad;
877
878	mad_send_wr->mad_agent_priv = mad_agent_priv;
879	mad_send_wr->sg_list[0].length = hdr_len;
880	mad_send_wr->sg_list[0].lkey = mad_agent->qp->pd->local_dma_lkey;
881
882	/* OPA MADs don't have to be the full 2048 bytes */
883	if (opa && base_version == OPA_MGMT_BASE_VERSION &&
884	    data_len < mad_size - hdr_len)
885		mad_send_wr->sg_list[1].length = data_len;
886	else
887		mad_send_wr->sg_list[1].length = mad_size - hdr_len;
888
889	mad_send_wr->sg_list[1].lkey = mad_agent->qp->pd->local_dma_lkey;
890
891	mad_send_wr->mad_list.cqe.done = ib_mad_send_done;
892
893	mad_send_wr->send_wr.wr.wr_cqe = &mad_send_wr->mad_list.cqe;
894	mad_send_wr->send_wr.wr.sg_list = mad_send_wr->sg_list;
895	mad_send_wr->send_wr.wr.num_sge = 2;
896	mad_send_wr->send_wr.wr.opcode = IB_WR_SEND;
897	mad_send_wr->send_wr.wr.send_flags = IB_SEND_SIGNALED;
898	mad_send_wr->send_wr.remote_qpn = remote_qpn;
899	mad_send_wr->send_wr.remote_qkey = IB_QP_SET_QKEY;
900	mad_send_wr->send_wr.pkey_index = pkey_index;
901
902	if (rmpp_active) {
903		ret = alloc_send_rmpp_list(mad_send_wr, mad_size, gfp_mask);
904		if (ret) {
905			kfree(buf);
906			return ERR_PTR(ret);
907		}
908	}
909
910	mad_send_wr->send_buf.mad_agent = mad_agent;
911	refcount_inc(&mad_agent_priv->refcount);
912	return &mad_send_wr->send_buf;
913}
914EXPORT_SYMBOL(ib_create_send_mad);
915
916int ib_get_mad_data_offset(u8 mgmt_class)
917{
918	if (mgmt_class == IB_MGMT_CLASS_SUBN_ADM)
919		return IB_MGMT_SA_HDR;
920	else if ((mgmt_class == IB_MGMT_CLASS_DEVICE_MGMT) ||
921		 (mgmt_class == IB_MGMT_CLASS_DEVICE_ADM) ||
922		 (mgmt_class == IB_MGMT_CLASS_BIS))
923		return IB_MGMT_DEVICE_HDR;
924	else if ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) &&
925		 (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END))
926		return IB_MGMT_VENDOR_HDR;
927	else
928		return IB_MGMT_MAD_HDR;
929}
930EXPORT_SYMBOL(ib_get_mad_data_offset);
931
932int ib_is_mad_class_rmpp(u8 mgmt_class)
933{
934	if ((mgmt_class == IB_MGMT_CLASS_SUBN_ADM) ||
935	    (mgmt_class == IB_MGMT_CLASS_DEVICE_MGMT) ||
936	    (mgmt_class == IB_MGMT_CLASS_DEVICE_ADM) ||
937	    (mgmt_class == IB_MGMT_CLASS_BIS) ||
938	    ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) &&
939	     (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END)))
940		return 1;
941	return 0;
942}
943EXPORT_SYMBOL(ib_is_mad_class_rmpp);
944
945void *ib_get_rmpp_segment(struct ib_mad_send_buf *send_buf, int seg_num)
946{
947	struct ib_mad_send_wr_private *mad_send_wr;
948	struct list_head *list;
949
950	mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private,
951				   send_buf);
952	list = &mad_send_wr->cur_seg->list;
953
954	if (mad_send_wr->cur_seg->num < seg_num) {
955		list_for_each_entry(mad_send_wr->cur_seg, list, list)
956			if (mad_send_wr->cur_seg->num == seg_num)
957				break;
958	} else if (mad_send_wr->cur_seg->num > seg_num) {
959		list_for_each_entry_reverse(mad_send_wr->cur_seg, list, list)
960			if (mad_send_wr->cur_seg->num == seg_num)
961				break;
962	}
963	return mad_send_wr->cur_seg->data;
964}
965EXPORT_SYMBOL(ib_get_rmpp_segment);
966
967static inline void *ib_get_payload(struct ib_mad_send_wr_private *mad_send_wr)
968{
969	if (mad_send_wr->send_buf.seg_count)
970		return ib_get_rmpp_segment(&mad_send_wr->send_buf,
971					   mad_send_wr->seg_num);
972	else
973		return mad_send_wr->send_buf.mad +
974		       mad_send_wr->send_buf.hdr_len;
975}
976
977void ib_free_send_mad(struct ib_mad_send_buf *send_buf)
978{
979	struct ib_mad_agent_private *mad_agent_priv;
980	struct ib_mad_send_wr_private *mad_send_wr;
981
982	mad_agent_priv = container_of(send_buf->mad_agent,
983				      struct ib_mad_agent_private, agent);
984	mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private,
985				   send_buf);
986
987	free_send_rmpp_list(mad_send_wr);
988	kfree(send_buf->mad);
989	deref_mad_agent(mad_agent_priv);
990}
991EXPORT_SYMBOL(ib_free_send_mad);
992
993int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
994{
995	struct ib_mad_qp_info *qp_info;
996	struct list_head *list;
997	struct ib_mad_agent *mad_agent;
998	struct ib_sge *sge;
999	unsigned long flags;
1000	int ret;
1001
1002	/* Set WR ID to find mad_send_wr upon completion */
1003	qp_info = mad_send_wr->mad_agent_priv->qp_info;
1004	mad_send_wr->mad_list.mad_queue = &qp_info->send_queue;
1005	mad_send_wr->mad_list.cqe.done = ib_mad_send_done;
1006	mad_send_wr->send_wr.wr.wr_cqe = &mad_send_wr->mad_list.cqe;
1007
1008	mad_agent = mad_send_wr->send_buf.mad_agent;
1009	sge = mad_send_wr->sg_list;
1010	sge[0].addr = ib_dma_map_single(mad_agent->device,
1011					mad_send_wr->send_buf.mad,
1012					sge[0].length,
1013					DMA_TO_DEVICE);
1014	if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[0].addr)))
1015		return -ENOMEM;
1016
1017	mad_send_wr->header_mapping = sge[0].addr;
1018
1019	sge[1].addr = ib_dma_map_single(mad_agent->device,
1020					ib_get_payload(mad_send_wr),
1021					sge[1].length,
1022					DMA_TO_DEVICE);
1023	if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[1].addr))) {
1024		ib_dma_unmap_single(mad_agent->device,
1025				    mad_send_wr->header_mapping,
1026				    sge[0].length, DMA_TO_DEVICE);
1027		return -ENOMEM;
1028	}
1029	mad_send_wr->payload_mapping = sge[1].addr;
1030
1031	spin_lock_irqsave(&qp_info->send_queue.lock, flags);
1032	if (qp_info->send_queue.count < qp_info->send_queue.max_active) {
1033		trace_ib_mad_ib_send_mad(mad_send_wr, qp_info);
1034		ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr.wr,
1035				   NULL);
1036		list = &qp_info->send_queue.list;
1037	} else {
1038		ret = 0;
1039		list = &qp_info->overflow_list;
1040	}
1041
1042	if (!ret) {
1043		qp_info->send_queue.count++;
1044		list_add_tail(&mad_send_wr->mad_list.list, list);
1045	}
1046	spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
1047	if (ret) {
1048		ib_dma_unmap_single(mad_agent->device,
1049				    mad_send_wr->header_mapping,
1050				    sge[0].length, DMA_TO_DEVICE);
1051		ib_dma_unmap_single(mad_agent->device,
1052				    mad_send_wr->payload_mapping,
1053				    sge[1].length, DMA_TO_DEVICE);
1054	}
1055	return ret;
1056}
1057
1058/*
1059 * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated
1060 *  with the registered client
1061 */
1062int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
1063		     struct ib_mad_send_buf **bad_send_buf)
1064{
1065	struct ib_mad_agent_private *mad_agent_priv;
1066	struct ib_mad_send_buf *next_send_buf;
1067	struct ib_mad_send_wr_private *mad_send_wr;
1068	unsigned long flags;
1069	int ret = -EINVAL;
1070
1071	/* Walk list of send WRs and post each on send list */
1072	for (; send_buf; send_buf = next_send_buf) {
1073		mad_send_wr = container_of(send_buf,
1074					   struct ib_mad_send_wr_private,
1075					   send_buf);
1076		mad_agent_priv = mad_send_wr->mad_agent_priv;
1077
1078		ret = ib_mad_enforce_security(mad_agent_priv,
1079					      mad_send_wr->send_wr.pkey_index);
1080		if (ret)
1081			goto error;
1082
1083		if (!send_buf->mad_agent->send_handler ||
1084		    (send_buf->timeout_ms &&
1085		     !send_buf->mad_agent->recv_handler)) {
1086			ret = -EINVAL;
1087			goto error;
1088		}
1089
1090		if (!ib_is_mad_class_rmpp(((struct ib_mad_hdr *) send_buf->mad)->mgmt_class)) {
1091			if (mad_agent_priv->agent.rmpp_version) {
1092				ret = -EINVAL;
1093				goto error;
1094			}
1095		}
1096
1097		/*
1098		 * Save pointer to next work request to post in case the
1099		 * current one completes, and the user modifies the work
1100		 * request associated with the completion
1101		 */
1102		next_send_buf = send_buf->next;
1103		mad_send_wr->send_wr.ah = send_buf->ah;
1104
1105		if (((struct ib_mad_hdr *) send_buf->mad)->mgmt_class ==
1106		    IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
1107			ret = handle_outgoing_dr_smp(mad_agent_priv,
1108						     mad_send_wr);
1109			if (ret < 0)		/* error */
1110				goto error;
1111			else if (ret == 1)	/* locally consumed */
1112				continue;
1113		}
1114
1115		mad_send_wr->tid = ((struct ib_mad_hdr *) send_buf->mad)->tid;
1116		/* Timeout will be updated after send completes */
1117		mad_send_wr->timeout = msecs_to_jiffies(send_buf->timeout_ms);
1118		mad_send_wr->max_retries = send_buf->retries;
1119		mad_send_wr->retries_left = send_buf->retries;
1120		send_buf->retries = 0;
1121		/* Reference for work request to QP + response */
1122		mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0);
1123		mad_send_wr->status = IB_WC_SUCCESS;
1124
1125		/* Reference MAD agent until send completes */
1126		refcount_inc(&mad_agent_priv->refcount);
1127		spin_lock_irqsave(&mad_agent_priv->lock, flags);
1128		list_add_tail(&mad_send_wr->agent_list,
1129			      &mad_agent_priv->send_list);
1130		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1131
1132		if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) {
1133			ret = ib_send_rmpp_mad(mad_send_wr);
1134			if (ret >= 0 && ret != IB_RMPP_RESULT_CONSUMED)
1135				ret = ib_send_mad(mad_send_wr);
1136		} else
1137			ret = ib_send_mad(mad_send_wr);
1138		if (ret < 0) {
1139			/* Fail send request */
1140			spin_lock_irqsave(&mad_agent_priv->lock, flags);
1141			list_del(&mad_send_wr->agent_list);
1142			spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1143			deref_mad_agent(mad_agent_priv);
1144			goto error;
1145		}
1146	}
1147	return 0;
1148error:
1149	if (bad_send_buf)
1150		*bad_send_buf = send_buf;
1151	return ret;
1152}
1153EXPORT_SYMBOL(ib_post_send_mad);
1154
1155/*
1156 * ib_free_recv_mad - Returns data buffers used to receive
1157 *  a MAD to the access layer
1158 */
1159void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc)
1160{
1161	struct ib_mad_recv_buf *mad_recv_buf, *temp_recv_buf;
1162	struct ib_mad_private_header *mad_priv_hdr;
1163	struct ib_mad_private *priv;
1164	struct list_head free_list;
1165
1166	INIT_LIST_HEAD(&free_list);
1167	list_splice_init(&mad_recv_wc->rmpp_list, &free_list);
1168
1169	list_for_each_entry_safe(mad_recv_buf, temp_recv_buf,
1170					&free_list, list) {
1171		mad_recv_wc = container_of(mad_recv_buf, struct ib_mad_recv_wc,
1172					   recv_buf);
1173		mad_priv_hdr = container_of(mad_recv_wc,
1174					    struct ib_mad_private_header,
1175					    recv_wc);
1176		priv = container_of(mad_priv_hdr, struct ib_mad_private,
1177				    header);
1178		kfree(priv);
1179	}
1180}
1181EXPORT_SYMBOL(ib_free_recv_mad);
1182
1183static int method_in_use(struct ib_mad_mgmt_method_table **method,
1184			 struct ib_mad_reg_req *mad_reg_req)
1185{
1186	int i;
1187
1188	for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) {
1189		if ((*method)->agent[i]) {
1190			pr_err("Method %d already in use\n", i);
1191			return -EINVAL;
1192		}
1193	}
1194	return 0;
1195}
1196
1197static int allocate_method_table(struct ib_mad_mgmt_method_table **method)
1198{
1199	/* Allocate management method table */
1200	*method = kzalloc(sizeof **method, GFP_ATOMIC);
1201	return (*method) ? 0 : (-ENOMEM);
1202}
1203
1204/*
1205 * Check to see if there are any methods still in use
1206 */
1207static int check_method_table(struct ib_mad_mgmt_method_table *method)
1208{
1209	int i;
1210
1211	for (i = 0; i < IB_MGMT_MAX_METHODS; i++)
1212		if (method->agent[i])
1213			return 1;
1214	return 0;
1215}
1216
1217/*
1218 * Check to see if there are any method tables for this class still in use
1219 */
1220static int check_class_table(struct ib_mad_mgmt_class_table *class)
1221{
1222	int i;
1223
1224	for (i = 0; i < MAX_MGMT_CLASS; i++)
1225		if (class->method_table[i])
1226			return 1;
1227	return 0;
1228}
1229
1230static int check_vendor_class(struct ib_mad_mgmt_vendor_class *vendor_class)
1231{
1232	int i;
1233
1234	for (i = 0; i < MAX_MGMT_OUI; i++)
1235		if (vendor_class->method_table[i])
1236			return 1;
1237	return 0;
1238}
1239
1240static int find_vendor_oui(struct ib_mad_mgmt_vendor_class *vendor_class,
1241			   const char *oui)
1242{
1243	int i;
1244
1245	for (i = 0; i < MAX_MGMT_OUI; i++)
1246		/* Is there matching OUI for this vendor class ? */
1247		if (!memcmp(vendor_class->oui[i], oui, 3))
1248			return i;
1249
1250	return -1;
1251}
1252
1253static int check_vendor_table(struct ib_mad_mgmt_vendor_class_table *vendor)
1254{
1255	int i;
1256
1257	for (i = 0; i < MAX_MGMT_VENDOR_RANGE2; i++)
1258		if (vendor->vendor_class[i])
1259			return 1;
1260
1261	return 0;
1262}
1263
1264static void remove_methods_mad_agent(struct ib_mad_mgmt_method_table *method,
1265				     struct ib_mad_agent_private *agent)
1266{
1267	int i;
1268
1269	/* Remove any methods for this mad agent */
1270	for (i = 0; i < IB_MGMT_MAX_METHODS; i++)
1271		if (method->agent[i] == agent)
1272			method->agent[i] = NULL;
1273}
1274
1275static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
1276			      struct ib_mad_agent_private *agent_priv,
1277			      u8 mgmt_class)
1278{
1279	struct ib_mad_port_private *port_priv;
1280	struct ib_mad_mgmt_class_table **class;
1281	struct ib_mad_mgmt_method_table **method;
1282	int i, ret;
1283
1284	port_priv = agent_priv->qp_info->port_priv;
1285	class = &port_priv->version[mad_reg_req->mgmt_class_version].class;
1286	if (!*class) {
1287		/* Allocate management class table for "new" class version */
1288		*class = kzalloc(sizeof **class, GFP_ATOMIC);
1289		if (!*class) {
1290			ret = -ENOMEM;
1291			goto error1;
1292		}
1293
1294		/* Allocate method table for this management class */
1295		method = &(*class)->method_table[mgmt_class];
1296		if ((ret = allocate_method_table(method)))
1297			goto error2;
1298	} else {
1299		method = &(*class)->method_table[mgmt_class];
1300		if (!*method) {
1301			/* Allocate method table for this management class */
1302			if ((ret = allocate_method_table(method)))
1303				goto error1;
1304		}
1305	}
1306
1307	/* Now, make sure methods are not already in use */
1308	if (method_in_use(method, mad_reg_req))
1309		goto error3;
1310
1311	/* Finally, add in methods being registered */
1312	for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS)
1313		(*method)->agent[i] = agent_priv;
1314
1315	return 0;
1316
1317error3:
1318	/* Remove any methods for this mad agent */
1319	remove_methods_mad_agent(*method, agent_priv);
1320	/* Now, check to see if there are any methods in use */
1321	if (!check_method_table(*method)) {
1322		/* If not, release management method table */
1323		kfree(*method);
1324		*method = NULL;
1325	}
1326	ret = -EINVAL;
1327	goto error1;
1328error2:
1329	kfree(*class);
1330	*class = NULL;
1331error1:
1332	return ret;
1333}
1334
1335static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
1336			   struct ib_mad_agent_private *agent_priv)
1337{
1338	struct ib_mad_port_private *port_priv;
1339	struct ib_mad_mgmt_vendor_class_table **vendor_table;
1340	struct ib_mad_mgmt_vendor_class_table *vendor = NULL;
1341	struct ib_mad_mgmt_vendor_class *vendor_class = NULL;
1342	struct ib_mad_mgmt_method_table **method;
1343	int i, ret = -ENOMEM;
1344	u8 vclass;
1345
1346	/* "New" vendor (with OUI) class */
1347	vclass = vendor_class_index(mad_reg_req->mgmt_class);
1348	port_priv = agent_priv->qp_info->port_priv;
1349	vendor_table = &port_priv->version[
1350				mad_reg_req->mgmt_class_version].vendor;
1351	if (!*vendor_table) {
1352		/* Allocate mgmt vendor class table for "new" class version */
1353		vendor = kzalloc(sizeof *vendor, GFP_ATOMIC);
1354		if (!vendor)
1355			goto error1;
1356
1357		*vendor_table = vendor;
1358	}
1359	if (!(*vendor_table)->vendor_class[vclass]) {
1360		/* Allocate table for this management vendor class */
1361		vendor_class = kzalloc(sizeof *vendor_class, GFP_ATOMIC);
1362		if (!vendor_class)
1363			goto error2;
1364
1365		(*vendor_table)->vendor_class[vclass] = vendor_class;
1366	}
1367	for (i = 0; i < MAX_MGMT_OUI; i++) {
1368		/* Is there matching OUI for this vendor class ? */
1369		if (!memcmp((*vendor_table)->vendor_class[vclass]->oui[i],
1370			    mad_reg_req->oui, 3)) {
1371			method = &(*vendor_table)->vendor_class[
1372						vclass]->method_table[i];
1373			if (!*method)
1374				goto error3;
1375			goto check_in_use;
1376		}
1377	}
1378	for (i = 0; i < MAX_MGMT_OUI; i++) {
1379		/* OUI slot available ? */
1380		if (!is_vendor_oui((*vendor_table)->vendor_class[
1381				vclass]->oui[i])) {
1382			method = &(*vendor_table)->vendor_class[
1383				vclass]->method_table[i];
1384			/* Allocate method table for this OUI */
1385			if (!*method) {
1386				ret = allocate_method_table(method);
1387				if (ret)
1388					goto error3;
1389			}
1390			memcpy((*vendor_table)->vendor_class[vclass]->oui[i],
1391			       mad_reg_req->oui, 3);
1392			goto check_in_use;
1393		}
1394	}
1395	dev_err(&agent_priv->agent.device->dev, "All OUI slots in use\n");
1396	goto error3;
1397
1398check_in_use:
1399	/* Now, make sure methods are not already in use */
1400	if (method_in_use(method, mad_reg_req))
1401		goto error4;
1402
1403	/* Finally, add in methods being registered */
1404	for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS)
1405		(*method)->agent[i] = agent_priv;
1406
1407	return 0;
1408
1409error4:
1410	/* Remove any methods for this mad agent */
1411	remove_methods_mad_agent(*method, agent_priv);
1412	/* Now, check to see if there are any methods in use */
1413	if (!check_method_table(*method)) {
1414		/* If not, release management method table */
1415		kfree(*method);
1416		*method = NULL;
1417	}
1418	ret = -EINVAL;
1419error3:
1420	if (vendor_class) {
1421		(*vendor_table)->vendor_class[vclass] = NULL;
1422		kfree(vendor_class);
1423	}
1424error2:
1425	if (vendor) {
1426		*vendor_table = NULL;
1427		kfree(vendor);
1428	}
1429error1:
1430	return ret;
1431}
1432
1433static void remove_mad_reg_req(struct ib_mad_agent_private *agent_priv)
1434{
1435	struct ib_mad_port_private *port_priv;
1436	struct ib_mad_mgmt_class_table *class;
1437	struct ib_mad_mgmt_method_table *method;
1438	struct ib_mad_mgmt_vendor_class_table *vendor;
1439	struct ib_mad_mgmt_vendor_class *vendor_class;
1440	int index;
1441	u8 mgmt_class;
1442
1443	/*
1444	 * Was MAD registration request supplied
1445	 * with original registration ?
1446	 */
1447	if (!agent_priv->reg_req)
1448		goto out;
1449
1450	port_priv = agent_priv->qp_info->port_priv;
1451	mgmt_class = convert_mgmt_class(agent_priv->reg_req->mgmt_class);
1452	class = port_priv->version[
1453			agent_priv->reg_req->mgmt_class_version].class;
1454	if (!class)
1455		goto vendor_check;
1456
1457	method = class->method_table[mgmt_class];
1458	if (method) {
1459		/* Remove any methods for this mad agent */
1460		remove_methods_mad_agent(method, agent_priv);
1461		/* Now, check to see if there are any methods still in use */
1462		if (!check_method_table(method)) {
1463			/* If not, release management method table */
1464			kfree(method);
1465			class->method_table[mgmt_class] = NULL;
1466			/* Any management classes left ? */
1467			if (!check_class_table(class)) {
1468				/* If not, release management class table */
1469				kfree(class);
1470				port_priv->version[
1471					agent_priv->reg_req->
1472					mgmt_class_version].class = NULL;
1473			}
1474		}
1475	}
1476
1477vendor_check:
1478	if (!is_vendor_class(mgmt_class))
1479		goto out;
1480
1481	/* normalize mgmt_class to vendor range 2 */
1482	mgmt_class = vendor_class_index(agent_priv->reg_req->mgmt_class);
1483	vendor = port_priv->version[
1484			agent_priv->reg_req->mgmt_class_version].vendor;
1485
1486	if (!vendor)
1487		goto out;
1488
1489	vendor_class = vendor->vendor_class[mgmt_class];
1490	if (vendor_class) {
1491		index = find_vendor_oui(vendor_class, agent_priv->reg_req->oui);
1492		if (index < 0)
1493			goto out;
1494		method = vendor_class->method_table[index];
1495		if (method) {
1496			/* Remove any methods for this mad agent */
1497			remove_methods_mad_agent(method, agent_priv);
1498			/*
1499			 * Now, check to see if there are
1500			 * any methods still in use
1501			 */
1502			if (!check_method_table(method)) {
1503				/* If not, release management method table */
1504				kfree(method);
1505				vendor_class->method_table[index] = NULL;
1506				memset(vendor_class->oui[index], 0, 3);
1507				/* Any OUIs left ? */
1508				if (!check_vendor_class(vendor_class)) {
1509					/* If not, release vendor class table */
1510					kfree(vendor_class);
1511					vendor->vendor_class[mgmt_class] = NULL;
1512					/* Any other vendor classes left ? */
1513					if (!check_vendor_table(vendor)) {
1514						kfree(vendor);
1515						port_priv->version[
1516							agent_priv->reg_req->
1517							mgmt_class_version].
1518							vendor = NULL;
1519					}
1520				}
1521			}
1522		}
1523	}
1524
1525out:
1526	return;
1527}
1528
1529static struct ib_mad_agent_private *
1530find_mad_agent(struct ib_mad_port_private *port_priv,
1531	       const struct ib_mad_hdr *mad_hdr)
1532{
1533	struct ib_mad_agent_private *mad_agent = NULL;
1534	unsigned long flags;
1535
1536	if (ib_response_mad(mad_hdr)) {
1537		u32 hi_tid;
1538
1539		/*
1540		 * Routing is based on high 32 bits of transaction ID
1541		 * of MAD.
1542		 */
1543		hi_tid = be64_to_cpu(mad_hdr->tid) >> 32;
1544		rcu_read_lock();
1545		mad_agent = xa_load(&ib_mad_clients, hi_tid);
1546		if (mad_agent && !refcount_inc_not_zero(&mad_agent->refcount))
1547			mad_agent = NULL;
1548		rcu_read_unlock();
1549	} else {
1550		struct ib_mad_mgmt_class_table *class;
1551		struct ib_mad_mgmt_method_table *method;
1552		struct ib_mad_mgmt_vendor_class_table *vendor;
1553		struct ib_mad_mgmt_vendor_class *vendor_class;
1554		const struct ib_vendor_mad *vendor_mad;
1555		int index;
1556
1557		spin_lock_irqsave(&port_priv->reg_lock, flags);
1558		/*
1559		 * Routing is based on version, class, and method
1560		 * For "newer" vendor MADs, also based on OUI
1561		 */
1562		if (mad_hdr->class_version >= MAX_MGMT_VERSION)
1563			goto out;
1564		if (!is_vendor_class(mad_hdr->mgmt_class)) {
1565			class = port_priv->version[
1566					mad_hdr->class_version].class;
1567			if (!class)
1568				goto out;
1569			if (convert_mgmt_class(mad_hdr->mgmt_class) >=
1570			    ARRAY_SIZE(class->method_table))
1571				goto out;
1572			method = class->method_table[convert_mgmt_class(
1573							mad_hdr->mgmt_class)];
1574			if (method)
1575				mad_agent = method->agent[mad_hdr->method &
1576							  ~IB_MGMT_METHOD_RESP];
1577		} else {
1578			vendor = port_priv->version[
1579					mad_hdr->class_version].vendor;
1580			if (!vendor)
1581				goto out;
1582			vendor_class = vendor->vendor_class[vendor_class_index(
1583						mad_hdr->mgmt_class)];
1584			if (!vendor_class)
1585				goto out;
1586			/* Find matching OUI */
1587			vendor_mad = (const struct ib_vendor_mad *)mad_hdr;
1588			index = find_vendor_oui(vendor_class, vendor_mad->oui);
1589			if (index == -1)
1590				goto out;
1591			method = vendor_class->method_table[index];
1592			if (method) {
1593				mad_agent = method->agent[mad_hdr->method &
1594							  ~IB_MGMT_METHOD_RESP];
1595			}
1596		}
1597		if (mad_agent)
1598			refcount_inc(&mad_agent->refcount);
1599out:
1600		spin_unlock_irqrestore(&port_priv->reg_lock, flags);
1601	}
1602
1603	if (mad_agent && !mad_agent->agent.recv_handler) {
1604		dev_notice(&port_priv->device->dev,
1605			   "No receive handler for client %p on port %u\n",
1606			   &mad_agent->agent, port_priv->port_num);
1607		deref_mad_agent(mad_agent);
1608		mad_agent = NULL;
1609	}
1610
1611	return mad_agent;
1612}
1613
1614static int validate_mad(const struct ib_mad_hdr *mad_hdr,
1615			const struct ib_mad_qp_info *qp_info,
1616			bool opa)
1617{
1618	int valid = 0;
1619	u32 qp_num = qp_info->qp->qp_num;
1620
1621	/* Make sure MAD base version is understood */
1622	if (mad_hdr->base_version != IB_MGMT_BASE_VERSION &&
1623	    (!opa || mad_hdr->base_version != OPA_MGMT_BASE_VERSION)) {
1624		pr_err("MAD received with unsupported base version %u %s\n",
1625		       mad_hdr->base_version, opa ? "(opa)" : "");
1626		goto out;
1627	}
1628
1629	/* Filter SMI packets sent to other than QP0 */
1630	if ((mad_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED) ||
1631	    (mad_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
1632		if (qp_num == 0)
1633			valid = 1;
1634	} else {
1635		/* CM attributes other than ClassPortInfo only use Send method */
1636		if ((mad_hdr->mgmt_class == IB_MGMT_CLASS_CM) &&
1637		    (mad_hdr->attr_id != IB_MGMT_CLASSPORTINFO_ATTR_ID) &&
1638		    (mad_hdr->method != IB_MGMT_METHOD_SEND))
1639			goto out;
1640		/* Filter GSI packets sent to QP0 */
1641		if (qp_num != 0)
1642			valid = 1;
1643	}
1644
1645out:
1646	return valid;
1647}
1648
1649static int is_rmpp_data_mad(const struct ib_mad_agent_private *mad_agent_priv,
1650			    const struct ib_mad_hdr *mad_hdr)
1651{
1652	struct ib_rmpp_mad *rmpp_mad;
1653
1654	rmpp_mad = (struct ib_rmpp_mad *)mad_hdr;
1655	return !mad_agent_priv->agent.rmpp_version ||
1656		!ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent) ||
1657		!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
1658				    IB_MGMT_RMPP_FLAG_ACTIVE) ||
1659		(rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_DATA);
1660}
1661
1662static inline int rcv_has_same_class(const struct ib_mad_send_wr_private *wr,
1663				     const struct ib_mad_recv_wc *rwc)
1664{
1665	return ((struct ib_mad_hdr *)(wr->send_buf.mad))->mgmt_class ==
1666		rwc->recv_buf.mad->mad_hdr.mgmt_class;
1667}
1668
1669static inline int
1670rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_priv,
1671		 const struct ib_mad_send_wr_private *wr,
1672		 const struct ib_mad_recv_wc *rwc)
1673{
1674	struct rdma_ah_attr attr;
1675	u8 send_resp, rcv_resp;
1676	union ib_gid sgid;
1677	struct ib_device *device = mad_agent_priv->agent.device;
1678	u32 port_num = mad_agent_priv->agent.port_num;
1679	u8 lmc;
1680	bool has_grh;
1681
1682	send_resp = ib_response_mad((struct ib_mad_hdr *)wr->send_buf.mad);
1683	rcv_resp = ib_response_mad(&rwc->recv_buf.mad->mad_hdr);
1684
1685	if (send_resp == rcv_resp)
1686		/* both requests, or both responses. GIDs different */
1687		return 0;
1688
1689	if (rdma_query_ah(wr->send_buf.ah, &attr))
1690		/* Assume not equal, to avoid false positives. */
1691		return 0;
1692
1693	has_grh = !!(rdma_ah_get_ah_flags(&attr) & IB_AH_GRH);
1694	if (has_grh != !!(rwc->wc->wc_flags & IB_WC_GRH))
1695		/* one has GID, other does not.  Assume different */
1696		return 0;
1697
1698	if (!send_resp && rcv_resp) {
1699		/* is request/response. */
1700		if (!has_grh) {
1701			if (ib_get_cached_lmc(device, port_num, &lmc))
1702				return 0;
1703			return (!lmc || !((rdma_ah_get_path_bits(&attr) ^
1704					   rwc->wc->dlid_path_bits) &
1705					  ((1 << lmc) - 1)));
1706		} else {
1707			const struct ib_global_route *grh =
1708					rdma_ah_read_grh(&attr);
1709
1710			if (rdma_query_gid(device, port_num,
1711					   grh->sgid_index, &sgid))
1712				return 0;
1713			return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw,
1714				       16);
1715		}
1716	}
1717
1718	if (!has_grh)
1719		return rdma_ah_get_dlid(&attr) == rwc->wc->slid;
1720	else
1721		return !memcmp(rdma_ah_read_grh(&attr)->dgid.raw,
1722			       rwc->recv_buf.grh->sgid.raw,
1723			       16);
1724}
1725
1726static inline int is_direct(u8 class)
1727{
1728	return (class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE);
1729}
1730
1731struct ib_mad_send_wr_private*
1732ib_find_send_mad(const struct ib_mad_agent_private *mad_agent_priv,
1733		 const struct ib_mad_recv_wc *wc)
1734{
1735	struct ib_mad_send_wr_private *wr;
1736	const struct ib_mad_hdr *mad_hdr;
1737
1738	mad_hdr = &wc->recv_buf.mad->mad_hdr;
1739
1740	list_for_each_entry(wr, &mad_agent_priv->wait_list, agent_list) {
1741		if ((wr->tid == mad_hdr->tid) &&
1742		    rcv_has_same_class(wr, wc) &&
1743		    /*
1744		     * Don't check GID for direct routed MADs.
1745		     * These might have permissive LIDs.
1746		     */
1747		    (is_direct(mad_hdr->mgmt_class) ||
1748		     rcv_has_same_gid(mad_agent_priv, wr, wc)))
1749			return (wr->status == IB_WC_SUCCESS) ? wr : NULL;
1750	}
1751
1752	/*
1753	 * It's possible to receive the response before we've
1754	 * been notified that the send has completed
1755	 */
1756	list_for_each_entry(wr, &mad_agent_priv->send_list, agent_list) {
1757		if (is_rmpp_data_mad(mad_agent_priv, wr->send_buf.mad) &&
1758		    wr->tid == mad_hdr->tid &&
1759		    wr->timeout &&
1760		    rcv_has_same_class(wr, wc) &&
1761		    /*
1762		     * Don't check GID for direct routed MADs.
1763		     * These might have permissive LIDs.
1764		     */
1765		    (is_direct(mad_hdr->mgmt_class) ||
1766		     rcv_has_same_gid(mad_agent_priv, wr, wc)))
1767			/* Verify request has not been canceled */
1768			return (wr->status == IB_WC_SUCCESS) ? wr : NULL;
1769	}
1770	return NULL;
1771}
1772
1773void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr)
1774{
1775	mad_send_wr->timeout = 0;
1776	if (mad_send_wr->refcount == 1)
1777		list_move_tail(&mad_send_wr->agent_list,
1778			      &mad_send_wr->mad_agent_priv->done_list);
1779}
1780
1781static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
1782				 struct ib_mad_recv_wc *mad_recv_wc)
1783{
1784	struct ib_mad_send_wr_private *mad_send_wr;
1785	struct ib_mad_send_wc mad_send_wc;
1786	unsigned long flags;
1787	int ret;
1788
1789	INIT_LIST_HEAD(&mad_recv_wc->rmpp_list);
1790	ret = ib_mad_enforce_security(mad_agent_priv,
1791				      mad_recv_wc->wc->pkey_index);
1792	if (ret) {
1793		ib_free_recv_mad(mad_recv_wc);
1794		deref_mad_agent(mad_agent_priv);
1795		return;
1796	}
1797
1798	list_add(&mad_recv_wc->recv_buf.list, &mad_recv_wc->rmpp_list);
1799	if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) {
1800		mad_recv_wc = ib_process_rmpp_recv_wc(mad_agent_priv,
1801						      mad_recv_wc);
1802		if (!mad_recv_wc) {
1803			deref_mad_agent(mad_agent_priv);
1804			return;
1805		}
1806	}
1807
1808	/* Complete corresponding request */
1809	if (ib_response_mad(&mad_recv_wc->recv_buf.mad->mad_hdr)) {
1810		spin_lock_irqsave(&mad_agent_priv->lock, flags);
1811		mad_send_wr = ib_find_send_mad(mad_agent_priv, mad_recv_wc);
1812		if (!mad_send_wr) {
1813			spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1814			if (!ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)
1815			   && ib_is_mad_class_rmpp(mad_recv_wc->recv_buf.mad->mad_hdr.mgmt_class)
1816			   && (ib_get_rmpp_flags(&((struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad)->rmpp_hdr)
1817					& IB_MGMT_RMPP_FLAG_ACTIVE)) {
1818				/* user rmpp is in effect
1819				 * and this is an active RMPP MAD
1820				 */
1821				mad_agent_priv->agent.recv_handler(
1822						&mad_agent_priv->agent, NULL,
1823						mad_recv_wc);
1824				deref_mad_agent(mad_agent_priv);
1825			} else {
1826				/* not user rmpp, revert to normal behavior and
1827				 * drop the mad
1828				 */
1829				ib_free_recv_mad(mad_recv_wc);
1830				deref_mad_agent(mad_agent_priv);
1831				return;
1832			}
1833		} else {
1834			ib_mark_mad_done(mad_send_wr);
1835			spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1836
1837			/* Defined behavior is to complete response before request */
1838			mad_agent_priv->agent.recv_handler(
1839					&mad_agent_priv->agent,
1840					&mad_send_wr->send_buf,
1841					mad_recv_wc);
1842			deref_mad_agent(mad_agent_priv);
1843
1844			mad_send_wc.status = IB_WC_SUCCESS;
1845			mad_send_wc.vendor_err = 0;
1846			mad_send_wc.send_buf = &mad_send_wr->send_buf;
1847			ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
1848		}
1849	} else {
1850		mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, NULL,
1851						   mad_recv_wc);
1852		deref_mad_agent(mad_agent_priv);
1853	}
1854}
1855
1856static enum smi_action handle_ib_smi(const struct ib_mad_port_private *port_priv,
1857				     const struct ib_mad_qp_info *qp_info,
1858				     const struct ib_wc *wc,
1859				     u32 port_num,
1860				     struct ib_mad_private *recv,
1861				     struct ib_mad_private *response)
1862{
1863	enum smi_forward_action retsmi;
1864	struct ib_smp *smp = (struct ib_smp *)recv->mad;
1865
1866	trace_ib_mad_handle_ib_smi(smp);
1867
1868	if (smi_handle_dr_smp_recv(smp,
1869				   rdma_cap_ib_switch(port_priv->device),
1870				   port_num,
1871				   port_priv->device->phys_port_cnt) ==
1872				   IB_SMI_DISCARD)
1873		return IB_SMI_DISCARD;
1874
1875	retsmi = smi_check_forward_dr_smp(smp);
1876	if (retsmi == IB_SMI_LOCAL)
1877		return IB_SMI_HANDLE;
1878
1879	if (retsmi == IB_SMI_SEND) { /* don't forward */
1880		if (smi_handle_dr_smp_send(smp,
1881					   rdma_cap_ib_switch(port_priv->device),
1882					   port_num) == IB_SMI_DISCARD)
1883			return IB_SMI_DISCARD;
1884
1885		if (smi_check_local_smp(smp, port_priv->device) == IB_SMI_DISCARD)
1886			return IB_SMI_DISCARD;
1887	} else if (rdma_cap_ib_switch(port_priv->device)) {
1888		/* forward case for switches */
1889		memcpy(response, recv, mad_priv_size(response));
1890		response->header.recv_wc.wc = &response->header.wc;
1891		response->header.recv_wc.recv_buf.mad = (struct ib_mad *)response->mad;
1892		response->header.recv_wc.recv_buf.grh = &response->grh;
1893
1894		agent_send_response((const struct ib_mad_hdr *)response->mad,
1895				    &response->grh, wc,
1896				    port_priv->device,
1897				    smi_get_fwd_port(smp),
1898				    qp_info->qp->qp_num,
1899				    response->mad_size,
1900				    false);
1901
1902		return IB_SMI_DISCARD;
1903	}
1904	return IB_SMI_HANDLE;
1905}
1906
1907static bool generate_unmatched_resp(const struct ib_mad_private *recv,
1908				    struct ib_mad_private *response,
1909				    size_t *resp_len, bool opa)
1910{
1911	const struct ib_mad_hdr *recv_hdr = (const struct ib_mad_hdr *)recv->mad;
1912	struct ib_mad_hdr *resp_hdr = (struct ib_mad_hdr *)response->mad;
1913
1914	if (recv_hdr->method == IB_MGMT_METHOD_GET ||
1915	    recv_hdr->method == IB_MGMT_METHOD_SET) {
1916		memcpy(response, recv, mad_priv_size(response));
1917		response->header.recv_wc.wc = &response->header.wc;
1918		response->header.recv_wc.recv_buf.mad = (struct ib_mad *)response->mad;
1919		response->header.recv_wc.recv_buf.grh = &response->grh;
1920		resp_hdr->method = IB_MGMT_METHOD_GET_RESP;
1921		resp_hdr->status = cpu_to_be16(IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB);
1922		if (recv_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
1923			resp_hdr->status |= IB_SMP_DIRECTION;
1924
1925		if (opa && recv_hdr->base_version == OPA_MGMT_BASE_VERSION) {
1926			if (recv_hdr->mgmt_class ==
1927			    IB_MGMT_CLASS_SUBN_LID_ROUTED ||
1928			    recv_hdr->mgmt_class ==
1929			    IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
1930				*resp_len = opa_get_smp_header_size(
1931							(struct opa_smp *)recv->mad);
1932			else
1933				*resp_len = sizeof(struct ib_mad_hdr);
1934		}
1935
1936		return true;
1937	} else {
1938		return false;
1939	}
1940}
1941
1942static enum smi_action
1943handle_opa_smi(struct ib_mad_port_private *port_priv,
1944	       struct ib_mad_qp_info *qp_info,
1945	       struct ib_wc *wc,
1946	       u32 port_num,
1947	       struct ib_mad_private *recv,
1948	       struct ib_mad_private *response)
1949{
1950	enum smi_forward_action retsmi;
1951	struct opa_smp *smp = (struct opa_smp *)recv->mad;
1952
1953	trace_ib_mad_handle_opa_smi(smp);
1954
1955	if (opa_smi_handle_dr_smp_recv(smp,
1956				   rdma_cap_ib_switch(port_priv->device),
1957				   port_num,
1958				   port_priv->device->phys_port_cnt) ==
1959				   IB_SMI_DISCARD)
1960		return IB_SMI_DISCARD;
1961
1962	retsmi = opa_smi_check_forward_dr_smp(smp);
1963	if (retsmi == IB_SMI_LOCAL)
1964		return IB_SMI_HANDLE;
1965
1966	if (retsmi == IB_SMI_SEND) { /* don't forward */
1967		if (opa_smi_handle_dr_smp_send(smp,
1968					   rdma_cap_ib_switch(port_priv->device),
1969					   port_num) == IB_SMI_DISCARD)
1970			return IB_SMI_DISCARD;
1971
1972		if (opa_smi_check_local_smp(smp, port_priv->device) ==
1973		    IB_SMI_DISCARD)
1974			return IB_SMI_DISCARD;
1975
1976	} else if (rdma_cap_ib_switch(port_priv->device)) {
1977		/* forward case for switches */
1978		memcpy(response, recv, mad_priv_size(response));
1979		response->header.recv_wc.wc = &response->header.wc;
1980		response->header.recv_wc.recv_buf.opa_mad =
1981				(struct opa_mad *)response->mad;
1982		response->header.recv_wc.recv_buf.grh = &response->grh;
1983
1984		agent_send_response((const struct ib_mad_hdr *)response->mad,
1985				    &response->grh, wc,
1986				    port_priv->device,
1987				    opa_smi_get_fwd_port(smp),
1988				    qp_info->qp->qp_num,
1989				    recv->header.wc.byte_len,
1990				    true);
1991
1992		return IB_SMI_DISCARD;
1993	}
1994
1995	return IB_SMI_HANDLE;
1996}
1997
1998static enum smi_action
1999handle_smi(struct ib_mad_port_private *port_priv,
2000	   struct ib_mad_qp_info *qp_info,
2001	   struct ib_wc *wc,
2002	   u32 port_num,
2003	   struct ib_mad_private *recv,
2004	   struct ib_mad_private *response,
2005	   bool opa)
2006{
2007	struct ib_mad_hdr *mad_hdr = (struct ib_mad_hdr *)recv->mad;
2008
2009	if (opa && mad_hdr->base_version == OPA_MGMT_BASE_VERSION &&
2010	    mad_hdr->class_version == OPA_SM_CLASS_VERSION)
2011		return handle_opa_smi(port_priv, qp_info, wc, port_num, recv,
2012				      response);
2013
2014	return handle_ib_smi(port_priv, qp_info, wc, port_num, recv, response);
2015}
2016
2017static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc)
2018{
2019	struct ib_mad_port_private *port_priv = cq->cq_context;
2020	struct ib_mad_list_head *mad_list =
2021		container_of(wc->wr_cqe, struct ib_mad_list_head, cqe);
2022	struct ib_mad_qp_info *qp_info;
2023	struct ib_mad_private_header *mad_priv_hdr;
2024	struct ib_mad_private *recv, *response = NULL;
2025	struct ib_mad_agent_private *mad_agent;
2026	u32 port_num;
2027	int ret = IB_MAD_RESULT_SUCCESS;
2028	size_t mad_size;
2029	u16 resp_mad_pkey_index = 0;
2030	bool opa;
2031
2032	if (list_empty_careful(&port_priv->port_list))
2033		return;
2034
2035	if (wc->status != IB_WC_SUCCESS) {
2036		/*
2037		 * Receive errors indicate that the QP has entered the error
2038		 * state - error handling/shutdown code will cleanup
2039		 */
2040		return;
2041	}
2042
2043	qp_info = mad_list->mad_queue->qp_info;
2044	dequeue_mad(mad_list);
2045
2046	opa = rdma_cap_opa_mad(qp_info->port_priv->device,
2047			       qp_info->port_priv->port_num);
2048
2049	mad_priv_hdr = container_of(mad_list, struct ib_mad_private_header,
2050				    mad_list);
2051	recv = container_of(mad_priv_hdr, struct ib_mad_private, header);
2052	ib_dma_unmap_single(port_priv->device,
2053			    recv->header.mapping,
2054			    mad_priv_dma_size(recv),
2055			    DMA_FROM_DEVICE);
2056
2057	/* Setup MAD receive work completion from "normal" work completion */
2058	recv->header.wc = *wc;
2059	recv->header.recv_wc.wc = &recv->header.wc;
2060
2061	if (opa && ((struct ib_mad_hdr *)(recv->mad))->base_version == OPA_MGMT_BASE_VERSION) {
2062		recv->header.recv_wc.mad_len = wc->byte_len - sizeof(struct ib_grh);
2063		recv->header.recv_wc.mad_seg_size = sizeof(struct opa_mad);
2064	} else {
2065		recv->header.recv_wc.mad_len = sizeof(struct ib_mad);
2066		recv->header.recv_wc.mad_seg_size = sizeof(struct ib_mad);
2067	}
2068
2069	recv->header.recv_wc.recv_buf.mad = (struct ib_mad *)recv->mad;
2070	recv->header.recv_wc.recv_buf.grh = &recv->grh;
2071
2072	/* Validate MAD */
2073	if (!validate_mad((const struct ib_mad_hdr *)recv->mad, qp_info, opa))
2074		goto out;
2075
2076	trace_ib_mad_recv_done_handler(qp_info, wc,
2077				       (struct ib_mad_hdr *)recv->mad);
2078
2079	mad_size = recv->mad_size;
2080	response = alloc_mad_private(mad_size, GFP_KERNEL);
2081	if (!response)
2082		goto out;
2083
2084	if (rdma_cap_ib_switch(port_priv->device))
2085		port_num = wc->port_num;
2086	else
2087		port_num = port_priv->port_num;
2088
2089	if (((struct ib_mad_hdr *)recv->mad)->mgmt_class ==
2090	    IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
2091		if (handle_smi(port_priv, qp_info, wc, port_num, recv,
2092			       response, opa)
2093		    == IB_SMI_DISCARD)
2094			goto out;
2095	}
2096
2097	/* Give driver "right of first refusal" on incoming MAD */
2098	if (port_priv->device->ops.process_mad) {
2099		ret = port_priv->device->ops.process_mad(
2100			port_priv->device, 0, port_priv->port_num, wc,
2101			&recv->grh, (const struct ib_mad *)recv->mad,
2102			(struct ib_mad *)response->mad, &mad_size,
2103			&resp_mad_pkey_index);
2104
2105		if (opa)
2106			wc->pkey_index = resp_mad_pkey_index;
2107
2108		if (ret & IB_MAD_RESULT_SUCCESS) {
2109			if (ret & IB_MAD_RESULT_CONSUMED)
2110				goto out;
2111			if (ret & IB_MAD_RESULT_REPLY) {
2112				agent_send_response((const struct ib_mad_hdr *)response->mad,
2113						    &recv->grh, wc,
2114						    port_priv->device,
2115						    port_num,
2116						    qp_info->qp->qp_num,
2117						    mad_size, opa);
2118				goto out;
2119			}
2120		}
2121	}
2122
2123	mad_agent = find_mad_agent(port_priv, (const struct ib_mad_hdr *)recv->mad);
2124	if (mad_agent) {
2125		trace_ib_mad_recv_done_agent(mad_agent);
2126		ib_mad_complete_recv(mad_agent, &recv->header.recv_wc);
2127		/*
2128		 * recv is freed up in error cases in ib_mad_complete_recv
2129		 * or via recv_handler in ib_mad_complete_recv()
2130		 */
2131		recv = NULL;
2132	} else if ((ret & IB_MAD_RESULT_SUCCESS) &&
2133		   generate_unmatched_resp(recv, response, &mad_size, opa)) {
2134		agent_send_response((const struct ib_mad_hdr *)response->mad, &recv->grh, wc,
2135				    port_priv->device, port_num,
2136				    qp_info->qp->qp_num, mad_size, opa);
2137	}
2138
2139out:
2140	/* Post another receive request for this QP */
2141	if (response) {
2142		ib_mad_post_receive_mads(qp_info, response);
2143		kfree(recv);
2144	} else
2145		ib_mad_post_receive_mads(qp_info, recv);
2146}
2147
2148static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv)
2149{
2150	struct ib_mad_send_wr_private *mad_send_wr;
2151	unsigned long delay;
2152
2153	if (list_empty(&mad_agent_priv->wait_list)) {
2154		cancel_delayed_work(&mad_agent_priv->timed_work);
2155	} else {
2156		mad_send_wr = list_entry(mad_agent_priv->wait_list.next,
2157					 struct ib_mad_send_wr_private,
2158					 agent_list);
2159
2160		if (time_after(mad_agent_priv->timeout,
2161			       mad_send_wr->timeout)) {
2162			mad_agent_priv->timeout = mad_send_wr->timeout;
2163			delay = mad_send_wr->timeout - jiffies;
2164			if ((long)delay <= 0)
2165				delay = 1;
2166			mod_delayed_work(mad_agent_priv->qp_info->port_priv->wq,
2167					 &mad_agent_priv->timed_work, delay);
2168		}
2169	}
2170}
2171
2172static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr)
2173{
2174	struct ib_mad_agent_private *mad_agent_priv;
2175	struct ib_mad_send_wr_private *temp_mad_send_wr;
2176	struct list_head *list_item;
2177	unsigned long delay;
2178
2179	mad_agent_priv = mad_send_wr->mad_agent_priv;
2180	list_del(&mad_send_wr->agent_list);
2181
2182	delay = mad_send_wr->timeout;
2183	mad_send_wr->timeout += jiffies;
2184
2185	if (delay) {
2186		list_for_each_prev(list_item, &mad_agent_priv->wait_list) {
2187			temp_mad_send_wr = list_entry(list_item,
2188						struct ib_mad_send_wr_private,
2189						agent_list);
2190			if (time_after(mad_send_wr->timeout,
2191				       temp_mad_send_wr->timeout))
2192				break;
2193		}
2194	} else {
2195		list_item = &mad_agent_priv->wait_list;
2196	}
2197
2198	list_add(&mad_send_wr->agent_list, list_item);
2199
2200	/* Reschedule a work item if we have a shorter timeout */
2201	if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list)
2202		mod_delayed_work(mad_agent_priv->qp_info->port_priv->wq,
2203				 &mad_agent_priv->timed_work, delay);
2204}
2205
2206void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr,
2207			  unsigned long timeout_ms)
2208{
2209	mad_send_wr->timeout = msecs_to_jiffies(timeout_ms);
2210	wait_for_response(mad_send_wr);
2211}
2212
2213/*
2214 * Process a send work completion
2215 */
2216void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
2217			     struct ib_mad_send_wc *mad_send_wc)
2218{
2219	struct ib_mad_agent_private	*mad_agent_priv;
2220	unsigned long			flags;
2221	int				ret;
2222
2223	mad_agent_priv = mad_send_wr->mad_agent_priv;
2224	spin_lock_irqsave(&mad_agent_priv->lock, flags);
2225	if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) {
2226		ret = ib_process_rmpp_send_wc(mad_send_wr, mad_send_wc);
2227		if (ret == IB_RMPP_RESULT_CONSUMED)
2228			goto done;
2229	} else
2230		ret = IB_RMPP_RESULT_UNHANDLED;
2231
2232	if (mad_send_wc->status != IB_WC_SUCCESS &&
2233	    mad_send_wr->status == IB_WC_SUCCESS) {
2234		mad_send_wr->status = mad_send_wc->status;
2235		mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
2236	}
2237
2238	if (--mad_send_wr->refcount > 0) {
2239		if (mad_send_wr->refcount == 1 && mad_send_wr->timeout &&
2240		    mad_send_wr->status == IB_WC_SUCCESS) {
2241			wait_for_response(mad_send_wr);
2242		}
2243		goto done;
2244	}
2245
2246	/* Remove send from MAD agent and notify client of completion */
2247	list_del(&mad_send_wr->agent_list);
2248	adjust_timeout(mad_agent_priv);
2249	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2250
2251	if (mad_send_wr->status != IB_WC_SUCCESS)
2252		mad_send_wc->status = mad_send_wr->status;
2253	if (ret == IB_RMPP_RESULT_INTERNAL)
2254		ib_rmpp_send_handler(mad_send_wc);
2255	else
2256		mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
2257						   mad_send_wc);
2258
2259	/* Release reference on agent taken when sending */
2260	deref_mad_agent(mad_agent_priv);
2261	return;
2262done:
2263	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2264}
2265
2266static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc)
2267{
2268	struct ib_mad_port_private *port_priv = cq->cq_context;
2269	struct ib_mad_list_head *mad_list =
2270		container_of(wc->wr_cqe, struct ib_mad_list_head, cqe);
2271	struct ib_mad_send_wr_private	*mad_send_wr, *queued_send_wr;
2272	struct ib_mad_qp_info		*qp_info;
2273	struct ib_mad_queue		*send_queue;
2274	struct ib_mad_send_wc		mad_send_wc;
2275	unsigned long flags;
2276	int ret;
2277
2278	if (list_empty_careful(&port_priv->port_list))
2279		return;
2280
2281	if (wc->status != IB_WC_SUCCESS) {
2282		if (!ib_mad_send_error(port_priv, wc))
2283			return;
2284	}
2285
2286	mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private,
2287				   mad_list);
2288	send_queue = mad_list->mad_queue;
2289	qp_info = send_queue->qp_info;
2290
2291	trace_ib_mad_send_done_agent(mad_send_wr->mad_agent_priv);
2292	trace_ib_mad_send_done_handler(mad_send_wr, wc);
2293
2294retry:
2295	ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent->device,
2296			    mad_send_wr->header_mapping,
2297			    mad_send_wr->sg_list[0].length, DMA_TO_DEVICE);
2298	ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent->device,
2299			    mad_send_wr->payload_mapping,
2300			    mad_send_wr->sg_list[1].length, DMA_TO_DEVICE);
2301	queued_send_wr = NULL;
2302	spin_lock_irqsave(&send_queue->lock, flags);
2303	list_del(&mad_list->list);
2304
2305	/* Move queued send to the send queue */
2306	if (send_queue->count-- > send_queue->max_active) {
2307		mad_list = container_of(qp_info->overflow_list.next,
2308					struct ib_mad_list_head, list);
2309		queued_send_wr = container_of(mad_list,
2310					struct ib_mad_send_wr_private,
2311					mad_list);
2312		list_move_tail(&mad_list->list, &send_queue->list);
2313	}
2314	spin_unlock_irqrestore(&send_queue->lock, flags);
2315
2316	mad_send_wc.send_buf = &mad_send_wr->send_buf;
2317	mad_send_wc.status = wc->status;
2318	mad_send_wc.vendor_err = wc->vendor_err;
2319	ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
2320
2321	if (queued_send_wr) {
2322		trace_ib_mad_send_done_resend(queued_send_wr, qp_info);
2323		ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr.wr,
2324				   NULL);
2325		if (ret) {
2326			dev_err(&port_priv->device->dev,
2327				"ib_post_send failed: %d\n", ret);
2328			mad_send_wr = queued_send_wr;
2329			wc->status = IB_WC_LOC_QP_OP_ERR;
2330			goto retry;
2331		}
2332	}
2333}
2334
2335static void mark_sends_for_retry(struct ib_mad_qp_info *qp_info)
2336{
2337	struct ib_mad_send_wr_private *mad_send_wr;
2338	struct ib_mad_list_head *mad_list;
2339	unsigned long flags;
2340
2341	spin_lock_irqsave(&qp_info->send_queue.lock, flags);
2342	list_for_each_entry(mad_list, &qp_info->send_queue.list, list) {
2343		mad_send_wr = container_of(mad_list,
2344					   struct ib_mad_send_wr_private,
2345					   mad_list);
2346		mad_send_wr->retry = 1;
2347	}
2348	spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
2349}
2350
2351static bool ib_mad_send_error(struct ib_mad_port_private *port_priv,
2352		struct ib_wc *wc)
2353{
2354	struct ib_mad_list_head *mad_list =
2355		container_of(wc->wr_cqe, struct ib_mad_list_head, cqe);
2356	struct ib_mad_qp_info *qp_info = mad_list->mad_queue->qp_info;
2357	struct ib_mad_send_wr_private *mad_send_wr;
2358	int ret;
2359
2360	/*
2361	 * Send errors will transition the QP to SQE - move
2362	 * QP to RTS and repost flushed work requests
2363	 */
2364	mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private,
2365				   mad_list);
2366	if (wc->status == IB_WC_WR_FLUSH_ERR) {
2367		if (mad_send_wr->retry) {
2368			/* Repost send */
2369			mad_send_wr->retry = 0;
2370			trace_ib_mad_error_handler(mad_send_wr, qp_info);
2371			ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr.wr,
2372					   NULL);
2373			if (!ret)
2374				return false;
2375		}
2376	} else {
2377		struct ib_qp_attr *attr;
2378
2379		/* Transition QP to RTS and fail offending send */
2380		attr = kmalloc(sizeof *attr, GFP_KERNEL);
2381		if (attr) {
2382			attr->qp_state = IB_QPS_RTS;
2383			attr->cur_qp_state = IB_QPS_SQE;
2384			ret = ib_modify_qp(qp_info->qp, attr,
2385					   IB_QP_STATE | IB_QP_CUR_STATE);
2386			kfree(attr);
2387			if (ret)
2388				dev_err(&port_priv->device->dev,
2389					"%s - ib_modify_qp to RTS: %d\n",
2390					__func__, ret);
2391			else
2392				mark_sends_for_retry(qp_info);
2393		}
2394	}
2395
2396	return true;
2397}
2398
2399static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
2400{
2401	unsigned long flags;
2402	struct ib_mad_send_wr_private *mad_send_wr, *temp_mad_send_wr;
2403	struct ib_mad_send_wc mad_send_wc;
2404	struct list_head cancel_list;
2405
2406	INIT_LIST_HEAD(&cancel_list);
2407
2408	spin_lock_irqsave(&mad_agent_priv->lock, flags);
2409	list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
2410				 &mad_agent_priv->send_list, agent_list) {
2411		if (mad_send_wr->status == IB_WC_SUCCESS) {
2412			mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
2413			mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
2414		}
2415	}
2416
2417	/* Empty wait list to prevent receives from finding a request */
2418	list_splice_init(&mad_agent_priv->wait_list, &cancel_list);
2419	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2420
2421	/* Report all cancelled requests */
2422	mad_send_wc.status = IB_WC_WR_FLUSH_ERR;
2423	mad_send_wc.vendor_err = 0;
2424
2425	list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
2426				 &cancel_list, agent_list) {
2427		mad_send_wc.send_buf = &mad_send_wr->send_buf;
2428		list_del(&mad_send_wr->agent_list);
2429		mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
2430						   &mad_send_wc);
2431		deref_mad_agent(mad_agent_priv);
2432	}
2433}
2434
2435static struct ib_mad_send_wr_private*
2436find_send_wr(struct ib_mad_agent_private *mad_agent_priv,
2437	     struct ib_mad_send_buf *send_buf)
2438{
2439	struct ib_mad_send_wr_private *mad_send_wr;
2440
2441	list_for_each_entry(mad_send_wr, &mad_agent_priv->wait_list,
2442			    agent_list) {
2443		if (&mad_send_wr->send_buf == send_buf)
2444			return mad_send_wr;
2445	}
2446
2447	list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list,
2448			    agent_list) {
2449		if (is_rmpp_data_mad(mad_agent_priv,
2450				     mad_send_wr->send_buf.mad) &&
2451		    &mad_send_wr->send_buf == send_buf)
2452			return mad_send_wr;
2453	}
2454	return NULL;
2455}
2456
2457int ib_modify_mad(struct ib_mad_send_buf *send_buf, u32 timeout_ms)
2458{
2459	struct ib_mad_agent_private *mad_agent_priv;
2460	struct ib_mad_send_wr_private *mad_send_wr;
2461	unsigned long flags;
2462	int active;
2463
2464	if (!send_buf)
2465		return -EINVAL;
2466
2467	mad_agent_priv = container_of(send_buf->mad_agent,
2468				      struct ib_mad_agent_private, agent);
2469	spin_lock_irqsave(&mad_agent_priv->lock, flags);
2470	mad_send_wr = find_send_wr(mad_agent_priv, send_buf);
2471	if (!mad_send_wr || mad_send_wr->status != IB_WC_SUCCESS) {
2472		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2473		return -EINVAL;
2474	}
2475
2476	active = (!mad_send_wr->timeout || mad_send_wr->refcount > 1);
2477	if (!timeout_ms) {
2478		mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
2479		mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
2480	}
2481
2482	mad_send_wr->send_buf.timeout_ms = timeout_ms;
2483	if (active)
2484		mad_send_wr->timeout = msecs_to_jiffies(timeout_ms);
2485	else
2486		ib_reset_mad_timeout(mad_send_wr, timeout_ms);
2487
2488	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2489	return 0;
2490}
2491EXPORT_SYMBOL(ib_modify_mad);
2492
2493static void local_completions(struct work_struct *work)
2494{
2495	struct ib_mad_agent_private *mad_agent_priv;
2496	struct ib_mad_local_private *local;
2497	struct ib_mad_agent_private *recv_mad_agent;
2498	unsigned long flags;
2499	int free_mad;
2500	struct ib_wc wc;
2501	struct ib_mad_send_wc mad_send_wc;
2502	bool opa;
2503
2504	mad_agent_priv =
2505		container_of(work, struct ib_mad_agent_private, local_work);
2506
2507	opa = rdma_cap_opa_mad(mad_agent_priv->qp_info->port_priv->device,
2508			       mad_agent_priv->qp_info->port_priv->port_num);
2509
2510	spin_lock_irqsave(&mad_agent_priv->lock, flags);
2511	while (!list_empty(&mad_agent_priv->local_list)) {
2512		local = list_entry(mad_agent_priv->local_list.next,
2513				   struct ib_mad_local_private,
2514				   completion_list);
2515		list_del(&local->completion_list);
2516		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2517		free_mad = 0;
2518		if (local->mad_priv) {
2519			u8 base_version;
2520			recv_mad_agent = local->recv_mad_agent;
2521			if (!recv_mad_agent) {
2522				dev_err(&mad_agent_priv->agent.device->dev,
2523					"No receive MAD agent for local completion\n");
2524				free_mad = 1;
2525				goto local_send_completion;
2526			}
2527
2528			/*
2529			 * Defined behavior is to complete response
2530			 * before request
2531			 */
2532			build_smp_wc(recv_mad_agent->agent.qp,
2533				     local->mad_send_wr->send_wr.wr.wr_cqe,
2534				     be16_to_cpu(IB_LID_PERMISSIVE),
2535				     local->mad_send_wr->send_wr.pkey_index,
2536				     recv_mad_agent->agent.port_num, &wc);
2537
2538			local->mad_priv->header.recv_wc.wc = &wc;
2539
2540			base_version = ((struct ib_mad_hdr *)(local->mad_priv->mad))->base_version;
2541			if (opa && base_version == OPA_MGMT_BASE_VERSION) {
2542				local->mad_priv->header.recv_wc.mad_len = local->return_wc_byte_len;
2543				local->mad_priv->header.recv_wc.mad_seg_size = sizeof(struct opa_mad);
2544			} else {
2545				local->mad_priv->header.recv_wc.mad_len = sizeof(struct ib_mad);
2546				local->mad_priv->header.recv_wc.mad_seg_size = sizeof(struct ib_mad);
2547			}
2548
2549			INIT_LIST_HEAD(&local->mad_priv->header.recv_wc.rmpp_list);
2550			list_add(&local->mad_priv->header.recv_wc.recv_buf.list,
2551				 &local->mad_priv->header.recv_wc.rmpp_list);
2552			local->mad_priv->header.recv_wc.recv_buf.grh = NULL;
2553			local->mad_priv->header.recv_wc.recv_buf.mad =
2554						(struct ib_mad *)local->mad_priv->mad;
2555			recv_mad_agent->agent.recv_handler(
2556						&recv_mad_agent->agent,
2557						&local->mad_send_wr->send_buf,
2558						&local->mad_priv->header.recv_wc);
2559			spin_lock_irqsave(&recv_mad_agent->lock, flags);
2560			deref_mad_agent(recv_mad_agent);
2561			spin_unlock_irqrestore(&recv_mad_agent->lock, flags);
2562		}
2563
2564local_send_completion:
2565		/* Complete send */
2566		mad_send_wc.status = IB_WC_SUCCESS;
2567		mad_send_wc.vendor_err = 0;
2568		mad_send_wc.send_buf = &local->mad_send_wr->send_buf;
2569		mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
2570						   &mad_send_wc);
2571
2572		spin_lock_irqsave(&mad_agent_priv->lock, flags);
2573		deref_mad_agent(mad_agent_priv);
2574		if (free_mad)
2575			kfree(local->mad_priv);
2576		kfree(local);
2577	}
2578	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2579}
2580
2581static int retry_send(struct ib_mad_send_wr_private *mad_send_wr)
2582{
2583	int ret;
2584
2585	if (!mad_send_wr->retries_left)
2586		return -ETIMEDOUT;
2587
2588	mad_send_wr->retries_left--;
2589	mad_send_wr->send_buf.retries++;
2590
2591	mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
2592
2593	if (ib_mad_kernel_rmpp_agent(&mad_send_wr->mad_agent_priv->agent)) {
2594		ret = ib_retry_rmpp(mad_send_wr);
2595		switch (ret) {
2596		case IB_RMPP_RESULT_UNHANDLED:
2597			ret = ib_send_mad(mad_send_wr);
2598			break;
2599		case IB_RMPP_RESULT_CONSUMED:
2600			ret = 0;
2601			break;
2602		default:
2603			ret = -ECOMM;
2604			break;
2605		}
2606	} else
2607		ret = ib_send_mad(mad_send_wr);
2608
2609	if (!ret) {
2610		mad_send_wr->refcount++;
2611		list_add_tail(&mad_send_wr->agent_list,
2612			      &mad_send_wr->mad_agent_priv->send_list);
2613	}
2614	return ret;
2615}
2616
2617static void timeout_sends(struct work_struct *work)
2618{
2619	struct ib_mad_agent_private *mad_agent_priv;
2620	struct ib_mad_send_wr_private *mad_send_wr;
2621	struct ib_mad_send_wc mad_send_wc;
2622	unsigned long flags, delay;
2623
2624	mad_agent_priv = container_of(work, struct ib_mad_agent_private,
2625				      timed_work.work);
2626	mad_send_wc.vendor_err = 0;
2627
2628	spin_lock_irqsave(&mad_agent_priv->lock, flags);
2629	while (!list_empty(&mad_agent_priv->wait_list)) {
2630		mad_send_wr = list_entry(mad_agent_priv->wait_list.next,
2631					 struct ib_mad_send_wr_private,
2632					 agent_list);
2633
2634		if (time_after(mad_send_wr->timeout, jiffies)) {
2635			delay = mad_send_wr->timeout - jiffies;
2636			if ((long)delay <= 0)
2637				delay = 1;
2638			queue_delayed_work(mad_agent_priv->qp_info->
2639					   port_priv->wq,
2640					   &mad_agent_priv->timed_work, delay);
2641			break;
2642		}
2643
2644		list_del(&mad_send_wr->agent_list);
2645		if (mad_send_wr->status == IB_WC_SUCCESS &&
2646		    !retry_send(mad_send_wr))
2647			continue;
2648
2649		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2650
2651		if (mad_send_wr->status == IB_WC_SUCCESS)
2652			mad_send_wc.status = IB_WC_RESP_TIMEOUT_ERR;
2653		else
2654			mad_send_wc.status = mad_send_wr->status;
2655		mad_send_wc.send_buf = &mad_send_wr->send_buf;
2656		mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
2657						   &mad_send_wc);
2658
2659		deref_mad_agent(mad_agent_priv);
2660		spin_lock_irqsave(&mad_agent_priv->lock, flags);
2661	}
2662	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2663}
2664
2665/*
2666 * Allocate receive MADs and post receive WRs for them
2667 */
2668static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
2669				    struct ib_mad_private *mad)
2670{
2671	unsigned long flags;
2672	int post, ret;
2673	struct ib_mad_private *mad_priv;
2674	struct ib_sge sg_list;
2675	struct ib_recv_wr recv_wr;
2676	struct ib_mad_queue *recv_queue = &qp_info->recv_queue;
2677
2678	/* Initialize common scatter list fields */
2679	sg_list.lkey = qp_info->port_priv->pd->local_dma_lkey;
2680
2681	/* Initialize common receive WR fields */
2682	recv_wr.next = NULL;
2683	recv_wr.sg_list = &sg_list;
2684	recv_wr.num_sge = 1;
2685
2686	do {
2687		/* Allocate and map receive buffer */
2688		if (mad) {
2689			mad_priv = mad;
2690			mad = NULL;
2691		} else {
2692			mad_priv = alloc_mad_private(port_mad_size(qp_info->port_priv),
2693						     GFP_ATOMIC);
2694			if (!mad_priv) {
2695				ret = -ENOMEM;
2696				break;
2697			}
2698		}
2699		sg_list.length = mad_priv_dma_size(mad_priv);
2700		sg_list.addr = ib_dma_map_single(qp_info->port_priv->device,
2701						 &mad_priv->grh,
2702						 mad_priv_dma_size(mad_priv),
2703						 DMA_FROM_DEVICE);
2704		if (unlikely(ib_dma_mapping_error(qp_info->port_priv->device,
2705						  sg_list.addr))) {
2706			kfree(mad_priv);
2707			ret = -ENOMEM;
2708			break;
2709		}
2710		mad_priv->header.mapping = sg_list.addr;
2711		mad_priv->header.mad_list.mad_queue = recv_queue;
2712		mad_priv->header.mad_list.cqe.done = ib_mad_recv_done;
2713		recv_wr.wr_cqe = &mad_priv->header.mad_list.cqe;
2714
2715		/* Post receive WR */
2716		spin_lock_irqsave(&recv_queue->lock, flags);
2717		post = (++recv_queue->count < recv_queue->max_active);
2718		list_add_tail(&mad_priv->header.mad_list.list, &recv_queue->list);
2719		spin_unlock_irqrestore(&recv_queue->lock, flags);
2720		ret = ib_post_recv(qp_info->qp, &recv_wr, NULL);
2721		if (ret) {
2722			spin_lock_irqsave(&recv_queue->lock, flags);
2723			list_del(&mad_priv->header.mad_list.list);
2724			recv_queue->count--;
2725			spin_unlock_irqrestore(&recv_queue->lock, flags);
2726			ib_dma_unmap_single(qp_info->port_priv->device,
2727					    mad_priv->header.mapping,
2728					    mad_priv_dma_size(mad_priv),
2729					    DMA_FROM_DEVICE);
2730			kfree(mad_priv);
2731			dev_err(&qp_info->port_priv->device->dev,
2732				"ib_post_recv failed: %d\n", ret);
2733			break;
2734		}
2735	} while (post);
2736
2737	return ret;
2738}
2739
2740/*
2741 * Return all the posted receive MADs
2742 */
2743static void cleanup_recv_queue(struct ib_mad_qp_info *qp_info)
2744{
2745	struct ib_mad_private_header *mad_priv_hdr;
2746	struct ib_mad_private *recv;
2747	struct ib_mad_list_head *mad_list;
2748
2749	if (!qp_info->qp)
2750		return;
2751
2752	while (!list_empty(&qp_info->recv_queue.list)) {
2753
2754		mad_list = list_entry(qp_info->recv_queue.list.next,
2755				      struct ib_mad_list_head, list);
2756		mad_priv_hdr = container_of(mad_list,
2757					    struct ib_mad_private_header,
2758					    mad_list);
2759		recv = container_of(mad_priv_hdr, struct ib_mad_private,
2760				    header);
2761
2762		/* Remove from posted receive MAD list */
2763		list_del(&mad_list->list);
2764
2765		ib_dma_unmap_single(qp_info->port_priv->device,
2766				    recv->header.mapping,
2767				    mad_priv_dma_size(recv),
2768				    DMA_FROM_DEVICE);
2769		kfree(recv);
2770	}
2771
2772	qp_info->recv_queue.count = 0;
2773}
2774
2775/*
2776 * Start the port
2777 */
2778static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
2779{
2780	int ret, i;
2781	struct ib_qp_attr *attr;
2782	struct ib_qp *qp;
2783	u16 pkey_index;
2784
2785	attr = kmalloc(sizeof *attr, GFP_KERNEL);
2786	if (!attr)
2787		return -ENOMEM;
2788
2789	ret = ib_find_pkey(port_priv->device, port_priv->port_num,
2790			   IB_DEFAULT_PKEY_FULL, &pkey_index);
2791	if (ret)
2792		pkey_index = 0;
2793
2794	for (i = 0; i < IB_MAD_QPS_CORE; i++) {
2795		qp = port_priv->qp_info[i].qp;
2796		if (!qp)
2797			continue;
2798
2799		/*
2800		 * PKey index for QP1 is irrelevant but
2801		 * one is needed for the Reset to Init transition
2802		 */
2803		attr->qp_state = IB_QPS_INIT;
2804		attr->pkey_index = pkey_index;
2805		attr->qkey = (qp->qp_num == 0) ? 0 : IB_QP1_QKEY;
2806		ret = ib_modify_qp(qp, attr, IB_QP_STATE |
2807					     IB_QP_PKEY_INDEX | IB_QP_QKEY);
2808		if (ret) {
2809			dev_err(&port_priv->device->dev,
2810				"Couldn't change QP%d state to INIT: %d\n",
2811				i, ret);
2812			goto out;
2813		}
2814
2815		attr->qp_state = IB_QPS_RTR;
2816		ret = ib_modify_qp(qp, attr, IB_QP_STATE);
2817		if (ret) {
2818			dev_err(&port_priv->device->dev,
2819				"Couldn't change QP%d state to RTR: %d\n",
2820				i, ret);
2821			goto out;
2822		}
2823
2824		attr->qp_state = IB_QPS_RTS;
2825		attr->sq_psn = IB_MAD_SEND_Q_PSN;
2826		ret = ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_SQ_PSN);
2827		if (ret) {
2828			dev_err(&port_priv->device->dev,
2829				"Couldn't change QP%d state to RTS: %d\n",
2830				i, ret);
2831			goto out;
2832		}
2833	}
2834
2835	ret = ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
2836	if (ret) {
2837		dev_err(&port_priv->device->dev,
2838			"Failed to request completion notification: %d\n",
2839			ret);
2840		goto out;
2841	}
2842
2843	for (i = 0; i < IB_MAD_QPS_CORE; i++) {
2844		if (!port_priv->qp_info[i].qp)
2845			continue;
2846
2847		ret = ib_mad_post_receive_mads(&port_priv->qp_info[i], NULL);
2848		if (ret) {
2849			dev_err(&port_priv->device->dev,
2850				"Couldn't post receive WRs\n");
2851			goto out;
2852		}
2853	}
2854out:
2855	kfree(attr);
2856	return ret;
2857}
2858
2859static void qp_event_handler(struct ib_event *event, void *qp_context)
2860{
2861	struct ib_mad_qp_info	*qp_info = qp_context;
2862
2863	/* It's worse than that! He's dead, Jim! */
2864	dev_err(&qp_info->port_priv->device->dev,
2865		"Fatal error (%d) on MAD QP (%u)\n",
2866		event->event, qp_info->qp->qp_num);
2867}
2868
2869static void init_mad_queue(struct ib_mad_qp_info *qp_info,
2870			   struct ib_mad_queue *mad_queue)
2871{
2872	mad_queue->qp_info = qp_info;
2873	mad_queue->count = 0;
2874	spin_lock_init(&mad_queue->lock);
2875	INIT_LIST_HEAD(&mad_queue->list);
2876}
2877
2878static void init_mad_qp(struct ib_mad_port_private *port_priv,
2879			struct ib_mad_qp_info *qp_info)
2880{
2881	qp_info->port_priv = port_priv;
2882	init_mad_queue(qp_info, &qp_info->send_queue);
2883	init_mad_queue(qp_info, &qp_info->recv_queue);
2884	INIT_LIST_HEAD(&qp_info->overflow_list);
2885}
2886
2887static int create_mad_qp(struct ib_mad_qp_info *qp_info,
2888			 enum ib_qp_type qp_type)
2889{
2890	struct ib_qp_init_attr	qp_init_attr;
2891	int ret;
2892
2893	memset(&qp_init_attr, 0, sizeof qp_init_attr);
2894	qp_init_attr.send_cq = qp_info->port_priv->cq;
2895	qp_init_attr.recv_cq = qp_info->port_priv->cq;
2896	qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
2897	qp_init_attr.cap.max_send_wr = mad_sendq_size;
2898	qp_init_attr.cap.max_recv_wr = mad_recvq_size;
2899	qp_init_attr.cap.max_send_sge = IB_MAD_SEND_REQ_MAX_SG;
2900	qp_init_attr.cap.max_recv_sge = IB_MAD_RECV_REQ_MAX_SG;
2901	qp_init_attr.qp_type = qp_type;
2902	qp_init_attr.port_num = qp_info->port_priv->port_num;
2903	qp_init_attr.qp_context = qp_info;
2904	qp_init_attr.event_handler = qp_event_handler;
2905	qp_info->qp = ib_create_qp(qp_info->port_priv->pd, &qp_init_attr);
2906	if (IS_ERR(qp_info->qp)) {
2907		dev_err(&qp_info->port_priv->device->dev,
2908			"Couldn't create ib_mad QP%d\n",
2909			get_spl_qp_index(qp_type));
2910		ret = PTR_ERR(qp_info->qp);
2911		goto error;
2912	}
2913	/* Use minimum queue sizes unless the CQ is resized */
2914	qp_info->send_queue.max_active = mad_sendq_size;
2915	qp_info->recv_queue.max_active = mad_recvq_size;
2916	return 0;
2917
2918error:
2919	return ret;
2920}
2921
2922static void destroy_mad_qp(struct ib_mad_qp_info *qp_info)
2923{
2924	if (!qp_info->qp)
2925		return;
2926
2927	ib_destroy_qp(qp_info->qp);
2928}
2929
2930/*
2931 * Open the port
2932 * Create the QP, PD, MR, and CQ if needed
2933 */
2934static int ib_mad_port_open(struct ib_device *device,
2935			    u32 port_num)
2936{
2937	int ret, cq_size;
2938	struct ib_mad_port_private *port_priv;
2939	unsigned long flags;
2940	char name[sizeof "ib_mad123"];
2941	int has_smi;
2942
2943	if (WARN_ON(rdma_max_mad_size(device, port_num) < IB_MGMT_MAD_SIZE))
2944		return -EFAULT;
2945
2946	if (WARN_ON(rdma_cap_opa_mad(device, port_num) &&
2947		    rdma_max_mad_size(device, port_num) < OPA_MGMT_MAD_SIZE))
2948		return -EFAULT;
2949
2950	/* Create new device info */
2951	port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL);
2952	if (!port_priv)
2953		return -ENOMEM;
2954
2955	port_priv->device = device;
2956	port_priv->port_num = port_num;
2957	spin_lock_init(&port_priv->reg_lock);
2958	init_mad_qp(port_priv, &port_priv->qp_info[0]);
2959	init_mad_qp(port_priv, &port_priv->qp_info[1]);
2960
2961	cq_size = mad_sendq_size + mad_recvq_size;
2962	has_smi = rdma_cap_ib_smi(device, port_num);
2963	if (has_smi)
2964		cq_size *= 2;
2965
2966	port_priv->pd = ib_alloc_pd(device, 0);
2967	if (IS_ERR(port_priv->pd)) {
2968		dev_err(&device->dev, "Couldn't create ib_mad PD\n");
2969		ret = PTR_ERR(port_priv->pd);
2970		goto error3;
2971	}
2972
2973	port_priv->cq = ib_alloc_cq(port_priv->device, port_priv, cq_size, 0,
2974			IB_POLL_UNBOUND_WORKQUEUE);
2975	if (IS_ERR(port_priv->cq)) {
2976		dev_err(&device->dev, "Couldn't create ib_mad CQ\n");
2977		ret = PTR_ERR(port_priv->cq);
2978		goto error4;
2979	}
2980
2981	if (has_smi) {
2982		ret = create_mad_qp(&port_priv->qp_info[0], IB_QPT_SMI);
2983		if (ret)
2984			goto error6;
2985	}
2986	ret = create_mad_qp(&port_priv->qp_info[1], IB_QPT_GSI);
2987	if (ret)
2988		goto error7;
2989
2990	snprintf(name, sizeof(name), "ib_mad%u", port_num);
2991	port_priv->wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM);
2992	if (!port_priv->wq) {
2993		ret = -ENOMEM;
2994		goto error8;
2995	}
2996
2997	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
2998	list_add_tail(&port_priv->port_list, &ib_mad_port_list);
2999	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
3000
3001	ret = ib_mad_port_start(port_priv);
3002	if (ret) {
3003		dev_err(&device->dev, "Couldn't start port\n");
3004		goto error9;
3005	}
3006
3007	return 0;
3008
3009error9:
3010	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
3011	list_del_init(&port_priv->port_list);
3012	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
3013
3014	destroy_workqueue(port_priv->wq);
3015error8:
3016	destroy_mad_qp(&port_priv->qp_info[1]);
3017error7:
3018	destroy_mad_qp(&port_priv->qp_info[0]);
3019error6:
3020	ib_free_cq(port_priv->cq);
3021	cleanup_recv_queue(&port_priv->qp_info[1]);
3022	cleanup_recv_queue(&port_priv->qp_info[0]);
3023error4:
3024	ib_dealloc_pd(port_priv->pd);
3025error3:
3026	kfree(port_priv);
3027
3028	return ret;
3029}
3030
3031/*
3032 * Close the port
3033 * If there are no classes using the port, free the port
3034 * resources (CQ, MR, PD, QP) and remove the port's info structure
3035 */
3036static int ib_mad_port_close(struct ib_device *device, u32 port_num)
3037{
3038	struct ib_mad_port_private *port_priv;
3039	unsigned long flags;
3040
3041	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
3042	port_priv = __ib_get_mad_port(device, port_num);
3043	if (port_priv == NULL) {
3044		spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
3045		dev_err(&device->dev, "Port %u not found\n", port_num);
3046		return -ENODEV;
3047	}
3048	list_del_init(&port_priv->port_list);
3049	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
3050
3051	destroy_workqueue(port_priv->wq);
3052	destroy_mad_qp(&port_priv->qp_info[1]);
3053	destroy_mad_qp(&port_priv->qp_info[0]);
3054	ib_free_cq(port_priv->cq);
3055	ib_dealloc_pd(port_priv->pd);
3056	cleanup_recv_queue(&port_priv->qp_info[1]);
3057	cleanup_recv_queue(&port_priv->qp_info[0]);
3058	/* XXX: Handle deallocation of MAD registration tables */
3059
3060	kfree(port_priv);
3061
3062	return 0;
3063}
3064
3065static int ib_mad_init_device(struct ib_device *device)
3066{
3067	int start, i;
3068	unsigned int count = 0;
3069	int ret;
3070
3071	start = rdma_start_port(device);
3072
3073	for (i = start; i <= rdma_end_port(device); i++) {
3074		if (!rdma_cap_ib_mad(device, i))
3075			continue;
3076
3077		ret = ib_mad_port_open(device, i);
3078		if (ret) {
3079			dev_err(&device->dev, "Couldn't open port %d\n", i);
3080			goto error;
3081		}
3082		ret = ib_agent_port_open(device, i);
3083		if (ret) {
3084			dev_err(&device->dev,
3085				"Couldn't open port %d for agents\n", i);
3086			goto error_agent;
3087		}
3088		count++;
3089	}
3090	if (!count)
3091		return -EOPNOTSUPP;
3092
3093	return 0;
3094
3095error_agent:
3096	if (ib_mad_port_close(device, i))
3097		dev_err(&device->dev, "Couldn't close port %d\n", i);
3098
3099error:
3100	while (--i >= start) {
3101		if (!rdma_cap_ib_mad(device, i))
3102			continue;
3103
3104		if (ib_agent_port_close(device, i))
3105			dev_err(&device->dev,
3106				"Couldn't close port %d for agents\n", i);
3107		if (ib_mad_port_close(device, i))
3108			dev_err(&device->dev, "Couldn't close port %d\n", i);
3109	}
3110	return ret;
3111}
3112
3113static void ib_mad_remove_device(struct ib_device *device, void *client_data)
3114{
3115	unsigned int i;
3116
3117	rdma_for_each_port (device, i) {
3118		if (!rdma_cap_ib_mad(device, i))
3119			continue;
3120
3121		if (ib_agent_port_close(device, i))
3122			dev_err(&device->dev,
3123				"Couldn't close port %u for agents\n", i);
3124		if (ib_mad_port_close(device, i))
3125			dev_err(&device->dev, "Couldn't close port %u\n", i);
3126	}
3127}
3128
3129static struct ib_client mad_client = {
3130	.name   = "mad",
3131	.add = ib_mad_init_device,
3132	.remove = ib_mad_remove_device
3133};
3134
3135int ib_mad_init(void)
3136{
3137	mad_recvq_size = min(mad_recvq_size, IB_MAD_QP_MAX_SIZE);
3138	mad_recvq_size = max(mad_recvq_size, IB_MAD_QP_MIN_SIZE);
3139
3140	mad_sendq_size = min(mad_sendq_size, IB_MAD_QP_MAX_SIZE);
3141	mad_sendq_size = max(mad_sendq_size, IB_MAD_QP_MIN_SIZE);
3142
3143	INIT_LIST_HEAD(&ib_mad_port_list);
3144
3145	if (ib_register_client(&mad_client)) {
3146		pr_err("Couldn't register ib_mad client\n");
3147		return -EINVAL;
3148	}
3149
3150	return 0;
3151}
3152
3153void ib_mad_cleanup(void)
3154{
3155	ib_unregister_client(&mad_client);
3156}
3157