eq.c revision 318536
1/*
2 * Copyright (c) 2005, 2006, 2007, 2008, 2014 Mellanox Technologies. All rights reserved.
3 * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses.  You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 *     Redistribution and use in source and binary forms, with or
12 *     without modification, are permitted provided that the following
13 *     conditions are met:
14 *
15 *	- Redistributions of source code must retain the above
16 *	  copyright notice, this list of conditions and the following
17 *	  disclaimer.
18 *
19 *	- Redistributions in binary form must reproduce the above
20 *	  copyright notice, this list of conditions and the following
21 *	  disclaimer in the documentation and/or other materials
22 *	  provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include <linux/interrupt.h>
35#include <linux/slab.h>
36#include <linux/module.h>
37#include <linux/mm.h>
38#include <linux/dma-mapping.h>
39
40#include <linux/mlx4/cmd.h>
41
42#include "mlx4.h"
43#include "fw.h"
44
45enum {
46	MLX4_IRQNAME_SIZE	= 32
47};
48
49enum {
50	MLX4_NUM_ASYNC_EQE	= 0x100,
51	MLX4_NUM_SPARE_EQE	= 0x80,
52	MLX4_EQ_ENTRY_SIZE	= 0x20
53};
54
55#define MLX4_EQ_STATUS_OK	   ( 0 << 28)
56#define MLX4_EQ_STATUS_WRITE_FAIL  (10 << 28)
57#define MLX4_EQ_OWNER_SW	   ( 0 << 24)
58#define MLX4_EQ_OWNER_HW	   ( 1 << 24)
59#define MLX4_EQ_FLAG_EC		   ( 1 << 18)
60#define MLX4_EQ_FLAG_OI		   ( 1 << 17)
61#define MLX4_EQ_STATE_ARMED	   ( 9 <<  8)
62#define MLX4_EQ_STATE_FIRED	   (10 <<  8)
63#define MLX4_EQ_STATE_ALWAYS_ARMED (11 <<  8)
64
65#define MLX4_ASYNC_EVENT_MASK ((1ull << MLX4_EVENT_TYPE_PATH_MIG)	    | \
66			       (1ull << MLX4_EVENT_TYPE_COMM_EST)	    | \
67			       (1ull << MLX4_EVENT_TYPE_SQ_DRAINED)	    | \
68			       (1ull << MLX4_EVENT_TYPE_CQ_ERROR)	    | \
69			       (1ull << MLX4_EVENT_TYPE_WQ_CATAS_ERROR)	    | \
70			       (1ull << MLX4_EVENT_TYPE_EEC_CATAS_ERROR)    | \
71			       (1ull << MLX4_EVENT_TYPE_PATH_MIG_FAILED)    | \
72			       (1ull << MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \
73			       (1ull << MLX4_EVENT_TYPE_WQ_ACCESS_ERROR)    | \
74			       (1ull << MLX4_EVENT_TYPE_PORT_CHANGE)	    | \
75			       (1ull << MLX4_EVENT_TYPE_ECC_DETECT)	    | \
76			       (1ull << MLX4_EVENT_TYPE_SRQ_CATAS_ERROR)    | \
77			       (1ull << MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE)    | \
78			       (1ull << MLX4_EVENT_TYPE_SRQ_LIMIT)	    | \
79			       (1ull << MLX4_EVENT_TYPE_CMD)		    | \
80			       (1ull << MLX4_EVENT_TYPE_OP_REQUIRED)	    | \
81			       (1ull << MLX4_EVENT_TYPE_COMM_CHANNEL)       | \
82			       (1ull << MLX4_EVENT_TYPE_FLR_EVENT)	    | \
83			       (1ull << MLX4_EVENT_TYPE_FATAL_WARNING))
84
85static u64 get_async_ev_mask(struct mlx4_dev *dev)
86{
87	u64 async_ev_mask = MLX4_ASYNC_EVENT_MASK;
88	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV)
89		async_ev_mask |= (1ull << MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT);
90	if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT)
91		async_ev_mask |= (1ull << MLX4_EVENT_TYPE_RECOVERABLE_ERROR_EVENT);
92
93	return async_ev_mask;
94}
95
96static void eq_set_ci(struct mlx4_eq *eq, int req_not)
97{
98	__raw_writel((__force u32) cpu_to_be32((eq->cons_index & 0xffffff) |
99					       req_not << 31),
100		     eq->doorbell);
101	/* We still want ordering, just not swabbing, so add a barrier */
102	mb();
103}
104
105static struct mlx4_eqe *get_eqe(struct mlx4_eq *eq, u32 entry, u8 eqe_factor)
106{
107	/* (entry & (eq->nent - 1)) gives us a cyclic array */
108	unsigned long offset = (entry & (eq->nent - 1)) * (MLX4_EQ_ENTRY_SIZE << eqe_factor);
109	/* CX3 is capable of extending the EQE from 32 to 64 bytes.
110	 * When this feature is enabled, the first (in the lower addresses)
111	 * 32 bytes in the 64 byte EQE are reserved and the next 32 bytes
112	 * contain the legacy EQE information.
113	 */
114	return eq->page_list[offset / PAGE_SIZE].buf + (offset + (eqe_factor ? MLX4_EQ_ENTRY_SIZE : 0)) % PAGE_SIZE;
115}
116
117static struct mlx4_eqe *next_eqe_sw(struct mlx4_eq *eq, u8 eqe_factor)
118{
119	struct mlx4_eqe *eqe = get_eqe(eq, eq->cons_index, eqe_factor);
120	return !!(eqe->owner & 0x80) ^ !!(eq->cons_index & eq->nent) ? NULL : eqe;
121}
122
123static struct mlx4_eqe *next_slave_event_eqe(struct mlx4_slave_event_eq *slave_eq)
124{
125	struct mlx4_eqe *eqe =
126		&slave_eq->event_eqe[slave_eq->cons & (SLAVE_EVENT_EQ_SIZE - 1)];
127	return (!!(eqe->owner & 0x80) ^
128		!!(slave_eq->cons & SLAVE_EVENT_EQ_SIZE)) ?
129		eqe : NULL;
130}
131
132void mlx4_gen_slave_eqe(struct work_struct *work)
133{
134	struct mlx4_mfunc_master_ctx *master =
135		container_of(work, struct mlx4_mfunc_master_ctx,
136			     slave_event_work);
137	struct mlx4_mfunc *mfunc =
138		container_of(master, struct mlx4_mfunc, master);
139	struct mlx4_priv *priv = container_of(mfunc, struct mlx4_priv, mfunc);
140	struct mlx4_dev *dev = &priv->dev;
141	struct mlx4_slave_event_eq *slave_eq = &mfunc->master.slave_eq;
142	struct mlx4_eqe *eqe;
143	u8 slave;
144	int i;
145
146	for (eqe = next_slave_event_eqe(slave_eq); eqe;
147	      eqe = next_slave_event_eqe(slave_eq)) {
148		slave = eqe->slave_id;
149
150		/* All active slaves need to receive the event */
151		if (slave == ALL_SLAVES) {
152			for (i = 0; i < dev->num_slaves; i++) {
153				if (mlx4_GEN_EQE(dev, i, eqe))
154					mlx4_warn(dev, "Failed to generate "
155						  "event for slave %d\n", i);
156			}
157		} else {
158			if (mlx4_GEN_EQE(dev, slave, eqe))
159				mlx4_warn(dev, "Failed to generate event "
160					       "for slave %d\n", slave);
161		}
162		++slave_eq->cons;
163	}
164}
165
166
167static void slave_event(struct mlx4_dev *dev, u8 slave, struct mlx4_eqe *eqe)
168{
169	struct mlx4_priv *priv = mlx4_priv(dev);
170	struct mlx4_slave_event_eq *slave_eq = &priv->mfunc.master.slave_eq;
171	struct mlx4_eqe *s_eqe;
172	unsigned long flags;
173
174	spin_lock_irqsave(&slave_eq->event_lock, flags);
175	s_eqe = &slave_eq->event_eqe[slave_eq->prod & (SLAVE_EVENT_EQ_SIZE - 1)];
176	if ((!!(s_eqe->owner & 0x80)) ^
177	    (!!(slave_eq->prod & SLAVE_EVENT_EQ_SIZE))) {
178		mlx4_warn(dev, "Master failed to generate an EQE for slave: %d. "
179			  "No free EQE on slave events queue\n", slave);
180		spin_unlock_irqrestore(&slave_eq->event_lock, flags);
181		return;
182	}
183
184	memcpy(s_eqe, eqe, dev->caps.eqe_size - 1);
185	s_eqe->slave_id = slave;
186	/* ensure all information is written before setting the ownersip bit */
187	wmb();
188	s_eqe->owner = !!(slave_eq->prod & SLAVE_EVENT_EQ_SIZE) ? 0x0 : 0x80;
189	++slave_eq->prod;
190
191	queue_work(priv->mfunc.master.comm_wq,
192		   &priv->mfunc.master.slave_event_work);
193	spin_unlock_irqrestore(&slave_eq->event_lock, flags);
194}
195
196static void mlx4_slave_event(struct mlx4_dev *dev, int slave,
197			     struct mlx4_eqe *eqe)
198{
199	struct mlx4_priv *priv = mlx4_priv(dev);
200
201	if (slave < 0 || slave >= dev->num_slaves ||
202	    slave == dev->caps.function)
203		return;
204
205	if (!priv->mfunc.master.slave_state[slave].active)
206		return;
207
208	slave_event(dev, slave, eqe);
209}
210
211int mlx4_gen_pkey_eqe(struct mlx4_dev *dev, int slave, u8 port)
212{
213	struct mlx4_eqe eqe;
214
215	struct mlx4_priv *priv = mlx4_priv(dev);
216	struct mlx4_slave_state *s_slave = &priv->mfunc.master.slave_state[slave];
217
218	if (!s_slave->active)
219		return 0;
220
221	memset(&eqe, 0, sizeof eqe);
222
223	eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT;
224	eqe.subtype = MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE;
225	eqe.event.port_mgmt_change.port = port;
226
227	return mlx4_GEN_EQE(dev, slave, &eqe);
228}
229EXPORT_SYMBOL(mlx4_gen_pkey_eqe);
230
231int mlx4_gen_guid_change_eqe(struct mlx4_dev *dev, int slave, u8 port)
232{
233	struct mlx4_eqe eqe;
234
235	/*don't send if we don't have the that slave */
236	if (dev->num_vfs < slave)
237		return 0;
238	memset(&eqe, 0, sizeof eqe);
239
240	eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT;
241	eqe.subtype = MLX4_DEV_PMC_SUBTYPE_GUID_INFO;
242	eqe.event.port_mgmt_change.port = port;
243
244	return mlx4_GEN_EQE(dev, slave, &eqe);
245}
246EXPORT_SYMBOL(mlx4_gen_guid_change_eqe);
247
248int mlx4_gen_port_state_change_eqe(struct mlx4_dev *dev, int slave, u8 port,
249				   u8 port_subtype_change)
250{
251	struct mlx4_eqe eqe;
252
253	/*don't send if we don't have the that slave */
254	if (dev->num_vfs < slave)
255		return 0;
256	memset(&eqe, 0, sizeof eqe);
257
258	eqe.type = MLX4_EVENT_TYPE_PORT_CHANGE;
259	eqe.subtype = port_subtype_change;
260	eqe.event.port_change.port = cpu_to_be32(port << 28);
261
262	mlx4_dbg(dev, "%s: sending: %d to slave: %d on port: %d\n", __func__,
263		 port_subtype_change, slave, port);
264	return mlx4_GEN_EQE(dev, slave, &eqe);
265}
266EXPORT_SYMBOL(mlx4_gen_port_state_change_eqe);
267
268enum slave_port_state mlx4_get_slave_port_state(struct mlx4_dev *dev, int slave, u8 port)
269{
270	struct mlx4_priv *priv = mlx4_priv(dev);
271	struct mlx4_slave_state *s_state = priv->mfunc.master.slave_state;
272	if (slave >= dev->num_slaves || port > MLX4_MAX_PORTS) {
273		pr_err("%s: Error: asking for slave:%d, port:%d\n",
274		       __func__, slave, port);
275		return SLAVE_PORT_DOWN;
276	}
277	return s_state[slave].port_state[port];
278}
279EXPORT_SYMBOL(mlx4_get_slave_port_state);
280
281static int mlx4_set_slave_port_state(struct mlx4_dev *dev, int slave, u8 port,
282				     enum slave_port_state state)
283{
284	struct mlx4_priv *priv = mlx4_priv(dev);
285	struct mlx4_slave_state *s_state = priv->mfunc.master.slave_state;
286
287	if (slave >= dev->num_slaves || port > MLX4_MAX_PORTS || port == 0) {
288		pr_err("%s: Error: asking for slave:%d, port:%d\n",
289		       __func__, slave, port);
290		return -1;
291	}
292	s_state[slave].port_state[port] = state;
293
294	return 0;
295}
296
297static void set_all_slave_state(struct mlx4_dev *dev, u8 port, int event)
298{
299	int i;
300	enum slave_port_gen_event gen_event;
301
302	for (i = 0; i < dev->num_slaves; i++)
303		set_and_calc_slave_port_state(dev, i, port, event, &gen_event);
304}
305/**************************************************************************
306	The function get as input the new event to that port,
307	and according to the prev state change the slave's port state.
308	The events are:
309		MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN,
310		MLX4_PORT_STATE_DEV_EVENT_PORT_UP
311		MLX4_PORT_STATE_IB_EVENT_GID_VALID
312		MLX4_PORT_STATE_IB_EVENT_GID_INVALID
313***************************************************************************/
314int set_and_calc_slave_port_state(struct mlx4_dev *dev, int slave,
315				  u8 port, int event,
316				  enum slave_port_gen_event *gen_event)
317{
318	struct mlx4_priv *priv = mlx4_priv(dev);
319	struct mlx4_slave_state *ctx = NULL;
320	unsigned long flags;
321	int ret = -1;
322	enum slave_port_state cur_state =
323		mlx4_get_slave_port_state(dev, slave, port);
324
325	*gen_event = SLAVE_PORT_GEN_EVENT_NONE;
326
327	if (slave >= dev->num_slaves || port > MLX4_MAX_PORTS || port == 0) {
328		pr_err("%s: Error: asking for slave:%d, port:%d\n",
329		       __func__, slave, port);
330		return ret;
331	}
332
333	ctx = &priv->mfunc.master.slave_state[slave];
334	spin_lock_irqsave(&ctx->lock, flags);
335
336	switch (cur_state) {
337	case SLAVE_PORT_DOWN:
338		if (MLX4_PORT_STATE_DEV_EVENT_PORT_UP == event)
339			mlx4_set_slave_port_state(dev, slave, port,
340						  SLAVE_PENDING_UP);
341		break;
342	case SLAVE_PENDING_UP:
343		if (MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN == event)
344			mlx4_set_slave_port_state(dev, slave, port,
345						  SLAVE_PORT_DOWN);
346		else if (MLX4_PORT_STATE_IB_PORT_STATE_EVENT_GID_VALID == event) {
347			mlx4_set_slave_port_state(dev, slave, port,
348						  SLAVE_PORT_UP);
349			*gen_event = SLAVE_PORT_GEN_EVENT_UP;
350		}
351		break;
352	case SLAVE_PORT_UP:
353		if (MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN == event) {
354			mlx4_set_slave_port_state(dev, slave, port,
355						  SLAVE_PORT_DOWN);
356			*gen_event = SLAVE_PORT_GEN_EVENT_DOWN;
357		} else if (MLX4_PORT_STATE_IB_EVENT_GID_INVALID ==
358				event) {
359			mlx4_set_slave_port_state(dev, slave, port,
360						  SLAVE_PENDING_UP);
361			*gen_event = SLAVE_PORT_GEN_EVENT_DOWN;
362		}
363		break;
364	default:
365		pr_err("%s: BUG!!! UNKNOWN state: "
366		       "slave:%d, port:%d\n", __func__, slave, port);
367			goto out;
368	}
369	ret = mlx4_get_slave_port_state(dev, slave, port);
370
371out:
372	spin_unlock_irqrestore(&ctx->lock, flags);
373	return ret;
374}
375
376EXPORT_SYMBOL(set_and_calc_slave_port_state);
377
378int mlx4_gen_slaves_port_mgt_ev(struct mlx4_dev *dev, u8 port, int attr, u16 sm_lid, u8 sm_sl)
379{
380	struct mlx4_eqe eqe;
381
382	memset(&eqe, 0, sizeof eqe);
383
384	eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT;
385	eqe.subtype = MLX4_DEV_PMC_SUBTYPE_PORT_INFO;
386	eqe.event.port_mgmt_change.port = port;
387	eqe.event.port_mgmt_change.params.port_info.changed_attr =
388		cpu_to_be32((u32) attr);
389	if (attr & MSTR_SM_CHANGE_MASK) {
390		eqe.event.port_mgmt_change.params.port_info.mstr_sm_lid =
391			cpu_to_be16(sm_lid);
392		eqe.event.port_mgmt_change.params.port_info.mstr_sm_sl =
393			sm_sl;
394	}
395
396	slave_event(dev, ALL_SLAVES, &eqe);
397	return 0;
398}
399EXPORT_SYMBOL(mlx4_gen_slaves_port_mgt_ev);
400
401void mlx4_master_handle_slave_flr(struct work_struct *work)
402{
403	struct mlx4_mfunc_master_ctx *master =
404		container_of(work, struct mlx4_mfunc_master_ctx,
405			     slave_flr_event_work);
406	struct mlx4_mfunc *mfunc =
407		container_of(master, struct mlx4_mfunc, master);
408	struct mlx4_priv *priv =
409		container_of(mfunc, struct mlx4_priv, mfunc);
410	struct mlx4_dev *dev = &priv->dev;
411	struct mlx4_slave_state *slave_state = priv->mfunc.master.slave_state;
412	int i;
413	int err;
414	unsigned long flags;
415
416	mlx4_dbg(dev, "mlx4_handle_slave_flr\n");
417
418	for (i = 0 ; i < dev->num_slaves; i++) {
419
420		if (MLX4_COMM_CMD_FLR == slave_state[i].last_cmd) {
421			mlx4_dbg(dev, "mlx4_handle_slave_flr: "
422				 "clean slave: %d\n", i);
423
424			mlx4_delete_all_resources_for_slave(dev, i);
425			/*return the slave to running mode*/
426			spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags);
427			slave_state[i].last_cmd = MLX4_COMM_CMD_RESET;
428			slave_state[i].is_slave_going_down = 0;
429			spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags);
430			/*notify the FW:*/
431			err = mlx4_cmd(dev, 0, i, 0, MLX4_CMD_INFORM_FLR_DONE,
432				       MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
433			if (err)
434				mlx4_warn(dev, "Failed to notify FW on "
435					  "FLR done (slave:%d)\n", i);
436		}
437	}
438}
439
440static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
441{
442	struct mlx4_priv *priv = mlx4_priv(dev);
443	struct mlx4_eqe *eqe;
444	int cqn;
445	int eqes_found = 0;
446	int set_ci = 0;
447	int port;
448	int slave = 0;
449	int ret;
450	u32 flr_slave;
451	u8 update_slave_state;
452	int i;
453	enum slave_port_gen_event gen_event;
454	unsigned long flags;
455	struct mlx4_vport_state *s_info;
456
457	while ((eqe = next_eqe_sw(eq, dev->caps.eqe_factor))) {
458		/*
459		 * Make sure we read EQ entry contents after we've
460		 * checked the ownership bit.
461		 */
462		rmb();
463
464		switch (eqe->type) {
465		case MLX4_EVENT_TYPE_COMP:
466			cqn = be32_to_cpu(eqe->event.comp.cqn) & 0xffffff;
467			mlx4_cq_completion(dev, cqn);
468			break;
469
470		case MLX4_EVENT_TYPE_PATH_MIG:
471		case MLX4_EVENT_TYPE_COMM_EST:
472		case MLX4_EVENT_TYPE_SQ_DRAINED:
473		case MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE:
474		case MLX4_EVENT_TYPE_WQ_CATAS_ERROR:
475		case MLX4_EVENT_TYPE_PATH_MIG_FAILED:
476		case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
477		case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR:
478			mlx4_dbg(dev, "event %d arrived\n", eqe->type);
479			if (mlx4_is_master(dev)) {
480				/* forward only to slave owning the QP */
481				ret = mlx4_get_slave_from_resource_id(dev,
482						RES_QP,
483						be32_to_cpu(eqe->event.qp.qpn)
484						& 0xffffff, &slave);
485				if (ret && ret != -ENOENT) {
486					mlx4_dbg(dev, "QP event %02x(%02x) on "
487						 "EQ %d at index %u: could "
488						 "not get slave id (%d)\n",
489						 eqe->type, eqe->subtype,
490						 eq->eqn, eq->cons_index, ret);
491					break;
492				}
493
494				if (!ret && slave != dev->caps.function) {
495					mlx4_slave_event(dev, slave, eqe);
496					break;
497				}
498
499			}
500			mlx4_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) &
501				      0xffffff, eqe->type);
502			break;
503
504		case MLX4_EVENT_TYPE_SRQ_LIMIT:
505			mlx4_dbg(dev, "%s: MLX4_EVENT_TYPE_SRQ_LIMIT\n",
506				 __func__);
507		/* fall through */
508		case MLX4_EVENT_TYPE_SRQ_CATAS_ERROR:
509			if (mlx4_is_master(dev)) {
510				/* forward only to slave owning the SRQ */
511				ret = mlx4_get_slave_from_resource_id(dev,
512						RES_SRQ,
513						be32_to_cpu(eqe->event.srq.srqn)
514						& 0xffffff,
515						&slave);
516				if (ret && ret != -ENOENT) {
517					mlx4_warn(dev, "SRQ event %02x(%02x) "
518						  "on EQ %d at index %u: could"
519						  " not get slave id (%d)\n",
520						  eqe->type, eqe->subtype,
521						  eq->eqn, eq->cons_index, ret);
522					break;
523				}
524				mlx4_dbg(dev, "%s: slave:%d, srq_no:0x%x, event: %02x(%02x)\n",
525					 __func__, slave,
526					 be32_to_cpu(eqe->event.srq.srqn),
527					 eqe->type, eqe->subtype);
528
529				if (!ret && slave != dev->caps.function) {
530					mlx4_dbg(dev, "%s: sending event %02x(%02x) to slave:%d\n",
531						 __func__, eqe->type,
532						 eqe->subtype, slave);
533					mlx4_slave_event(dev, slave, eqe);
534					break;
535				}
536			}
537			mlx4_srq_event(dev, be32_to_cpu(eqe->event.srq.srqn) &
538				       0xffffff, eqe->type);
539			break;
540
541		case MLX4_EVENT_TYPE_CMD:
542			mlx4_cmd_event(dev,
543				       be16_to_cpu(eqe->event.cmd.token),
544				       eqe->event.cmd.status,
545				       be64_to_cpu(eqe->event.cmd.out_param));
546			break;
547
548		case MLX4_EVENT_TYPE_PORT_CHANGE:
549			port = be32_to_cpu(eqe->event.port_change.port) >> 28;
550			if (eqe->subtype == MLX4_PORT_CHANGE_SUBTYPE_DOWN) {
551				mlx4_dispatch_event(dev, MLX4_DEV_EVENT_PORT_DOWN,
552						    port);
553				mlx4_priv(dev)->sense.do_sense_port[port] = 1;
554				if (!mlx4_is_master(dev))
555					break;
556				for (i = 0; i < dev->num_slaves; i++) {
557					if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) {
558						if (i == mlx4_master_func_num(dev))
559							continue;
560						mlx4_dbg(dev, "%s: Sending MLX4_PORT_CHANGE_SUBTYPE_DOWN"
561							 " to slave: %d, port:%d\n",
562							 __func__, i, port);
563						s_info = &priv->mfunc.master.vf_oper[slave].vport[port].state;
564						if (IFLA_VF_LINK_STATE_AUTO == s_info->link_state)
565							mlx4_slave_event(dev, i, eqe);
566					} else {  /* IB port */
567						set_and_calc_slave_port_state(dev, i, port,
568									      MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN,
569									      &gen_event);
570						/*we can be in pending state, then do not send port_down event*/
571						if (SLAVE_PORT_GEN_EVENT_DOWN ==  gen_event) {
572							if (i == mlx4_master_func_num(dev))
573								continue;
574							mlx4_slave_event(dev, i, eqe);
575						}
576					}
577				}
578			} else {
579				mlx4_dispatch_event(dev, MLX4_DEV_EVENT_PORT_UP, port);
580
581				mlx4_priv(dev)->sense.do_sense_port[port] = 0;
582
583				if (!mlx4_is_master(dev))
584					break;
585				if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH)
586					for (i = 0; i < dev->num_slaves; i++) {
587						if (i == mlx4_master_func_num(dev))
588							continue;
589						s_info = &priv->mfunc.master.vf_oper[slave].vport[port].state;
590						if (IFLA_VF_LINK_STATE_AUTO == s_info->link_state)
591							mlx4_slave_event(dev, i, eqe);
592					}
593				else /* IB port */
594					/* port-up event will be sent to a slave when the
595					 * slave's alias-guid is set. This is done in alias_GUID.c
596					 */
597					set_all_slave_state(dev, port, MLX4_DEV_EVENT_PORT_UP);
598			}
599			break;
600
601		case MLX4_EVENT_TYPE_CQ_ERROR:
602			mlx4_warn(dev, "CQ %s on CQN %06x\n",
603				  eqe->event.cq_err.syndrome == 1 ?
604				  "overrun" : "access violation",
605				  be32_to_cpu(eqe->event.cq_err.cqn) & 0xffffff);
606			if (mlx4_is_master(dev)) {
607				ret = mlx4_get_slave_from_resource_id(dev,
608					RES_CQ,
609					be32_to_cpu(eqe->event.cq_err.cqn)
610					& 0xffffff, &slave);
611				if (ret && ret != -ENOENT) {
612					mlx4_dbg(dev, "CQ event %02x(%02x) on "
613						 "EQ %d at index %u: could "
614						  "not get slave id (%d)\n",
615						  eqe->type, eqe->subtype,
616						  eq->eqn, eq->cons_index, ret);
617					break;
618				}
619
620				if (!ret && slave != dev->caps.function) {
621					mlx4_slave_event(dev, slave, eqe);
622					break;
623				}
624			}
625			mlx4_cq_event(dev,
626				      be32_to_cpu(eqe->event.cq_err.cqn)
627				      & 0xffffff,
628				      eqe->type);
629			break;
630
631		case MLX4_EVENT_TYPE_EQ_OVERFLOW:
632			mlx4_warn(dev, "EQ overrun on EQN %d\n", eq->eqn);
633			break;
634
635		case MLX4_EVENT_TYPE_OP_REQUIRED:
636			atomic_inc(&priv->opreq_count);
637			/* FW commands can't be executed from interrupt context
638			   working in deferred task */
639			queue_work(mlx4_wq, &priv->opreq_task);
640			break;
641
642		case MLX4_EVENT_TYPE_COMM_CHANNEL:
643			if (!mlx4_is_master(dev)) {
644				mlx4_warn(dev, "Received comm channel event "
645					       "for non master device\n");
646				break;
647			}
648
649			memcpy(&priv->mfunc.master.comm_arm_bit_vector,
650			       eqe->event.comm_channel_arm.bit_vec,
651			       sizeof eqe->event.comm_channel_arm.bit_vec);
652
653			if (!queue_work(priv->mfunc.master.comm_wq,
654				   &priv->mfunc.master.comm_work))
655				mlx4_warn(dev, "Failed to queue comm channel work\n");
656
657			if (!queue_work(priv->mfunc.master.comm_wq,
658				   &priv->mfunc.master.arm_comm_work))
659				mlx4_warn(dev, "Failed to queue arm comm channel work\n");
660			break;
661
662		case MLX4_EVENT_TYPE_FLR_EVENT:
663			flr_slave = be32_to_cpu(eqe->event.flr_event.slave_id);
664			if (!mlx4_is_master(dev)) {
665				mlx4_warn(dev, "Non-master function received"
666					       "FLR event\n");
667				break;
668			}
669
670			mlx4_dbg(dev, "FLR event for slave: %d\n", flr_slave);
671
672			if (flr_slave >= dev->num_slaves) {
673				mlx4_warn(dev,
674					  "Got FLR for unknown function: %d\n",
675					  flr_slave);
676				update_slave_state = 0;
677			} else
678				update_slave_state = 1;
679
680			spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags);
681			if (update_slave_state) {
682				priv->mfunc.master.slave_state[flr_slave].active = false;
683				priv->mfunc.master.slave_state[flr_slave].last_cmd = MLX4_COMM_CMD_FLR;
684				priv->mfunc.master.slave_state[flr_slave].is_slave_going_down = 1;
685			}
686			spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags);
687			queue_work(priv->mfunc.master.comm_wq,
688				   &priv->mfunc.master.slave_flr_event_work);
689			break;
690
691		case MLX4_EVENT_TYPE_FATAL_WARNING:
692			if (eqe->subtype == MLX4_FATAL_WARNING_SUBTYPE_WARMING) {
693				if (mlx4_is_master(dev))
694					for (i = 0; i < dev->num_slaves; i++) {
695						mlx4_dbg(dev, "%s: Sending "
696							"MLX4_FATAL_WARNING_SUBTYPE_WARMING"
697							" to slave: %d\n", __func__, i);
698						if (i == dev->caps.function)
699							continue;
700						mlx4_slave_event(dev, i, eqe);
701					}
702				mlx4_err(dev, "Temperature Threshold was reached! "
703					"Threshold: %d celsius degrees; "
704					"Current Temperature: %d\n",
705					be16_to_cpu(eqe->event.warming.warning_threshold),
706					be16_to_cpu(eqe->event.warming.current_temperature));
707			} else
708				mlx4_warn(dev, "Unhandled event FATAL WARNING (%02x), "
709					  "subtype %02x on EQ %d at index %u. owner=%x, "
710					  "nent=0x%x, slave=%x, ownership=%s\n",
711					  eqe->type, eqe->subtype, eq->eqn,
712					  eq->cons_index, eqe->owner, eq->nent,
713					  eqe->slave_id,
714					  !!(eqe->owner & 0x80) ^
715					  !!(eq->cons_index & eq->nent) ? "HW" : "SW");
716
717			break;
718
719		case MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT:
720			mlx4_dispatch_event(dev, MLX4_DEV_EVENT_PORT_MGMT_CHANGE,
721					    (unsigned long) eqe);
722			break;
723
724		case MLX4_EVENT_TYPE_RECOVERABLE_ERROR_EVENT:
725			switch (eqe->subtype) {
726			case MLX4_RECOVERABLE_ERROR_EVENT_SUBTYPE_BAD_CABLE:
727				mlx4_warn(dev, "Bad cable detected on port %u\n",
728					  eqe->event.bad_cable.port);
729				break;
730			case MLX4_RECOVERABLE_ERROR_EVENT_SUBTYPE_UNSUPPORTED_CABLE:
731				mlx4_warn(dev, "Unsupported cable detected\n");
732				break;
733			default:
734				mlx4_dbg(dev, "Unhandled recoverable error event "
735					 "detected: %02x(%02x) on EQ %d at index %u. "
736					 "owner=%x, nent=0x%x, ownership=%s\n",
737					 eqe->type, eqe->subtype, eq->eqn,
738					 eq->cons_index, eqe->owner, eq->nent,
739					 !!(eqe->owner & 0x80) ^
740					 !!(eq->cons_index & eq->nent) ? "HW" : "SW");
741				break;
742			}
743			break;
744
745		case MLX4_EVENT_TYPE_EEC_CATAS_ERROR:
746		case MLX4_EVENT_TYPE_ECC_DETECT:
747		default:
748			mlx4_warn(dev, "Unhandled event %02x(%02x) on EQ %d at "
749				  "index %u. owner=%x, nent=0x%x, slave=%x, "
750				  "ownership=%s\n",
751				  eqe->type, eqe->subtype, eq->eqn,
752				  eq->cons_index, eqe->owner, eq->nent,
753				  eqe->slave_id,
754				  !!(eqe->owner & 0x80) ^
755				  !!(eq->cons_index & eq->nent) ? "HW" : "SW");
756			break;
757		};
758
759		++eq->cons_index;
760		eqes_found = 1;
761		++set_ci;
762
763		/*
764		 * The HCA will think the queue has overflowed if we
765		 * don't tell it we've been processing events.  We
766		 * create our EQs with MLX4_NUM_SPARE_EQE extra
767		 * entries, so we must update our consumer index at
768		 * least that often.
769		 */
770		if (unlikely(set_ci >= MLX4_NUM_SPARE_EQE)) {
771			eq_set_ci(eq, 0);
772			set_ci = 0;
773		}
774	}
775
776	eq_set_ci(eq, 1);
777
778	return eqes_found;
779}
780
781static irqreturn_t mlx4_interrupt(int irq, void *dev_ptr)
782{
783	struct mlx4_dev *dev = dev_ptr;
784	struct mlx4_priv *priv = mlx4_priv(dev);
785	int work = 0;
786	int i;
787
788	writel(priv->eq_table.clr_mask, priv->eq_table.clr_int);
789
790	for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i)
791		work |= mlx4_eq_int(dev, &priv->eq_table.eq[i]);
792
793	return IRQ_RETVAL(work);
794}
795
796static irqreturn_t mlx4_msi_x_interrupt(int irq, void *eq_ptr)
797{
798	struct mlx4_eq  *eq  = eq_ptr;
799	struct mlx4_dev *dev = eq->dev;
800
801	mlx4_eq_int(dev, eq);
802
803	/* MSI-X vectors always belong to us */
804	return IRQ_HANDLED;
805}
806
807int mlx4_MAP_EQ_wrapper(struct mlx4_dev *dev, int slave,
808			struct mlx4_vhcr *vhcr,
809			struct mlx4_cmd_mailbox *inbox,
810			struct mlx4_cmd_mailbox *outbox,
811			struct mlx4_cmd_info *cmd)
812{
813	struct mlx4_priv *priv = mlx4_priv(dev);
814	struct mlx4_slave_event_eq_info *event_eq =
815		priv->mfunc.master.slave_state[slave].event_eq;
816	u32 in_modifier = vhcr->in_modifier;
817	u32 eqn = in_modifier & 0x3FF;
818	u64 in_param =  vhcr->in_param;
819	int err = 0;
820	int i;
821
822	if (slave == dev->caps.function)
823		err = mlx4_cmd(dev, in_param, (in_modifier & 0x80000000) | eqn,
824			       0, MLX4_CMD_MAP_EQ, MLX4_CMD_TIME_CLASS_B,
825			       MLX4_CMD_NATIVE);
826	if (!err)
827		for (i = 0; i < MLX4_EVENT_TYPES_NUM; ++i)
828			if (in_param & (1LL << i))
829				event_eq[i].eqn = in_modifier >> 31 ? -1 : eqn;
830
831	return err;
832}
833
834static int mlx4_MAP_EQ(struct mlx4_dev *dev, u64 event_mask, int unmap,
835			int eq_num)
836{
837	return mlx4_cmd(dev, event_mask, (unmap << 31) | eq_num,
838			0, MLX4_CMD_MAP_EQ, MLX4_CMD_TIME_CLASS_B,
839			MLX4_CMD_WRAPPED);
840}
841
842static int mlx4_SW2HW_EQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
843			 int eq_num)
844{
845	return mlx4_cmd(dev, mailbox->dma, eq_num, 0,
846			MLX4_CMD_SW2HW_EQ, MLX4_CMD_TIME_CLASS_A,
847			MLX4_CMD_WRAPPED);
848}
849
850static int mlx4_HW2SW_EQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
851			 int eq_num)
852{
853	return mlx4_cmd_box(dev, 0, mailbox->dma, eq_num,
854			    0, MLX4_CMD_HW2SW_EQ, MLX4_CMD_TIME_CLASS_A,
855			    MLX4_CMD_WRAPPED);
856}
857
858static int mlx4_num_eq_uar(struct mlx4_dev *dev)
859{
860	/*
861	 * Each UAR holds 4 EQ doorbells.  To figure out how many UARs
862	 * we need to map, take the difference of highest index and
863	 * the lowest index we'll use and add 1.
864	 */
865	return (dev->caps.num_comp_vectors + 1 + dev->caps.reserved_eqs +
866		 dev->caps.comp_pool)/4 - dev->caps.reserved_eqs/4 + 1;
867}
868
869static void __iomem *mlx4_get_eq_uar(struct mlx4_dev *dev, struct mlx4_eq *eq)
870{
871	struct mlx4_priv *priv = mlx4_priv(dev);
872	int index;
873
874	index = eq->eqn / 4 - dev->caps.reserved_eqs / 4;
875
876	if (!priv->eq_table.uar_map[index]) {
877		priv->eq_table.uar_map[index] =
878			ioremap(pci_resource_start(dev->pdev, 2) +
879				((eq->eqn / 4) << PAGE_SHIFT),
880				PAGE_SIZE);
881		if (!priv->eq_table.uar_map[index]) {
882			mlx4_err(dev, "Couldn't map EQ doorbell for EQN 0x%06x\n",
883				 eq->eqn);
884			return NULL;
885		}
886	}
887
888	return priv->eq_table.uar_map[index] + 0x800 + 8 * (eq->eqn % 4);
889}
890
891static void mlx4_unmap_uar(struct mlx4_dev *dev)
892{
893	struct mlx4_priv *priv = mlx4_priv(dev);
894	int i;
895
896	for (i = 0; i < mlx4_num_eq_uar(dev); ++i)
897		if (priv->eq_table.uar_map[i]) {
898			iounmap(priv->eq_table.uar_map[i]);
899			priv->eq_table.uar_map[i] = NULL;
900		}
901}
902
903static int mlx4_create_eq(struct mlx4_dev *dev, int nent,
904			  u8 intr, struct mlx4_eq *eq)
905{
906	struct mlx4_priv *priv = mlx4_priv(dev);
907	struct mlx4_cmd_mailbox *mailbox;
908	struct mlx4_eq_context *eq_context;
909	int npages;
910	u64 *dma_list = NULL;
911	dma_addr_t t;
912	u64 mtt_addr;
913	int err = -ENOMEM;
914	int i;
915
916	eq->dev   = dev;
917	eq->nent  = roundup_pow_of_two(max(nent, 2));
918	/* CX3 is capable of extending the CQE\EQE from 32 to 64 bytes */
919	npages = PAGE_ALIGN(eq->nent * (MLX4_EQ_ENTRY_SIZE << dev->caps.eqe_factor)) / PAGE_SIZE;
920
921	eq->page_list = kmalloc(npages * sizeof *eq->page_list,
922				GFP_KERNEL);
923	if (!eq->page_list)
924		goto err_out;
925
926	for (i = 0; i < npages; ++i)
927		eq->page_list[i].buf = NULL;
928
929	dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL);
930	if (!dma_list)
931		goto err_out_free;
932
933	mailbox = mlx4_alloc_cmd_mailbox(dev);
934	if (IS_ERR(mailbox))
935		goto err_out_free;
936	eq_context = mailbox->buf;
937
938	for (i = 0; i < npages; ++i) {
939		eq->page_list[i].buf = dma_alloc_coherent(&dev->pdev->dev,
940							  PAGE_SIZE, &t, GFP_KERNEL);
941		if (!eq->page_list[i].buf)
942			goto err_out_free_pages;
943
944		dma_list[i] = t;
945		eq->page_list[i].map = t;
946
947		memset(eq->page_list[i].buf, 0, PAGE_SIZE);
948	}
949
950	eq->eqn = mlx4_bitmap_alloc(&priv->eq_table.bitmap);
951	if (eq->eqn == -1)
952		goto err_out_free_pages;
953
954	eq->doorbell = mlx4_get_eq_uar(dev, eq);
955	if (!eq->doorbell) {
956		err = -ENOMEM;
957		goto err_out_free_eq;
958	}
959
960	err = mlx4_mtt_init(dev, npages, PAGE_SHIFT, &eq->mtt);
961	if (err)
962		goto err_out_free_eq;
963
964	err = mlx4_write_mtt(dev, &eq->mtt, 0, npages, dma_list);
965	if (err)
966		goto err_out_free_mtt;
967
968	memset(eq_context, 0, sizeof *eq_context);
969	eq_context->flags	  = cpu_to_be32(MLX4_EQ_STATUS_OK   |
970						MLX4_EQ_STATE_ARMED);
971	eq_context->log_eq_size	  = ilog2(eq->nent);
972	eq_context->intr	  = intr;
973	eq_context->log_page_size = PAGE_SHIFT - MLX4_ICM_PAGE_SHIFT;
974
975	mtt_addr = mlx4_mtt_addr(dev, &eq->mtt);
976	eq_context->mtt_base_addr_h = mtt_addr >> 32;
977	eq_context->mtt_base_addr_l = cpu_to_be32(mtt_addr & 0xffffffff);
978
979	err = mlx4_SW2HW_EQ(dev, mailbox, eq->eqn);
980	if (err) {
981		mlx4_warn(dev, "SW2HW_EQ failed (%d)\n", err);
982		goto err_out_free_mtt;
983	}
984
985	kfree(dma_list);
986	mlx4_free_cmd_mailbox(dev, mailbox);
987
988	eq->cons_index = 0;
989
990	return err;
991
992err_out_free_mtt:
993	mlx4_mtt_cleanup(dev, &eq->mtt);
994
995err_out_free_eq:
996	mlx4_bitmap_free(&priv->eq_table.bitmap, eq->eqn, MLX4_USE_RR);
997
998err_out_free_pages:
999	for (i = 0; i < npages; ++i)
1000		if (eq->page_list[i].buf)
1001			dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
1002					  eq->page_list[i].buf,
1003					  eq->page_list[i].map);
1004
1005	mlx4_free_cmd_mailbox(dev, mailbox);
1006
1007err_out_free:
1008	kfree(eq->page_list);
1009	kfree(dma_list);
1010
1011err_out:
1012	return err;
1013}
1014
1015static void mlx4_free_eq(struct mlx4_dev *dev,
1016			 struct mlx4_eq *eq)
1017{
1018	struct mlx4_priv *priv = mlx4_priv(dev);
1019	struct mlx4_cmd_mailbox *mailbox;
1020	int err;
1021	int i;
1022	/* CX3 is capable of extending the CQE\EQE from 32 to 64 bytes */
1023	int npages = PAGE_ALIGN((MLX4_EQ_ENTRY_SIZE << dev->caps.eqe_factor) * eq->nent) / PAGE_SIZE;
1024
1025	mailbox = mlx4_alloc_cmd_mailbox(dev);
1026	if (IS_ERR(mailbox))
1027		return;
1028
1029	err = mlx4_HW2SW_EQ(dev, mailbox, eq->eqn);
1030	if (err)
1031		mlx4_warn(dev, "HW2SW_EQ failed (%d)\n", err);
1032
1033	if (0) {
1034		mlx4_dbg(dev, "Dumping EQ context %02x:\n", eq->eqn);
1035		for (i = 0; i < sizeof (struct mlx4_eq_context) / 4; ++i) {
1036			if (i % 4 == 0)
1037				pr_cont("[%02x] ", i * 4);
1038			pr_cont(" %08x", be32_to_cpup(mailbox->buf + i * 4));
1039			if ((i + 1) % 4 == 0)
1040				pr_cont("\n");
1041		}
1042	}
1043
1044	mlx4_mtt_cleanup(dev, &eq->mtt);
1045	for (i = 0; i < npages; ++i)
1046		dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
1047				    eq->page_list[i].buf,
1048				    eq->page_list[i].map);
1049
1050	kfree(eq->page_list);
1051	mlx4_bitmap_free(&priv->eq_table.bitmap, eq->eqn, MLX4_USE_RR);
1052	mlx4_free_cmd_mailbox(dev, mailbox);
1053}
1054
1055static void mlx4_free_irqs(struct mlx4_dev *dev)
1056{
1057	struct mlx4_eq_table *eq_table = &mlx4_priv(dev)->eq_table;
1058	struct mlx4_priv *priv = mlx4_priv(dev);
1059	int	i, vec;
1060
1061	if (eq_table->have_irq)
1062		free_irq(dev->pdev->irq, dev);
1063
1064	for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i)
1065		if (eq_table->eq[i].have_irq) {
1066			free_irq(eq_table->eq[i].irq, eq_table->eq + i);
1067			eq_table->eq[i].have_irq = 0;
1068		}
1069
1070	for (i = 0; i < dev->caps.comp_pool; i++) {
1071		/*
1072		 * Freeing the assigned irq's
1073		 * all bits should be 0, but we need to validate
1074		 */
1075		if (priv->msix_ctl.pool_bm & 1ULL << i) {
1076			/* NO need protecting*/
1077			vec = dev->caps.num_comp_vectors + 1 + i;
1078			free_irq(priv->eq_table.eq[vec].irq,
1079				 &priv->eq_table.eq[vec]);
1080		}
1081	}
1082
1083
1084	kfree(eq_table->irq_names);
1085}
1086
1087static int mlx4_map_clr_int(struct mlx4_dev *dev)
1088{
1089	struct mlx4_priv *priv = mlx4_priv(dev);
1090
1091	priv->clr_base = ioremap(pci_resource_start(dev->pdev, priv->fw.clr_int_bar) +
1092				 priv->fw.clr_int_base, MLX4_CLR_INT_SIZE);
1093	if (!priv->clr_base) {
1094		mlx4_err(dev, "Couldn't map interrupt clear register, aborting.\n");
1095		return -ENOMEM;
1096	}
1097
1098	return 0;
1099}
1100
1101static void mlx4_unmap_clr_int(struct mlx4_dev *dev)
1102{
1103	struct mlx4_priv *priv = mlx4_priv(dev);
1104
1105	iounmap(priv->clr_base);
1106}
1107
1108int mlx4_alloc_eq_table(struct mlx4_dev *dev)
1109{
1110	struct mlx4_priv *priv = mlx4_priv(dev);
1111
1112	priv->eq_table.eq = kcalloc(dev->caps.num_eqs - dev->caps.reserved_eqs,
1113				    sizeof *priv->eq_table.eq, GFP_KERNEL);
1114	if (!priv->eq_table.eq)
1115		return -ENOMEM;
1116
1117	return 0;
1118}
1119
1120void mlx4_free_eq_table(struct mlx4_dev *dev)
1121{
1122	kfree(mlx4_priv(dev)->eq_table.eq);
1123}
1124
1125int mlx4_init_eq_table(struct mlx4_dev *dev)
1126{
1127	struct mlx4_priv *priv = mlx4_priv(dev);
1128	int err;
1129	int i;
1130
1131	priv->eq_table.uar_map = kcalloc(mlx4_num_eq_uar(dev),
1132					 sizeof *priv->eq_table.uar_map,
1133					 GFP_KERNEL);
1134	if (!priv->eq_table.uar_map) {
1135		err = -ENOMEM;
1136		goto err_out_free;
1137	}
1138
1139	err = mlx4_bitmap_init(&priv->eq_table.bitmap,
1140				roundup_pow_of_two(dev->caps.num_eqs),
1141				dev->caps.num_eqs - 1,
1142				dev->caps.reserved_eqs,
1143				roundup_pow_of_two(dev->caps.num_eqs) -
1144					dev->caps.num_eqs);
1145	if (err)
1146		goto err_out_free;
1147
1148	for (i = 0; i < mlx4_num_eq_uar(dev); ++i)
1149		priv->eq_table.uar_map[i] = NULL;
1150
1151	if (!mlx4_is_slave(dev)) {
1152		err = mlx4_map_clr_int(dev);
1153		if (err)
1154			goto err_out_bitmap;
1155
1156		priv->eq_table.clr_mask =
1157			swab32(1 << (priv->eq_table.inta_pin & 31));
1158		priv->eq_table.clr_int  = priv->clr_base +
1159			(priv->eq_table.inta_pin < 32 ? 4 : 0);
1160	}
1161
1162	priv->eq_table.irq_names =
1163		kmalloc(MLX4_IRQNAME_SIZE * (dev->caps.num_comp_vectors + 1 +
1164					     dev->caps.comp_pool),
1165			GFP_KERNEL);
1166	if (!priv->eq_table.irq_names) {
1167		err = -ENOMEM;
1168		goto err_out_clr_int;
1169	}
1170
1171	for (i = 0; i < dev->caps.num_comp_vectors; ++i) {
1172		err = mlx4_create_eq(dev, dev->caps.num_cqs -
1173					  dev->caps.reserved_cqs +
1174					  MLX4_NUM_SPARE_EQE,
1175				     (dev->flags & MLX4_FLAG_MSI_X) ? i : 0,
1176				     &priv->eq_table.eq[i]);
1177		if (err) {
1178			--i;
1179			goto err_out_unmap;
1180		}
1181	}
1182
1183	err = mlx4_create_eq(dev, MLX4_NUM_ASYNC_EQE + MLX4_NUM_SPARE_EQE,
1184			     (dev->flags & MLX4_FLAG_MSI_X) ? dev->caps.num_comp_vectors : 0,
1185			     &priv->eq_table.eq[dev->caps.num_comp_vectors]);
1186	if (err)
1187		goto err_out_comp;
1188
1189	/*if additional completion vectors poolsize is 0 this loop will not run*/
1190	for (i = dev->caps.num_comp_vectors + 1;
1191	      i < dev->caps.num_comp_vectors + dev->caps.comp_pool + 1; ++i) {
1192
1193		err = mlx4_create_eq(dev, dev->caps.num_cqs -
1194					  dev->caps.reserved_cqs +
1195					  MLX4_NUM_SPARE_EQE,
1196				     (dev->flags & MLX4_FLAG_MSI_X) ? i : 0,
1197				     &priv->eq_table.eq[i]);
1198		if (err) {
1199			--i;
1200			goto err_out_unmap;
1201		}
1202	}
1203
1204
1205	if (dev->flags & MLX4_FLAG_MSI_X) {
1206		const char *eq_name;
1207
1208		for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) {
1209			if (i < dev->caps.num_comp_vectors) {
1210				snprintf(priv->eq_table.irq_names +
1211					 i * MLX4_IRQNAME_SIZE,
1212					 MLX4_IRQNAME_SIZE,
1213					 "mlx4-comp-%d@pci:%s", i,
1214					 pci_name(dev->pdev));
1215			} else {
1216				snprintf(priv->eq_table.irq_names +
1217					 i * MLX4_IRQNAME_SIZE,
1218					 MLX4_IRQNAME_SIZE,
1219					 "mlx4-async@pci:%s",
1220					 pci_name(dev->pdev));
1221			}
1222
1223			eq_name = priv->eq_table.irq_names +
1224				  i * MLX4_IRQNAME_SIZE;
1225			err = request_irq(priv->eq_table.eq[i].irq,
1226					  mlx4_msi_x_interrupt, 0, eq_name,
1227					  priv->eq_table.eq + i);
1228			if (err)
1229				goto err_out_async;
1230
1231			priv->eq_table.eq[i].have_irq = 1;
1232		}
1233	} else {
1234		snprintf(priv->eq_table.irq_names,
1235			 MLX4_IRQNAME_SIZE,
1236			 DRV_NAME "@pci:%s",
1237			 pci_name(dev->pdev));
1238		err = request_irq(dev->pdev->irq, mlx4_interrupt,
1239				  IRQF_SHARED, priv->eq_table.irq_names, dev);
1240		if (err)
1241			goto err_out_async;
1242
1243		priv->eq_table.have_irq = 1;
1244	}
1245
1246	err = mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 0,
1247			  priv->eq_table.eq[dev->caps.num_comp_vectors].eqn);
1248	if (err)
1249		mlx4_warn(dev, "MAP_EQ for async EQ %d failed (%d)\n",
1250			   priv->eq_table.eq[dev->caps.num_comp_vectors].eqn, err);
1251
1252	for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i)
1253		eq_set_ci(&priv->eq_table.eq[i], 1);
1254
1255	return 0;
1256
1257err_out_async:
1258	mlx4_free_eq(dev, &priv->eq_table.eq[dev->caps.num_comp_vectors]);
1259
1260err_out_comp:
1261	i = dev->caps.num_comp_vectors - 1;
1262
1263err_out_unmap:
1264	while (i >= 0) {
1265		mlx4_free_eq(dev, &priv->eq_table.eq[i]);
1266		--i;
1267	}
1268	mlx4_free_irqs(dev);
1269
1270err_out_clr_int:
1271	if (!mlx4_is_slave(dev))
1272		mlx4_unmap_clr_int(dev);
1273
1274err_out_bitmap:
1275	mlx4_unmap_uar(dev);
1276	mlx4_bitmap_cleanup(&priv->eq_table.bitmap);
1277
1278err_out_free:
1279	kfree(priv->eq_table.uar_map);
1280
1281	return err;
1282}
1283
1284void mlx4_cleanup_eq_table(struct mlx4_dev *dev)
1285{
1286	struct mlx4_priv *priv = mlx4_priv(dev);
1287	int i;
1288
1289	mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 1,
1290		    priv->eq_table.eq[dev->caps.num_comp_vectors].eqn);
1291
1292	mlx4_free_irqs(dev);
1293
1294	for (i = 0; i < dev->caps.num_comp_vectors + dev->caps.comp_pool + 1; ++i)
1295		mlx4_free_eq(dev, &priv->eq_table.eq[i]);
1296
1297	if (!mlx4_is_slave(dev))
1298		mlx4_unmap_clr_int(dev);
1299
1300	mlx4_unmap_uar(dev);
1301	mlx4_bitmap_cleanup(&priv->eq_table.bitmap);
1302
1303	kfree(priv->eq_table.uar_map);
1304}
1305
1306/* A test that verifies that we can accept interrupts on all
1307 * the irq vectors of the device.
1308 * Interrupts are checked using the NOP command.
1309 */
1310int mlx4_test_interrupts(struct mlx4_dev *dev)
1311{
1312	struct mlx4_priv *priv = mlx4_priv(dev);
1313	int i;
1314	int err;
1315
1316	err = mlx4_NOP(dev);
1317	/* When not in MSI_X, there is only one irq to check */
1318	if (!(dev->flags & MLX4_FLAG_MSI_X) || mlx4_is_slave(dev))
1319		return err;
1320
1321	/* A loop over all completion vectors, for each vector we will check
1322	 * whether it works by mapping command completions to that vector
1323	 * and performing a NOP command
1324	 */
1325	for(i = 0; !err && (i < dev->caps.num_comp_vectors); ++i) {
1326		/* Temporary use polling for command completions */
1327		mlx4_cmd_use_polling(dev);
1328
1329		/* Map the new eq to handle all asyncronous events */
1330		err = mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 0,
1331				  priv->eq_table.eq[i].eqn);
1332		if (err) {
1333			mlx4_warn(dev, "Failed mapping eq for interrupt test\n");
1334			mlx4_cmd_use_events(dev);
1335			break;
1336		}
1337
1338		/* Go back to using events */
1339		mlx4_cmd_use_events(dev);
1340		err = mlx4_NOP(dev);
1341	}
1342
1343	/* Return to default */
1344	mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 0,
1345		    priv->eq_table.eq[dev->caps.num_comp_vectors].eqn);
1346	return err;
1347}
1348EXPORT_SYMBOL(mlx4_test_interrupts);
1349
1350int mlx4_assign_eq(struct mlx4_dev *dev, char* name, int * vector)
1351{
1352
1353	struct mlx4_priv *priv = mlx4_priv(dev);
1354	int vec = 0, err = 0, i;
1355
1356	mutex_lock(&priv->msix_ctl.pool_lock);
1357	for (i = 0; !vec && i < dev->caps.comp_pool; i++) {
1358		if (~priv->msix_ctl.pool_bm & 1ULL << i) {
1359			priv->msix_ctl.pool_bm |= 1ULL << i;
1360			vec = dev->caps.num_comp_vectors + 1 + i;
1361			snprintf(priv->eq_table.irq_names +
1362					vec * MLX4_IRQNAME_SIZE,
1363					MLX4_IRQNAME_SIZE, "%s", name);
1364			err = request_irq(priv->eq_table.eq[vec].irq,
1365					  mlx4_msi_x_interrupt, 0,
1366					  &priv->eq_table.irq_names[vec<<5],
1367					  priv->eq_table.eq + vec);
1368			if (err) {
1369				/*zero out bit by fliping it*/
1370				priv->msix_ctl.pool_bm ^= 1 << i;
1371				vec = 0;
1372				continue;
1373				/*we dont want to break here*/
1374			}
1375			eq_set_ci(&priv->eq_table.eq[vec], 1);
1376		}
1377	}
1378	mutex_unlock(&priv->msix_ctl.pool_lock);
1379
1380	if (vec) {
1381		*vector = vec;
1382	} else {
1383		*vector = 0;
1384		err = (i == dev->caps.comp_pool) ? -ENOSPC : err;
1385	}
1386	return err;
1387}
1388EXPORT_SYMBOL(mlx4_assign_eq);
1389
1390void mlx4_release_eq(struct mlx4_dev *dev, int vec)
1391{
1392	struct mlx4_priv *priv = mlx4_priv(dev);
1393	/*bm index*/
1394	int i = vec - dev->caps.num_comp_vectors - 1;
1395
1396	if (likely(i >= 0)) {
1397		/*sanity check , making sure were not trying to free irq's
1398		  Belonging to a legacy EQ*/
1399		mutex_lock(&priv->msix_ctl.pool_lock);
1400		if (priv->msix_ctl.pool_bm & 1ULL << i) {
1401			free_irq(priv->eq_table.eq[vec].irq,
1402				 &priv->eq_table.eq[vec]);
1403			priv->msix_ctl.pool_bm &= ~(1ULL << i);
1404		}
1405		mutex_unlock(&priv->msix_ctl.pool_lock);
1406	}
1407
1408}
1409EXPORT_SYMBOL(mlx4_release_eq);
1410
1411