mps.c revision 273736
1/*-
2 * Copyright (c) 2009 Yahoo! Inc.
3 * Copyright (c) 2012 LSI Corp.
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 *
27 * LSI MPT-Fusion Host Adapter FreeBSD
28 *
29 * $FreeBSD: stable/10/sys/dev/mps/mps.c 273736 2014-10-27 14:38:00Z hselasky $
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: stable/10/sys/dev/mps/mps.c 273736 2014-10-27 14:38:00Z hselasky $");
34
35/* Communications core for LSI MPT2 */
36
37/* TODO Move headers to mpsvar */
38#include <sys/types.h>
39#include <sys/param.h>
40#include <sys/systm.h>
41#include <sys/kernel.h>
42#include <sys/selinfo.h>
43#include <sys/lock.h>
44#include <sys/mutex.h>
45#include <sys/module.h>
46#include <sys/bus.h>
47#include <sys/conf.h>
48#include <sys/bio.h>
49#include <sys/malloc.h>
50#include <sys/uio.h>
51#include <sys/sysctl.h>
52#include <sys/queue.h>
53#include <sys/kthread.h>
54#include <sys/taskqueue.h>
55#include <sys/endian.h>
56#include <sys/eventhandler.h>
57
58#include <machine/bus.h>
59#include <machine/resource.h>
60#include <sys/rman.h>
61#include <sys/proc.h>
62
63#include <dev/pci/pcivar.h>
64
65#include <cam/cam.h>
66#include <cam/scsi/scsi_all.h>
67
68#include <dev/mps/mpi/mpi2_type.h>
69#include <dev/mps/mpi/mpi2.h>
70#include <dev/mps/mpi/mpi2_ioc.h>
71#include <dev/mps/mpi/mpi2_sas.h>
72#include <dev/mps/mpi/mpi2_cnfg.h>
73#include <dev/mps/mpi/mpi2_init.h>
74#include <dev/mps/mpi/mpi2_tool.h>
75#include <dev/mps/mps_ioctl.h>
76#include <dev/mps/mpsvar.h>
77#include <dev/mps/mps_table.h>
78
79static int mps_diag_reset(struct mps_softc *sc, int sleep_flag);
80static int mps_init_queues(struct mps_softc *sc);
81static int mps_message_unit_reset(struct mps_softc *sc, int sleep_flag);
82static int mps_transition_operational(struct mps_softc *sc);
83static int mps_iocfacts_allocate(struct mps_softc *sc, uint8_t attaching);
84static void mps_iocfacts_free(struct mps_softc *sc);
85static void mps_startup(void *arg);
86static int mps_send_iocinit(struct mps_softc *sc);
87static int mps_alloc_queues(struct mps_softc *sc);
88static int mps_alloc_replies(struct mps_softc *sc);
89static int mps_alloc_requests(struct mps_softc *sc);
90static int mps_attach_log(struct mps_softc *sc);
91static __inline void mps_complete_command(struct mps_softc *sc,
92    struct mps_command *cm);
93static void mps_dispatch_event(struct mps_softc *sc, uintptr_t data,
94    MPI2_EVENT_NOTIFICATION_REPLY *reply);
95static void mps_config_complete(struct mps_softc *sc, struct mps_command *cm);
96static void mps_periodic(void *);
97static int mps_reregister_events(struct mps_softc *sc);
98static void mps_enqueue_request(struct mps_softc *sc, struct mps_command *cm);
99static int mps_get_iocfacts(struct mps_softc *sc, MPI2_IOC_FACTS_REPLY *facts);
100static int mps_wait_db_ack(struct mps_softc *sc, int timeout, int sleep_flag);
101SYSCTL_NODE(_hw, OID_AUTO, mps, CTLFLAG_RD, 0, "MPS Driver Parameters");
102
103MALLOC_DEFINE(M_MPT2, "mps", "mpt2 driver memory");
104
105/*
106 * Do a "Diagnostic Reset" aka a hard reset.  This should get the chip out of
107 * any state and back to its initialization state machine.
108 */
109static char mpt2_reset_magic[] = { 0x00, 0x0f, 0x04, 0x0b, 0x02, 0x07, 0x0d };
110
111/* Added this union to smoothly convert le64toh cm->cm_desc.Words.
112 * Compiler only support unint64_t to be passed as argument.
113 * Otherwise it will through below error
114 * "aggregate value used where an integer was expected"
115 */
116
117typedef union _reply_descriptor {
118        u64 word;
119        struct {
120                u32 low;
121                u32 high;
122        } u;
123}reply_descriptor,address_descriptor;
124
125/* Rate limit chain-fail messages to 1 per minute */
126static struct timeval mps_chainfail_interval = { 60, 0 };
127
128/*
129 * sleep_flag can be either CAN_SLEEP or NO_SLEEP.
130 * If this function is called from process context, it can sleep
131 * and there is no harm to sleep, in case if this fuction is called
132 * from Interrupt handler, we can not sleep and need NO_SLEEP flag set.
133 * based on sleep flags driver will call either msleep, pause or DELAY.
134 * msleep and pause are of same variant, but pause is used when mps_mtx
135 * is not hold by driver.
136 *
137 */
138static int
139mps_diag_reset(struct mps_softc *sc,int sleep_flag)
140{
141	uint32_t reg;
142	int i, error, tries = 0;
143	uint8_t first_wait_done = FALSE;
144
145	mps_dprint(sc, MPS_TRACE, "%s\n", __func__);
146
147	/* Clear any pending interrupts */
148	mps_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0);
149
150	/*Force NO_SLEEP for threads prohibited to sleep
151 	* e.a Thread from interrupt handler are prohibited to sleep.
152 	*/
153	if (curthread->td_no_sleeping != 0)
154		sleep_flag = NO_SLEEP;
155
156	/* Push the magic sequence */
157	error = ETIMEDOUT;
158	while (tries++ < 20) {
159		for (i = 0; i < sizeof(mpt2_reset_magic); i++)
160			mps_regwrite(sc, MPI2_WRITE_SEQUENCE_OFFSET,
161			    mpt2_reset_magic[i]);
162		/* wait 100 msec */
163		if (mtx_owned(&sc->mps_mtx) && sleep_flag == CAN_SLEEP)
164			msleep(&sc->msleep_fake_chan, &sc->mps_mtx, 0,
165			    "mpsdiag", hz/10);
166		else if (sleep_flag == CAN_SLEEP)
167			pause("mpsdiag", hz/10);
168		else
169			DELAY(100 * 1000);
170
171		reg = mps_regread(sc, MPI2_HOST_DIAGNOSTIC_OFFSET);
172		if (reg & MPI2_DIAG_DIAG_WRITE_ENABLE) {
173			error = 0;
174			break;
175		}
176	}
177	if (error)
178		return (error);
179
180	/* Send the actual reset.  XXX need to refresh the reg? */
181	mps_regwrite(sc, MPI2_HOST_DIAGNOSTIC_OFFSET,
182	    reg | MPI2_DIAG_RESET_ADAPTER);
183
184	/* Wait up to 300 seconds in 50ms intervals */
185	error = ETIMEDOUT;
186	for (i = 0; i < 6000; i++) {
187		/*
188		 * Wait 50 msec. If this is the first time through, wait 256
189		 * msec to satisfy Diag Reset timing requirements.
190		 */
191		if (first_wait_done) {
192			if (mtx_owned(&sc->mps_mtx) && sleep_flag == CAN_SLEEP)
193				msleep(&sc->msleep_fake_chan, &sc->mps_mtx, 0,
194				    "mpsdiag", hz/20);
195			else if (sleep_flag == CAN_SLEEP)
196				pause("mpsdiag", hz/20);
197			else
198				DELAY(50 * 1000);
199		} else {
200			DELAY(256 * 1000);
201			first_wait_done = TRUE;
202		}
203		/*
204		 * Check for the RESET_ADAPTER bit to be cleared first, then
205		 * wait for the RESET state to be cleared, which takes a little
206		 * longer.
207		 */
208		reg = mps_regread(sc, MPI2_HOST_DIAGNOSTIC_OFFSET);
209		if (reg & MPI2_DIAG_RESET_ADAPTER) {
210			continue;
211		}
212		reg = mps_regread(sc, MPI2_DOORBELL_OFFSET);
213		if ((reg & MPI2_IOC_STATE_MASK) != MPI2_IOC_STATE_RESET) {
214			error = 0;
215			break;
216		}
217	}
218	if (error)
219		return (error);
220
221	mps_regwrite(sc, MPI2_WRITE_SEQUENCE_OFFSET, 0x0);
222
223	return (0);
224}
225
226static int
227mps_message_unit_reset(struct mps_softc *sc, int sleep_flag)
228{
229
230	MPS_FUNCTRACE(sc);
231
232	mps_regwrite(sc, MPI2_DOORBELL_OFFSET,
233	    MPI2_FUNCTION_IOC_MESSAGE_UNIT_RESET <<
234	    MPI2_DOORBELL_FUNCTION_SHIFT);
235
236	if (mps_wait_db_ack(sc, 5, sleep_flag) != 0) {
237		mps_dprint(sc, MPS_FAULT, "Doorbell handshake failed : <%s>\n",
238				__func__);
239		return (ETIMEDOUT);
240	}
241
242	return (0);
243}
244
245static int
246mps_transition_ready(struct mps_softc *sc)
247{
248	uint32_t reg, state;
249	int error, tries = 0;
250	int sleep_flags;
251
252	MPS_FUNCTRACE(sc);
253	/* If we are in attach call, do not sleep */
254	sleep_flags = (sc->mps_flags & MPS_FLAGS_ATTACH_DONE)
255					? CAN_SLEEP:NO_SLEEP;
256	error = 0;
257	while (tries++ < 1200) {
258		reg = mps_regread(sc, MPI2_DOORBELL_OFFSET);
259		mps_dprint(sc, MPS_INIT, "Doorbell= 0x%x\n", reg);
260
261		/*
262		 * Ensure the IOC is ready to talk.  If it's not, try
263		 * resetting it.
264		 */
265		if (reg & MPI2_DOORBELL_USED) {
266			mps_diag_reset(sc, sleep_flags);
267			DELAY(50000);
268			continue;
269		}
270
271		/* Is the adapter owned by another peer? */
272		if ((reg & MPI2_DOORBELL_WHO_INIT_MASK) ==
273		    (MPI2_WHOINIT_PCI_PEER << MPI2_DOORBELL_WHO_INIT_SHIFT)) {
274			device_printf(sc->mps_dev, "IOC is under the control "
275			    "of another peer host, aborting initialization.\n");
276			return (ENXIO);
277		}
278
279		state = reg & MPI2_IOC_STATE_MASK;
280		if (state == MPI2_IOC_STATE_READY) {
281			/* Ready to go! */
282			error = 0;
283			break;
284		} else if (state == MPI2_IOC_STATE_FAULT) {
285			mps_dprint(sc, MPS_FAULT, "IOC in fault state 0x%x, resetting\n",
286			    state & MPI2_DOORBELL_FAULT_CODE_MASK);
287			mps_diag_reset(sc, sleep_flags);
288		} else if (state == MPI2_IOC_STATE_OPERATIONAL) {
289			/* Need to take ownership */
290			mps_message_unit_reset(sc, sleep_flags);
291		} else if (state == MPI2_IOC_STATE_RESET) {
292			/* Wait a bit, IOC might be in transition */
293			mps_dprint(sc, MPS_FAULT,
294			    "IOC in unexpected reset state\n");
295		} else {
296			mps_dprint(sc, MPS_FAULT,
297			    "IOC in unknown state 0x%x\n", state);
298			error = EINVAL;
299			break;
300		}
301
302		/* Wait 50ms for things to settle down. */
303		DELAY(50000);
304	}
305
306	if (error)
307		device_printf(sc->mps_dev, "Cannot transition IOC to ready\n");
308
309	return (error);
310}
311
312static int
313mps_transition_operational(struct mps_softc *sc)
314{
315	uint32_t reg, state;
316	int error;
317
318	MPS_FUNCTRACE(sc);
319
320	error = 0;
321	reg = mps_regread(sc, MPI2_DOORBELL_OFFSET);
322	mps_dprint(sc, MPS_INIT, "Doorbell= 0x%x\n", reg);
323
324	state = reg & MPI2_IOC_STATE_MASK;
325	if (state != MPI2_IOC_STATE_READY) {
326		if ((error = mps_transition_ready(sc)) != 0) {
327			mps_dprint(sc, MPS_FAULT,
328			    "%s failed to transition ready\n", __func__);
329			return (error);
330		}
331	}
332
333	error = mps_send_iocinit(sc);
334	return (error);
335}
336
337/*
338 * This is called during attach and when re-initializing due to a Diag Reset.
339 * IOC Facts is used to allocate many of the structures needed by the driver.
340 * If called from attach, de-allocation is not required because the driver has
341 * not allocated any structures yet, but if called from a Diag Reset, previously
342 * allocated structures based on IOC Facts will need to be freed and re-
343 * allocated bases on the latest IOC Facts.
344 */
345static int
346mps_iocfacts_allocate(struct mps_softc *sc, uint8_t attaching)
347{
348	int error;
349	Mpi2IOCFactsReply_t saved_facts;
350	uint8_t saved_mode, reallocating;
351
352	mps_dprint(sc, MPS_TRACE, "%s\n", __func__);
353
354	/* Save old IOC Facts and then only reallocate if Facts have changed */
355	if (!attaching) {
356		bcopy(sc->facts, &saved_facts, sizeof(MPI2_IOC_FACTS_REPLY));
357	}
358
359	/*
360	 * Get IOC Facts.  In all cases throughout this function, panic if doing
361	 * a re-initialization and only return the error if attaching so the OS
362	 * can handle it.
363	 */
364	if ((error = mps_get_iocfacts(sc, sc->facts)) != 0) {
365		if (attaching) {
366			mps_dprint(sc, MPS_FAULT, "%s failed to get IOC Facts "
367			    "with error %d\n", __func__, error);
368			return (error);
369		} else {
370			panic("%s failed to get IOC Facts with error %d\n",
371			    __func__, error);
372		}
373	}
374
375	mps_print_iocfacts(sc, sc->facts);
376
377	snprintf(sc->fw_version, sizeof(sc->fw_version),
378	    "%02d.%02d.%02d.%02d",
379	    sc->facts->FWVersion.Struct.Major,
380	    sc->facts->FWVersion.Struct.Minor,
381	    sc->facts->FWVersion.Struct.Unit,
382	    sc->facts->FWVersion.Struct.Dev);
383
384	mps_printf(sc, "Firmware: %s, Driver: %s\n", sc->fw_version,
385	    MPS_DRIVER_VERSION);
386	mps_printf(sc, "IOCCapabilities: %b\n", sc->facts->IOCCapabilities,
387	    "\20" "\3ScsiTaskFull" "\4DiagTrace" "\5SnapBuf" "\6ExtBuf"
388	    "\7EEDP" "\10BiDirTarg" "\11Multicast" "\14TransRetry" "\15IR"
389	    "\16EventReplay" "\17RaidAccel" "\20MSIXIndex" "\21HostDisc");
390
391	/*
392	 * If the chip doesn't support event replay then a hard reset will be
393	 * required to trigger a full discovery.  Do the reset here then
394	 * retransition to Ready.  A hard reset might have already been done,
395	 * but it doesn't hurt to do it again.  Only do this if attaching, not
396	 * for a Diag Reset.
397	 */
398	if (attaching) {
399		if ((sc->facts->IOCCapabilities &
400		    MPI2_IOCFACTS_CAPABILITY_EVENT_REPLAY) == 0) {
401			mps_diag_reset(sc, NO_SLEEP);
402			if ((error = mps_transition_ready(sc)) != 0) {
403				mps_dprint(sc, MPS_FAULT, "%s failed to "
404				    "transition to ready with error %d\n",
405				    __func__, error);
406				return (error);
407			}
408		}
409	}
410
411	/*
412	 * Set flag if IR Firmware is loaded.  If the RAID Capability has
413	 * changed from the previous IOC Facts, log a warning, but only if
414	 * checking this after a Diag Reset and not during attach.
415	 */
416	saved_mode = sc->ir_firmware;
417	if (sc->facts->IOCCapabilities &
418	    MPI2_IOCFACTS_CAPABILITY_INTEGRATED_RAID)
419		sc->ir_firmware = 1;
420	if (!attaching) {
421		if (sc->ir_firmware != saved_mode) {
422			mps_dprint(sc, MPS_FAULT, "%s new IR/IT mode in IOC "
423			    "Facts does not match previous mode\n", __func__);
424		}
425	}
426
427	/* Only deallocate and reallocate if relevant IOC Facts have changed */
428	reallocating = FALSE;
429	if ((!attaching) &&
430	    ((saved_facts.MsgVersion != sc->facts->MsgVersion) ||
431	    (saved_facts.HeaderVersion != sc->facts->HeaderVersion) ||
432	    (saved_facts.MaxChainDepth != sc->facts->MaxChainDepth) ||
433	    (saved_facts.RequestCredit != sc->facts->RequestCredit) ||
434	    (saved_facts.ProductID != sc->facts->ProductID) ||
435	    (saved_facts.IOCCapabilities != sc->facts->IOCCapabilities) ||
436	    (saved_facts.IOCRequestFrameSize !=
437	    sc->facts->IOCRequestFrameSize) ||
438	    (saved_facts.MaxTargets != sc->facts->MaxTargets) ||
439	    (saved_facts.MaxSasExpanders != sc->facts->MaxSasExpanders) ||
440	    (saved_facts.MaxEnclosures != sc->facts->MaxEnclosures) ||
441	    (saved_facts.HighPriorityCredit != sc->facts->HighPriorityCredit) ||
442	    (saved_facts.MaxReplyDescriptorPostQueueDepth !=
443	    sc->facts->MaxReplyDescriptorPostQueueDepth) ||
444	    (saved_facts.ReplyFrameSize != sc->facts->ReplyFrameSize) ||
445	    (saved_facts.MaxVolumes != sc->facts->MaxVolumes) ||
446	    (saved_facts.MaxPersistentEntries !=
447	    sc->facts->MaxPersistentEntries))) {
448		reallocating = TRUE;
449	}
450
451	/*
452	 * Some things should be done if attaching or re-allocating after a Diag
453	 * Reset, but are not needed after a Diag Reset if the FW has not
454	 * changed.
455	 */
456	if (attaching || reallocating) {
457		/*
458		 * Check if controller supports FW diag buffers and set flag to
459		 * enable each type.
460		 */
461		if (sc->facts->IOCCapabilities &
462		    MPI2_IOCFACTS_CAPABILITY_DIAG_TRACE_BUFFER)
463			sc->fw_diag_buffer_list[MPI2_DIAG_BUF_TYPE_TRACE].
464			    enabled = TRUE;
465		if (sc->facts->IOCCapabilities &
466		    MPI2_IOCFACTS_CAPABILITY_SNAPSHOT_BUFFER)
467			sc->fw_diag_buffer_list[MPI2_DIAG_BUF_TYPE_SNAPSHOT].
468			    enabled = TRUE;
469		if (sc->facts->IOCCapabilities &
470		    MPI2_IOCFACTS_CAPABILITY_EXTENDED_BUFFER)
471			sc->fw_diag_buffer_list[MPI2_DIAG_BUF_TYPE_EXTENDED].
472			    enabled = TRUE;
473
474		/*
475		 * Set flag if EEDP is supported and if TLR is supported.
476		 */
477		if (sc->facts->IOCCapabilities & MPI2_IOCFACTS_CAPABILITY_EEDP)
478			sc->eedp_enabled = TRUE;
479		if (sc->facts->IOCCapabilities & MPI2_IOCFACTS_CAPABILITY_TLR)
480			sc->control_TLR = TRUE;
481
482		/*
483		 * Size the queues. Since the reply queues always need one free
484		 * entry, we'll just deduct one reply message here.
485		 */
486		sc->num_reqs = MIN(MPS_REQ_FRAMES, sc->facts->RequestCredit);
487		sc->num_replies = MIN(MPS_REPLY_FRAMES + MPS_EVT_REPLY_FRAMES,
488		    sc->facts->MaxReplyDescriptorPostQueueDepth) - 1;
489
490		/*
491		 * Initialize all Tail Queues
492		 */
493		TAILQ_INIT(&sc->req_list);
494		TAILQ_INIT(&sc->high_priority_req_list);
495		TAILQ_INIT(&sc->chain_list);
496		TAILQ_INIT(&sc->tm_list);
497	}
498
499	/*
500	 * If doing a Diag Reset and the FW is significantly different
501	 * (reallocating will be set above in IOC Facts comparison), then all
502	 * buffers based on the IOC Facts will need to be freed before they are
503	 * reallocated.
504	 */
505	if (reallocating) {
506		mps_iocfacts_free(sc);
507		mpssas_realloc_targets(sc, saved_facts.MaxTargets);
508	}
509
510	/*
511	 * Any deallocation has been completed.  Now start reallocating
512	 * if needed.  Will only need to reallocate if attaching or if the new
513	 * IOC Facts are different from the previous IOC Facts after a Diag
514	 * Reset. Targets have already been allocated above if needed.
515	 */
516	if (attaching || reallocating) {
517		if (((error = mps_alloc_queues(sc)) != 0) ||
518		    ((error = mps_alloc_replies(sc)) != 0) ||
519		    ((error = mps_alloc_requests(sc)) != 0)) {
520			if (attaching ) {
521				mps_dprint(sc, MPS_FAULT, "%s failed to alloc "
522				    "queues with error %d\n", __func__, error);
523				mps_free(sc);
524				return (error);
525			} else {
526				panic("%s failed to alloc queues with error "
527				    "%d\n", __func__, error);
528			}
529		}
530	}
531
532	/* Always initialize the queues */
533	bzero(sc->free_queue, sc->fqdepth * 4);
534	mps_init_queues(sc);
535
536	/*
537	 * Always get the chip out of the reset state, but only panic if not
538	 * attaching.  If attaching and there is an error, that is handled by
539	 * the OS.
540	 */
541	error = mps_transition_operational(sc);
542	if (error != 0) {
543		if (attaching) {
544			mps_printf(sc, "%s failed to transition to operational "
545			    "with error %d\n", __func__, error);
546			mps_free(sc);
547			return (error);
548		} else {
549			panic("%s failed to transition to operational with "
550			    "error %d\n", __func__, error);
551		}
552	}
553
554	/*
555	 * Finish the queue initialization.
556	 * These are set here instead of in mps_init_queues() because the
557	 * IOC resets these values during the state transition in
558	 * mps_transition_operational().  The free index is set to 1
559	 * because the corresponding index in the IOC is set to 0, and the
560	 * IOC treats the queues as full if both are set to the same value.
561	 * Hence the reason that the queue can't hold all of the possible
562	 * replies.
563	 */
564	sc->replypostindex = 0;
565	mps_regwrite(sc, MPI2_REPLY_FREE_HOST_INDEX_OFFSET, sc->replyfreeindex);
566	mps_regwrite(sc, MPI2_REPLY_POST_HOST_INDEX_OFFSET, 0);
567
568	/*
569	 * Attach the subsystems so they can prepare their event masks.
570	 */
571	/* XXX Should be dynamic so that IM/IR and user modules can attach */
572	if (attaching) {
573		if (((error = mps_attach_log(sc)) != 0) ||
574		    ((error = mps_attach_sas(sc)) != 0) ||
575		    ((error = mps_attach_user(sc)) != 0)) {
576			mps_printf(sc, "%s failed to attach all subsystems: "
577			    "error %d\n", __func__, error);
578			mps_free(sc);
579			return (error);
580		}
581
582		if ((error = mps_pci_setup_interrupts(sc)) != 0) {
583			mps_printf(sc, "%s failed to setup interrupts\n",
584			    __func__);
585			mps_free(sc);
586			return (error);
587		}
588	}
589
590	/*
591	 * Set flag if this is a WD controller.  This shouldn't ever change, but
592	 * reset it after a Diag Reset, just in case.
593	 */
594	sc->WD_available = FALSE;
595	if (pci_get_device(sc->mps_dev) == MPI2_MFGPAGE_DEVID_SSS6200)
596		sc->WD_available = TRUE;
597
598	return (error);
599}
600
601/*
602 * This is called if memory is being free (during detach for example) and when
603 * buffers need to be reallocated due to a Diag Reset.
604 */
605static void
606mps_iocfacts_free(struct mps_softc *sc)
607{
608	struct mps_command *cm;
609	int i;
610
611	mps_dprint(sc, MPS_TRACE, "%s\n", __func__);
612
613	if (sc->free_busaddr != 0)
614		bus_dmamap_unload(sc->queues_dmat, sc->queues_map);
615	if (sc->free_queue != NULL)
616		bus_dmamem_free(sc->queues_dmat, sc->free_queue,
617		    sc->queues_map);
618	if (sc->queues_dmat != NULL)
619		bus_dma_tag_destroy(sc->queues_dmat);
620
621	if (sc->chain_busaddr != 0)
622		bus_dmamap_unload(sc->chain_dmat, sc->chain_map);
623	if (sc->chain_frames != NULL)
624		bus_dmamem_free(sc->chain_dmat, sc->chain_frames,
625		    sc->chain_map);
626	if (sc->chain_dmat != NULL)
627		bus_dma_tag_destroy(sc->chain_dmat);
628
629	if (sc->sense_busaddr != 0)
630		bus_dmamap_unload(sc->sense_dmat, sc->sense_map);
631	if (sc->sense_frames != NULL)
632		bus_dmamem_free(sc->sense_dmat, sc->sense_frames,
633		    sc->sense_map);
634	if (sc->sense_dmat != NULL)
635		bus_dma_tag_destroy(sc->sense_dmat);
636
637	if (sc->reply_busaddr != 0)
638		bus_dmamap_unload(sc->reply_dmat, sc->reply_map);
639	if (sc->reply_frames != NULL)
640		bus_dmamem_free(sc->reply_dmat, sc->reply_frames,
641		    sc->reply_map);
642	if (sc->reply_dmat != NULL)
643		bus_dma_tag_destroy(sc->reply_dmat);
644
645	if (sc->req_busaddr != 0)
646		bus_dmamap_unload(sc->req_dmat, sc->req_map);
647	if (sc->req_frames != NULL)
648		bus_dmamem_free(sc->req_dmat, sc->req_frames, sc->req_map);
649	if (sc->req_dmat != NULL)
650		bus_dma_tag_destroy(sc->req_dmat);
651
652	if (sc->chains != NULL)
653		free(sc->chains, M_MPT2);
654	if (sc->commands != NULL) {
655		for (i = 1; i < sc->num_reqs; i++) {
656			cm = &sc->commands[i];
657			bus_dmamap_destroy(sc->buffer_dmat, cm->cm_dmamap);
658		}
659		free(sc->commands, M_MPT2);
660	}
661	if (sc->buffer_dmat != NULL)
662		bus_dma_tag_destroy(sc->buffer_dmat);
663}
664
665/*
666 * The terms diag reset and hard reset are used interchangeably in the MPI
667 * docs to mean resetting the controller chip.  In this code diag reset
668 * cleans everything up, and the hard reset function just sends the reset
669 * sequence to the chip.  This should probably be refactored so that every
670 * subsystem gets a reset notification of some sort, and can clean up
671 * appropriately.
672 */
673int
674mps_reinit(struct mps_softc *sc)
675{
676	int error;
677	struct mpssas_softc *sassc;
678
679	sassc = sc->sassc;
680
681	MPS_FUNCTRACE(sc);
682
683	mtx_assert(&sc->mps_mtx, MA_OWNED);
684
685	if (sc->mps_flags & MPS_FLAGS_DIAGRESET) {
686		mps_dprint(sc, MPS_INIT, "%s reset already in progress\n",
687			   __func__);
688		return 0;
689	}
690
691	mps_dprint(sc, MPS_INFO, "Reinitializing controller,\n");
692	/* make sure the completion callbacks can recognize they're getting
693	 * a NULL cm_reply due to a reset.
694	 */
695	sc->mps_flags |= MPS_FLAGS_DIAGRESET;
696
697	/*
698	 * Mask interrupts here.
699	 */
700	mps_dprint(sc, MPS_INIT, "%s mask interrupts\n", __func__);
701	mps_mask_intr(sc);
702
703	error = mps_diag_reset(sc, CAN_SLEEP);
704	if (error != 0) {
705		/* XXXSL No need to panic here */
706		panic("%s hard reset failed with error %d\n",
707		    __func__, error);
708	}
709
710	/* Restore the PCI state, including the MSI-X registers */
711	mps_pci_restore(sc);
712
713	/* Give the I/O subsystem special priority to get itself prepared */
714	mpssas_handle_reinit(sc);
715
716	/*
717	 * Get IOC Facts and allocate all structures based on this information.
718	 * The attach function will also call mps_iocfacts_allocate at startup.
719	 * If relevant values have changed in IOC Facts, this function will free
720	 * all of the memory based on IOC Facts and reallocate that memory.
721	 */
722	if ((error = mps_iocfacts_allocate(sc, FALSE)) != 0) {
723		panic("%s IOC Facts based allocation failed with error %d\n",
724		    __func__, error);
725	}
726
727	/*
728	 * Mapping structures will be re-allocated after getting IOC Page8, so
729	 * free these structures here.
730	 */
731	mps_mapping_exit(sc);
732
733	/*
734	 * The static page function currently read is IOC Page8.  Others can be
735	 * added in future.  It's possible that the values in IOC Page8 have
736	 * changed after a Diag Reset due to user modification, so always read
737	 * these.  Interrupts are masked, so unmask them before getting config
738	 * pages.
739	 */
740	mps_unmask_intr(sc);
741	sc->mps_flags &= ~MPS_FLAGS_DIAGRESET;
742	mps_base_static_config_pages(sc);
743
744	/*
745	 * Some mapping info is based in IOC Page8 data, so re-initialize the
746	 * mapping tables.
747	 */
748	mps_mapping_initialize(sc);
749
750	/*
751	 * Restart will reload the event masks clobbered by the reset, and
752	 * then enable the port.
753	 */
754	mps_reregister_events(sc);
755
756	/* the end of discovery will release the simq, so we're done. */
757	mps_dprint(sc, MPS_INFO, "%s finished sc %p post %u free %u\n",
758	    __func__, sc, sc->replypostindex, sc->replyfreeindex);
759
760	mpssas_release_simq_reinit(sassc);
761
762	return 0;
763}
764
765/* Wait for the chip to ACK a word that we've put into its FIFO
766 * Wait for <timeout> seconds. In single loop wait for busy loop
767 * for 500 microseconds.
768 * Total is [ 0.5 * (2000 * <timeout>) ] in miliseconds.
769 * */
770static int
771mps_wait_db_ack(struct mps_softc *sc, int timeout, int sleep_flag)
772{
773
774	u32 cntdn, count;
775	u32 int_status;
776	u32 doorbell;
777
778	count = 0;
779	cntdn = (sleep_flag == CAN_SLEEP) ? 1000*timeout : 2000*timeout;
780	do {
781		int_status = mps_regread(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET);
782		if (!(int_status & MPI2_HIS_SYS2IOC_DB_STATUS)) {
783			mps_dprint(sc, MPS_INIT,
784			"%s: successfull count(%d), timeout(%d)\n",
785			__func__, count, timeout);
786		return 0;
787		} else if (int_status & MPI2_HIS_IOC2SYS_DB_STATUS) {
788			doorbell = mps_regread(sc, MPI2_DOORBELL_OFFSET);
789			if ((doorbell & MPI2_IOC_STATE_MASK) ==
790				MPI2_IOC_STATE_FAULT) {
791				mps_dprint(sc, MPS_FAULT,
792					"fault_state(0x%04x)!\n", doorbell);
793				return (EFAULT);
794			}
795		} else if (int_status == 0xFFFFFFFF)
796			goto out;
797
798		/* If it can sleep, sleep for 1 milisecond, else busy loop for
799		* 0.5 milisecond */
800		if (mtx_owned(&sc->mps_mtx) && sleep_flag == CAN_SLEEP)
801			msleep(&sc->msleep_fake_chan, &sc->mps_mtx, 0,
802			"mpsdba", hz/1000);
803		else if (sleep_flag == CAN_SLEEP)
804			pause("mpsdba", hz/1000);
805		else
806			DELAY(500);
807		count++;
808	} while (--cntdn);
809
810	out:
811	mps_dprint(sc, MPS_FAULT, "%s: failed due to timeout count(%d), "
812		"int_status(%x)!\n", __func__, count, int_status);
813	return (ETIMEDOUT);
814
815}
816
817/* Wait for the chip to signal that the next word in its FIFO can be fetched */
818static int
819mps_wait_db_int(struct mps_softc *sc)
820{
821	int retry;
822
823	for (retry = 0; retry < MPS_DB_MAX_WAIT; retry++) {
824		if ((mps_regread(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET) &
825		    MPI2_HIS_IOC2SYS_DB_STATUS) != 0)
826			return (0);
827		DELAY(2000);
828	}
829	return (ETIMEDOUT);
830}
831
832/* Step through the synchronous command state machine, i.e. "Doorbell mode" */
833static int
834mps_request_sync(struct mps_softc *sc, void *req, MPI2_DEFAULT_REPLY *reply,
835    int req_sz, int reply_sz, int timeout)
836{
837	uint32_t *data32;
838	uint16_t *data16;
839	int i, count, ioc_sz, residual;
840	int sleep_flags = CAN_SLEEP;
841
842	if (curthread->td_no_sleeping != 0)
843		sleep_flags = NO_SLEEP;
844
845	/* Step 1 */
846	mps_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0);
847
848	/* Step 2 */
849	if (mps_regread(sc, MPI2_DOORBELL_OFFSET) & MPI2_DOORBELL_USED)
850		return (EBUSY);
851
852	/* Step 3
853	 * Announce that a message is coming through the doorbell.  Messages
854	 * are pushed at 32bit words, so round up if needed.
855	 */
856	count = (req_sz + 3) / 4;
857	mps_regwrite(sc, MPI2_DOORBELL_OFFSET,
858	    (MPI2_FUNCTION_HANDSHAKE << MPI2_DOORBELL_FUNCTION_SHIFT) |
859	    (count << MPI2_DOORBELL_ADD_DWORDS_SHIFT));
860
861	/* Step 4 */
862	if (mps_wait_db_int(sc) ||
863	    (mps_regread(sc, MPI2_DOORBELL_OFFSET) & MPI2_DOORBELL_USED) == 0) {
864		mps_dprint(sc, MPS_FAULT, "Doorbell failed to activate\n");
865		return (ENXIO);
866	}
867	mps_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0);
868	if (mps_wait_db_ack(sc, 5, sleep_flags) != 0) {
869		mps_dprint(sc, MPS_FAULT, "Doorbell handshake failed\n");
870		return (ENXIO);
871	}
872
873	/* Step 5 */
874	/* Clock out the message data synchronously in 32-bit dwords*/
875	data32 = (uint32_t *)req;
876	for (i = 0; i < count; i++) {
877		mps_regwrite(sc, MPI2_DOORBELL_OFFSET, htole32(data32[i]));
878		if (mps_wait_db_ack(sc, 5, sleep_flags) != 0) {
879			mps_dprint(sc, MPS_FAULT,
880			    "Timeout while writing doorbell\n");
881			return (ENXIO);
882		}
883	}
884
885	/* Step 6 */
886	/* Clock in the reply in 16-bit words.  The total length of the
887	 * message is always in the 4th byte, so clock out the first 2 words
888	 * manually, then loop the rest.
889	 */
890	data16 = (uint16_t *)reply;
891	if (mps_wait_db_int(sc) != 0) {
892		mps_dprint(sc, MPS_FAULT, "Timeout reading doorbell 0\n");
893		return (ENXIO);
894	}
895	data16[0] =
896	    mps_regread(sc, MPI2_DOORBELL_OFFSET) & MPI2_DOORBELL_DATA_MASK;
897	mps_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0);
898	if (mps_wait_db_int(sc) != 0) {
899		mps_dprint(sc, MPS_FAULT, "Timeout reading doorbell 1\n");
900		return (ENXIO);
901	}
902	data16[1] =
903	    mps_regread(sc, MPI2_DOORBELL_OFFSET) & MPI2_DOORBELL_DATA_MASK;
904	mps_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0);
905
906	/* Number of 32bit words in the message */
907	ioc_sz = reply->MsgLength;
908
909	/*
910	 * Figure out how many 16bit words to clock in without overrunning.
911	 * The precision loss with dividing reply_sz can safely be
912	 * ignored because the messages can only be multiples of 32bits.
913	 */
914	residual = 0;
915	count = MIN((reply_sz / 4), ioc_sz) * 2;
916	if (count < ioc_sz * 2) {
917		residual = ioc_sz * 2 - count;
918		mps_dprint(sc, MPS_ERROR, "Driver error, throwing away %d "
919		    "residual message words\n", residual);
920	}
921
922	for (i = 2; i < count; i++) {
923		if (mps_wait_db_int(sc) != 0) {
924			mps_dprint(sc, MPS_FAULT,
925			    "Timeout reading doorbell %d\n", i);
926			return (ENXIO);
927		}
928		data16[i] = mps_regread(sc, MPI2_DOORBELL_OFFSET) &
929		    MPI2_DOORBELL_DATA_MASK;
930		mps_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0);
931	}
932
933	/*
934	 * Pull out residual words that won't fit into the provided buffer.
935	 * This keeps the chip from hanging due to a driver programming
936	 * error.
937	 */
938	while (residual--) {
939		if (mps_wait_db_int(sc) != 0) {
940			mps_dprint(sc, MPS_FAULT,
941			    "Timeout reading doorbell\n");
942			return (ENXIO);
943		}
944		(void)mps_regread(sc, MPI2_DOORBELL_OFFSET);
945		mps_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0);
946	}
947
948	/* Step 7 */
949	if (mps_wait_db_int(sc) != 0) {
950		mps_dprint(sc, MPS_FAULT, "Timeout waiting to exit doorbell\n");
951		return (ENXIO);
952	}
953	if (mps_regread(sc, MPI2_DOORBELL_OFFSET) & MPI2_DOORBELL_USED)
954		mps_dprint(sc, MPS_FAULT, "Warning, doorbell still active\n");
955	mps_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0);
956
957	return (0);
958}
959
960static void
961mps_enqueue_request(struct mps_softc *sc, struct mps_command *cm)
962{
963	reply_descriptor rd;
964	MPS_FUNCTRACE(sc);
965	mps_dprint(sc, MPS_TRACE, "SMID %u cm %p ccb %p\n",
966	    cm->cm_desc.Default.SMID, cm, cm->cm_ccb);
967
968	if (sc->mps_flags & MPS_FLAGS_ATTACH_DONE && !(sc->mps_flags & MPS_FLAGS_SHUTDOWN))
969		mtx_assert(&sc->mps_mtx, MA_OWNED);
970
971	if (++sc->io_cmds_active > sc->io_cmds_highwater)
972		sc->io_cmds_highwater++;
973	rd.u.low = cm->cm_desc.Words.Low;
974	rd.u.high = cm->cm_desc.Words.High;
975	rd.word = htole64(rd.word);
976	/* TODO-We may need to make below regwrite atomic */
977	mps_regwrite(sc, MPI2_REQUEST_DESCRIPTOR_POST_LOW_OFFSET,
978	    rd.u.low);
979	mps_regwrite(sc, MPI2_REQUEST_DESCRIPTOR_POST_HIGH_OFFSET,
980	    rd.u.high);
981}
982
983/*
984 * Just the FACTS, ma'am.
985 */
986static int
987mps_get_iocfacts(struct mps_softc *sc, MPI2_IOC_FACTS_REPLY *facts)
988{
989	MPI2_DEFAULT_REPLY *reply;
990	MPI2_IOC_FACTS_REQUEST request;
991	int error, req_sz, reply_sz;
992
993	MPS_FUNCTRACE(sc);
994
995	req_sz = sizeof(MPI2_IOC_FACTS_REQUEST);
996	reply_sz = sizeof(MPI2_IOC_FACTS_REPLY);
997	reply = (MPI2_DEFAULT_REPLY *)facts;
998
999	bzero(&request, req_sz);
1000	request.Function = MPI2_FUNCTION_IOC_FACTS;
1001	error = mps_request_sync(sc, &request, reply, req_sz, reply_sz, 5);
1002
1003	return (error);
1004}
1005
1006static int
1007mps_send_iocinit(struct mps_softc *sc)
1008{
1009	MPI2_IOC_INIT_REQUEST	init;
1010	MPI2_DEFAULT_REPLY	reply;
1011	int req_sz, reply_sz, error;
1012	struct timeval now;
1013	uint64_t time_in_msec;
1014
1015	MPS_FUNCTRACE(sc);
1016
1017	req_sz = sizeof(MPI2_IOC_INIT_REQUEST);
1018	reply_sz = sizeof(MPI2_IOC_INIT_REPLY);
1019	bzero(&init, req_sz);
1020	bzero(&reply, reply_sz);
1021
1022	/*
1023	 * Fill in the init block.  Note that most addresses are
1024	 * deliberately in the lower 32bits of memory.  This is a micro-
1025	 * optimzation for PCI/PCIX, though it's not clear if it helps PCIe.
1026	 */
1027	init.Function = MPI2_FUNCTION_IOC_INIT;
1028	init.WhoInit = MPI2_WHOINIT_HOST_DRIVER;
1029	init.MsgVersion = htole16(MPI2_VERSION);
1030	init.HeaderVersion = htole16(MPI2_HEADER_VERSION);
1031	init.SystemRequestFrameSize = htole16(sc->facts->IOCRequestFrameSize);
1032	init.ReplyDescriptorPostQueueDepth = htole16(sc->pqdepth);
1033	init.ReplyFreeQueueDepth = htole16(sc->fqdepth);
1034	init.SenseBufferAddressHigh = 0;
1035	init.SystemReplyAddressHigh = 0;
1036	init.SystemRequestFrameBaseAddress.High = 0;
1037	init.SystemRequestFrameBaseAddress.Low = htole32((uint32_t)sc->req_busaddr);
1038	init.ReplyDescriptorPostQueueAddress.High = 0;
1039	init.ReplyDescriptorPostQueueAddress.Low = htole32((uint32_t)sc->post_busaddr);
1040	init.ReplyFreeQueueAddress.High = 0;
1041	init.ReplyFreeQueueAddress.Low = htole32((uint32_t)sc->free_busaddr);
1042	getmicrotime(&now);
1043	time_in_msec = (now.tv_sec * 1000 + now.tv_usec/1000);
1044	init.TimeStamp.High = htole32((time_in_msec >> 32) & 0xFFFFFFFF);
1045	init.TimeStamp.Low = htole32(time_in_msec & 0xFFFFFFFF);
1046
1047	error = mps_request_sync(sc, &init, &reply, req_sz, reply_sz, 5);
1048	if ((reply.IOCStatus & MPI2_IOCSTATUS_MASK) != MPI2_IOCSTATUS_SUCCESS)
1049		error = ENXIO;
1050
1051	mps_dprint(sc, MPS_INIT, "IOCInit status= 0x%x\n", reply.IOCStatus);
1052	return (error);
1053}
1054
1055void
1056mps_memaddr_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
1057{
1058	bus_addr_t *addr;
1059
1060	addr = arg;
1061	*addr = segs[0].ds_addr;
1062}
1063
1064static int
1065mps_alloc_queues(struct mps_softc *sc)
1066{
1067	bus_addr_t queues_busaddr;
1068	uint8_t *queues;
1069	int qsize, fqsize, pqsize;
1070
1071	/*
1072	 * The reply free queue contains 4 byte entries in multiples of 16 and
1073	 * aligned on a 16 byte boundary. There must always be an unused entry.
1074	 * This queue supplies fresh reply frames for the firmware to use.
1075	 *
1076	 * The reply descriptor post queue contains 8 byte entries in
1077	 * multiples of 16 and aligned on a 16 byte boundary.  This queue
1078	 * contains filled-in reply frames sent from the firmware to the host.
1079	 *
1080	 * These two queues are allocated together for simplicity.
1081	 */
1082	sc->fqdepth = roundup2((sc->num_replies + 1), 16);
1083	sc->pqdepth = roundup2((sc->num_replies + 1), 16);
1084	fqsize= sc->fqdepth * 4;
1085	pqsize = sc->pqdepth * 8;
1086	qsize = fqsize + pqsize;
1087
1088        if (bus_dma_tag_create( sc->mps_parent_dmat,    /* parent */
1089				16, 0,			/* algnmnt, boundary */
1090				BUS_SPACE_MAXADDR_32BIT,/* lowaddr */
1091				BUS_SPACE_MAXADDR,	/* highaddr */
1092				NULL, NULL,		/* filter, filterarg */
1093                                qsize,			/* maxsize */
1094                                1,			/* nsegments */
1095                                qsize,			/* maxsegsize */
1096                                0,			/* flags */
1097                                NULL, NULL,		/* lockfunc, lockarg */
1098                                &sc->queues_dmat)) {
1099		device_printf(sc->mps_dev, "Cannot allocate queues DMA tag\n");
1100		return (ENOMEM);
1101        }
1102        if (bus_dmamem_alloc(sc->queues_dmat, (void **)&queues, BUS_DMA_NOWAIT,
1103	    &sc->queues_map)) {
1104		device_printf(sc->mps_dev, "Cannot allocate queues memory\n");
1105		return (ENOMEM);
1106        }
1107        bzero(queues, qsize);
1108        bus_dmamap_load(sc->queues_dmat, sc->queues_map, queues, qsize,
1109	    mps_memaddr_cb, &queues_busaddr, 0);
1110
1111	sc->free_queue = (uint32_t *)queues;
1112	sc->free_busaddr = queues_busaddr;
1113	sc->post_queue = (MPI2_REPLY_DESCRIPTORS_UNION *)(queues + fqsize);
1114	sc->post_busaddr = queues_busaddr + fqsize;
1115
1116	return (0);
1117}
1118
1119static int
1120mps_alloc_replies(struct mps_softc *sc)
1121{
1122	int rsize, num_replies;
1123
1124	/*
1125	 * sc->num_replies should be one less than sc->fqdepth.  We need to
1126	 * allocate space for sc->fqdepth replies, but only sc->num_replies
1127	 * replies can be used at once.
1128	 */
1129	num_replies = max(sc->fqdepth, sc->num_replies);
1130
1131	rsize = sc->facts->ReplyFrameSize * num_replies * 4;
1132        if (bus_dma_tag_create( sc->mps_parent_dmat,    /* parent */
1133				4, 0,			/* algnmnt, boundary */
1134				BUS_SPACE_MAXADDR_32BIT,/* lowaddr */
1135				BUS_SPACE_MAXADDR,	/* highaddr */
1136				NULL, NULL,		/* filter, filterarg */
1137                                rsize,			/* maxsize */
1138                                1,			/* nsegments */
1139                                rsize,			/* maxsegsize */
1140                                0,			/* flags */
1141                                NULL, NULL,		/* lockfunc, lockarg */
1142                                &sc->reply_dmat)) {
1143		device_printf(sc->mps_dev, "Cannot allocate replies DMA tag\n");
1144		return (ENOMEM);
1145        }
1146        if (bus_dmamem_alloc(sc->reply_dmat, (void **)&sc->reply_frames,
1147	    BUS_DMA_NOWAIT, &sc->reply_map)) {
1148		device_printf(sc->mps_dev, "Cannot allocate replies memory\n");
1149		return (ENOMEM);
1150        }
1151        bzero(sc->reply_frames, rsize);
1152        bus_dmamap_load(sc->reply_dmat, sc->reply_map, sc->reply_frames, rsize,
1153	    mps_memaddr_cb, &sc->reply_busaddr, 0);
1154
1155	return (0);
1156}
1157
1158static int
1159mps_alloc_requests(struct mps_softc *sc)
1160{
1161	struct mps_command *cm;
1162	struct mps_chain *chain;
1163	int i, rsize, nsegs;
1164
1165	rsize = sc->facts->IOCRequestFrameSize * sc->num_reqs * 4;
1166        if (bus_dma_tag_create( sc->mps_parent_dmat,    /* parent */
1167				16, 0,			/* algnmnt, boundary */
1168				BUS_SPACE_MAXADDR_32BIT,/* lowaddr */
1169				BUS_SPACE_MAXADDR,	/* highaddr */
1170				NULL, NULL,		/* filter, filterarg */
1171                                rsize,			/* maxsize */
1172                                1,			/* nsegments */
1173                                rsize,			/* maxsegsize */
1174                                0,			/* flags */
1175                                NULL, NULL,		/* lockfunc, lockarg */
1176                                &sc->req_dmat)) {
1177		device_printf(sc->mps_dev, "Cannot allocate request DMA tag\n");
1178		return (ENOMEM);
1179        }
1180        if (bus_dmamem_alloc(sc->req_dmat, (void **)&sc->req_frames,
1181	    BUS_DMA_NOWAIT, &sc->req_map)) {
1182		device_printf(sc->mps_dev, "Cannot allocate request memory\n");
1183		return (ENOMEM);
1184        }
1185        bzero(sc->req_frames, rsize);
1186        bus_dmamap_load(sc->req_dmat, sc->req_map, sc->req_frames, rsize,
1187	    mps_memaddr_cb, &sc->req_busaddr, 0);
1188
1189	rsize = sc->facts->IOCRequestFrameSize * sc->max_chains * 4;
1190        if (bus_dma_tag_create( sc->mps_parent_dmat,    /* parent */
1191				16, 0,			/* algnmnt, boundary */
1192				BUS_SPACE_MAXADDR_32BIT,/* lowaddr */
1193				BUS_SPACE_MAXADDR,	/* highaddr */
1194				NULL, NULL,		/* filter, filterarg */
1195                                rsize,			/* maxsize */
1196                                1,			/* nsegments */
1197                                rsize,			/* maxsegsize */
1198                                0,			/* flags */
1199                                NULL, NULL,		/* lockfunc, lockarg */
1200                                &sc->chain_dmat)) {
1201		device_printf(sc->mps_dev, "Cannot allocate chain DMA tag\n");
1202		return (ENOMEM);
1203        }
1204        if (bus_dmamem_alloc(sc->chain_dmat, (void **)&sc->chain_frames,
1205	    BUS_DMA_NOWAIT, &sc->chain_map)) {
1206		device_printf(sc->mps_dev, "Cannot allocate chain memory\n");
1207		return (ENOMEM);
1208        }
1209        bzero(sc->chain_frames, rsize);
1210        bus_dmamap_load(sc->chain_dmat, sc->chain_map, sc->chain_frames, rsize,
1211	    mps_memaddr_cb, &sc->chain_busaddr, 0);
1212
1213	rsize = MPS_SENSE_LEN * sc->num_reqs;
1214        if (bus_dma_tag_create( sc->mps_parent_dmat,    /* parent */
1215				1, 0,			/* algnmnt, boundary */
1216				BUS_SPACE_MAXADDR_32BIT,/* lowaddr */
1217				BUS_SPACE_MAXADDR,	/* highaddr */
1218				NULL, NULL,		/* filter, filterarg */
1219                                rsize,			/* maxsize */
1220                                1,			/* nsegments */
1221                                rsize,			/* maxsegsize */
1222                                0,			/* flags */
1223                                NULL, NULL,		/* lockfunc, lockarg */
1224                                &sc->sense_dmat)) {
1225		device_printf(sc->mps_dev, "Cannot allocate sense DMA tag\n");
1226		return (ENOMEM);
1227        }
1228        if (bus_dmamem_alloc(sc->sense_dmat, (void **)&sc->sense_frames,
1229	    BUS_DMA_NOWAIT, &sc->sense_map)) {
1230		device_printf(sc->mps_dev, "Cannot allocate sense memory\n");
1231		return (ENOMEM);
1232        }
1233        bzero(sc->sense_frames, rsize);
1234        bus_dmamap_load(sc->sense_dmat, sc->sense_map, sc->sense_frames, rsize,
1235	    mps_memaddr_cb, &sc->sense_busaddr, 0);
1236
1237	sc->chains = malloc(sizeof(struct mps_chain) * sc->max_chains, M_MPT2,
1238	    M_WAITOK | M_ZERO);
1239	if(!sc->chains) {
1240		device_printf(sc->mps_dev,
1241		"Cannot allocate chains memory %s %d\n",
1242		 __func__, __LINE__);
1243		return (ENOMEM);
1244	}
1245	for (i = 0; i < sc->max_chains; i++) {
1246		chain = &sc->chains[i];
1247		chain->chain = (MPI2_SGE_IO_UNION *)(sc->chain_frames +
1248		    i * sc->facts->IOCRequestFrameSize * 4);
1249		chain->chain_busaddr = sc->chain_busaddr +
1250		    i * sc->facts->IOCRequestFrameSize * 4;
1251		mps_free_chain(sc, chain);
1252		sc->chain_free_lowwater++;
1253	}
1254
1255	/* XXX Need to pick a more precise value */
1256	nsegs = (MAXPHYS / PAGE_SIZE) + 1;
1257        if (bus_dma_tag_create( sc->mps_parent_dmat,    /* parent */
1258				1, 0,			/* algnmnt, boundary */
1259				BUS_SPACE_MAXADDR,	/* lowaddr */
1260				BUS_SPACE_MAXADDR,	/* highaddr */
1261				NULL, NULL,		/* filter, filterarg */
1262                                BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
1263                                nsegs,			/* nsegments */
1264                                BUS_SPACE_MAXSIZE_24BIT,/* maxsegsize */
1265                                BUS_DMA_ALLOCNOW,	/* flags */
1266                                busdma_lock_mutex,	/* lockfunc */
1267				&sc->mps_mtx,		/* lockarg */
1268                                &sc->buffer_dmat)) {
1269		device_printf(sc->mps_dev, "Cannot allocate buffer DMA tag\n");
1270		return (ENOMEM);
1271        }
1272
1273	/*
1274	 * SMID 0 cannot be used as a free command per the firmware spec.
1275	 * Just drop that command instead of risking accounting bugs.
1276	 */
1277	sc->commands = malloc(sizeof(struct mps_command) * sc->num_reqs,
1278	    M_MPT2, M_WAITOK | M_ZERO);
1279	if(!sc->commands) {
1280		device_printf(sc->mps_dev, "Cannot allocate memory %s %d\n",
1281		 __func__, __LINE__);
1282		return (ENOMEM);
1283	}
1284	for (i = 1; i < sc->num_reqs; i++) {
1285		cm = &sc->commands[i];
1286		cm->cm_req = sc->req_frames +
1287		    i * sc->facts->IOCRequestFrameSize * 4;
1288		cm->cm_req_busaddr = sc->req_busaddr +
1289		    i * sc->facts->IOCRequestFrameSize * 4;
1290		cm->cm_sense = &sc->sense_frames[i];
1291		cm->cm_sense_busaddr = sc->sense_busaddr + i * MPS_SENSE_LEN;
1292		cm->cm_desc.Default.SMID = i;
1293		cm->cm_sc = sc;
1294		TAILQ_INIT(&cm->cm_chain_list);
1295		callout_init_mtx(&cm->cm_callout, &sc->mps_mtx, 0);
1296
1297		/* XXX Is a failure here a critical problem? */
1298		if (bus_dmamap_create(sc->buffer_dmat, 0, &cm->cm_dmamap) == 0)
1299			if (i <= sc->facts->HighPriorityCredit)
1300				mps_free_high_priority_command(sc, cm);
1301			else
1302				mps_free_command(sc, cm);
1303		else {
1304			panic("failed to allocate command %d\n", i);
1305			sc->num_reqs = i;
1306			break;
1307		}
1308	}
1309
1310	return (0);
1311}
1312
1313static int
1314mps_init_queues(struct mps_softc *sc)
1315{
1316	int i;
1317
1318	memset((uint8_t *)sc->post_queue, 0xff, sc->pqdepth * 8);
1319
1320	/*
1321	 * According to the spec, we need to use one less reply than we
1322	 * have space for on the queue.  So sc->num_replies (the number we
1323	 * use) should be less than sc->fqdepth (allocated size).
1324	 */
1325	if (sc->num_replies >= sc->fqdepth)
1326		return (EINVAL);
1327
1328	/*
1329	 * Initialize all of the free queue entries.
1330	 */
1331	for (i = 0; i < sc->fqdepth; i++)
1332		sc->free_queue[i] = sc->reply_busaddr + (i * sc->facts->ReplyFrameSize * 4);
1333	sc->replyfreeindex = sc->num_replies;
1334
1335	return (0);
1336}
1337
1338/* Get the driver parameter tunables.  Lowest priority are the driver defaults.
1339 * Next are the global settings, if they exist.  Highest are the per-unit
1340 * settings, if they exist.
1341 */
1342static void
1343mps_get_tunables(struct mps_softc *sc)
1344{
1345	char tmpstr[80];
1346
1347	/* XXX default to some debugging for now */
1348	sc->mps_debug = MPS_INFO|MPS_FAULT;
1349	sc->disable_msix = 0;
1350	sc->disable_msi = 0;
1351	sc->max_chains = MPS_CHAIN_FRAMES;
1352
1353	/*
1354	 * Grab the global variables.
1355	 */
1356	TUNABLE_INT_FETCH("hw.mps.debug_level", &sc->mps_debug);
1357	TUNABLE_INT_FETCH("hw.mps.disable_msix", &sc->disable_msix);
1358	TUNABLE_INT_FETCH("hw.mps.disable_msi", &sc->disable_msi);
1359	TUNABLE_INT_FETCH("hw.mps.max_chains", &sc->max_chains);
1360
1361	/* Grab the unit-instance variables */
1362	snprintf(tmpstr, sizeof(tmpstr), "dev.mps.%d.debug_level",
1363	    device_get_unit(sc->mps_dev));
1364	TUNABLE_INT_FETCH(tmpstr, &sc->mps_debug);
1365
1366	snprintf(tmpstr, sizeof(tmpstr), "dev.mps.%d.disable_msix",
1367	    device_get_unit(sc->mps_dev));
1368	TUNABLE_INT_FETCH(tmpstr, &sc->disable_msix);
1369
1370	snprintf(tmpstr, sizeof(tmpstr), "dev.mps.%d.disable_msi",
1371	    device_get_unit(sc->mps_dev));
1372	TUNABLE_INT_FETCH(tmpstr, &sc->disable_msi);
1373
1374	snprintf(tmpstr, sizeof(tmpstr), "dev.mps.%d.max_chains",
1375	    device_get_unit(sc->mps_dev));
1376	TUNABLE_INT_FETCH(tmpstr, &sc->max_chains);
1377
1378	bzero(sc->exclude_ids, sizeof(sc->exclude_ids));
1379	snprintf(tmpstr, sizeof(tmpstr), "dev.mps.%d.exclude_ids",
1380	    device_get_unit(sc->mps_dev));
1381	TUNABLE_STR_FETCH(tmpstr, sc->exclude_ids, sizeof(sc->exclude_ids));
1382}
1383
1384static void
1385mps_setup_sysctl(struct mps_softc *sc)
1386{
1387	struct sysctl_ctx_list	*sysctl_ctx = NULL;
1388	struct sysctl_oid	*sysctl_tree = NULL;
1389	char tmpstr[80], tmpstr2[80];
1390
1391	/*
1392	 * Setup the sysctl variable so the user can change the debug level
1393	 * on the fly.
1394	 */
1395	snprintf(tmpstr, sizeof(tmpstr), "MPS controller %d",
1396	    device_get_unit(sc->mps_dev));
1397	snprintf(tmpstr2, sizeof(tmpstr2), "%d", device_get_unit(sc->mps_dev));
1398
1399	sysctl_ctx = device_get_sysctl_ctx(sc->mps_dev);
1400	if (sysctl_ctx != NULL)
1401		sysctl_tree = device_get_sysctl_tree(sc->mps_dev);
1402
1403	if (sysctl_tree == NULL) {
1404		sysctl_ctx_init(&sc->sysctl_ctx);
1405		sc->sysctl_tree = SYSCTL_ADD_NODE(&sc->sysctl_ctx,
1406		    SYSCTL_STATIC_CHILDREN(_hw_mps), OID_AUTO, tmpstr2,
1407		    CTLFLAG_RD, 0, tmpstr);
1408		if (sc->sysctl_tree == NULL)
1409			return;
1410		sysctl_ctx = &sc->sysctl_ctx;
1411		sysctl_tree = sc->sysctl_tree;
1412	}
1413
1414	SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree),
1415	    OID_AUTO, "debug_level", CTLFLAG_RW, &sc->mps_debug, 0,
1416	    "mps debug level");
1417
1418	SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree),
1419	    OID_AUTO, "disable_msix", CTLFLAG_RD, &sc->disable_msix, 0,
1420	    "Disable the use of MSI-X interrupts");
1421
1422	SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree),
1423	    OID_AUTO, "disable_msi", CTLFLAG_RD, &sc->disable_msi, 0,
1424	    "Disable the use of MSI interrupts");
1425
1426	SYSCTL_ADD_STRING(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree),
1427	    OID_AUTO, "firmware_version", CTLFLAG_RW, sc->fw_version,
1428	    strlen(sc->fw_version), "firmware version");
1429
1430	SYSCTL_ADD_STRING(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree),
1431	    OID_AUTO, "driver_version", CTLFLAG_RW, MPS_DRIVER_VERSION,
1432	    strlen(MPS_DRIVER_VERSION), "driver version");
1433
1434	SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree),
1435	    OID_AUTO, "io_cmds_active", CTLFLAG_RD,
1436	    &sc->io_cmds_active, 0, "number of currently active commands");
1437
1438	SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree),
1439	    OID_AUTO, "io_cmds_highwater", CTLFLAG_RD,
1440	    &sc->io_cmds_highwater, 0, "maximum active commands seen");
1441
1442	SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree),
1443	    OID_AUTO, "chain_free", CTLFLAG_RD,
1444	    &sc->chain_free, 0, "number of free chain elements");
1445
1446	SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree),
1447	    OID_AUTO, "chain_free_lowwater", CTLFLAG_RD,
1448	    &sc->chain_free_lowwater, 0,"lowest number of free chain elements");
1449
1450	SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree),
1451	    OID_AUTO, "max_chains", CTLFLAG_RD,
1452	    &sc->max_chains, 0,"maximum chain frames that will be allocated");
1453
1454#if __FreeBSD_version >= 900030
1455	SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree),
1456	    OID_AUTO, "chain_alloc_fail", CTLFLAG_RD,
1457	    &sc->chain_alloc_fail, "chain allocation failures");
1458#endif //FreeBSD_version >= 900030
1459}
1460
1461int
1462mps_attach(struct mps_softc *sc)
1463{
1464	int error;
1465
1466	mps_get_tunables(sc);
1467
1468	MPS_FUNCTRACE(sc);
1469
1470	mtx_init(&sc->mps_mtx, "MPT2SAS lock", NULL, MTX_DEF);
1471	callout_init_mtx(&sc->periodic, &sc->mps_mtx, 0);
1472	TAILQ_INIT(&sc->event_list);
1473	timevalclear(&sc->lastfail);
1474
1475	if ((error = mps_transition_ready(sc)) != 0) {
1476		mps_printf(sc, "%s failed to transition ready\n", __func__);
1477		return (error);
1478	}
1479
1480	sc->facts = malloc(sizeof(MPI2_IOC_FACTS_REPLY), M_MPT2,
1481	    M_ZERO|M_NOWAIT);
1482	if(!sc->facts) {
1483		device_printf(sc->mps_dev, "Cannot allocate memory %s %d\n",
1484		 __func__, __LINE__);
1485		return (ENOMEM);
1486	}
1487
1488	/*
1489	 * Get IOC Facts and allocate all structures based on this information.
1490	 * A Diag Reset will also call mps_iocfacts_allocate and re-read the IOC
1491	 * Facts. If relevant values have changed in IOC Facts, this function
1492	 * will free all of the memory based on IOC Facts and reallocate that
1493	 * memory.  If this fails, any allocated memory should already be freed.
1494	 */
1495	if ((error = mps_iocfacts_allocate(sc, TRUE)) != 0) {
1496		mps_dprint(sc, MPS_FAULT, "%s IOC Facts based allocation "
1497		    "failed with error %d\n", __func__, error);
1498		return (error);
1499	}
1500
1501	/* Start the periodic watchdog check on the IOC Doorbell */
1502	mps_periodic(sc);
1503
1504	/*
1505	 * The portenable will kick off discovery events that will drive the
1506	 * rest of the initialization process.  The CAM/SAS module will
1507	 * hold up the boot sequence until discovery is complete.
1508	 */
1509	sc->mps_ich.ich_func = mps_startup;
1510	sc->mps_ich.ich_arg = sc;
1511	if (config_intrhook_establish(&sc->mps_ich) != 0) {
1512		mps_dprint(sc, MPS_ERROR, "Cannot establish MPS config hook\n");
1513		error = EINVAL;
1514	}
1515
1516	/*
1517	 * Allow IR to shutdown gracefully when shutdown occurs.
1518	 */
1519	sc->shutdown_eh = EVENTHANDLER_REGISTER(shutdown_final,
1520	    mpssas_ir_shutdown, sc, SHUTDOWN_PRI_DEFAULT);
1521
1522	if (sc->shutdown_eh == NULL)
1523		mps_dprint(sc, MPS_ERROR, "shutdown event registration "
1524		    "failed\n");
1525
1526	mps_setup_sysctl(sc);
1527
1528	sc->mps_flags |= MPS_FLAGS_ATTACH_DONE;
1529
1530	return (error);
1531}
1532
1533/* Run through any late-start handlers. */
1534static void
1535mps_startup(void *arg)
1536{
1537	struct mps_softc *sc;
1538
1539	sc = (struct mps_softc *)arg;
1540
1541	mps_lock(sc);
1542	mps_unmask_intr(sc);
1543
1544	/* initialize device mapping tables */
1545	mps_base_static_config_pages(sc);
1546	mps_mapping_initialize(sc);
1547	mpssas_startup(sc);
1548	mps_unlock(sc);
1549}
1550
1551/* Periodic watchdog.  Is called with the driver lock already held. */
1552static void
1553mps_periodic(void *arg)
1554{
1555	struct mps_softc *sc;
1556	uint32_t db;
1557
1558	sc = (struct mps_softc *)arg;
1559	if (sc->mps_flags & MPS_FLAGS_SHUTDOWN)
1560		return;
1561
1562	db = mps_regread(sc, MPI2_DOORBELL_OFFSET);
1563	if ((db & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT) {
1564		mps_dprint(sc, MPS_FAULT, "IOC Fault 0x%08x, Resetting\n", db);
1565		mps_reinit(sc);
1566	}
1567
1568	callout_reset(&sc->periodic, MPS_PERIODIC_DELAY * hz, mps_periodic, sc);
1569}
1570
1571static void
1572mps_log_evt_handler(struct mps_softc *sc, uintptr_t data,
1573    MPI2_EVENT_NOTIFICATION_REPLY *event)
1574{
1575	MPI2_EVENT_DATA_LOG_ENTRY_ADDED *entry;
1576
1577	mps_print_event(sc, event);
1578
1579	switch (event->Event) {
1580	case MPI2_EVENT_LOG_DATA:
1581		mps_dprint(sc, MPS_EVENT, "MPI2_EVENT_LOG_DATA:\n");
1582		if (sc->mps_debug & MPS_EVENT)
1583			hexdump(event->EventData, event->EventDataLength, NULL, 0);
1584		break;
1585	case MPI2_EVENT_LOG_ENTRY_ADDED:
1586		entry = (MPI2_EVENT_DATA_LOG_ENTRY_ADDED *)event->EventData;
1587		mps_dprint(sc, MPS_EVENT, "MPI2_EVENT_LOG_ENTRY_ADDED event "
1588		    "0x%x Sequence %d:\n", entry->LogEntryQualifier,
1589		     entry->LogSequence);
1590		break;
1591	default:
1592		break;
1593	}
1594	return;
1595}
1596
1597static int
1598mps_attach_log(struct mps_softc *sc)
1599{
1600	u32 events[MPI2_EVENT_NOTIFY_EVENTMASK_WORDS];
1601
1602	bzero(events, 16);
1603	setbit(events, MPI2_EVENT_LOG_DATA);
1604	setbit(events, MPI2_EVENT_LOG_ENTRY_ADDED);
1605
1606	mps_register_events(sc, events, mps_log_evt_handler, NULL,
1607	    &sc->mps_log_eh);
1608
1609	return (0);
1610}
1611
1612static int
1613mps_detach_log(struct mps_softc *sc)
1614{
1615
1616	if (sc->mps_log_eh != NULL)
1617		mps_deregister_events(sc, sc->mps_log_eh);
1618	return (0);
1619}
1620
1621/*
1622 * Free all of the driver resources and detach submodules.  Should be called
1623 * without the lock held.
1624 */
1625int
1626mps_free(struct mps_softc *sc)
1627{
1628	int error;
1629
1630	/* Turn off the watchdog */
1631	mps_lock(sc);
1632	sc->mps_flags |= MPS_FLAGS_SHUTDOWN;
1633	mps_unlock(sc);
1634	/* Lock must not be held for this */
1635	callout_drain(&sc->periodic);
1636
1637	if (((error = mps_detach_log(sc)) != 0) ||
1638	    ((error = mps_detach_sas(sc)) != 0))
1639		return (error);
1640
1641	mps_detach_user(sc);
1642
1643	/* Put the IOC back in the READY state. */
1644	mps_lock(sc);
1645	if ((error = mps_transition_ready(sc)) != 0) {
1646		mps_unlock(sc);
1647		return (error);
1648	}
1649	mps_unlock(sc);
1650
1651	if (sc->facts != NULL)
1652		free(sc->facts, M_MPT2);
1653
1654	/*
1655	 * Free all buffers that are based on IOC Facts.  A Diag Reset may need
1656	 * to free these buffers too.
1657	 */
1658	mps_iocfacts_free(sc);
1659
1660	if (sc->sysctl_tree != NULL)
1661		sysctl_ctx_free(&sc->sysctl_ctx);
1662
1663	/* Deregister the shutdown function */
1664	if (sc->shutdown_eh != NULL)
1665		EVENTHANDLER_DEREGISTER(shutdown_final, sc->shutdown_eh);
1666
1667	mtx_destroy(&sc->mps_mtx);
1668
1669	return (0);
1670}
1671
1672static __inline void
1673mps_complete_command(struct mps_softc *sc, struct mps_command *cm)
1674{
1675	MPS_FUNCTRACE(sc);
1676
1677	if (cm == NULL) {
1678		mps_dprint(sc, MPS_ERROR, "Completing NULL command\n");
1679		return;
1680	}
1681
1682	if (cm->cm_flags & MPS_CM_FLAGS_POLLED)
1683		cm->cm_flags |= MPS_CM_FLAGS_COMPLETE;
1684
1685	if (cm->cm_complete != NULL) {
1686		mps_dprint(sc, MPS_TRACE,
1687			   "%s cm %p calling cm_complete %p data %p reply %p\n",
1688			   __func__, cm, cm->cm_complete, cm->cm_complete_data,
1689			   cm->cm_reply);
1690		cm->cm_complete(sc, cm);
1691	}
1692
1693	if (cm->cm_flags & MPS_CM_FLAGS_WAKEUP) {
1694		mps_dprint(sc, MPS_TRACE, "waking up %p\n", cm);
1695		wakeup(cm);
1696	}
1697
1698	if (cm->cm_sc->io_cmds_active != 0) {
1699		cm->cm_sc->io_cmds_active--;
1700	} else {
1701		mps_dprint(sc, MPS_ERROR, "Warning: io_cmds_active is "
1702		    "out of sync - resynching to 0\n");
1703	}
1704}
1705
1706
1707static void
1708mps_sas_log_info(struct mps_softc *sc , u32 log_info)
1709{
1710	union loginfo_type {
1711		u32     loginfo;
1712		struct {
1713			u32     subcode:16;
1714			u32     code:8;
1715			u32     originator:4;
1716			u32     bus_type:4;
1717		} dw;
1718	};
1719	union loginfo_type sas_loginfo;
1720	char *originator_str = NULL;
1721
1722	sas_loginfo.loginfo = log_info;
1723	if (sas_loginfo.dw.bus_type != 3 /*SAS*/)
1724		return;
1725
1726	/* each nexus loss loginfo */
1727	if (log_info == 0x31170000)
1728		return;
1729
1730	/* eat the loginfos associated with task aborts */
1731	if ((log_info == 30050000 || log_info ==
1732	    0x31140000 || log_info == 0x31130000))
1733		return;
1734
1735	switch (sas_loginfo.dw.originator) {
1736	case 0:
1737		originator_str = "IOP";
1738		break;
1739	case 1:
1740		originator_str = "PL";
1741		break;
1742	case 2:
1743		originator_str = "IR";
1744		break;
1745}
1746
1747	mps_dprint(sc, MPS_LOG, "log_info(0x%08x): originator(%s), "
1748	"code(0x%02x), sub_code(0x%04x)\n", log_info,
1749	originator_str, sas_loginfo.dw.code,
1750	sas_loginfo.dw.subcode);
1751}
1752
1753static void
1754mps_display_reply_info(struct mps_softc *sc, uint8_t *reply)
1755{
1756	MPI2DefaultReply_t *mpi_reply;
1757	u16 sc_status;
1758
1759	mpi_reply = (MPI2DefaultReply_t*)reply;
1760	sc_status = le16toh(mpi_reply->IOCStatus);
1761	if (sc_status & MPI2_IOCSTATUS_FLAG_LOG_INFO_AVAILABLE)
1762		mps_sas_log_info(sc, le32toh(mpi_reply->IOCLogInfo));
1763}
1764void
1765mps_intr(void *data)
1766{
1767	struct mps_softc *sc;
1768	uint32_t status;
1769
1770	sc = (struct mps_softc *)data;
1771	mps_dprint(sc, MPS_TRACE, "%s\n", __func__);
1772
1773	/*
1774	 * Check interrupt status register to flush the bus.  This is
1775	 * needed for both INTx interrupts and driver-driven polling
1776	 */
1777	status = mps_regread(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET);
1778	if ((status & MPI2_HIS_REPLY_DESCRIPTOR_INTERRUPT) == 0)
1779		return;
1780
1781	mps_lock(sc);
1782	mps_intr_locked(data);
1783	mps_unlock(sc);
1784	return;
1785}
1786
1787/*
1788 * In theory, MSI/MSIX interrupts shouldn't need to read any registers on the
1789 * chip.  Hopefully this theory is correct.
1790 */
1791void
1792mps_intr_msi(void *data)
1793{
1794	struct mps_softc *sc;
1795
1796	sc = (struct mps_softc *)data;
1797	mps_dprint(sc, MPS_TRACE, "%s\n", __func__);
1798	mps_lock(sc);
1799	mps_intr_locked(data);
1800	mps_unlock(sc);
1801	return;
1802}
1803
1804/*
1805 * The locking is overly broad and simplistic, but easy to deal with for now.
1806 */
1807void
1808mps_intr_locked(void *data)
1809{
1810	MPI2_REPLY_DESCRIPTORS_UNION *desc;
1811	struct mps_softc *sc;
1812	struct mps_command *cm = NULL;
1813	uint8_t flags;
1814	u_int pq;
1815	MPI2_DIAG_RELEASE_REPLY *rel_rep;
1816	mps_fw_diagnostic_buffer_t *pBuffer;
1817
1818	sc = (struct mps_softc *)data;
1819
1820	pq = sc->replypostindex;
1821	mps_dprint(sc, MPS_TRACE,
1822	    "%s sc %p starting with replypostindex %u\n",
1823	    __func__, sc, sc->replypostindex);
1824
1825	for ( ;; ) {
1826		cm = NULL;
1827		desc = &sc->post_queue[sc->replypostindex];
1828		flags = desc->Default.ReplyFlags &
1829		    MPI2_RPY_DESCRIPT_FLAGS_TYPE_MASK;
1830		if ((flags == MPI2_RPY_DESCRIPT_FLAGS_UNUSED)
1831		 || (le32toh(desc->Words.High) == 0xffffffff))
1832			break;
1833
1834		/* increment the replypostindex now, so that event handlers
1835		 * and cm completion handlers which decide to do a diag
1836		 * reset can zero it without it getting incremented again
1837		 * afterwards, and we break out of this loop on the next
1838		 * iteration since the reply post queue has been cleared to
1839		 * 0xFF and all descriptors look unused (which they are).
1840		 */
1841		if (++sc->replypostindex >= sc->pqdepth)
1842			sc->replypostindex = 0;
1843
1844		switch (flags) {
1845		case MPI2_RPY_DESCRIPT_FLAGS_SCSI_IO_SUCCESS:
1846			cm = &sc->commands[le16toh(desc->SCSIIOSuccess.SMID)];
1847			cm->cm_reply = NULL;
1848			break;
1849		case MPI2_RPY_DESCRIPT_FLAGS_ADDRESS_REPLY:
1850		{
1851			uint32_t baddr;
1852			uint8_t *reply;
1853
1854			/*
1855			 * Re-compose the reply address from the address
1856			 * sent back from the chip.  The ReplyFrameAddress
1857			 * is the lower 32 bits of the physical address of
1858			 * particular reply frame.  Convert that address to
1859			 * host format, and then use that to provide the
1860			 * offset against the virtual address base
1861			 * (sc->reply_frames).
1862			 */
1863			baddr = le32toh(desc->AddressReply.ReplyFrameAddress);
1864			reply = sc->reply_frames +
1865				(baddr - ((uint32_t)sc->reply_busaddr));
1866			/*
1867			 * Make sure the reply we got back is in a valid
1868			 * range.  If not, go ahead and panic here, since
1869			 * we'll probably panic as soon as we deference the
1870			 * reply pointer anyway.
1871			 */
1872			if ((reply < sc->reply_frames)
1873			 || (reply > (sc->reply_frames +
1874			     (sc->fqdepth * sc->facts->ReplyFrameSize * 4)))) {
1875				printf("%s: WARNING: reply %p out of range!\n",
1876				       __func__, reply);
1877				printf("%s: reply_frames %p, fqdepth %d, "
1878				       "frame size %d\n", __func__,
1879				       sc->reply_frames, sc->fqdepth,
1880				       sc->facts->ReplyFrameSize * 4);
1881				printf("%s: baddr %#x,\n", __func__, baddr);
1882				/* LSI-TODO. See Linux Code. Need Gracefull exit*/
1883				panic("Reply address out of range");
1884			}
1885			if (le16toh(desc->AddressReply.SMID) == 0) {
1886				if (((MPI2_DEFAULT_REPLY *)reply)->Function ==
1887				    MPI2_FUNCTION_DIAG_BUFFER_POST) {
1888					/*
1889					 * If SMID is 0 for Diag Buffer Post,
1890					 * this implies that the reply is due to
1891					 * a release function with a status that
1892					 * the buffer has been released.  Set
1893					 * the buffer flags accordingly.
1894					 */
1895					rel_rep =
1896					    (MPI2_DIAG_RELEASE_REPLY *)reply;
1897					if (le16toh(rel_rep->IOCStatus) ==
1898					    MPI2_IOCSTATUS_DIAGNOSTIC_RELEASED)
1899					    {
1900						pBuffer =
1901						    &sc->fw_diag_buffer_list[
1902						    rel_rep->BufferType];
1903						pBuffer->valid_data = TRUE;
1904						pBuffer->owned_by_firmware =
1905						    FALSE;
1906						pBuffer->immediate = FALSE;
1907					}
1908				} else
1909					mps_dispatch_event(sc, baddr,
1910					    (MPI2_EVENT_NOTIFICATION_REPLY *)
1911					    reply);
1912			} else {
1913				cm = &sc->commands[le16toh(desc->AddressReply.SMID)];
1914				cm->cm_reply = reply;
1915				cm->cm_reply_data =
1916				    le32toh(desc->AddressReply.ReplyFrameAddress);
1917			}
1918			break;
1919		}
1920		case MPI2_RPY_DESCRIPT_FLAGS_TARGETASSIST_SUCCESS:
1921		case MPI2_RPY_DESCRIPT_FLAGS_TARGET_COMMAND_BUFFER:
1922		case MPI2_RPY_DESCRIPT_FLAGS_RAID_ACCELERATOR_SUCCESS:
1923		default:
1924			/* Unhandled */
1925			mps_dprint(sc, MPS_ERROR, "Unhandled reply 0x%x\n",
1926			    desc->Default.ReplyFlags);
1927			cm = NULL;
1928			break;
1929		}
1930
1931
1932		if (cm != NULL) {
1933			// Print Error reply frame
1934			if (cm->cm_reply)
1935				mps_display_reply_info(sc,cm->cm_reply);
1936			mps_complete_command(sc, cm);
1937		}
1938
1939		desc->Words.Low = 0xffffffff;
1940		desc->Words.High = 0xffffffff;
1941	}
1942
1943	if (pq != sc->replypostindex) {
1944		mps_dprint(sc, MPS_TRACE,
1945		    "%s sc %p writing postindex %d\n",
1946		    __func__, sc, sc->replypostindex);
1947		mps_regwrite(sc, MPI2_REPLY_POST_HOST_INDEX_OFFSET, sc->replypostindex);
1948	}
1949
1950	return;
1951}
1952
1953static void
1954mps_dispatch_event(struct mps_softc *sc, uintptr_t data,
1955    MPI2_EVENT_NOTIFICATION_REPLY *reply)
1956{
1957	struct mps_event_handle *eh;
1958	int event, handled = 0;
1959
1960	event = le16toh(reply->Event);
1961	TAILQ_FOREACH(eh, &sc->event_list, eh_list) {
1962		if (isset(eh->mask, event)) {
1963			eh->callback(sc, data, reply);
1964			handled++;
1965		}
1966	}
1967
1968	if (handled == 0)
1969		mps_dprint(sc, MPS_EVENT, "Unhandled event 0x%x\n", le16toh(event));
1970
1971	/*
1972	 * This is the only place that the event/reply should be freed.
1973	 * Anything wanting to hold onto the event data should have
1974	 * already copied it into their own storage.
1975	 */
1976	mps_free_reply(sc, data);
1977}
1978
1979static void
1980mps_reregister_events_complete(struct mps_softc *sc, struct mps_command *cm)
1981{
1982	mps_dprint(sc, MPS_TRACE, "%s\n", __func__);
1983
1984	if (cm->cm_reply)
1985		mps_print_event(sc,
1986			(MPI2_EVENT_NOTIFICATION_REPLY *)cm->cm_reply);
1987
1988	mps_free_command(sc, cm);
1989
1990	/* next, send a port enable */
1991	mpssas_startup(sc);
1992}
1993
1994/*
1995 * For both register_events and update_events, the caller supplies a bitmap
1996 * of events that it _wants_.  These functions then turn that into a bitmask
1997 * suitable for the controller.
1998 */
1999int
2000mps_register_events(struct mps_softc *sc, u32 *mask,
2001    mps_evt_callback_t *cb, void *data, struct mps_event_handle **handle)
2002{
2003	struct mps_event_handle *eh;
2004	int error = 0;
2005
2006	eh = malloc(sizeof(struct mps_event_handle), M_MPT2, M_WAITOK|M_ZERO);
2007	if(!eh) {
2008		device_printf(sc->mps_dev, "Cannot allocate memory %s %d\n",
2009		 __func__, __LINE__);
2010		return (ENOMEM);
2011	}
2012	eh->callback = cb;
2013	eh->data = data;
2014	TAILQ_INSERT_TAIL(&sc->event_list, eh, eh_list);
2015	if (mask != NULL)
2016		error = mps_update_events(sc, eh, mask);
2017	*handle = eh;
2018
2019	return (error);
2020}
2021
2022int
2023mps_update_events(struct mps_softc *sc, struct mps_event_handle *handle,
2024    u32 *mask)
2025{
2026	MPI2_EVENT_NOTIFICATION_REQUEST *evtreq;
2027	MPI2_EVENT_NOTIFICATION_REPLY *reply;
2028	struct mps_command *cm;
2029	int error, i;
2030
2031	mps_dprint(sc, MPS_TRACE, "%s\n", __func__);
2032
2033	if ((mask != NULL) && (handle != NULL))
2034		bcopy(mask, &handle->mask[0], sizeof(u32) *
2035				MPI2_EVENT_NOTIFY_EVENTMASK_WORDS);
2036
2037	for (i = 0; i < MPI2_EVENT_NOTIFY_EVENTMASK_WORDS; i++)
2038		sc->event_mask[i] = -1;
2039
2040	for (i = 0; i < MPI2_EVENT_NOTIFY_EVENTMASK_WORDS; i++)
2041		sc->event_mask[i] &= ~handle->mask[i];
2042
2043
2044	if ((cm = mps_alloc_command(sc)) == NULL)
2045		return (EBUSY);
2046	evtreq = (MPI2_EVENT_NOTIFICATION_REQUEST *)cm->cm_req;
2047	evtreq->Function = MPI2_FUNCTION_EVENT_NOTIFICATION;
2048	evtreq->MsgFlags = 0;
2049	evtreq->SASBroadcastPrimitiveMasks = 0;
2050#ifdef MPS_DEBUG_ALL_EVENTS
2051	{
2052		u_char fullmask[16];
2053		memset(fullmask, 0x00, 16);
2054		bcopy(fullmask, &evtreq->EventMasks[0], sizeof(u32) *
2055				MPI2_EVENT_NOTIFY_EVENTMASK_WORDS);
2056	}
2057#else
2058        for (i = 0; i < MPI2_EVENT_NOTIFY_EVENTMASK_WORDS; i++)
2059                evtreq->EventMasks[i] =
2060                    htole32(sc->event_mask[i]);
2061#endif
2062	cm->cm_desc.Default.RequestFlags = MPI2_REQ_DESCRIPT_FLAGS_DEFAULT_TYPE;
2063	cm->cm_data = NULL;
2064
2065	error = mps_request_polled(sc, cm);
2066	reply = (MPI2_EVENT_NOTIFICATION_REPLY *)cm->cm_reply;
2067	if ((reply == NULL) ||
2068	    (reply->IOCStatus & MPI2_IOCSTATUS_MASK) != MPI2_IOCSTATUS_SUCCESS)
2069		error = ENXIO;
2070	mps_print_event(sc, reply);
2071	mps_dprint(sc, MPS_TRACE, "%s finished error %d\n", __func__, error);
2072
2073	mps_free_command(sc, cm);
2074	return (error);
2075}
2076
2077static int
2078mps_reregister_events(struct mps_softc *sc)
2079{
2080	MPI2_EVENT_NOTIFICATION_REQUEST *evtreq;
2081	struct mps_command *cm;
2082	struct mps_event_handle *eh;
2083	int error, i;
2084
2085	mps_dprint(sc, MPS_TRACE, "%s\n", __func__);
2086
2087	/* first, reregister events */
2088
2089	for (i = 0; i < MPI2_EVENT_NOTIFY_EVENTMASK_WORDS; i++)
2090		sc->event_mask[i] = -1;
2091
2092	TAILQ_FOREACH(eh, &sc->event_list, eh_list) {
2093		for (i = 0; i < MPI2_EVENT_NOTIFY_EVENTMASK_WORDS; i++)
2094			sc->event_mask[i] &= ~eh->mask[i];
2095	}
2096
2097	if ((cm = mps_alloc_command(sc)) == NULL)
2098		return (EBUSY);
2099	evtreq = (MPI2_EVENT_NOTIFICATION_REQUEST *)cm->cm_req;
2100	evtreq->Function = MPI2_FUNCTION_EVENT_NOTIFICATION;
2101	evtreq->MsgFlags = 0;
2102	evtreq->SASBroadcastPrimitiveMasks = 0;
2103#ifdef MPS_DEBUG_ALL_EVENTS
2104	{
2105		u_char fullmask[16];
2106		memset(fullmask, 0x00, 16);
2107		bcopy(fullmask, &evtreq->EventMasks[0], sizeof(u32) *
2108			MPI2_EVENT_NOTIFY_EVENTMASK_WORDS);
2109	}
2110#else
2111        for (i = 0; i < MPI2_EVENT_NOTIFY_EVENTMASK_WORDS; i++)
2112                evtreq->EventMasks[i] =
2113                    htole32(sc->event_mask[i]);
2114#endif
2115	cm->cm_desc.Default.RequestFlags = MPI2_REQ_DESCRIPT_FLAGS_DEFAULT_TYPE;
2116	cm->cm_data = NULL;
2117	cm->cm_complete = mps_reregister_events_complete;
2118
2119	error = mps_map_command(sc, cm);
2120
2121	mps_dprint(sc, MPS_TRACE, "%s finished with error %d\n", __func__,
2122	    error);
2123	return (error);
2124}
2125
2126void
2127mps_deregister_events(struct mps_softc *sc, struct mps_event_handle *handle)
2128{
2129
2130	TAILQ_REMOVE(&sc->event_list, handle, eh_list);
2131	free(handle, M_MPT2);
2132}
2133
2134/*
2135 * Add a chain element as the next SGE for the specified command.
2136 * Reset cm_sge and cm_sgesize to indicate all the available space.
2137 */
2138static int
2139mps_add_chain(struct mps_command *cm)
2140{
2141	MPI2_SGE_CHAIN32 *sgc;
2142	struct mps_chain *chain;
2143	int space;
2144
2145	if (cm->cm_sglsize < MPS_SGC_SIZE)
2146		panic("MPS: Need SGE Error Code\n");
2147
2148	chain = mps_alloc_chain(cm->cm_sc);
2149	if (chain == NULL)
2150		return (ENOBUFS);
2151
2152	space = (int)cm->cm_sc->facts->IOCRequestFrameSize * 4;
2153
2154	/*
2155	 * Note: a double-linked list is used to make it easier to
2156	 * walk for debugging.
2157	 */
2158	TAILQ_INSERT_TAIL(&cm->cm_chain_list, chain, chain_link);
2159
2160	sgc = (MPI2_SGE_CHAIN32 *)&cm->cm_sge->MpiChain;
2161	sgc->Length = htole16(space);
2162	sgc->NextChainOffset = 0;
2163	/* TODO Looks like bug in Setting sgc->Flags.
2164	 *	sgc->Flags = ( MPI2_SGE_FLAGS_CHAIN_ELEMENT | MPI2_SGE_FLAGS_64_BIT_ADDRESSING |
2165	 *	            MPI2_SGE_FLAGS_SYSTEM_ADDRESS) << MPI2_SGE_FLAGS_SHIFT
2166	 *	This is fine.. because we are not using simple element. In case of
2167	 *	MPI2_SGE_CHAIN32, we have seperate Length and Flags feild.
2168 	 */
2169	sgc->Flags = MPI2_SGE_FLAGS_CHAIN_ELEMENT;
2170	sgc->Address = htole32(chain->chain_busaddr);
2171
2172	cm->cm_sge = (MPI2_SGE_IO_UNION *)&chain->chain->MpiSimple;
2173	cm->cm_sglsize = space;
2174	return (0);
2175}
2176
2177/*
2178 * Add one scatter-gather element (chain, simple, transaction context)
2179 * to the scatter-gather list for a command.  Maintain cm_sglsize and
2180 * cm_sge as the remaining size and pointer to the next SGE to fill
2181 * in, respectively.
2182 */
2183int
2184mps_push_sge(struct mps_command *cm, void *sgep, size_t len, int segsleft)
2185{
2186	MPI2_SGE_TRANSACTION_UNION *tc = sgep;
2187	MPI2_SGE_SIMPLE64 *sge = sgep;
2188	int error, type;
2189	uint32_t saved_buf_len, saved_address_low, saved_address_high;
2190
2191	type = (tc->Flags & MPI2_SGE_FLAGS_ELEMENT_MASK);
2192
2193#ifdef INVARIANTS
2194	switch (type) {
2195	case MPI2_SGE_FLAGS_TRANSACTION_ELEMENT: {
2196		if (len != tc->DetailsLength + 4)
2197			panic("TC %p length %u or %zu?", tc,
2198			    tc->DetailsLength + 4, len);
2199		}
2200		break;
2201	case MPI2_SGE_FLAGS_CHAIN_ELEMENT:
2202		/* Driver only uses 32-bit chain elements */
2203		if (len != MPS_SGC_SIZE)
2204			panic("CHAIN %p length %u or %zu?", sgep,
2205			    MPS_SGC_SIZE, len);
2206		break;
2207	case MPI2_SGE_FLAGS_SIMPLE_ELEMENT:
2208		/* Driver only uses 64-bit SGE simple elements */
2209		if (len != MPS_SGE64_SIZE)
2210			panic("SGE simple %p length %u or %zu?", sge,
2211			    MPS_SGE64_SIZE, len);
2212		if (((le32toh(sge->FlagsLength) >> MPI2_SGE_FLAGS_SHIFT) &
2213		    MPI2_SGE_FLAGS_ADDRESS_SIZE) == 0)
2214			panic("SGE simple %p not marked 64-bit?", sge);
2215
2216		break;
2217	default:
2218		panic("Unexpected SGE %p, flags %02x", tc, tc->Flags);
2219	}
2220#endif
2221
2222	/*
2223	 * case 1: 1 more segment, enough room for it
2224	 * case 2: 2 more segments, enough room for both
2225	 * case 3: >=2 more segments, only enough room for 1 and a chain
2226	 * case 4: >=1 more segment, enough room for only a chain
2227	 * case 5: >=1 more segment, no room for anything (error)
2228         */
2229
2230	/*
2231	 * There should be room for at least a chain element, or this
2232	 * code is buggy.  Case (5).
2233	 */
2234	if (cm->cm_sglsize < MPS_SGC_SIZE)
2235		panic("MPS: Need SGE Error Code\n");
2236
2237	if (segsleft >= 2 &&
2238	    cm->cm_sglsize < len + MPS_SGC_SIZE + MPS_SGE64_SIZE) {
2239		/*
2240		 * There are 2 or more segments left to add, and only
2241		 * enough room for 1 and a chain.  Case (3).
2242		 *
2243		 * Mark as last element in this chain if necessary.
2244		 */
2245		if (type == MPI2_SGE_FLAGS_SIMPLE_ELEMENT) {
2246			sge->FlagsLength |= htole32(
2247			    MPI2_SGE_FLAGS_LAST_ELEMENT << MPI2_SGE_FLAGS_SHIFT);
2248		}
2249
2250		/*
2251		 * Add the item then a chain.  Do the chain now,
2252		 * rather than on the next iteration, to simplify
2253		 * understanding the code.
2254		 */
2255		cm->cm_sglsize -= len;
2256		bcopy(sgep, cm->cm_sge, len);
2257		cm->cm_sge = (MPI2_SGE_IO_UNION *)((uintptr_t)cm->cm_sge + len);
2258		return (mps_add_chain(cm));
2259	}
2260
2261	if (segsleft >= 1 && cm->cm_sglsize < len + MPS_SGC_SIZE) {
2262		/*
2263		 * 1 or more segment, enough room for only a chain.
2264		 * Hope the previous element wasn't a Simple entry
2265		 * that needed to be marked with
2266		 * MPI2_SGE_FLAGS_LAST_ELEMENT.  Case (4).
2267		 */
2268		if ((error = mps_add_chain(cm)) != 0)
2269			return (error);
2270	}
2271
2272#ifdef INVARIANTS
2273	/* Case 1: 1 more segment, enough room for it. */
2274	if (segsleft == 1 && cm->cm_sglsize < len)
2275		panic("1 seg left and no room? %u versus %zu",
2276		    cm->cm_sglsize, len);
2277
2278	/* Case 2: 2 more segments, enough room for both */
2279	if (segsleft == 2 && cm->cm_sglsize < len + MPS_SGE64_SIZE)
2280		panic("2 segs left and no room? %u versus %zu",
2281		    cm->cm_sglsize, len);
2282#endif
2283
2284	if (segsleft == 1 && type == MPI2_SGE_FLAGS_SIMPLE_ELEMENT) {
2285		/*
2286		 * If this is a bi-directional request, need to account for that
2287		 * here.  Save the pre-filled sge values.  These will be used
2288		 * either for the 2nd SGL or for a single direction SGL.  If
2289		 * cm_out_len is non-zero, this is a bi-directional request, so
2290		 * fill in the OUT SGL first, then the IN SGL, otherwise just
2291		 * fill in the IN SGL.  Note that at this time, when filling in
2292		 * 2 SGL's for a bi-directional request, they both use the same
2293		 * DMA buffer (same cm command).
2294		 */
2295		saved_buf_len = le32toh(sge->FlagsLength) & 0x00FFFFFF;
2296		saved_address_low = sge->Address.Low;
2297		saved_address_high = sge->Address.High;
2298		if (cm->cm_out_len) {
2299			sge->FlagsLength = htole32(cm->cm_out_len |
2300			    ((uint32_t)(MPI2_SGE_FLAGS_SIMPLE_ELEMENT |
2301			    MPI2_SGE_FLAGS_END_OF_BUFFER |
2302			    MPI2_SGE_FLAGS_HOST_TO_IOC |
2303			    MPI2_SGE_FLAGS_64_BIT_ADDRESSING) <<
2304			    MPI2_SGE_FLAGS_SHIFT));
2305			cm->cm_sglsize -= len;
2306			bcopy(sgep, cm->cm_sge, len);
2307			cm->cm_sge = (MPI2_SGE_IO_UNION *)((uintptr_t)cm->cm_sge
2308			    + len);
2309		}
2310		saved_buf_len |=
2311		    ((uint32_t)(MPI2_SGE_FLAGS_SIMPLE_ELEMENT |
2312		    MPI2_SGE_FLAGS_END_OF_BUFFER |
2313		    MPI2_SGE_FLAGS_LAST_ELEMENT |
2314		    MPI2_SGE_FLAGS_END_OF_LIST |
2315		    MPI2_SGE_FLAGS_64_BIT_ADDRESSING) <<
2316		    MPI2_SGE_FLAGS_SHIFT);
2317		if (cm->cm_flags & MPS_CM_FLAGS_DATAIN) {
2318			saved_buf_len |=
2319			    ((uint32_t)(MPI2_SGE_FLAGS_IOC_TO_HOST) <<
2320			    MPI2_SGE_FLAGS_SHIFT);
2321		} else {
2322			saved_buf_len |=
2323			    ((uint32_t)(MPI2_SGE_FLAGS_HOST_TO_IOC) <<
2324			    MPI2_SGE_FLAGS_SHIFT);
2325		}
2326		sge->FlagsLength = htole32(saved_buf_len);
2327		sge->Address.Low = saved_address_low;
2328		sge->Address.High = saved_address_high;
2329	}
2330
2331	cm->cm_sglsize -= len;
2332	bcopy(sgep, cm->cm_sge, len);
2333	cm->cm_sge = (MPI2_SGE_IO_UNION *)((uintptr_t)cm->cm_sge + len);
2334	return (0);
2335}
2336
2337/*
2338 * Add one dma segment to the scatter-gather list for a command.
2339 */
2340int
2341mps_add_dmaseg(struct mps_command *cm, vm_paddr_t pa, size_t len, u_int flags,
2342    int segsleft)
2343{
2344	MPI2_SGE_SIMPLE64 sge;
2345
2346	/*
2347	 * This driver always uses 64-bit address elements for simplicity.
2348	 */
2349	bzero(&sge, sizeof(sge));
2350	flags |= MPI2_SGE_FLAGS_SIMPLE_ELEMENT |
2351	    MPI2_SGE_FLAGS_64_BIT_ADDRESSING;
2352	sge.FlagsLength = htole32(len | (flags << MPI2_SGE_FLAGS_SHIFT));
2353	mps_from_u64(pa, &sge.Address);
2354
2355	return (mps_push_sge(cm, &sge, sizeof sge, segsleft));
2356}
2357
2358static void
2359mps_data_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
2360{
2361	struct mps_softc *sc;
2362	struct mps_command *cm;
2363	u_int i, dir, sflags;
2364
2365	cm = (struct mps_command *)arg;
2366	sc = cm->cm_sc;
2367
2368	/*
2369	 * In this case, just print out a warning and let the chip tell the
2370	 * user they did the wrong thing.
2371	 */
2372	if ((cm->cm_max_segs != 0) && (nsegs > cm->cm_max_segs)) {
2373		mps_dprint(sc, MPS_ERROR,
2374			   "%s: warning: busdma returned %d segments, "
2375			   "more than the %d allowed\n", __func__, nsegs,
2376			   cm->cm_max_segs);
2377	}
2378
2379	/*
2380	 * Set up DMA direction flags.  Bi-directional requests are also handled
2381	 * here.  In that case, both direction flags will be set.
2382	 */
2383	sflags = 0;
2384	if (cm->cm_flags & MPS_CM_FLAGS_SMP_PASS) {
2385		/*
2386		 * We have to add a special case for SMP passthrough, there
2387		 * is no easy way to generically handle it.  The first
2388		 * S/G element is used for the command (therefore the
2389		 * direction bit needs to be set).  The second one is used
2390		 * for the reply.  We'll leave it to the caller to make
2391		 * sure we only have two buffers.
2392		 */
2393		/*
2394		 * Even though the busdma man page says it doesn't make
2395		 * sense to have both direction flags, it does in this case.
2396		 * We have one s/g element being accessed in each direction.
2397		 */
2398		dir = BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD;
2399
2400		/*
2401		 * Set the direction flag on the first buffer in the SMP
2402		 * passthrough request.  We'll clear it for the second one.
2403		 */
2404		sflags |= MPI2_SGE_FLAGS_DIRECTION |
2405			  MPI2_SGE_FLAGS_END_OF_BUFFER;
2406	} else if (cm->cm_flags & MPS_CM_FLAGS_DATAOUT) {
2407		sflags |= MPI2_SGE_FLAGS_HOST_TO_IOC;
2408		dir = BUS_DMASYNC_PREWRITE;
2409	} else
2410		dir = BUS_DMASYNC_PREREAD;
2411
2412	for (i = 0; i < nsegs; i++) {
2413		if ((cm->cm_flags & MPS_CM_FLAGS_SMP_PASS) && (i != 0)) {
2414			sflags &= ~MPI2_SGE_FLAGS_DIRECTION;
2415		}
2416		error = mps_add_dmaseg(cm, segs[i].ds_addr, segs[i].ds_len,
2417		    sflags, nsegs - i);
2418		if (error != 0) {
2419			/* Resource shortage, roll back! */
2420			if (ratecheck(&sc->lastfail, &mps_chainfail_interval))
2421				mps_dprint(sc, MPS_INFO, "Out of chain frames, "
2422				    "consider increasing hw.mps.max_chains.\n");
2423			cm->cm_flags |= MPS_CM_FLAGS_CHAIN_FAILED;
2424			mps_complete_command(sc, cm);
2425			return;
2426		}
2427	}
2428
2429	bus_dmamap_sync(sc->buffer_dmat, cm->cm_dmamap, dir);
2430	mps_enqueue_request(sc, cm);
2431
2432	return;
2433}
2434
2435static void
2436mps_data_cb2(void *arg, bus_dma_segment_t *segs, int nsegs, bus_size_t mapsize,
2437	     int error)
2438{
2439	mps_data_cb(arg, segs, nsegs, error);
2440}
2441
2442/*
2443 * This is the routine to enqueue commands ansynchronously.
2444 * Note that the only error path here is from bus_dmamap_load(), which can
2445 * return EINPROGRESS if it is waiting for resources.  Other than this, it's
2446 * assumed that if you have a command in-hand, then you have enough credits
2447 * to use it.
2448 */
2449int
2450mps_map_command(struct mps_softc *sc, struct mps_command *cm)
2451{
2452	int error = 0;
2453
2454	if (cm->cm_flags & MPS_CM_FLAGS_USE_UIO) {
2455		error = bus_dmamap_load_uio(sc->buffer_dmat, cm->cm_dmamap,
2456		    &cm->cm_uio, mps_data_cb2, cm, 0);
2457	} else if (cm->cm_flags & MPS_CM_FLAGS_USE_CCB) {
2458		error = bus_dmamap_load_ccb(sc->buffer_dmat, cm->cm_dmamap,
2459		    cm->cm_data, mps_data_cb, cm, 0);
2460	} else if ((cm->cm_data != NULL) && (cm->cm_length != 0)) {
2461		error = bus_dmamap_load(sc->buffer_dmat, cm->cm_dmamap,
2462		    cm->cm_data, cm->cm_length, mps_data_cb, cm, 0);
2463	} else {
2464		/* Add a zero-length element as needed */
2465		if (cm->cm_sge != NULL)
2466			mps_add_dmaseg(cm, 0, 0, 0, 1);
2467		mps_enqueue_request(sc, cm);
2468	}
2469
2470	return (error);
2471}
2472
2473/*
2474 * This is the routine to enqueue commands synchronously.  An error of
2475 * EINPROGRESS from mps_map_command() is ignored since the command will
2476 * be executed and enqueued automatically.  Other errors come from msleep().
2477 */
2478int
2479mps_wait_command(struct mps_softc *sc, struct mps_command *cm, int timeout,
2480    int sleep_flag)
2481{
2482	int error, rc;
2483	struct timeval cur_time, start_time;
2484
2485	if (sc->mps_flags & MPS_FLAGS_DIAGRESET)
2486		return  EBUSY;
2487
2488	cm->cm_complete = NULL;
2489	cm->cm_flags |= (MPS_CM_FLAGS_WAKEUP + MPS_CM_FLAGS_POLLED);
2490	error = mps_map_command(sc, cm);
2491	if ((error != 0) && (error != EINPROGRESS))
2492		return (error);
2493
2494	// Check for context and wait for 50 mSec at a time until time has
2495	// expired or the command has finished.  If msleep can't be used, need
2496	// to poll.
2497	if (curthread->td_no_sleeping != 0)
2498		sleep_flag = NO_SLEEP;
2499	getmicrotime(&start_time);
2500	if (mtx_owned(&sc->mps_mtx) && sleep_flag == CAN_SLEEP) {
2501		error = msleep(cm, &sc->mps_mtx, 0, "mpswait", timeout*hz);
2502	} else {
2503		while ((cm->cm_flags & MPS_CM_FLAGS_COMPLETE) == 0) {
2504			mps_intr_locked(sc);
2505			if (sleep_flag == CAN_SLEEP)
2506				pause("mpswait", hz/20);
2507			else
2508				DELAY(50000);
2509
2510			getmicrotime(&cur_time);
2511			if ((cur_time.tv_sec - start_time.tv_sec) > timeout) {
2512				error = EWOULDBLOCK;
2513				break;
2514			}
2515		}
2516	}
2517
2518	if (error == EWOULDBLOCK) {
2519		mps_dprint(sc, MPS_FAULT, "Calling Reinit from %s\n", __func__);
2520		rc = mps_reinit(sc);
2521		mps_dprint(sc, MPS_FAULT, "Reinit %s\n", (rc == 0) ? "success" :
2522		    "failed");
2523		error = ETIMEDOUT;
2524	}
2525	return (error);
2526}
2527
2528/*
2529 * This is the routine to enqueue a command synchonously and poll for
2530 * completion.  Its use should be rare.
2531 */
2532int
2533mps_request_polled(struct mps_softc *sc, struct mps_command *cm)
2534{
2535	int error, timeout = 0, rc;
2536	struct timeval cur_time, start_time;
2537
2538	error = 0;
2539
2540	cm->cm_flags |= MPS_CM_FLAGS_POLLED;
2541	cm->cm_complete = NULL;
2542	mps_map_command(sc, cm);
2543
2544	getmicrotime(&start_time);
2545	while ((cm->cm_flags & MPS_CM_FLAGS_COMPLETE) == 0) {
2546		mps_intr_locked(sc);
2547
2548		if (mtx_owned(&sc->mps_mtx))
2549			msleep(&sc->msleep_fake_chan, &sc->mps_mtx, 0,
2550			    "mpspoll", hz/20);
2551		else
2552			pause("mpsdiag", hz/20);
2553
2554		/*
2555		 * Check for real-time timeout and fail if more than 60 seconds.
2556		 */
2557		getmicrotime(&cur_time);
2558		timeout = cur_time.tv_sec - start_time.tv_sec;
2559		if (timeout > 60) {
2560			mps_dprint(sc, MPS_FAULT, "polling failed\n");
2561			error = ETIMEDOUT;
2562			break;
2563		}
2564	}
2565
2566	if (error) {
2567		mps_dprint(sc, MPS_FAULT, "Calling Reinit from %s\n", __func__);
2568		rc = mps_reinit(sc);
2569		mps_dprint(sc, MPS_FAULT, "Reinit %s\n", (rc == 0) ? "success" :
2570		    "failed");
2571	}
2572
2573	return (error);
2574}
2575
2576/*
2577 * The MPT driver had a verbose interface for config pages.  In this driver,
2578 * reduce it to much simplier terms, similar to the Linux driver.
2579 */
2580int
2581mps_read_config_page(struct mps_softc *sc, struct mps_config_params *params)
2582{
2583	MPI2_CONFIG_REQUEST *req;
2584	struct mps_command *cm;
2585	int error;
2586
2587	if (sc->mps_flags & MPS_FLAGS_BUSY) {
2588		return (EBUSY);
2589	}
2590
2591	cm = mps_alloc_command(sc);
2592	if (cm == NULL) {
2593		return (EBUSY);
2594	}
2595
2596	req = (MPI2_CONFIG_REQUEST *)cm->cm_req;
2597	req->Function = MPI2_FUNCTION_CONFIG;
2598	req->Action = params->action;
2599	req->SGLFlags = 0;
2600	req->ChainOffset = 0;
2601	req->PageAddress = params->page_address;
2602	if (params->hdr.Struct.PageType == MPI2_CONFIG_PAGETYPE_EXTENDED) {
2603		MPI2_CONFIG_EXTENDED_PAGE_HEADER *hdr;
2604
2605		hdr = &params->hdr.Ext;
2606		req->ExtPageType = hdr->ExtPageType;
2607		req->ExtPageLength = hdr->ExtPageLength;
2608		req->Header.PageType = MPI2_CONFIG_PAGETYPE_EXTENDED;
2609		req->Header.PageLength = 0; /* Must be set to zero */
2610		req->Header.PageNumber = hdr->PageNumber;
2611		req->Header.PageVersion = hdr->PageVersion;
2612	} else {
2613		MPI2_CONFIG_PAGE_HEADER *hdr;
2614
2615		hdr = &params->hdr.Struct;
2616		req->Header.PageType = hdr->PageType;
2617		req->Header.PageNumber = hdr->PageNumber;
2618		req->Header.PageLength = hdr->PageLength;
2619		req->Header.PageVersion = hdr->PageVersion;
2620	}
2621
2622	cm->cm_data = params->buffer;
2623	cm->cm_length = params->length;
2624	cm->cm_sge = &req->PageBufferSGE;
2625	cm->cm_sglsize = sizeof(MPI2_SGE_IO_UNION);
2626	cm->cm_flags = MPS_CM_FLAGS_SGE_SIMPLE | MPS_CM_FLAGS_DATAIN;
2627	cm->cm_desc.Default.RequestFlags = MPI2_REQ_DESCRIPT_FLAGS_DEFAULT_TYPE;
2628
2629	cm->cm_complete_data = params;
2630	if (params->callback != NULL) {
2631		cm->cm_complete = mps_config_complete;
2632		return (mps_map_command(sc, cm));
2633	} else {
2634		error = mps_wait_command(sc, cm, 0, CAN_SLEEP);
2635		if (error) {
2636			mps_dprint(sc, MPS_FAULT,
2637			    "Error %d reading config page\n", error);
2638			mps_free_command(sc, cm);
2639			return (error);
2640		}
2641		mps_config_complete(sc, cm);
2642	}
2643
2644	return (0);
2645}
2646
2647int
2648mps_write_config_page(struct mps_softc *sc, struct mps_config_params *params)
2649{
2650	return (EINVAL);
2651}
2652
2653static void
2654mps_config_complete(struct mps_softc *sc, struct mps_command *cm)
2655{
2656	MPI2_CONFIG_REPLY *reply;
2657	struct mps_config_params *params;
2658
2659	MPS_FUNCTRACE(sc);
2660	params = cm->cm_complete_data;
2661
2662	if (cm->cm_data != NULL) {
2663		bus_dmamap_sync(sc->buffer_dmat, cm->cm_dmamap,
2664		    BUS_DMASYNC_POSTREAD);
2665		bus_dmamap_unload(sc->buffer_dmat, cm->cm_dmamap);
2666	}
2667
2668	/*
2669	 * XXX KDM need to do more error recovery?  This results in the
2670	 * device in question not getting probed.
2671	 */
2672	if ((cm->cm_flags & MPS_CM_FLAGS_ERROR_MASK) != 0) {
2673		params->status = MPI2_IOCSTATUS_BUSY;
2674		goto done;
2675	}
2676
2677	reply = (MPI2_CONFIG_REPLY *)cm->cm_reply;
2678	if (reply == NULL) {
2679		params->status = MPI2_IOCSTATUS_BUSY;
2680		goto done;
2681	}
2682	params->status = reply->IOCStatus;
2683	if (params->hdr.Ext.ExtPageType != 0) {
2684		params->hdr.Ext.ExtPageType = reply->ExtPageType;
2685		params->hdr.Ext.ExtPageLength = reply->ExtPageLength;
2686	} else {
2687		params->hdr.Struct.PageType = reply->Header.PageType;
2688		params->hdr.Struct.PageNumber = reply->Header.PageNumber;
2689		params->hdr.Struct.PageLength = reply->Header.PageLength;
2690		params->hdr.Struct.PageVersion = reply->Header.PageVersion;
2691	}
2692
2693done:
2694	mps_free_command(sc, cm);
2695	if (params->callback != NULL)
2696		params->callback(sc, params);
2697
2698	return;
2699}
2700