1/*- 2 * Copyright (c) 2009 Yahoo! Inc. 3 * Copyright (c) 2011-2015 LSI Corp. 4 * Copyright (c) 2013-2016 Avago Technologies 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * Avago Technologies (LSI) MPT-Fusion Host Adapter FreeBSD 29 * 30 */ 31 32#include <sys/cdefs.h> 33__FBSDID("$FreeBSD: stable/11/sys/dev/mpr/mpr.c 329189 2018-02-13 02:11:39Z mav $"); 34 35/* Communications core for Avago Technologies (LSI) MPT3 */ 36 37/* TODO Move headers to mprvar */ 38#include <sys/types.h> 39#include <sys/param.h> 40#include <sys/systm.h> 41#include <sys/kernel.h> 42#include <sys/selinfo.h> 43#include <sys/lock.h> 44#include <sys/mutex.h> 45#include <sys/module.h> 46#include <sys/bus.h> 47#include <sys/conf.h> 48#include <sys/bio.h> 49#include <sys/malloc.h> 50#include <sys/uio.h> 51#include <sys/sysctl.h> 52#include <sys/queue.h> 53#include <sys/kthread.h> 54#include <sys/taskqueue.h> 55#include <sys/endian.h> 56#include <sys/eventhandler.h> 57 58#include <machine/bus.h> 59#include <machine/resource.h> 60#include <sys/rman.h> 61#include <sys/proc.h> 62 63#include <dev/pci/pcivar.h> 64 65#include <cam/cam.h> 66#include <cam/cam_ccb.h> 67#include <cam/scsi/scsi_all.h> 68 69#include <dev/mpr/mpi/mpi2_type.h> 70#include <dev/mpr/mpi/mpi2.h> 71#include <dev/mpr/mpi/mpi2_ioc.h> 72#include <dev/mpr/mpi/mpi2_sas.h> 73#include <dev/mpr/mpi/mpi2_pci.h> 74#include <dev/mpr/mpi/mpi2_cnfg.h> 75#include <dev/mpr/mpi/mpi2_init.h> 76#include <dev/mpr/mpi/mpi2_tool.h> 77#include <dev/mpr/mpr_ioctl.h> 78#include <dev/mpr/mprvar.h> 79#include <dev/mpr/mpr_table.h> 80#include <dev/mpr/mpr_sas.h> 81 82static int mpr_diag_reset(struct mpr_softc *sc, int sleep_flag); 83static int mpr_init_queues(struct mpr_softc *sc); 84static int mpr_message_unit_reset(struct mpr_softc *sc, int sleep_flag); 85static int mpr_transition_operational(struct mpr_softc *sc); 86static int mpr_iocfacts_allocate(struct mpr_softc *sc, uint8_t attaching); 87static void mpr_iocfacts_free(struct mpr_softc *sc); 88static void mpr_startup(void *arg); 89static int mpr_send_iocinit(struct mpr_softc *sc); 90static int mpr_alloc_queues(struct mpr_softc *sc); 91static int mpr_alloc_replies(struct mpr_softc *sc); 92static int mpr_alloc_requests(struct mpr_softc *sc); 93static int mpr_alloc_nvme_prp_pages(struct mpr_softc *sc); 94static int mpr_attach_log(struct mpr_softc *sc); 95static __inline void mpr_complete_command(struct mpr_softc *sc, 96 struct mpr_command *cm); 97static void mpr_dispatch_event(struct mpr_softc *sc, uintptr_t data, 98 MPI2_EVENT_NOTIFICATION_REPLY *reply); 99static void mpr_config_complete(struct mpr_softc *sc, struct mpr_command *cm); 100static void mpr_periodic(void *); 101static int mpr_reregister_events(struct mpr_softc *sc); 102static void mpr_enqueue_request(struct mpr_softc *sc, struct mpr_command *cm); 103static int mpr_get_iocfacts(struct mpr_softc *sc, MPI2_IOC_FACTS_REPLY *facts); 104static int mpr_wait_db_ack(struct mpr_softc *sc, int timeout, int sleep_flag); 105SYSCTL_NODE(_hw, OID_AUTO, mpr, CTLFLAG_RD, 0, "MPR Driver Parameters"); 106 107MALLOC_DEFINE(M_MPR, "mpr", "mpr driver memory"); 108 109/* 110 * Do a "Diagnostic Reset" aka a hard reset. This should get the chip out of 111 * any state and back to its initialization state machine. 112 */ 113static char mpt2_reset_magic[] = { 0x00, 0x0f, 0x04, 0x0b, 0x02, 0x07, 0x0d }; 114 115/* 116 * Added this union to smoothly convert le64toh cm->cm_desc.Words. 117 * Compiler only supports uint64_t to be passed as an argument. 118 * Otherwise it will through this error: 119 * "aggregate value used where an integer was expected" 120 */ 121typedef union _reply_descriptor { 122 u64 word; 123 struct { 124 u32 low; 125 u32 high; 126 } u; 127} reply_descriptor, request_descriptor; 128 129/* Rate limit chain-fail messages to 1 per minute */ 130static struct timeval mpr_chainfail_interval = { 60, 0 }; 131 132/* 133 * sleep_flag can be either CAN_SLEEP or NO_SLEEP. 134 * If this function is called from process context, it can sleep 135 * and there is no harm to sleep, in case if this fuction is called 136 * from Interrupt handler, we can not sleep and need NO_SLEEP flag set. 137 * based on sleep flags driver will call either msleep, pause or DELAY. 138 * msleep and pause are of same variant, but pause is used when mpr_mtx 139 * is not hold by driver. 140 */ 141static int 142mpr_diag_reset(struct mpr_softc *sc,int sleep_flag) 143{ 144 uint32_t reg; 145 int i, error, tries = 0; 146 uint8_t first_wait_done = FALSE; 147 148 mpr_dprint(sc, MPR_TRACE, "%s\n", __func__); 149 150 /* Clear any pending interrupts */ 151 mpr_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); 152 153 /* 154 * Force NO_SLEEP for threads prohibited to sleep 155 * e.a Thread from interrupt handler are prohibited to sleep. 156 */ 157#if __FreeBSD_version >= 1000029 158 if (curthread->td_no_sleeping) 159#else //__FreeBSD_version < 1000029 160 if (curthread->td_pflags & TDP_NOSLEEPING) 161#endif //__FreeBSD_version >= 1000029 162 sleep_flag = NO_SLEEP; 163 164 /* Push the magic sequence */ 165 error = ETIMEDOUT; 166 while (tries++ < 20) { 167 for (i = 0; i < sizeof(mpt2_reset_magic); i++) 168 mpr_regwrite(sc, MPI2_WRITE_SEQUENCE_OFFSET, 169 mpt2_reset_magic[i]); 170 171 /* wait 100 msec */ 172 if (mtx_owned(&sc->mpr_mtx) && sleep_flag == CAN_SLEEP) 173 msleep(&sc->msleep_fake_chan, &sc->mpr_mtx, 0, 174 "mprdiag", hz/10); 175 else if (sleep_flag == CAN_SLEEP) 176 pause("mprdiag", hz/10); 177 else 178 DELAY(100 * 1000); 179 180 reg = mpr_regread(sc, MPI2_HOST_DIAGNOSTIC_OFFSET); 181 if (reg & MPI2_DIAG_DIAG_WRITE_ENABLE) { 182 error = 0; 183 break; 184 } 185 } 186 if (error) 187 return (error); 188 189 /* Send the actual reset. XXX need to refresh the reg? */ 190 mpr_regwrite(sc, MPI2_HOST_DIAGNOSTIC_OFFSET, 191 reg | MPI2_DIAG_RESET_ADAPTER); 192 193 /* Wait up to 300 seconds in 50ms intervals */ 194 error = ETIMEDOUT; 195 for (i = 0; i < 6000; i++) { 196 /* 197 * Wait 50 msec. If this is the first time through, wait 256 198 * msec to satisfy Diag Reset timing requirements. 199 */ 200 if (first_wait_done) { 201 if (mtx_owned(&sc->mpr_mtx) && sleep_flag == CAN_SLEEP) 202 msleep(&sc->msleep_fake_chan, &sc->mpr_mtx, 0, 203 "mprdiag", hz/20); 204 else if (sleep_flag == CAN_SLEEP) 205 pause("mprdiag", hz/20); 206 else 207 DELAY(50 * 1000); 208 } else { 209 DELAY(256 * 1000); 210 first_wait_done = TRUE; 211 } 212 /* 213 * Check for the RESET_ADAPTER bit to be cleared first, then 214 * wait for the RESET state to be cleared, which takes a little 215 * longer. 216 */ 217 reg = mpr_regread(sc, MPI2_HOST_DIAGNOSTIC_OFFSET); 218 if (reg & MPI2_DIAG_RESET_ADAPTER) { 219 continue; 220 } 221 reg = mpr_regread(sc, MPI2_DOORBELL_OFFSET); 222 if ((reg & MPI2_IOC_STATE_MASK) != MPI2_IOC_STATE_RESET) { 223 error = 0; 224 break; 225 } 226 } 227 if (error) 228 return (error); 229 230 mpr_regwrite(sc, MPI2_WRITE_SEQUENCE_OFFSET, 0x0); 231 232 return (0); 233} 234 235static int 236mpr_message_unit_reset(struct mpr_softc *sc, int sleep_flag) 237{ 238 239 MPR_FUNCTRACE(sc); 240 241 mpr_regwrite(sc, MPI2_DOORBELL_OFFSET, 242 MPI2_FUNCTION_IOC_MESSAGE_UNIT_RESET << 243 MPI2_DOORBELL_FUNCTION_SHIFT); 244 245 if (mpr_wait_db_ack(sc, 5, sleep_flag) != 0) { 246 mpr_dprint(sc, MPR_FAULT, "Doorbell handshake failed : <%s>\n", 247 __func__); 248 return (ETIMEDOUT); 249 } 250 251 return (0); 252} 253 254static int 255mpr_transition_ready(struct mpr_softc *sc) 256{ 257 uint32_t reg, state; 258 int error, tries = 0; 259 int sleep_flags; 260 261 MPR_FUNCTRACE(sc); 262 /* If we are in attach call, do not sleep */ 263 sleep_flags = (sc->mpr_flags & MPR_FLAGS_ATTACH_DONE) 264 ? CAN_SLEEP : NO_SLEEP; 265 266 error = 0; 267 while (tries++ < 1200) { 268 reg = mpr_regread(sc, MPI2_DOORBELL_OFFSET); 269 mpr_dprint(sc, MPR_INIT, "Doorbell= 0x%x\n", reg); 270 271 /* 272 * Ensure the IOC is ready to talk. If it's not, try 273 * resetting it. 274 */ 275 if (reg & MPI2_DOORBELL_USED) { 276 mpr_diag_reset(sc, sleep_flags); 277 DELAY(50000); 278 continue; 279 } 280 281 /* Is the adapter owned by another peer? */ 282 if ((reg & MPI2_DOORBELL_WHO_INIT_MASK) == 283 (MPI2_WHOINIT_PCI_PEER << MPI2_DOORBELL_WHO_INIT_SHIFT)) { 284 device_printf(sc->mpr_dev, "IOC is under the control " 285 "of another peer host, aborting initialization.\n"); 286 return (ENXIO); 287 } 288 289 state = reg & MPI2_IOC_STATE_MASK; 290 if (state == MPI2_IOC_STATE_READY) { 291 /* Ready to go! */ 292 error = 0; 293 break; 294 } else if (state == MPI2_IOC_STATE_FAULT) { 295 mpr_dprint(sc, MPR_FAULT, "IOC in fault state 0x%x\n", 296 state & MPI2_DOORBELL_FAULT_CODE_MASK); 297 mpr_diag_reset(sc, sleep_flags); 298 } else if (state == MPI2_IOC_STATE_OPERATIONAL) { 299 /* Need to take ownership */ 300 mpr_message_unit_reset(sc, sleep_flags); 301 } else if (state == MPI2_IOC_STATE_RESET) { 302 /* Wait a bit, IOC might be in transition */ 303 mpr_dprint(sc, MPR_FAULT, 304 "IOC in unexpected reset state\n"); 305 } else { 306 mpr_dprint(sc, MPR_FAULT, 307 "IOC in unknown state 0x%x\n", state); 308 error = EINVAL; 309 break; 310 } 311 312 /* Wait 50ms for things to settle down. */ 313 DELAY(50000); 314 } 315 316 if (error) 317 device_printf(sc->mpr_dev, "Cannot transition IOC to ready\n"); 318 return (error); 319} 320 321static int 322mpr_transition_operational(struct mpr_softc *sc) 323{ 324 uint32_t reg, state; 325 int error; 326 327 MPR_FUNCTRACE(sc); 328 329 error = 0; 330 reg = mpr_regread(sc, MPI2_DOORBELL_OFFSET); 331 mpr_dprint(sc, MPR_INIT, "Doorbell= 0x%x\n", reg); 332 333 state = reg & MPI2_IOC_STATE_MASK; 334 if (state != MPI2_IOC_STATE_READY) { 335 if ((error = mpr_transition_ready(sc)) != 0) { 336 mpr_dprint(sc, MPR_FAULT, 337 "%s failed to transition ready\n", __func__); 338 return (error); 339 } 340 } 341 342 error = mpr_send_iocinit(sc); 343 return (error); 344} 345 346/* 347 * This is called during attach and when re-initializing due to a Diag Reset. 348 * IOC Facts is used to allocate many of the structures needed by the driver. 349 * If called from attach, de-allocation is not required because the driver has 350 * not allocated any structures yet, but if called from a Diag Reset, previously 351 * allocated structures based on IOC Facts will need to be freed and re- 352 * allocated bases on the latest IOC Facts. 353 */ 354static int 355mpr_iocfacts_allocate(struct mpr_softc *sc, uint8_t attaching) 356{ 357 int error; 358 Mpi2IOCFactsReply_t saved_facts; 359 uint8_t saved_mode, reallocating; 360 361 mpr_dprint(sc, MPR_TRACE, "%s\n", __func__); 362 363 /* Save old IOC Facts and then only reallocate if Facts have changed */ 364 if (!attaching) { 365 bcopy(sc->facts, &saved_facts, sizeof(MPI2_IOC_FACTS_REPLY)); 366 } 367 368 /* 369 * Get IOC Facts. In all cases throughout this function, panic if doing 370 * a re-initialization and only return the error if attaching so the OS 371 * can handle it. 372 */ 373 if ((error = mpr_get_iocfacts(sc, sc->facts)) != 0) { 374 if (attaching) { 375 mpr_dprint(sc, MPR_FAULT, "%s failed to get IOC Facts " 376 "with error %d\n", __func__, error); 377 return (error); 378 } else { 379 panic("%s failed to get IOC Facts with error %d\n", 380 __func__, error); 381 } 382 } 383 384 MPR_DPRINT_PAGE(sc, MPR_XINFO, iocfacts, sc->facts); 385 386 snprintf(sc->fw_version, sizeof(sc->fw_version), 387 "%02d.%02d.%02d.%02d", 388 sc->facts->FWVersion.Struct.Major, 389 sc->facts->FWVersion.Struct.Minor, 390 sc->facts->FWVersion.Struct.Unit, 391 sc->facts->FWVersion.Struct.Dev); 392 393 mpr_printf(sc, "Firmware: %s, Driver: %s\n", sc->fw_version, 394 MPR_DRIVER_VERSION); 395 mpr_printf(sc, "IOCCapabilities: %b\n", sc->facts->IOCCapabilities, 396 "\20" "\3ScsiTaskFull" "\4DiagTrace" "\5SnapBuf" "\6ExtBuf" 397 "\7EEDP" "\10BiDirTarg" "\11Multicast" "\14TransRetry" "\15IR" 398 "\16EventReplay" "\17RaidAccel" "\20MSIXIndex" "\21HostDisc" 399 "\22FastPath" "\23RDPQArray" "\24AtomicReqDesc" "\25PCIeSRIOV"); 400 401 /* 402 * If the chip doesn't support event replay then a hard reset will be 403 * required to trigger a full discovery. Do the reset here then 404 * retransition to Ready. A hard reset might have already been done, 405 * but it doesn't hurt to do it again. Only do this if attaching, not 406 * for a Diag Reset. 407 */ 408 if (attaching) { 409 if ((sc->facts->IOCCapabilities & 410 MPI2_IOCFACTS_CAPABILITY_EVENT_REPLAY) == 0) { 411 mpr_diag_reset(sc, NO_SLEEP); 412 if ((error = mpr_transition_ready(sc)) != 0) { 413 mpr_dprint(sc, MPR_FAULT, "%s failed to " 414 "transition to ready with error %d\n", 415 __func__, error); 416 return (error); 417 } 418 } 419 } 420 421 /* 422 * Set flag if IR Firmware is loaded. If the RAID Capability has 423 * changed from the previous IOC Facts, log a warning, but only if 424 * checking this after a Diag Reset and not during attach. 425 */ 426 saved_mode = sc->ir_firmware; 427 if (sc->facts->IOCCapabilities & 428 MPI2_IOCFACTS_CAPABILITY_INTEGRATED_RAID) 429 sc->ir_firmware = 1; 430 if (!attaching) { 431 if (sc->ir_firmware != saved_mode) { 432 mpr_dprint(sc, MPR_FAULT, "%s new IR/IT mode in IOC " 433 "Facts does not match previous mode\n", __func__); 434 } 435 } 436 437 /* Only deallocate and reallocate if relevant IOC Facts have changed */ 438 reallocating = FALSE; 439 sc->mpr_flags &= ~MPR_FLAGS_REALLOCATED; 440 441 if ((!attaching) && 442 ((saved_facts.MsgVersion != sc->facts->MsgVersion) || 443 (saved_facts.HeaderVersion != sc->facts->HeaderVersion) || 444 (saved_facts.MaxChainDepth != sc->facts->MaxChainDepth) || 445 (saved_facts.RequestCredit != sc->facts->RequestCredit) || 446 (saved_facts.ProductID != sc->facts->ProductID) || 447 (saved_facts.IOCCapabilities != sc->facts->IOCCapabilities) || 448 (saved_facts.IOCRequestFrameSize != 449 sc->facts->IOCRequestFrameSize) || 450 (saved_facts.IOCMaxChainSegmentSize != 451 sc->facts->IOCMaxChainSegmentSize) || 452 (saved_facts.MaxTargets != sc->facts->MaxTargets) || 453 (saved_facts.MaxSasExpanders != sc->facts->MaxSasExpanders) || 454 (saved_facts.MaxEnclosures != sc->facts->MaxEnclosures) || 455 (saved_facts.HighPriorityCredit != sc->facts->HighPriorityCredit) || 456 (saved_facts.MaxReplyDescriptorPostQueueDepth != 457 sc->facts->MaxReplyDescriptorPostQueueDepth) || 458 (saved_facts.ReplyFrameSize != sc->facts->ReplyFrameSize) || 459 (saved_facts.MaxVolumes != sc->facts->MaxVolumes) || 460 (saved_facts.MaxPersistentEntries != 461 sc->facts->MaxPersistentEntries))) { 462 reallocating = TRUE; 463 464 /* Record that we reallocated everything */ 465 sc->mpr_flags |= MPR_FLAGS_REALLOCATED; 466 } 467 468 /* 469 * Some things should be done if attaching or re-allocating after a Diag 470 * Reset, but are not needed after a Diag Reset if the FW has not 471 * changed. 472 */ 473 if (attaching || reallocating) { 474 /* 475 * Check if controller supports FW diag buffers and set flag to 476 * enable each type. 477 */ 478 if (sc->facts->IOCCapabilities & 479 MPI2_IOCFACTS_CAPABILITY_DIAG_TRACE_BUFFER) 480 sc->fw_diag_buffer_list[MPI2_DIAG_BUF_TYPE_TRACE]. 481 enabled = TRUE; 482 if (sc->facts->IOCCapabilities & 483 MPI2_IOCFACTS_CAPABILITY_SNAPSHOT_BUFFER) 484 sc->fw_diag_buffer_list[MPI2_DIAG_BUF_TYPE_SNAPSHOT]. 485 enabled = TRUE; 486 if (sc->facts->IOCCapabilities & 487 MPI2_IOCFACTS_CAPABILITY_EXTENDED_BUFFER) 488 sc->fw_diag_buffer_list[MPI2_DIAG_BUF_TYPE_EXTENDED]. 489 enabled = TRUE; 490 491 /* 492 * Set flags for some supported items. 493 */ 494 if (sc->facts->IOCCapabilities & MPI2_IOCFACTS_CAPABILITY_EEDP) 495 sc->eedp_enabled = TRUE; 496 if (sc->facts->IOCCapabilities & MPI2_IOCFACTS_CAPABILITY_TLR) 497 sc->control_TLR = TRUE; 498 if (sc->facts->IOCCapabilities & 499 MPI26_IOCFACTS_CAPABILITY_ATOMIC_REQ) 500 sc->atomic_desc_capable = TRUE; 501 502 /* 503 * Size the queues. Since the reply queues always need one free 504 * entry, we'll just deduct one reply message here. 505 */ 506 sc->num_prireqs = MIN(MPR_PRI_REQ_FRAMES, 507 sc->facts->HighPriorityCredit); 508 sc->num_reqs = MIN(MPR_REQ_FRAMES, sc->facts->RequestCredit) + 509 sc->num_prireqs; 510 sc->num_replies = MIN(MPR_REPLY_FRAMES + MPR_EVT_REPLY_FRAMES, 511 sc->facts->MaxReplyDescriptorPostQueueDepth) - 1; 512 513 /* 514 * Initialize all Tail Queues 515 */ 516 TAILQ_INIT(&sc->req_list); 517 TAILQ_INIT(&sc->high_priority_req_list); 518 TAILQ_INIT(&sc->chain_list); 519 TAILQ_INIT(&sc->prp_page_list); 520 TAILQ_INIT(&sc->tm_list); 521 } 522 523 /* 524 * If doing a Diag Reset and the FW is significantly different 525 * (reallocating will be set above in IOC Facts comparison), then all 526 * buffers based on the IOC Facts will need to be freed before they are 527 * reallocated. 528 */ 529 if (reallocating) { 530 mpr_iocfacts_free(sc); 531 mprsas_realloc_targets(sc, saved_facts.MaxTargets + 532 saved_facts.MaxVolumes); 533 } 534 535 /* 536 * Any deallocation has been completed. Now start reallocating 537 * if needed. Will only need to reallocate if attaching or if the new 538 * IOC Facts are different from the previous IOC Facts after a Diag 539 * Reset. Targets have already been allocated above if needed. 540 */ 541 if (attaching || reallocating) { 542 if (((error = mpr_alloc_queues(sc)) != 0) || 543 ((error = mpr_alloc_replies(sc)) != 0) || 544 ((error = mpr_alloc_requests(sc)) != 0)) { 545 if (attaching ) { 546 mpr_dprint(sc, MPR_FAULT, "%s failed to alloc " 547 "queues with error %d\n", __func__, error); 548 mpr_free(sc); 549 return (error); 550 } else { 551 panic("%s failed to alloc queues with error " 552 "%d\n", __func__, error); 553 } 554 } 555 } 556 557 /* Always initialize the queues */ 558 bzero(sc->free_queue, sc->fqdepth * 4); 559 mpr_init_queues(sc); 560 561 /* 562 * Always get the chip out of the reset state, but only panic if not 563 * attaching. If attaching and there is an error, that is handled by 564 * the OS. 565 */ 566 error = mpr_transition_operational(sc); 567 if (error != 0) { 568 if (attaching) { 569 mpr_printf(sc, "%s failed to transition to operational " 570 "with error %d\n", __func__, error); 571 mpr_free(sc); 572 return (error); 573 } else { 574 panic("%s failed to transition to operational with " 575 "error %d\n", __func__, error); 576 } 577 } 578 579 /* 580 * Finish the queue initialization. 581 * These are set here instead of in mpr_init_queues() because the 582 * IOC resets these values during the state transition in 583 * mpr_transition_operational(). The free index is set to 1 584 * because the corresponding index in the IOC is set to 0, and the 585 * IOC treats the queues as full if both are set to the same value. 586 * Hence the reason that the queue can't hold all of the possible 587 * replies. 588 */ 589 sc->replypostindex = 0; 590 mpr_regwrite(sc, MPI2_REPLY_FREE_HOST_INDEX_OFFSET, sc->replyfreeindex); 591 mpr_regwrite(sc, MPI2_REPLY_POST_HOST_INDEX_OFFSET, 0); 592 593 /* 594 * Attach the subsystems so they can prepare their event masks. 595 */ 596 /* XXX Should be dynamic so that IM/IR and user modules can attach */ 597 if (attaching) { 598 if (((error = mpr_attach_log(sc)) != 0) || 599 ((error = mpr_attach_sas(sc)) != 0) || 600 ((error = mpr_attach_user(sc)) != 0)) { 601 mpr_printf(sc, "%s failed to attach all subsystems: " 602 "error %d\n", __func__, error); 603 mpr_free(sc); 604 return (error); 605 } 606 607 if ((error = mpr_pci_setup_interrupts(sc)) != 0) { 608 mpr_printf(sc, "%s failed to setup interrupts\n", 609 __func__); 610 mpr_free(sc); 611 return (error); 612 } 613 } 614 615 return (error); 616} 617 618/* 619 * This is called if memory is being free (during detach for example) and when 620 * buffers need to be reallocated due to a Diag Reset. 621 */ 622static void 623mpr_iocfacts_free(struct mpr_softc *sc) 624{ 625 struct mpr_command *cm; 626 int i; 627 628 mpr_dprint(sc, MPR_TRACE, "%s\n", __func__); 629 630 if (sc->free_busaddr != 0) 631 bus_dmamap_unload(sc->queues_dmat, sc->queues_map); 632 if (sc->free_queue != NULL) 633 bus_dmamem_free(sc->queues_dmat, sc->free_queue, 634 sc->queues_map); 635 if (sc->queues_dmat != NULL) 636 bus_dma_tag_destroy(sc->queues_dmat); 637 638 if (sc->chain_busaddr != 0) 639 bus_dmamap_unload(sc->chain_dmat, sc->chain_map); 640 if (sc->chain_frames != NULL) 641 bus_dmamem_free(sc->chain_dmat, sc->chain_frames, 642 sc->chain_map); 643 if (sc->chain_dmat != NULL) 644 bus_dma_tag_destroy(sc->chain_dmat); 645 646 if (sc->sense_busaddr != 0) 647 bus_dmamap_unload(sc->sense_dmat, sc->sense_map); 648 if (sc->sense_frames != NULL) 649 bus_dmamem_free(sc->sense_dmat, sc->sense_frames, 650 sc->sense_map); 651 if (sc->sense_dmat != NULL) 652 bus_dma_tag_destroy(sc->sense_dmat); 653 654 if (sc->prp_page_busaddr != 0) 655 bus_dmamap_unload(sc->prp_page_dmat, sc->prp_page_map); 656 if (sc->prp_pages != NULL) 657 bus_dmamem_free(sc->prp_page_dmat, sc->prp_pages, 658 sc->prp_page_map); 659 if (sc->prp_page_dmat != NULL) 660 bus_dma_tag_destroy(sc->prp_page_dmat); 661 662 if (sc->reply_busaddr != 0) 663 bus_dmamap_unload(sc->reply_dmat, sc->reply_map); 664 if (sc->reply_frames != NULL) 665 bus_dmamem_free(sc->reply_dmat, sc->reply_frames, 666 sc->reply_map); 667 if (sc->reply_dmat != NULL) 668 bus_dma_tag_destroy(sc->reply_dmat); 669 670 if (sc->req_busaddr != 0) 671 bus_dmamap_unload(sc->req_dmat, sc->req_map); 672 if (sc->req_frames != NULL) 673 bus_dmamem_free(sc->req_dmat, sc->req_frames, sc->req_map); 674 if (sc->req_dmat != NULL) 675 bus_dma_tag_destroy(sc->req_dmat); 676 677 if (sc->chains != NULL) 678 free(sc->chains, M_MPR); 679 if (sc->prps != NULL) 680 free(sc->prps, M_MPR); 681 if (sc->commands != NULL) { 682 for (i = 1; i < sc->num_reqs; i++) { 683 cm = &sc->commands[i]; 684 bus_dmamap_destroy(sc->buffer_dmat, cm->cm_dmamap); 685 } 686 free(sc->commands, M_MPR); 687 } 688 if (sc->buffer_dmat != NULL) 689 bus_dma_tag_destroy(sc->buffer_dmat); 690} 691 692/* 693 * The terms diag reset and hard reset are used interchangeably in the MPI 694 * docs to mean resetting the controller chip. In this code diag reset 695 * cleans everything up, and the hard reset function just sends the reset 696 * sequence to the chip. This should probably be refactored so that every 697 * subsystem gets a reset notification of some sort, and can clean up 698 * appropriately. 699 */ 700int 701mpr_reinit(struct mpr_softc *sc) 702{ 703 int error; 704 struct mprsas_softc *sassc; 705 706 sassc = sc->sassc; 707 708 MPR_FUNCTRACE(sc); 709 710 mtx_assert(&sc->mpr_mtx, MA_OWNED); 711 712 if (sc->mpr_flags & MPR_FLAGS_DIAGRESET) { 713 mpr_dprint(sc, MPR_INIT, "%s reset already in progress\n", 714 __func__); 715 return 0; 716 } 717 718 mpr_dprint(sc, MPR_INFO, "Reinitializing controller,\n"); 719 /* make sure the completion callbacks can recognize they're getting 720 * a NULL cm_reply due to a reset. 721 */ 722 sc->mpr_flags |= MPR_FLAGS_DIAGRESET; 723 724 /* 725 * Mask interrupts here. 726 */ 727 mpr_dprint(sc, MPR_INIT, "%s mask interrupts\n", __func__); 728 mpr_mask_intr(sc); 729 730 error = mpr_diag_reset(sc, CAN_SLEEP); 731 if (error != 0) { 732 panic("%s hard reset failed with error %d\n", __func__, error); 733 } 734 735 /* Restore the PCI state, including the MSI-X registers */ 736 mpr_pci_restore(sc); 737 738 /* Give the I/O subsystem special priority to get itself prepared */ 739 mprsas_handle_reinit(sc); 740 741 /* 742 * Get IOC Facts and allocate all structures based on this information. 743 * The attach function will also call mpr_iocfacts_allocate at startup. 744 * If relevant values have changed in IOC Facts, this function will free 745 * all of the memory based on IOC Facts and reallocate that memory. 746 */ 747 if ((error = mpr_iocfacts_allocate(sc, FALSE)) != 0) { 748 panic("%s IOC Facts based allocation failed with error %d\n", 749 __func__, error); 750 } 751 752 /* 753 * Mapping structures will be re-allocated after getting IOC Page8, so 754 * free these structures here. 755 */ 756 mpr_mapping_exit(sc); 757 758 /* 759 * The static page function currently read is IOC Page8. Others can be 760 * added in future. It's possible that the values in IOC Page8 have 761 * changed after a Diag Reset due to user modification, so always read 762 * these. Interrupts are masked, so unmask them before getting config 763 * pages. 764 */ 765 mpr_unmask_intr(sc); 766 sc->mpr_flags &= ~MPR_FLAGS_DIAGRESET; 767 mpr_base_static_config_pages(sc); 768 769 /* 770 * Some mapping info is based in IOC Page8 data, so re-initialize the 771 * mapping tables. 772 */ 773 mpr_mapping_initialize(sc); 774 775 /* 776 * Restart will reload the event masks clobbered by the reset, and 777 * then enable the port. 778 */ 779 mpr_reregister_events(sc); 780 781 /* the end of discovery will release the simq, so we're done. */ 782 mpr_dprint(sc, MPR_INFO, "%s finished sc %p post %u free %u\n", 783 __func__, sc, sc->replypostindex, sc->replyfreeindex); 784 mprsas_release_simq_reinit(sassc); 785 786 return 0; 787} 788 789/* Wait for the chip to ACK a word that we've put into its FIFO 790 * Wait for <timeout> seconds. In single loop wait for busy loop 791 * for 500 microseconds. 792 * Total is [ 0.5 * (2000 * <timeout>) ] in miliseconds. 793 * */ 794static int 795mpr_wait_db_ack(struct mpr_softc *sc, int timeout, int sleep_flag) 796{ 797 u32 cntdn, count; 798 u32 int_status; 799 u32 doorbell; 800 801 count = 0; 802 cntdn = (sleep_flag == CAN_SLEEP) ? 1000*timeout : 2000*timeout; 803 do { 804 int_status = mpr_regread(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET); 805 if (!(int_status & MPI2_HIS_SYS2IOC_DB_STATUS)) { 806 mpr_dprint(sc, MPR_INIT, "%s: successful count(%d), " 807 "timeout(%d)\n", __func__, count, timeout); 808 return 0; 809 } else if (int_status & MPI2_HIS_IOC2SYS_DB_STATUS) { 810 doorbell = mpr_regread(sc, MPI2_DOORBELL_OFFSET); 811 if ((doorbell & MPI2_IOC_STATE_MASK) == 812 MPI2_IOC_STATE_FAULT) { 813 mpr_dprint(sc, MPR_FAULT, 814 "fault_state(0x%04x)!\n", doorbell); 815 return (EFAULT); 816 } 817 } else if (int_status == 0xFFFFFFFF) 818 goto out; 819 820 /* 821 * If it can sleep, sleep for 1 milisecond, else busy loop for 822 * 0.5 milisecond 823 */ 824 if (mtx_owned(&sc->mpr_mtx) && sleep_flag == CAN_SLEEP) 825 msleep(&sc->msleep_fake_chan, &sc->mpr_mtx, 0, "mprdba", 826 hz/1000); 827 else if (sleep_flag == CAN_SLEEP) 828 pause("mprdba", hz/1000); 829 else 830 DELAY(500); 831 count++; 832 } while (--cntdn); 833 834out: 835 mpr_dprint(sc, MPR_FAULT, "%s: failed due to timeout count(%d), " 836 "int_status(%x)!\n", __func__, count, int_status); 837 return (ETIMEDOUT); 838} 839 840/* Wait for the chip to signal that the next word in its FIFO can be fetched */ 841static int 842mpr_wait_db_int(struct mpr_softc *sc) 843{ 844 int retry; 845 846 for (retry = 0; retry < MPR_DB_MAX_WAIT; retry++) { 847 if ((mpr_regread(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET) & 848 MPI2_HIS_IOC2SYS_DB_STATUS) != 0) 849 return (0); 850 DELAY(2000); 851 } 852 return (ETIMEDOUT); 853} 854 855/* Step through the synchronous command state machine, i.e. "Doorbell mode" */ 856static int 857mpr_request_sync(struct mpr_softc *sc, void *req, MPI2_DEFAULT_REPLY *reply, 858 int req_sz, int reply_sz, int timeout) 859{ 860 uint32_t *data32; 861 uint16_t *data16; 862 int i, count, ioc_sz, residual; 863 int sleep_flags = CAN_SLEEP; 864 865#if __FreeBSD_version >= 1000029 866 if (curthread->td_no_sleeping) 867#else //__FreeBSD_version < 1000029 868 if (curthread->td_pflags & TDP_NOSLEEPING) 869#endif //__FreeBSD_version >= 1000029 870 sleep_flags = NO_SLEEP; 871 872 /* Step 1 */ 873 mpr_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); 874 875 /* Step 2 */ 876 if (mpr_regread(sc, MPI2_DOORBELL_OFFSET) & MPI2_DOORBELL_USED) 877 return (EBUSY); 878 879 /* Step 3 880 * Announce that a message is coming through the doorbell. Messages 881 * are pushed at 32bit words, so round up if needed. 882 */ 883 count = (req_sz + 3) / 4; 884 mpr_regwrite(sc, MPI2_DOORBELL_OFFSET, 885 (MPI2_FUNCTION_HANDSHAKE << MPI2_DOORBELL_FUNCTION_SHIFT) | 886 (count << MPI2_DOORBELL_ADD_DWORDS_SHIFT)); 887 888 /* Step 4 */ 889 if (mpr_wait_db_int(sc) || 890 (mpr_regread(sc, MPI2_DOORBELL_OFFSET) & MPI2_DOORBELL_USED) == 0) { 891 mpr_dprint(sc, MPR_FAULT, "Doorbell failed to activate\n"); 892 return (ENXIO); 893 } 894 mpr_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); 895 if (mpr_wait_db_ack(sc, 5, sleep_flags) != 0) { 896 mpr_dprint(sc, MPR_FAULT, "Doorbell handshake failed\n"); 897 return (ENXIO); 898 } 899 900 /* Step 5 */ 901 /* Clock out the message data synchronously in 32-bit dwords*/ 902 data32 = (uint32_t *)req; 903 for (i = 0; i < count; i++) { 904 mpr_regwrite(sc, MPI2_DOORBELL_OFFSET, htole32(data32[i])); 905 if (mpr_wait_db_ack(sc, 5, sleep_flags) != 0) { 906 mpr_dprint(sc, MPR_FAULT, 907 "Timeout while writing doorbell\n"); 908 return (ENXIO); 909 } 910 } 911 912 /* Step 6 */ 913 /* Clock in the reply in 16-bit words. The total length of the 914 * message is always in the 4th byte, so clock out the first 2 words 915 * manually, then loop the rest. 916 */ 917 data16 = (uint16_t *)reply; 918 if (mpr_wait_db_int(sc) != 0) { 919 mpr_dprint(sc, MPR_FAULT, "Timeout reading doorbell 0\n"); 920 return (ENXIO); 921 } 922 data16[0] = 923 mpr_regread(sc, MPI2_DOORBELL_OFFSET) & MPI2_DOORBELL_DATA_MASK; 924 mpr_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); 925 if (mpr_wait_db_int(sc) != 0) { 926 mpr_dprint(sc, MPR_FAULT, "Timeout reading doorbell 1\n"); 927 return (ENXIO); 928 } 929 data16[1] = 930 mpr_regread(sc, MPI2_DOORBELL_OFFSET) & MPI2_DOORBELL_DATA_MASK; 931 mpr_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); 932 933 /* Number of 32bit words in the message */ 934 ioc_sz = reply->MsgLength; 935 936 /* 937 * Figure out how many 16bit words to clock in without overrunning. 938 * The precision loss with dividing reply_sz can safely be 939 * ignored because the messages can only be multiples of 32bits. 940 */ 941 residual = 0; 942 count = MIN((reply_sz / 4), ioc_sz) * 2; 943 if (count < ioc_sz * 2) { 944 residual = ioc_sz * 2 - count; 945 mpr_dprint(sc, MPR_ERROR, "Driver error, throwing away %d " 946 "residual message words\n", residual); 947 } 948 949 for (i = 2; i < count; i++) { 950 if (mpr_wait_db_int(sc) != 0) { 951 mpr_dprint(sc, MPR_FAULT, 952 "Timeout reading doorbell %d\n", i); 953 return (ENXIO); 954 } 955 data16[i] = mpr_regread(sc, MPI2_DOORBELL_OFFSET) & 956 MPI2_DOORBELL_DATA_MASK; 957 mpr_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); 958 } 959 960 /* 961 * Pull out residual words that won't fit into the provided buffer. 962 * This keeps the chip from hanging due to a driver programming 963 * error. 964 */ 965 while (residual--) { 966 if (mpr_wait_db_int(sc) != 0) { 967 mpr_dprint(sc, MPR_FAULT, "Timeout reading doorbell\n"); 968 return (ENXIO); 969 } 970 (void)mpr_regread(sc, MPI2_DOORBELL_OFFSET); 971 mpr_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); 972 } 973 974 /* Step 7 */ 975 if (mpr_wait_db_int(sc) != 0) { 976 mpr_dprint(sc, MPR_FAULT, "Timeout waiting to exit doorbell\n"); 977 return (ENXIO); 978 } 979 if (mpr_regread(sc, MPI2_DOORBELL_OFFSET) & MPI2_DOORBELL_USED) 980 mpr_dprint(sc, MPR_FAULT, "Warning, doorbell still active\n"); 981 mpr_regwrite(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET, 0x0); 982 983 return (0); 984} 985 986static void 987mpr_enqueue_request(struct mpr_softc *sc, struct mpr_command *cm) 988{ 989 request_descriptor rd; 990 991 MPR_FUNCTRACE(sc); 992 mpr_dprint(sc, MPR_TRACE, "SMID %u cm %p ccb %p\n", 993 cm->cm_desc.Default.SMID, cm, cm->cm_ccb); 994 995 if (sc->mpr_flags & MPR_FLAGS_ATTACH_DONE && !(sc->mpr_flags & 996 MPR_FLAGS_SHUTDOWN)) 997 mtx_assert(&sc->mpr_mtx, MA_OWNED); 998 999 if (++sc->io_cmds_active > sc->io_cmds_highwater) 1000 sc->io_cmds_highwater++; 1001 1002 if (sc->atomic_desc_capable) { 1003 rd.u.low = cm->cm_desc.Words.Low; 1004 mpr_regwrite(sc, MPI26_ATOMIC_REQUEST_DESCRIPTOR_POST_OFFSET, 1005 rd.u.low); 1006 } else { 1007 rd.u.low = cm->cm_desc.Words.Low; 1008 rd.u.high = cm->cm_desc.Words.High; 1009 rd.word = htole64(rd.word); 1010 mpr_regwrite(sc, MPI2_REQUEST_DESCRIPTOR_POST_LOW_OFFSET, 1011 rd.u.low); 1012 mpr_regwrite(sc, MPI2_REQUEST_DESCRIPTOR_POST_HIGH_OFFSET, 1013 rd.u.high); 1014 } 1015} 1016 1017/* 1018 * Just the FACTS, ma'am. 1019 */ 1020static int 1021mpr_get_iocfacts(struct mpr_softc *sc, MPI2_IOC_FACTS_REPLY *facts) 1022{ 1023 MPI2_DEFAULT_REPLY *reply; 1024 MPI2_IOC_FACTS_REQUEST request; 1025 int error, req_sz, reply_sz; 1026 1027 MPR_FUNCTRACE(sc); 1028 1029 req_sz = sizeof(MPI2_IOC_FACTS_REQUEST); 1030 reply_sz = sizeof(MPI2_IOC_FACTS_REPLY); 1031 reply = (MPI2_DEFAULT_REPLY *)facts; 1032 1033 bzero(&request, req_sz); 1034 request.Function = MPI2_FUNCTION_IOC_FACTS; 1035 error = mpr_request_sync(sc, &request, reply, req_sz, reply_sz, 5); 1036 1037 return (error); 1038} 1039 1040static int 1041mpr_send_iocinit(struct mpr_softc *sc) 1042{ 1043 MPI2_IOC_INIT_REQUEST init; 1044 MPI2_DEFAULT_REPLY reply; 1045 int req_sz, reply_sz, error; 1046 struct timeval now; 1047 uint64_t time_in_msec; 1048 1049 MPR_FUNCTRACE(sc); 1050 1051 req_sz = sizeof(MPI2_IOC_INIT_REQUEST); 1052 reply_sz = sizeof(MPI2_IOC_INIT_REPLY); 1053 bzero(&init, req_sz); 1054 bzero(&reply, reply_sz); 1055 1056 /* 1057 * Fill in the init block. Note that most addresses are 1058 * deliberately in the lower 32bits of memory. This is a micro- 1059 * optimzation for PCI/PCIX, though it's not clear if it helps PCIe. 1060 */ 1061 init.Function = MPI2_FUNCTION_IOC_INIT; 1062 init.WhoInit = MPI2_WHOINIT_HOST_DRIVER; 1063 init.MsgVersion = htole16(MPI2_VERSION); 1064 init.HeaderVersion = htole16(MPI2_HEADER_VERSION); 1065 init.SystemRequestFrameSize = htole16(sc->facts->IOCRequestFrameSize); 1066 init.ReplyDescriptorPostQueueDepth = htole16(sc->pqdepth); 1067 init.ReplyFreeQueueDepth = htole16(sc->fqdepth); 1068 init.SenseBufferAddressHigh = 0; 1069 init.SystemReplyAddressHigh = 0; 1070 init.SystemRequestFrameBaseAddress.High = 0; 1071 init.SystemRequestFrameBaseAddress.Low = 1072 htole32((uint32_t)sc->req_busaddr); 1073 init.ReplyDescriptorPostQueueAddress.High = 0; 1074 init.ReplyDescriptorPostQueueAddress.Low = 1075 htole32((uint32_t)sc->post_busaddr); 1076 init.ReplyFreeQueueAddress.High = 0; 1077 init.ReplyFreeQueueAddress.Low = htole32((uint32_t)sc->free_busaddr); 1078 getmicrotime(&now); 1079 time_in_msec = (now.tv_sec * 1000 + now.tv_usec/1000); 1080 init.TimeStamp.High = htole32((time_in_msec >> 32) & 0xFFFFFFFF); 1081 init.TimeStamp.Low = htole32(time_in_msec & 0xFFFFFFFF); 1082 init.HostPageSize = HOST_PAGE_SIZE_4K; 1083 1084 error = mpr_request_sync(sc, &init, &reply, req_sz, reply_sz, 5); 1085 if ((reply.IOCStatus & MPI2_IOCSTATUS_MASK) != MPI2_IOCSTATUS_SUCCESS) 1086 error = ENXIO; 1087 1088 mpr_dprint(sc, MPR_INIT, "IOCInit status= 0x%x\n", reply.IOCStatus); 1089 return (error); 1090} 1091 1092void 1093mpr_memaddr_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) 1094{ 1095 bus_addr_t *addr; 1096 1097 addr = arg; 1098 *addr = segs[0].ds_addr; 1099} 1100 1101static int 1102mpr_alloc_queues(struct mpr_softc *sc) 1103{ 1104 bus_addr_t queues_busaddr; 1105 uint8_t *queues; 1106 int qsize, fqsize, pqsize; 1107 1108 /* 1109 * The reply free queue contains 4 byte entries in multiples of 16 and 1110 * aligned on a 16 byte boundary. There must always be an unused entry. 1111 * This queue supplies fresh reply frames for the firmware to use. 1112 * 1113 * The reply descriptor post queue contains 8 byte entries in 1114 * multiples of 16 and aligned on a 16 byte boundary. This queue 1115 * contains filled-in reply frames sent from the firmware to the host. 1116 * 1117 * These two queues are allocated together for simplicity. 1118 */ 1119 sc->fqdepth = roundup2(sc->num_replies + 1, 16); 1120 sc->pqdepth = roundup2(sc->num_replies + 1, 16); 1121 fqsize= sc->fqdepth * 4; 1122 pqsize = sc->pqdepth * 8; 1123 qsize = fqsize + pqsize; 1124 1125 if (bus_dma_tag_create( sc->mpr_parent_dmat, /* parent */ 1126 16, 0, /* algnmnt, boundary */ 1127 BUS_SPACE_MAXADDR_32BIT,/* lowaddr */ 1128 BUS_SPACE_MAXADDR, /* highaddr */ 1129 NULL, NULL, /* filter, filterarg */ 1130 qsize, /* maxsize */ 1131 1, /* nsegments */ 1132 qsize, /* maxsegsize */ 1133 0, /* flags */ 1134 NULL, NULL, /* lockfunc, lockarg */ 1135 &sc->queues_dmat)) { 1136 device_printf(sc->mpr_dev, "Cannot allocate queues DMA tag\n"); 1137 return (ENOMEM); 1138 } 1139 if (bus_dmamem_alloc(sc->queues_dmat, (void **)&queues, BUS_DMA_NOWAIT, 1140 &sc->queues_map)) { 1141 device_printf(sc->mpr_dev, "Cannot allocate queues memory\n"); 1142 return (ENOMEM); 1143 } 1144 bzero(queues, qsize); 1145 bus_dmamap_load(sc->queues_dmat, sc->queues_map, queues, qsize, 1146 mpr_memaddr_cb, &queues_busaddr, 0); 1147 1148 sc->free_queue = (uint32_t *)queues; 1149 sc->free_busaddr = queues_busaddr; 1150 sc->post_queue = (MPI2_REPLY_DESCRIPTORS_UNION *)(queues + fqsize); 1151 sc->post_busaddr = queues_busaddr + fqsize; 1152 1153 return (0); 1154} 1155 1156static int 1157mpr_alloc_replies(struct mpr_softc *sc) 1158{ 1159 int rsize, num_replies; 1160 1161 /* 1162 * sc->num_replies should be one less than sc->fqdepth. We need to 1163 * allocate space for sc->fqdepth replies, but only sc->num_replies 1164 * replies can be used at once. 1165 */ 1166 num_replies = max(sc->fqdepth, sc->num_replies); 1167 1168 rsize = sc->facts->ReplyFrameSize * num_replies * 4; 1169 if (bus_dma_tag_create( sc->mpr_parent_dmat, /* parent */ 1170 4, 0, /* algnmnt, boundary */ 1171 BUS_SPACE_MAXADDR_32BIT,/* lowaddr */ 1172 BUS_SPACE_MAXADDR, /* highaddr */ 1173 NULL, NULL, /* filter, filterarg */ 1174 rsize, /* maxsize */ 1175 1, /* nsegments */ 1176 rsize, /* maxsegsize */ 1177 0, /* flags */ 1178 NULL, NULL, /* lockfunc, lockarg */ 1179 &sc->reply_dmat)) { 1180 device_printf(sc->mpr_dev, "Cannot allocate replies DMA tag\n"); 1181 return (ENOMEM); 1182 } 1183 if (bus_dmamem_alloc(sc->reply_dmat, (void **)&sc->reply_frames, 1184 BUS_DMA_NOWAIT, &sc->reply_map)) { 1185 device_printf(sc->mpr_dev, "Cannot allocate replies memory\n"); 1186 return (ENOMEM); 1187 } 1188 bzero(sc->reply_frames, rsize); 1189 bus_dmamap_load(sc->reply_dmat, sc->reply_map, sc->reply_frames, rsize, 1190 mpr_memaddr_cb, &sc->reply_busaddr, 0); 1191 1192 return (0); 1193} 1194 1195static int 1196mpr_alloc_requests(struct mpr_softc *sc) 1197{ 1198 struct mpr_command *cm; 1199 struct mpr_chain *chain; 1200 int i, rsize, nsegs; 1201 1202 rsize = sc->facts->IOCRequestFrameSize * sc->num_reqs * 4; 1203 if (bus_dma_tag_create( sc->mpr_parent_dmat, /* parent */ 1204 16, 0, /* algnmnt, boundary */ 1205 BUS_SPACE_MAXADDR_32BIT,/* lowaddr */ 1206 BUS_SPACE_MAXADDR, /* highaddr */ 1207 NULL, NULL, /* filter, filterarg */ 1208 rsize, /* maxsize */ 1209 1, /* nsegments */ 1210 rsize, /* maxsegsize */ 1211 0, /* flags */ 1212 NULL, NULL, /* lockfunc, lockarg */ 1213 &sc->req_dmat)) { 1214 device_printf(sc->mpr_dev, "Cannot allocate request DMA tag\n"); 1215 return (ENOMEM); 1216 } 1217 if (bus_dmamem_alloc(sc->req_dmat, (void **)&sc->req_frames, 1218 BUS_DMA_NOWAIT, &sc->req_map)) { 1219 device_printf(sc->mpr_dev, "Cannot allocate request memory\n"); 1220 return (ENOMEM); 1221 } 1222 bzero(sc->req_frames, rsize); 1223 bus_dmamap_load(sc->req_dmat, sc->req_map, sc->req_frames, rsize, 1224 mpr_memaddr_cb, &sc->req_busaddr, 0); 1225 1226 /* 1227 * Gen3 and beyond uses the IOCMaxChainSegmentSize from IOC Facts to 1228 * get the size of a Chain Frame. Previous versions use the size as a 1229 * Request Frame for the Chain Frame size. If IOCMaxChainSegmentSize 1230 * is 0, use the default value. The IOCMaxChainSegmentSize is the 1231 * number of 16-byte elelements that can fit in a Chain Frame, which is 1232 * the size of an IEEE Simple SGE. 1233 */ 1234 if (sc->facts->MsgVersion >= MPI2_VERSION_02_05) { 1235 sc->chain_seg_size = 1236 htole16(sc->facts->IOCMaxChainSegmentSize); 1237 if (sc->chain_seg_size == 0) { 1238 sc->chain_frame_size = MPR_DEFAULT_CHAIN_SEG_SIZE * 1239 MPR_MAX_CHAIN_ELEMENT_SIZE; 1240 } else { 1241 sc->chain_frame_size = sc->chain_seg_size * 1242 MPR_MAX_CHAIN_ELEMENT_SIZE; 1243 } 1244 } else { 1245 sc->chain_frame_size = sc->facts->IOCRequestFrameSize * 4; 1246 } 1247 rsize = sc->chain_frame_size * sc->max_chains; 1248 if (bus_dma_tag_create( sc->mpr_parent_dmat, /* parent */ 1249 16, 0, /* algnmnt, boundary */ 1250 BUS_SPACE_MAXADDR, /* lowaddr */ 1251 BUS_SPACE_MAXADDR, /* highaddr */ 1252 NULL, NULL, /* filter, filterarg */ 1253 rsize, /* maxsize */ 1254 1, /* nsegments */ 1255 rsize, /* maxsegsize */ 1256 0, /* flags */ 1257 NULL, NULL, /* lockfunc, lockarg */ 1258 &sc->chain_dmat)) { 1259 device_printf(sc->mpr_dev, "Cannot allocate chain DMA tag\n"); 1260 return (ENOMEM); 1261 } 1262 if (bus_dmamem_alloc(sc->chain_dmat, (void **)&sc->chain_frames, 1263 BUS_DMA_NOWAIT, &sc->chain_map)) { 1264 device_printf(sc->mpr_dev, "Cannot allocate chain memory\n"); 1265 return (ENOMEM); 1266 } 1267 bzero(sc->chain_frames, rsize); 1268 bus_dmamap_load(sc->chain_dmat, sc->chain_map, sc->chain_frames, rsize, 1269 mpr_memaddr_cb, &sc->chain_busaddr, 0); 1270 1271 rsize = MPR_SENSE_LEN * sc->num_reqs; 1272 if (bus_dma_tag_create( sc->mpr_parent_dmat, /* parent */ 1273 1, 0, /* algnmnt, boundary */ 1274 BUS_SPACE_MAXADDR_32BIT,/* lowaddr */ 1275 BUS_SPACE_MAXADDR, /* highaddr */ 1276 NULL, NULL, /* filter, filterarg */ 1277 rsize, /* maxsize */ 1278 1, /* nsegments */ 1279 rsize, /* maxsegsize */ 1280 0, /* flags */ 1281 NULL, NULL, /* lockfunc, lockarg */ 1282 &sc->sense_dmat)) { 1283 device_printf(sc->mpr_dev, "Cannot allocate sense DMA tag\n"); 1284 return (ENOMEM); 1285 } 1286 if (bus_dmamem_alloc(sc->sense_dmat, (void **)&sc->sense_frames, 1287 BUS_DMA_NOWAIT, &sc->sense_map)) { 1288 device_printf(sc->mpr_dev, "Cannot allocate sense memory\n"); 1289 return (ENOMEM); 1290 } 1291 bzero(sc->sense_frames, rsize); 1292 bus_dmamap_load(sc->sense_dmat, sc->sense_map, sc->sense_frames, rsize, 1293 mpr_memaddr_cb, &sc->sense_busaddr, 0); 1294 1295 sc->chains = malloc(sizeof(struct mpr_chain) * sc->max_chains, M_MPR, 1296 M_WAITOK | M_ZERO); 1297 if (!sc->chains) { 1298 device_printf(sc->mpr_dev, "Cannot allocate memory %s %d\n", 1299 __func__, __LINE__); 1300 return (ENOMEM); 1301 } 1302 for (i = 0; i < sc->max_chains; i++) { 1303 chain = &sc->chains[i]; 1304 chain->chain = (MPI2_SGE_IO_UNION *)(sc->chain_frames + 1305 i * sc->chain_frame_size); 1306 chain->chain_busaddr = sc->chain_busaddr + 1307 i * sc->chain_frame_size; 1308 mpr_free_chain(sc, chain); 1309 sc->chain_free_lowwater++; 1310 } 1311 1312 /* 1313 * Allocate NVMe PRP Pages for NVMe SGL support only if the FW supports 1314 * these devices. 1315 */ 1316 if ((sc->facts->MsgVersion >= MPI2_VERSION_02_06) && 1317 (sc->facts->ProtocolFlags & MPI2_IOCFACTS_PROTOCOL_NVME_DEVICES)) { 1318 if (mpr_alloc_nvme_prp_pages(sc) == ENOMEM) 1319 return (ENOMEM); 1320 } 1321 1322 /* XXX Need to pick a more precise value */ 1323 nsegs = (MAXPHYS / PAGE_SIZE) + 1; 1324 if (bus_dma_tag_create( sc->mpr_parent_dmat, /* parent */ 1325 1, 0, /* algnmnt, boundary */ 1326 BUS_SPACE_MAXADDR, /* lowaddr */ 1327 BUS_SPACE_MAXADDR, /* highaddr */ 1328 NULL, NULL, /* filter, filterarg */ 1329 BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ 1330 nsegs, /* nsegments */ 1331 BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 1332 BUS_DMA_ALLOCNOW, /* flags */ 1333 busdma_lock_mutex, /* lockfunc */ 1334 &sc->mpr_mtx, /* lockarg */ 1335 &sc->buffer_dmat)) { 1336 device_printf(sc->mpr_dev, "Cannot allocate buffer DMA tag\n"); 1337 return (ENOMEM); 1338 } 1339 1340 /* 1341 * SMID 0 cannot be used as a free command per the firmware spec. 1342 * Just drop that command instead of risking accounting bugs. 1343 */ 1344 sc->commands = malloc(sizeof(struct mpr_command) * sc->num_reqs, 1345 M_MPR, M_WAITOK | M_ZERO); 1346 if (!sc->commands) { 1347 device_printf(sc->mpr_dev, "Cannot allocate memory %s %d\n", 1348 __func__, __LINE__); 1349 return (ENOMEM); 1350 } 1351 for (i = 1; i < sc->num_reqs; i++) { 1352 cm = &sc->commands[i]; 1353 cm->cm_req = sc->req_frames + 1354 i * sc->facts->IOCRequestFrameSize * 4; 1355 cm->cm_req_busaddr = sc->req_busaddr + 1356 i * sc->facts->IOCRequestFrameSize * 4; 1357 cm->cm_sense = &sc->sense_frames[i]; 1358 cm->cm_sense_busaddr = sc->sense_busaddr + i * MPR_SENSE_LEN; 1359 cm->cm_desc.Default.SMID = i; 1360 cm->cm_sc = sc; 1361 TAILQ_INIT(&cm->cm_chain_list); 1362 TAILQ_INIT(&cm->cm_prp_page_list); 1363 callout_init_mtx(&cm->cm_callout, &sc->mpr_mtx, 0); 1364 1365 /* XXX Is a failure here a critical problem? */ 1366 if (bus_dmamap_create(sc->buffer_dmat, 0, &cm->cm_dmamap) 1367 == 0) { 1368 if (i <= sc->num_prireqs) 1369 mpr_free_high_priority_command(sc, cm); 1370 else 1371 mpr_free_command(sc, cm); 1372 } else { 1373 panic("failed to allocate command %d\n", i); 1374 sc->num_reqs = i; 1375 break; 1376 } 1377 } 1378 1379 return (0); 1380} 1381 1382/* 1383 * Allocate contiguous buffers for PCIe NVMe devices for building native PRPs, 1384 * which are scatter/gather lists for NVMe devices. 1385 * 1386 * This buffer must be contiguous due to the nature of how NVMe PRPs are built 1387 * and translated by FW. 1388 * 1389 * returns ENOMEM if memory could not be allocated, otherwise returns 0. 1390 */ 1391static int 1392mpr_alloc_nvme_prp_pages(struct mpr_softc *sc) 1393{ 1394 int PRPs_per_page, PRPs_required, pages_required; 1395 int rsize, i; 1396 struct mpr_prp_page *prp_page; 1397 1398 /* 1399 * Assuming a MAX_IO_SIZE of 1MB and a PAGE_SIZE of 4k, the max number 1400 * of PRPs (NVMe's Scatter/Gather Element) needed per I/O is: 1401 * MAX_IO_SIZE / PAGE_SIZE = 256 1402 * 1403 * 1 PRP entry in main frame for PRP list pointer still leaves 255 PRPs 1404 * required for the remainder of the 1MB I/O. 512 PRPs can fit into one 1405 * page (4096 / 8 = 512), so only one page is required for each I/O. 1406 * 1407 * Each of these buffers will need to be contiguous. For simplicity, 1408 * only one buffer is allocated here, which has all of the space 1409 * required for the NVMe Queue Depth. If there are problems allocating 1410 * this one buffer, this function will need to change to allocate 1411 * individual, contiguous NVME_QDEPTH buffers. 1412 * 1413 * The real calculation will use the real max io size. Above is just an 1414 * example. 1415 * 1416 */ 1417 PRPs_required = sc->maxio / PAGE_SIZE; 1418 PRPs_per_page = (PAGE_SIZE / PRP_ENTRY_SIZE) - 1; 1419 pages_required = (PRPs_required / PRPs_per_page) + 1; 1420 1421 sc->prp_buffer_size = PAGE_SIZE * pages_required; 1422 rsize = sc->prp_buffer_size * NVME_QDEPTH; 1423 if (bus_dma_tag_create( sc->mpr_parent_dmat, /* parent */ 1424 4, 0, /* algnmnt, boundary */ 1425 BUS_SPACE_MAXADDR_32BIT,/* lowaddr */ 1426 BUS_SPACE_MAXADDR, /* highaddr */ 1427 NULL, NULL, /* filter, filterarg */ 1428 rsize, /* maxsize */ 1429 1, /* nsegments */ 1430 rsize, /* maxsegsize */ 1431 0, /* flags */ 1432 NULL, NULL, /* lockfunc, lockarg */ 1433 &sc->prp_page_dmat)) { 1434 device_printf(sc->mpr_dev, "Cannot allocate NVMe PRP DMA " 1435 "tag\n"); 1436 return (ENOMEM); 1437 } 1438 if (bus_dmamem_alloc(sc->prp_page_dmat, (void **)&sc->prp_pages, 1439 BUS_DMA_NOWAIT, &sc->prp_page_map)) { 1440 device_printf(sc->mpr_dev, "Cannot allocate NVMe PRP memory\n"); 1441 return (ENOMEM); 1442 } 1443 bzero(sc->prp_pages, rsize); 1444 bus_dmamap_load(sc->prp_page_dmat, sc->prp_page_map, sc->prp_pages, 1445 rsize, mpr_memaddr_cb, &sc->prp_page_busaddr, 0); 1446 1447 sc->prps = malloc(sizeof(struct mpr_prp_page) * NVME_QDEPTH, M_MPR, 1448 M_WAITOK | M_ZERO); 1449 for (i = 0; i < NVME_QDEPTH; i++) { 1450 prp_page = &sc->prps[i]; 1451 prp_page->prp_page = (uint64_t *)(sc->prp_pages + 1452 i * sc->prp_buffer_size); 1453 prp_page->prp_page_busaddr = (uint64_t)(sc->prp_page_busaddr + 1454 i * sc->prp_buffer_size); 1455 mpr_free_prp_page(sc, prp_page); 1456 sc->prp_pages_free_lowwater++; 1457 } 1458 1459 return (0); 1460} 1461 1462static int 1463mpr_init_queues(struct mpr_softc *sc) 1464{ 1465 int i; 1466 1467 memset((uint8_t *)sc->post_queue, 0xff, sc->pqdepth * 8); 1468 1469 /* 1470 * According to the spec, we need to use one less reply than we 1471 * have space for on the queue. So sc->num_replies (the number we 1472 * use) should be less than sc->fqdepth (allocated size). 1473 */ 1474 if (sc->num_replies >= sc->fqdepth) 1475 return (EINVAL); 1476 1477 /* 1478 * Initialize all of the free queue entries. 1479 */ 1480 for (i = 0; i < sc->fqdepth; i++) { 1481 sc->free_queue[i] = sc->reply_busaddr + 1482 (i * sc->facts->ReplyFrameSize * 4); 1483 } 1484 sc->replyfreeindex = sc->num_replies; 1485 1486 return (0); 1487} 1488 1489/* Get the driver parameter tunables. Lowest priority are the driver defaults. 1490 * Next are the global settings, if they exist. Highest are the per-unit 1491 * settings, if they exist. 1492 */ 1493void 1494mpr_get_tunables(struct mpr_softc *sc) 1495{ 1496 char tmpstr[80]; 1497 1498 /* XXX default to some debugging for now */ 1499 sc->mpr_debug = MPR_INFO | MPR_FAULT; 1500 sc->disable_msix = 0; 1501 sc->disable_msi = 0; 1502 sc->max_chains = MPR_CHAIN_FRAMES; 1503 sc->max_io_pages = MPR_MAXIO_PAGES; 1504 sc->enable_ssu = MPR_SSU_ENABLE_SSD_DISABLE_HDD; 1505 sc->spinup_wait_time = DEFAULT_SPINUP_WAIT; 1506 sc->use_phynum = 1; 1507 1508 /* 1509 * Grab the global variables. 1510 */ 1511 TUNABLE_INT_FETCH("hw.mpr.debug_level", &sc->mpr_debug); 1512 TUNABLE_INT_FETCH("hw.mpr.disable_msix", &sc->disable_msix); 1513 TUNABLE_INT_FETCH("hw.mpr.disable_msi", &sc->disable_msi); 1514 TUNABLE_INT_FETCH("hw.mpr.max_chains", &sc->max_chains); 1515 TUNABLE_INT_FETCH("hw.mpr.max_io_pages", &sc->max_io_pages); 1516 TUNABLE_INT_FETCH("hw.mpr.enable_ssu", &sc->enable_ssu); 1517 TUNABLE_INT_FETCH("hw.mpr.spinup_wait_time", &sc->spinup_wait_time); 1518 TUNABLE_INT_FETCH("hw.mpr.use_phy_num", &sc->use_phynum); 1519 1520 /* Grab the unit-instance variables */ 1521 snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.debug_level", 1522 device_get_unit(sc->mpr_dev)); 1523 TUNABLE_INT_FETCH(tmpstr, &sc->mpr_debug); 1524 1525 snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.disable_msix", 1526 device_get_unit(sc->mpr_dev)); 1527 TUNABLE_INT_FETCH(tmpstr, &sc->disable_msix); 1528 1529 snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.disable_msi", 1530 device_get_unit(sc->mpr_dev)); 1531 TUNABLE_INT_FETCH(tmpstr, &sc->disable_msi); 1532 1533 snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.max_chains", 1534 device_get_unit(sc->mpr_dev)); 1535 TUNABLE_INT_FETCH(tmpstr, &sc->max_chains); 1536 1537 snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.max_io_pages", 1538 device_get_unit(sc->mpr_dev)); 1539 TUNABLE_INT_FETCH(tmpstr, &sc->max_io_pages); 1540 1541 bzero(sc->exclude_ids, sizeof(sc->exclude_ids)); 1542 snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.exclude_ids", 1543 device_get_unit(sc->mpr_dev)); 1544 TUNABLE_STR_FETCH(tmpstr, sc->exclude_ids, sizeof(sc->exclude_ids)); 1545 1546 snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.enable_ssu", 1547 device_get_unit(sc->mpr_dev)); 1548 TUNABLE_INT_FETCH(tmpstr, &sc->enable_ssu); 1549 1550 snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.spinup_wait_time", 1551 device_get_unit(sc->mpr_dev)); 1552 TUNABLE_INT_FETCH(tmpstr, &sc->spinup_wait_time); 1553 1554 snprintf(tmpstr, sizeof(tmpstr), "dev.mpr.%d.use_phy_num", 1555 device_get_unit(sc->mpr_dev)); 1556 TUNABLE_INT_FETCH(tmpstr, &sc->use_phynum); 1557} 1558 1559static void 1560mpr_setup_sysctl(struct mpr_softc *sc) 1561{ 1562 struct sysctl_ctx_list *sysctl_ctx = NULL; 1563 struct sysctl_oid *sysctl_tree = NULL; 1564 char tmpstr[80], tmpstr2[80]; 1565 1566 /* 1567 * Setup the sysctl variable so the user can change the debug level 1568 * on the fly. 1569 */ 1570 snprintf(tmpstr, sizeof(tmpstr), "MPR controller %d", 1571 device_get_unit(sc->mpr_dev)); 1572 snprintf(tmpstr2, sizeof(tmpstr2), "%d", device_get_unit(sc->mpr_dev)); 1573 1574 sysctl_ctx = device_get_sysctl_ctx(sc->mpr_dev); 1575 if (sysctl_ctx != NULL) 1576 sysctl_tree = device_get_sysctl_tree(sc->mpr_dev); 1577 1578 if (sysctl_tree == NULL) { 1579 sysctl_ctx_init(&sc->sysctl_ctx); 1580 sc->sysctl_tree = SYSCTL_ADD_NODE(&sc->sysctl_ctx, 1581 SYSCTL_STATIC_CHILDREN(_hw_mpr), OID_AUTO, tmpstr2, 1582 CTLFLAG_RD, 0, tmpstr); 1583 if (sc->sysctl_tree == NULL) 1584 return; 1585 sysctl_ctx = &sc->sysctl_ctx; 1586 sysctl_tree = sc->sysctl_tree; 1587 } 1588 1589 SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), 1590 OID_AUTO, "debug_level", CTLFLAG_RW, &sc->mpr_debug, 0, 1591 "mpr debug level"); 1592 1593 SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), 1594 OID_AUTO, "disable_msix", CTLFLAG_RD, &sc->disable_msix, 0, 1595 "Disable the use of MSI-X interrupts"); 1596 1597 SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), 1598 OID_AUTO, "disable_msi", CTLFLAG_RD, &sc->disable_msi, 0, 1599 "Disable the use of MSI interrupts"); 1600 1601 SYSCTL_ADD_STRING(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), 1602 OID_AUTO, "firmware_version", CTLFLAG_RW, sc->fw_version, 1603 strlen(sc->fw_version), "firmware version"); 1604 1605 SYSCTL_ADD_STRING(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), 1606 OID_AUTO, "driver_version", CTLFLAG_RW, MPR_DRIVER_VERSION, 1607 strlen(MPR_DRIVER_VERSION), "driver version"); 1608 1609 SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), 1610 OID_AUTO, "io_cmds_active", CTLFLAG_RD, 1611 &sc->io_cmds_active, 0, "number of currently active commands"); 1612 1613 SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), 1614 OID_AUTO, "io_cmds_highwater", CTLFLAG_RD, 1615 &sc->io_cmds_highwater, 0, "maximum active commands seen"); 1616 1617 SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), 1618 OID_AUTO, "chain_free", CTLFLAG_RD, 1619 &sc->chain_free, 0, "number of free chain elements"); 1620 1621 SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), 1622 OID_AUTO, "chain_free_lowwater", CTLFLAG_RD, 1623 &sc->chain_free_lowwater, 0,"lowest number of free chain elements"); 1624 1625 SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), 1626 OID_AUTO, "max_chains", CTLFLAG_RD, 1627 &sc->max_chains, 0,"maximum chain frames that will be allocated"); 1628 1629 SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), 1630 OID_AUTO, "max_io_pages", CTLFLAG_RD, 1631 &sc->max_io_pages, 0,"maximum pages to allow per I/O (if <1 use " 1632 "IOCFacts)"); 1633 1634 SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), 1635 OID_AUTO, "enable_ssu", CTLFLAG_RW, &sc->enable_ssu, 0, 1636 "enable SSU to SATA SSD/HDD at shutdown"); 1637 1638 SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), 1639 OID_AUTO, "chain_alloc_fail", CTLFLAG_RD, 1640 &sc->chain_alloc_fail, "chain allocation failures"); 1641 1642 SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), 1643 OID_AUTO, "spinup_wait_time", CTLFLAG_RD, 1644 &sc->spinup_wait_time, DEFAULT_SPINUP_WAIT, "seconds to wait for " 1645 "spinup after SATA ID error"); 1646 1647 SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), 1648 OID_AUTO, "use_phy_num", CTLFLAG_RD, &sc->use_phynum, 0, 1649 "Use the phy number for enumeration"); 1650 1651 SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), 1652 OID_AUTO, "prp_pages_free", CTLFLAG_RD, 1653 &sc->prp_pages_free, 0, "number of free PRP pages"); 1654 1655 SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), 1656 OID_AUTO, "prp_pages_free_lowwater", CTLFLAG_RD, 1657 &sc->prp_pages_free_lowwater, 0,"lowest number of free PRP pages"); 1658 1659 SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), 1660 OID_AUTO, "prp_page_alloc_fail", CTLFLAG_RD, 1661 &sc->prp_page_alloc_fail, "PRP page allocation failures"); 1662} 1663 1664int 1665mpr_attach(struct mpr_softc *sc) 1666{ 1667 int error; 1668 1669 MPR_FUNCTRACE(sc); 1670 1671 mtx_init(&sc->mpr_mtx, "MPR lock", NULL, MTX_DEF); 1672 callout_init_mtx(&sc->periodic, &sc->mpr_mtx, 0); 1673 callout_init_mtx(&sc->device_check_callout, &sc->mpr_mtx, 0); 1674 TAILQ_INIT(&sc->event_list); 1675 timevalclear(&sc->lastfail); 1676 1677 if ((error = mpr_transition_ready(sc)) != 0) { 1678 mpr_printf(sc, "%s failed to transition ready\n", __func__); 1679 return (error); 1680 } 1681 1682 sc->facts = malloc(sizeof(MPI2_IOC_FACTS_REPLY), M_MPR, 1683 M_ZERO|M_NOWAIT); 1684 if (!sc->facts) { 1685 device_printf(sc->mpr_dev, "Cannot allocate memory %s %d\n", 1686 __func__, __LINE__); 1687 return (ENOMEM); 1688 } 1689 1690 /* 1691 * Get IOC Facts and allocate all structures based on this information. 1692 * A Diag Reset will also call mpr_iocfacts_allocate and re-read the IOC 1693 * Facts. If relevant values have changed in IOC Facts, this function 1694 * will free all of the memory based on IOC Facts and reallocate that 1695 * memory. If this fails, any allocated memory should already be freed. 1696 */ 1697 if ((error = mpr_iocfacts_allocate(sc, TRUE)) != 0) { 1698 mpr_dprint(sc, MPR_FAULT, "%s IOC Facts based allocation " 1699 "failed with error %d\n", __func__, error); 1700 return (error); 1701 } 1702 1703 /* Start the periodic watchdog check on the IOC Doorbell */ 1704 mpr_periodic(sc); 1705 1706 /* 1707 * The portenable will kick off discovery events that will drive the 1708 * rest of the initialization process. The CAM/SAS module will 1709 * hold up the boot sequence until discovery is complete. 1710 */ 1711 sc->mpr_ich.ich_func = mpr_startup; 1712 sc->mpr_ich.ich_arg = sc; 1713 if (config_intrhook_establish(&sc->mpr_ich) != 0) { 1714 mpr_dprint(sc, MPR_ERROR, "Cannot establish MPR config hook\n"); 1715 error = EINVAL; 1716 } 1717 1718 /* 1719 * Allow IR to shutdown gracefully when shutdown occurs. 1720 */ 1721 sc->shutdown_eh = EVENTHANDLER_REGISTER(shutdown_final, 1722 mprsas_ir_shutdown, sc, SHUTDOWN_PRI_DEFAULT); 1723 1724 if (sc->shutdown_eh == NULL) 1725 mpr_dprint(sc, MPR_ERROR, "shutdown event registration " 1726 "failed\n"); 1727 1728 mpr_setup_sysctl(sc); 1729 1730 sc->mpr_flags |= MPR_FLAGS_ATTACH_DONE; 1731 1732 return (error); 1733} 1734 1735/* Run through any late-start handlers. */ 1736static void 1737mpr_startup(void *arg) 1738{ 1739 struct mpr_softc *sc; 1740 1741 sc = (struct mpr_softc *)arg; 1742 1743 mpr_lock(sc); 1744 mpr_unmask_intr(sc); 1745 1746 /* initialize device mapping tables */ 1747 mpr_base_static_config_pages(sc); 1748 mpr_mapping_initialize(sc); 1749 mprsas_startup(sc); 1750 mpr_unlock(sc); 1751} 1752 1753/* Periodic watchdog. Is called with the driver lock already held. */ 1754static void 1755mpr_periodic(void *arg) 1756{ 1757 struct mpr_softc *sc; 1758 uint32_t db; 1759 1760 sc = (struct mpr_softc *)arg; 1761 if (sc->mpr_flags & MPR_FLAGS_SHUTDOWN) 1762 return; 1763 1764 db = mpr_regread(sc, MPI2_DOORBELL_OFFSET); 1765 if ((db & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT) { 1766 if ((db & MPI2_DOORBELL_FAULT_CODE_MASK) == 1767 IFAULT_IOP_OVER_TEMP_THRESHOLD_EXCEEDED) { 1768 panic("TEMPERATURE FAULT: STOPPING."); 1769 } 1770 mpr_dprint(sc, MPR_FAULT, "IOC Fault 0x%08x, Resetting\n", db); 1771 mpr_reinit(sc); 1772 } 1773 1774 callout_reset(&sc->periodic, MPR_PERIODIC_DELAY * hz, mpr_periodic, sc); 1775} 1776 1777static void 1778mpr_log_evt_handler(struct mpr_softc *sc, uintptr_t data, 1779 MPI2_EVENT_NOTIFICATION_REPLY *event) 1780{ 1781 MPI2_EVENT_DATA_LOG_ENTRY_ADDED *entry; 1782 1783 MPR_DPRINT_EVENT(sc, generic, event); 1784 1785 switch (event->Event) { 1786 case MPI2_EVENT_LOG_DATA: 1787 mpr_dprint(sc, MPR_EVENT, "MPI2_EVENT_LOG_DATA:\n"); 1788 if (sc->mpr_debug & MPR_EVENT) 1789 hexdump(event->EventData, event->EventDataLength, NULL, 1790 0); 1791 break; 1792 case MPI2_EVENT_LOG_ENTRY_ADDED: 1793 entry = (MPI2_EVENT_DATA_LOG_ENTRY_ADDED *)event->EventData; 1794 mpr_dprint(sc, MPR_EVENT, "MPI2_EVENT_LOG_ENTRY_ADDED event " 1795 "0x%x Sequence %d:\n", entry->LogEntryQualifier, 1796 entry->LogSequence); 1797 break; 1798 default: 1799 break; 1800 } 1801 return; 1802} 1803 1804static int 1805mpr_attach_log(struct mpr_softc *sc) 1806{ 1807 uint8_t events[16]; 1808 1809 bzero(events, 16); 1810 setbit(events, MPI2_EVENT_LOG_DATA); 1811 setbit(events, MPI2_EVENT_LOG_ENTRY_ADDED); 1812 1813 mpr_register_events(sc, events, mpr_log_evt_handler, NULL, 1814 &sc->mpr_log_eh); 1815 1816 return (0); 1817} 1818 1819static int 1820mpr_detach_log(struct mpr_softc *sc) 1821{ 1822 1823 if (sc->mpr_log_eh != NULL) 1824 mpr_deregister_events(sc, sc->mpr_log_eh); 1825 return (0); 1826} 1827 1828/* 1829 * Free all of the driver resources and detach submodules. Should be called 1830 * without the lock held. 1831 */ 1832int 1833mpr_free(struct mpr_softc *sc) 1834{ 1835 int error; 1836 1837 /* Turn off the watchdog */ 1838 mpr_lock(sc); 1839 sc->mpr_flags |= MPR_FLAGS_SHUTDOWN; 1840 mpr_unlock(sc); 1841 /* Lock must not be held for this */ 1842 callout_drain(&sc->periodic); 1843 callout_drain(&sc->device_check_callout); 1844 1845 if (((error = mpr_detach_log(sc)) != 0) || 1846 ((error = mpr_detach_sas(sc)) != 0)) 1847 return (error); 1848 1849 mpr_detach_user(sc); 1850 1851 /* Put the IOC back in the READY state. */ 1852 mpr_lock(sc); 1853 if ((error = mpr_transition_ready(sc)) != 0) { 1854 mpr_unlock(sc); 1855 return (error); 1856 } 1857 mpr_unlock(sc); 1858 1859 if (sc->facts != NULL) 1860 free(sc->facts, M_MPR); 1861 1862 /* 1863 * Free all buffers that are based on IOC Facts. A Diag Reset may need 1864 * to free these buffers too. 1865 */ 1866 mpr_iocfacts_free(sc); 1867 1868 if (sc->sysctl_tree != NULL) 1869 sysctl_ctx_free(&sc->sysctl_ctx); 1870 1871 /* Deregister the shutdown function */ 1872 if (sc->shutdown_eh != NULL) 1873 EVENTHANDLER_DEREGISTER(shutdown_final, sc->shutdown_eh); 1874 1875 mtx_destroy(&sc->mpr_mtx); 1876 1877 return (0); 1878} 1879 1880static __inline void 1881mpr_complete_command(struct mpr_softc *sc, struct mpr_command *cm) 1882{ 1883 MPR_FUNCTRACE(sc); 1884 1885 if (cm == NULL) { 1886 mpr_dprint(sc, MPR_ERROR, "Completing NULL command\n"); 1887 return; 1888 } 1889 1890 if (cm->cm_flags & MPR_CM_FLAGS_POLLED) 1891 cm->cm_flags |= MPR_CM_FLAGS_COMPLETE; 1892 1893 if (cm->cm_complete != NULL) { 1894 mpr_dprint(sc, MPR_TRACE, 1895 "%s cm %p calling cm_complete %p data %p reply %p\n", 1896 __func__, cm, cm->cm_complete, cm->cm_complete_data, 1897 cm->cm_reply); 1898 cm->cm_complete(sc, cm); 1899 } 1900 1901 if (cm->cm_flags & MPR_CM_FLAGS_WAKEUP) { 1902 mpr_dprint(sc, MPR_TRACE, "waking up %p\n", cm); 1903 wakeup(cm); 1904 } 1905 1906 if (sc->io_cmds_active != 0) { 1907 sc->io_cmds_active--; 1908 } else { 1909 mpr_dprint(sc, MPR_ERROR, "Warning: io_cmds_active is " 1910 "out of sync - resynching to 0\n"); 1911 } 1912} 1913 1914static void 1915mpr_sas_log_info(struct mpr_softc *sc , u32 log_info) 1916{ 1917 union loginfo_type { 1918 u32 loginfo; 1919 struct { 1920 u32 subcode:16; 1921 u32 code:8; 1922 u32 originator:4; 1923 u32 bus_type:4; 1924 } dw; 1925 }; 1926 union loginfo_type sas_loginfo; 1927 char *originator_str = NULL; 1928 1929 sas_loginfo.loginfo = log_info; 1930 if (sas_loginfo.dw.bus_type != 3 /*SAS*/) 1931 return; 1932 1933 /* each nexus loss loginfo */ 1934 if (log_info == 0x31170000) 1935 return; 1936 1937 /* eat the loginfos associated with task aborts */ 1938 if ((log_info == 30050000) || (log_info == 0x31140000) || 1939 (log_info == 0x31130000)) 1940 return; 1941 1942 switch (sas_loginfo.dw.originator) { 1943 case 0: 1944 originator_str = "IOP"; 1945 break; 1946 case 1: 1947 originator_str = "PL"; 1948 break; 1949 case 2: 1950 originator_str = "IR"; 1951 break; 1952 } 1953 1954 mpr_dprint(sc, MPR_LOG, "log_info(0x%08x): originator(%s), " 1955 "code(0x%02x), sub_code(0x%04x)\n", log_info, originator_str, 1956 sas_loginfo.dw.code, sas_loginfo.dw.subcode); 1957} 1958 1959static void 1960mpr_display_reply_info(struct mpr_softc *sc, uint8_t *reply) 1961{ 1962 MPI2DefaultReply_t *mpi_reply; 1963 u16 sc_status; 1964 1965 mpi_reply = (MPI2DefaultReply_t*)reply; 1966 sc_status = le16toh(mpi_reply->IOCStatus); 1967 if (sc_status & MPI2_IOCSTATUS_FLAG_LOG_INFO_AVAILABLE) 1968 mpr_sas_log_info(sc, le32toh(mpi_reply->IOCLogInfo)); 1969} 1970 1971void 1972mpr_intr(void *data) 1973{ 1974 struct mpr_softc *sc; 1975 uint32_t status; 1976 1977 sc = (struct mpr_softc *)data; 1978 mpr_dprint(sc, MPR_TRACE, "%s\n", __func__); 1979 1980 /* 1981 * Check interrupt status register to flush the bus. This is 1982 * needed for both INTx interrupts and driver-driven polling 1983 */ 1984 status = mpr_regread(sc, MPI2_HOST_INTERRUPT_STATUS_OFFSET); 1985 if ((status & MPI2_HIS_REPLY_DESCRIPTOR_INTERRUPT) == 0) 1986 return; 1987 1988 mpr_lock(sc); 1989 mpr_intr_locked(data); 1990 mpr_unlock(sc); 1991 return; 1992} 1993 1994/* 1995 * In theory, MSI/MSIX interrupts shouldn't need to read any registers on the 1996 * chip. Hopefully this theory is correct. 1997 */ 1998void 1999mpr_intr_msi(void *data) 2000{ 2001 struct mpr_softc *sc; 2002 2003 sc = (struct mpr_softc *)data; 2004 mpr_dprint(sc, MPR_TRACE, "%s\n", __func__); 2005 mpr_lock(sc); 2006 mpr_intr_locked(data); 2007 mpr_unlock(sc); 2008 return; 2009} 2010 2011/* 2012 * The locking is overly broad and simplistic, but easy to deal with for now. 2013 */ 2014void 2015mpr_intr_locked(void *data) 2016{ 2017 MPI2_REPLY_DESCRIPTORS_UNION *desc; 2018 struct mpr_softc *sc; 2019 struct mpr_command *cm = NULL; 2020 uint8_t flags; 2021 u_int pq; 2022 MPI2_DIAG_RELEASE_REPLY *rel_rep; 2023 mpr_fw_diagnostic_buffer_t *pBuffer; 2024 2025 sc = (struct mpr_softc *)data; 2026 2027 pq = sc->replypostindex; 2028 mpr_dprint(sc, MPR_TRACE, 2029 "%s sc %p starting with replypostindex %u\n", 2030 __func__, sc, sc->replypostindex); 2031 2032 for ( ;; ) { 2033 cm = NULL; 2034 desc = &sc->post_queue[sc->replypostindex]; 2035 flags = desc->Default.ReplyFlags & 2036 MPI2_RPY_DESCRIPT_FLAGS_TYPE_MASK; 2037 if ((flags == MPI2_RPY_DESCRIPT_FLAGS_UNUSED) || 2038 (le32toh(desc->Words.High) == 0xffffffff)) 2039 break; 2040 2041 /* increment the replypostindex now, so that event handlers 2042 * and cm completion handlers which decide to do a diag 2043 * reset can zero it without it getting incremented again 2044 * afterwards, and we break out of this loop on the next 2045 * iteration since the reply post queue has been cleared to 2046 * 0xFF and all descriptors look unused (which they are). 2047 */ 2048 if (++sc->replypostindex >= sc->pqdepth) 2049 sc->replypostindex = 0; 2050 2051 switch (flags) { 2052 case MPI2_RPY_DESCRIPT_FLAGS_SCSI_IO_SUCCESS: 2053 case MPI25_RPY_DESCRIPT_FLAGS_FAST_PATH_SCSI_IO_SUCCESS: 2054 case MPI26_RPY_DESCRIPT_FLAGS_PCIE_ENCAPSULATED_SUCCESS: 2055 cm = &sc->commands[le16toh(desc->SCSIIOSuccess.SMID)]; 2056 cm->cm_reply = NULL; 2057 break; 2058 case MPI2_RPY_DESCRIPT_FLAGS_ADDRESS_REPLY: 2059 { 2060 uint32_t baddr; 2061 uint8_t *reply; 2062 2063 /* 2064 * Re-compose the reply address from the address 2065 * sent back from the chip. The ReplyFrameAddress 2066 * is the lower 32 bits of the physical address of 2067 * particular reply frame. Convert that address to 2068 * host format, and then use that to provide the 2069 * offset against the virtual address base 2070 * (sc->reply_frames). 2071 */ 2072 baddr = le32toh(desc->AddressReply.ReplyFrameAddress); 2073 reply = sc->reply_frames + 2074 (baddr - ((uint32_t)sc->reply_busaddr)); 2075 /* 2076 * Make sure the reply we got back is in a valid 2077 * range. If not, go ahead and panic here, since 2078 * we'll probably panic as soon as we deference the 2079 * reply pointer anyway. 2080 */ 2081 if ((reply < sc->reply_frames) 2082 || (reply > (sc->reply_frames + 2083 (sc->fqdepth * sc->facts->ReplyFrameSize * 4)))) { 2084 printf("%s: WARNING: reply %p out of range!\n", 2085 __func__, reply); 2086 printf("%s: reply_frames %p, fqdepth %d, " 2087 "frame size %d\n", __func__, 2088 sc->reply_frames, sc->fqdepth, 2089 sc->facts->ReplyFrameSize * 4); 2090 printf("%s: baddr %#x,\n", __func__, baddr); 2091 /* LSI-TODO. See Linux Code for Graceful exit */ 2092 panic("Reply address out of range"); 2093 } 2094 if (le16toh(desc->AddressReply.SMID) == 0) { 2095 if (((MPI2_DEFAULT_REPLY *)reply)->Function == 2096 MPI2_FUNCTION_DIAG_BUFFER_POST) { 2097 /* 2098 * If SMID is 0 for Diag Buffer Post, 2099 * this implies that the reply is due to 2100 * a release function with a status that 2101 * the buffer has been released. Set 2102 * the buffer flags accordingly. 2103 */ 2104 rel_rep = 2105 (MPI2_DIAG_RELEASE_REPLY *)reply; 2106 if ((le16toh(rel_rep->IOCStatus) & 2107 MPI2_IOCSTATUS_MASK) == 2108 MPI2_IOCSTATUS_DIAGNOSTIC_RELEASED) 2109 { 2110 pBuffer = 2111 &sc->fw_diag_buffer_list[ 2112 rel_rep->BufferType]; 2113 pBuffer->valid_data = TRUE; 2114 pBuffer->owned_by_firmware = 2115 FALSE; 2116 pBuffer->immediate = FALSE; 2117 } 2118 } else 2119 mpr_dispatch_event(sc, baddr, 2120 (MPI2_EVENT_NOTIFICATION_REPLY *) 2121 reply); 2122 } else { 2123 cm = &sc->commands[ 2124 le16toh(desc->AddressReply.SMID)]; 2125 cm->cm_reply = reply; 2126 cm->cm_reply_data = 2127 le32toh(desc->AddressReply. 2128 ReplyFrameAddress); 2129 } 2130 break; 2131 } 2132 case MPI2_RPY_DESCRIPT_FLAGS_TARGETASSIST_SUCCESS: 2133 case MPI2_RPY_DESCRIPT_FLAGS_TARGET_COMMAND_BUFFER: 2134 case MPI2_RPY_DESCRIPT_FLAGS_RAID_ACCELERATOR_SUCCESS: 2135 default: 2136 /* Unhandled */ 2137 mpr_dprint(sc, MPR_ERROR, "Unhandled reply 0x%x\n", 2138 desc->Default.ReplyFlags); 2139 cm = NULL; 2140 break; 2141 } 2142 2143 if (cm != NULL) { 2144 // Print Error reply frame 2145 if (cm->cm_reply) 2146 mpr_display_reply_info(sc,cm->cm_reply); 2147 mpr_complete_command(sc, cm); 2148 } 2149 2150 desc->Words.Low = 0xffffffff; 2151 desc->Words.High = 0xffffffff; 2152 } 2153 2154 if (pq != sc->replypostindex) { 2155 mpr_dprint(sc, MPR_TRACE, 2156 "%s sc %p writing postindex %d\n", 2157 __func__, sc, sc->replypostindex); 2158 mpr_regwrite(sc, MPI2_REPLY_POST_HOST_INDEX_OFFSET, 2159 sc->replypostindex); 2160 } 2161 2162 return; 2163} 2164 2165static void 2166mpr_dispatch_event(struct mpr_softc *sc, uintptr_t data, 2167 MPI2_EVENT_NOTIFICATION_REPLY *reply) 2168{ 2169 struct mpr_event_handle *eh; 2170 int event, handled = 0; 2171 2172 event = le16toh(reply->Event); 2173 TAILQ_FOREACH(eh, &sc->event_list, eh_list) { 2174 if (isset(eh->mask, event)) { 2175 eh->callback(sc, data, reply); 2176 handled++; 2177 } 2178 } 2179 2180 if (handled == 0) 2181 mpr_dprint(sc, MPR_EVENT, "Unhandled event 0x%x\n", 2182 le16toh(event)); 2183 2184 /* 2185 * This is the only place that the event/reply should be freed. 2186 * Anything wanting to hold onto the event data should have 2187 * already copied it into their own storage. 2188 */ 2189 mpr_free_reply(sc, data); 2190} 2191 2192static void 2193mpr_reregister_events_complete(struct mpr_softc *sc, struct mpr_command *cm) 2194{ 2195 mpr_dprint(sc, MPR_TRACE, "%s\n", __func__); 2196 2197 if (cm->cm_reply) 2198 MPR_DPRINT_EVENT(sc, generic, 2199 (MPI2_EVENT_NOTIFICATION_REPLY *)cm->cm_reply); 2200 2201 mpr_free_command(sc, cm); 2202 2203 /* next, send a port enable */ 2204 mprsas_startup(sc); 2205} 2206 2207/* 2208 * For both register_events and update_events, the caller supplies a bitmap 2209 * of events that it _wants_. These functions then turn that into a bitmask 2210 * suitable for the controller. 2211 */ 2212int 2213mpr_register_events(struct mpr_softc *sc, uint8_t *mask, 2214 mpr_evt_callback_t *cb, void *data, struct mpr_event_handle **handle) 2215{ 2216 struct mpr_event_handle *eh; 2217 int error = 0; 2218 2219 eh = malloc(sizeof(struct mpr_event_handle), M_MPR, M_WAITOK|M_ZERO); 2220 if (!eh) { 2221 device_printf(sc->mpr_dev, "Cannot allocate memory %s %d\n", 2222 __func__, __LINE__); 2223 return (ENOMEM); 2224 } 2225 eh->callback = cb; 2226 eh->data = data; 2227 TAILQ_INSERT_TAIL(&sc->event_list, eh, eh_list); 2228 if (mask != NULL) 2229 error = mpr_update_events(sc, eh, mask); 2230 *handle = eh; 2231 2232 return (error); 2233} 2234 2235int 2236mpr_update_events(struct mpr_softc *sc, struct mpr_event_handle *handle, 2237 uint8_t *mask) 2238{ 2239 MPI2_EVENT_NOTIFICATION_REQUEST *evtreq; 2240 MPI2_EVENT_NOTIFICATION_REPLY *reply = NULL; 2241 struct mpr_command *cm = NULL; 2242 struct mpr_event_handle *eh; 2243 int error, i; 2244 2245 mpr_dprint(sc, MPR_TRACE, "%s\n", __func__); 2246 2247 if ((mask != NULL) && (handle != NULL)) 2248 bcopy(mask, &handle->mask[0], 16); 2249 memset(sc->event_mask, 0xff, 16); 2250 2251 TAILQ_FOREACH(eh, &sc->event_list, eh_list) { 2252 for (i = 0; i < 16; i++) 2253 sc->event_mask[i] &= ~eh->mask[i]; 2254 } 2255 2256 if ((cm = mpr_alloc_command(sc)) == NULL) 2257 return (EBUSY); 2258 evtreq = (MPI2_EVENT_NOTIFICATION_REQUEST *)cm->cm_req; 2259 evtreq->Function = MPI2_FUNCTION_EVENT_NOTIFICATION; 2260 evtreq->MsgFlags = 0; 2261 evtreq->SASBroadcastPrimitiveMasks = 0; 2262#ifdef MPR_DEBUG_ALL_EVENTS 2263 { 2264 u_char fullmask[16]; 2265 memset(fullmask, 0x00, 16); 2266 bcopy(fullmask, (uint8_t *)&evtreq->EventMasks, 16); 2267 } 2268#else 2269 bcopy(sc->event_mask, (uint8_t *)&evtreq->EventMasks, 16); 2270#endif 2271 cm->cm_desc.Default.RequestFlags = MPI2_REQ_DESCRIPT_FLAGS_DEFAULT_TYPE; 2272 cm->cm_data = NULL; 2273 2274 error = mpr_request_polled(sc, &cm); 2275 if (cm != NULL) 2276 reply = (MPI2_EVENT_NOTIFICATION_REPLY *)cm->cm_reply; 2277 if ((reply == NULL) || 2278 (reply->IOCStatus & MPI2_IOCSTATUS_MASK) != MPI2_IOCSTATUS_SUCCESS) 2279 error = ENXIO; 2280 2281 if (reply) 2282 MPR_DPRINT_EVENT(sc, generic, reply); 2283 2284 mpr_dprint(sc, MPR_TRACE, "%s finished error %d\n", __func__, error); 2285 2286 if (cm != NULL) 2287 mpr_free_command(sc, cm); 2288 return (error); 2289} 2290 2291static int 2292mpr_reregister_events(struct mpr_softc *sc) 2293{ 2294 MPI2_EVENT_NOTIFICATION_REQUEST *evtreq; 2295 struct mpr_command *cm; 2296 struct mpr_event_handle *eh; 2297 int error, i; 2298 2299 mpr_dprint(sc, MPR_TRACE, "%s\n", __func__); 2300 2301 /* first, reregister events */ 2302 2303 memset(sc->event_mask, 0xff, 16); 2304 2305 TAILQ_FOREACH(eh, &sc->event_list, eh_list) { 2306 for (i = 0; i < 16; i++) 2307 sc->event_mask[i] &= ~eh->mask[i]; 2308 } 2309 2310 if ((cm = mpr_alloc_command(sc)) == NULL) 2311 return (EBUSY); 2312 evtreq = (MPI2_EVENT_NOTIFICATION_REQUEST *)cm->cm_req; 2313 evtreq->Function = MPI2_FUNCTION_EVENT_NOTIFICATION; 2314 evtreq->MsgFlags = 0; 2315 evtreq->SASBroadcastPrimitiveMasks = 0; 2316#ifdef MPR_DEBUG_ALL_EVENTS 2317 { 2318 u_char fullmask[16]; 2319 memset(fullmask, 0x00, 16); 2320 bcopy(fullmask, (uint8_t *)&evtreq->EventMasks, 16); 2321 } 2322#else 2323 bcopy(sc->event_mask, (uint8_t *)&evtreq->EventMasks, 16); 2324#endif 2325 cm->cm_desc.Default.RequestFlags = MPI2_REQ_DESCRIPT_FLAGS_DEFAULT_TYPE; 2326 cm->cm_data = NULL; 2327 cm->cm_complete = mpr_reregister_events_complete; 2328 2329 error = mpr_map_command(sc, cm); 2330 2331 mpr_dprint(sc, MPR_TRACE, "%s finished with error %d\n", __func__, 2332 error); 2333 return (error); 2334} 2335 2336int 2337mpr_deregister_events(struct mpr_softc *sc, struct mpr_event_handle *handle) 2338{ 2339 2340 TAILQ_REMOVE(&sc->event_list, handle, eh_list); 2341 free(handle, M_MPR); 2342 return (mpr_update_events(sc, NULL, NULL)); 2343} 2344 2345/** 2346* mpr_build_nvme_prp - This function is called for NVMe end devices to build a 2347* native SGL (NVMe PRP). The native SGL is built starting in the first PRP entry 2348* of the NVMe message (PRP1). If the data buffer is small enough to be described 2349* entirely using PRP1, then PRP2 is not used. If needed, PRP2 is used to 2350* describe a larger data buffer. If the data buffer is too large to describe 2351* using the two PRP entriess inside the NVMe message, then PRP1 describes the 2352* first data memory segment, and PRP2 contains a pointer to a PRP list located 2353* elsewhere in memory to describe the remaining data memory segments. The PRP 2354* list will be contiguous. 2355 2356* The native SGL for NVMe devices is a Physical Region Page (PRP). A PRP 2357* consists of a list of PRP entries to describe a number of noncontigous 2358* physical memory segments as a single memory buffer, just as a SGL does. Note 2359* however, that this function is only used by the IOCTL call, so the memory 2360* given will be guaranteed to be contiguous. There is no need to translate 2361* non-contiguous SGL into a PRP in this case. All PRPs will describe contiguous 2362* space that is one page size each. 2363* 2364* Each NVMe message contains two PRP entries. The first (PRP1) either contains 2365* a PRP list pointer or a PRP element, depending upon the command. PRP2 contains 2366* the second PRP element if the memory being described fits within 2 PRP 2367* entries, or a PRP list pointer if the PRP spans more than two entries. 2368* 2369* A PRP list pointer contains the address of a PRP list, structured as a linear 2370* array of PRP entries. Each PRP entry in this list describes a segment of 2371* physical memory. 2372* 2373* Each 64-bit PRP entry comprises an address and an offset field. The address 2374* always points to the beginning of a PAGE_SIZE physical memory page, and the 2375* offset describes where within that page the memory segment begins. Only the 2376* first element in a PRP list may contain a non-zero offest, implying that all 2377* memory segments following the first begin at the start of a PAGE_SIZE page. 2378* 2379* Each PRP element normally describes a chunck of PAGE_SIZE physical memory, 2380* with exceptions for the first and last elements in the list. If the memory 2381* being described by the list begins at a non-zero offset within the first page, 2382* then the first PRP element will contain a non-zero offset indicating where the 2383* region begins within the page. The last memory segment may end before the end 2384* of the PAGE_SIZE segment, depending upon the overall size of the memory being 2385* described by the PRP list. 2386* 2387* Since PRP entries lack any indication of size, the overall data buffer length 2388* is used to determine where the end of the data memory buffer is located, and 2389* how many PRP entries are required to describe it. 2390* 2391* Returns nothing. 2392*/ 2393void 2394mpr_build_nvme_prp(struct mpr_softc *sc, struct mpr_command *cm, 2395 Mpi26NVMeEncapsulatedRequest_t *nvme_encap_request, void *data, 2396 uint32_t data_in_sz, uint32_t data_out_sz) 2397{ 2398 int prp_size = PRP_ENTRY_SIZE; 2399 uint64_t *prp_entry, *prp1_entry, *prp2_entry; 2400 uint64_t *prp_entry_phys, *prp_page, *prp_page_phys; 2401 uint32_t offset, entry_len, page_mask_result, page_mask; 2402 bus_addr_t paddr; 2403 size_t length; 2404 struct mpr_prp_page *prp_page_info = NULL; 2405 2406 /* 2407 * Not all commands require a data transfer. If no data, just return 2408 * without constructing any PRP. 2409 */ 2410 if (!data_in_sz && !data_out_sz) 2411 return; 2412 2413 /* 2414 * Set pointers to PRP1 and PRP2, which are in the NVMe command. PRP1 is 2415 * located at a 24 byte offset from the start of the NVMe command. Then 2416 * set the current PRP entry pointer to PRP1. 2417 */ 2418 prp1_entry = (uint64_t *)(nvme_encap_request->NVMe_Command + 2419 NVME_CMD_PRP1_OFFSET); 2420 prp2_entry = (uint64_t *)(nvme_encap_request->NVMe_Command + 2421 NVME_CMD_PRP2_OFFSET); 2422 prp_entry = prp1_entry; 2423 2424 /* 2425 * For the PRP entries, use the specially allocated buffer of 2426 * contiguous memory. PRP Page allocation failures should not happen 2427 * because there should be enough PRP page buffers to account for the 2428 * possible NVMe QDepth. 2429 */ 2430 prp_page_info = mpr_alloc_prp_page(sc); 2431 KASSERT(prp_page_info != NULL, ("%s: There are no PRP Pages left to be " 2432 "used for building a native NVMe SGL.\n", __func__)); 2433 prp_page = (uint64_t *)prp_page_info->prp_page; 2434 prp_page_phys = (uint64_t *)(uintptr_t)prp_page_info->prp_page_busaddr; 2435 2436 /* 2437 * Insert the allocated PRP page into the command's PRP page list. This 2438 * will be freed when the command is freed. 2439 */ 2440 TAILQ_INSERT_TAIL(&cm->cm_prp_page_list, prp_page_info, prp_page_link); 2441 2442 /* 2443 * Check if we are within 1 entry of a page boundary we don't want our 2444 * first entry to be a PRP List entry. 2445 */ 2446 page_mask = PAGE_SIZE - 1; 2447 page_mask_result = (uintptr_t)((uint8_t *)prp_page + prp_size) & 2448 page_mask; 2449 if (!page_mask_result) 2450 { 2451 /* Bump up to next page boundary. */ 2452 prp_page = (uint64_t *)((uint8_t *)prp_page + prp_size); 2453 prp_page_phys = (uint64_t *)((uint8_t *)prp_page_phys + 2454 prp_size); 2455 } 2456 2457 /* 2458 * Set PRP physical pointer, which initially points to the current PRP 2459 * DMA memory page. 2460 */ 2461 prp_entry_phys = prp_page_phys; 2462 2463 /* Get physical address and length of the data buffer. */ 2464 paddr = (bus_addr_t)data; 2465 if (data_in_sz) 2466 length = data_in_sz; 2467 else 2468 length = data_out_sz; 2469 2470 /* Loop while the length is not zero. */ 2471 while (length) 2472 { 2473 /* 2474 * Check if we need to put a list pointer here if we are at page 2475 * boundary - prp_size (8 bytes). 2476 */ 2477 page_mask_result = (uintptr_t)((uint8_t *)prp_entry_phys + 2478 prp_size) & page_mask; 2479 if (!page_mask_result) 2480 { 2481 /* 2482 * This is the last entry in a PRP List, so we need to 2483 * put a PRP list pointer here. What this does is: 2484 * - bump the current memory pointer to the next 2485 * address, which will be the next full page. 2486 * - set the PRP Entry to point to that page. This is 2487 * now the PRP List pointer. 2488 * - bump the PRP Entry pointer the start of the next 2489 * page. Since all of this PRP memory is contiguous, 2490 * no need to get a new page - it's just the next 2491 * address. 2492 */ 2493 prp_entry_phys++; 2494 *prp_entry = 2495 htole64((uint64_t)(uintptr_t)prp_entry_phys); 2496 prp_entry++; 2497 } 2498 2499 /* Need to handle if entry will be part of a page. */ 2500 offset = (uint32_t)paddr & page_mask; 2501 entry_len = PAGE_SIZE - offset; 2502 2503 if (prp_entry == prp1_entry) 2504 { 2505 /* 2506 * Must fill in the first PRP pointer (PRP1) before 2507 * moving on. 2508 */ 2509 *prp1_entry = htole64((uint64_t)paddr); 2510 2511 /* 2512 * Now point to the second PRP entry within the 2513 * command (PRP2). 2514 */ 2515 prp_entry = prp2_entry; 2516 } 2517 else if (prp_entry == prp2_entry) 2518 { 2519 /* 2520 * Should the PRP2 entry be a PRP List pointer or just a 2521 * regular PRP pointer? If there is more than one more 2522 * page of data, must use a PRP List pointer. 2523 */ 2524 if (length > PAGE_SIZE) 2525 { 2526 /* 2527 * PRP2 will contain a PRP List pointer because 2528 * more PRP's are needed with this command. The 2529 * list will start at the beginning of the 2530 * contiguous buffer. 2531 */ 2532 *prp2_entry = 2533 htole64( 2534 (uint64_t)(uintptr_t)prp_entry_phys); 2535 2536 /* 2537 * The next PRP Entry will be the start of the 2538 * first PRP List. 2539 */ 2540 prp_entry = prp_page; 2541 } 2542 else 2543 { 2544 /* 2545 * After this, the PRP Entries are complete. 2546 * This command uses 2 PRP's and no PRP list. 2547 */ 2548 *prp2_entry = htole64((uint64_t)paddr); 2549 } 2550 } 2551 else 2552 { 2553 /* 2554 * Put entry in list and bump the addresses. 2555 * 2556 * After PRP1 and PRP2 are filled in, this will fill in 2557 * all remaining PRP entries in a PRP List, one per each 2558 * time through the loop. 2559 */ 2560 *prp_entry = htole64((uint64_t)paddr); 2561 prp_entry++; 2562 prp_entry_phys++; 2563 } 2564 2565 /* 2566 * Bump the phys address of the command's data buffer by the 2567 * entry_len. 2568 */ 2569 paddr += entry_len; 2570 2571 /* Decrement length accounting for last partial page. */ 2572 if (entry_len > length) 2573 length = 0; 2574 else 2575 length -= entry_len; 2576 } 2577} 2578 2579/* 2580 * mpr_check_pcie_native_sgl - This function is called for PCIe end devices to 2581 * determine if the driver needs to build a native SGL. If so, that native SGL 2582 * is built in the contiguous buffers allocated especially for PCIe SGL 2583 * creation. If the driver will not build a native SGL, return TRUE and a 2584 * normal IEEE SGL will be built. Currently this routine supports NVMe devices 2585 * only. 2586 * 2587 * Returns FALSE (0) if native SGL was built, TRUE (1) if no SGL was built. 2588 */ 2589static int 2590mpr_check_pcie_native_sgl(struct mpr_softc *sc, struct mpr_command *cm, 2591 bus_dma_segment_t *segs, int segs_left) 2592{ 2593 uint32_t i, sge_dwords, length, offset, entry_len; 2594 uint32_t num_entries, buff_len = 0, sges_in_segment; 2595 uint32_t page_mask, page_mask_result, *curr_buff; 2596 uint32_t *ptr_sgl, *ptr_first_sgl, first_page_offset; 2597 uint32_t first_page_data_size, end_residual; 2598 uint64_t *msg_phys; 2599 bus_addr_t paddr; 2600 int build_native_sgl = 0, first_prp_entry; 2601 int prp_size = PRP_ENTRY_SIZE; 2602 Mpi25IeeeSgeChain64_t *main_chain_element = NULL; 2603 struct mpr_prp_page *prp_page_info = NULL; 2604 2605 mpr_dprint(sc, MPR_TRACE, "%s\n", __func__); 2606 2607 /* 2608 * Add up the sizes of each segment length to get the total transfer 2609 * size, which will be checked against the Maximum Data Transfer Size. 2610 * If the data transfer length exceeds the MDTS for this device, just 2611 * return 1 so a normal IEEE SGL will be built. F/W will break the I/O 2612 * up into multiple I/O's. [nvme_mdts = 0 means unlimited] 2613 */ 2614 for (i = 0; i < segs_left; i++) 2615 buff_len += htole32(segs[i].ds_len); 2616 if ((cm->cm_targ->MDTS > 0) && (buff_len > cm->cm_targ->MDTS)) 2617 return 1; 2618 2619 /* Create page_mask (to get offset within page) */ 2620 page_mask = PAGE_SIZE - 1; 2621 2622 /* 2623 * Check if the number of elements exceeds the max number that can be 2624 * put in the main message frame (H/W can only translate an SGL that 2625 * is contained entirely in the main message frame). 2626 */ 2627 sges_in_segment = (sc->facts->IOCRequestFrameSize - 2628 offsetof(Mpi25SCSIIORequest_t, SGL)) / sizeof(MPI25_SGE_IO_UNION); 2629 if (segs_left > sges_in_segment) 2630 build_native_sgl = 1; 2631 else 2632 { 2633 /* 2634 * NVMe uses one PRP for each physical page (or part of physical 2635 * page). 2636 * if 4 pages or less then IEEE is OK 2637 * if > 5 pages then we need to build a native SGL 2638 * if > 4 and <= 5 pages, then check the physical address of 2639 * the first SG entry, then if this first size in the page 2640 * is >= the residual beyond 4 pages then use IEEE, 2641 * otherwise use native SGL 2642 */ 2643 if (buff_len > (PAGE_SIZE * 5)) 2644 build_native_sgl = 1; 2645 else if ((buff_len > (PAGE_SIZE * 4)) && 2646 (buff_len <= (PAGE_SIZE * 5)) ) 2647 { 2648 msg_phys = (uint64_t *)segs[0].ds_addr; 2649 first_page_offset = 2650 ((uint32_t)(uint64_t)(uintptr_t)msg_phys & 2651 page_mask); 2652 first_page_data_size = PAGE_SIZE - first_page_offset; 2653 end_residual = buff_len % PAGE_SIZE; 2654 2655 /* 2656 * If offset into first page pushes the end of the data 2657 * beyond end of the 5th page, we need the extra PRP 2658 * list. 2659 */ 2660 if (first_page_data_size < end_residual) 2661 build_native_sgl = 1; 2662 2663 /* 2664 * Check if first SG entry size is < residual beyond 4 2665 * pages. 2666 */ 2667 if (htole32(segs[0].ds_len) < 2668 (buff_len - (PAGE_SIZE * 4))) 2669 build_native_sgl = 1; 2670 } 2671 } 2672 2673 /* check if native SGL is needed */ 2674 if (!build_native_sgl) 2675 return 1; 2676 2677 /* 2678 * Native SGL is needed. 2679 * Put a chain element in main message frame that points to the first 2680 * chain buffer. 2681 * 2682 * NOTE: The ChainOffset field must be 0 when using a chain pointer to 2683 * a native SGL. 2684 */ 2685 2686 /* Set main message chain element pointer */ 2687 main_chain_element = (pMpi25IeeeSgeChain64_t)cm->cm_sge; 2688 2689 /* 2690 * For NVMe the chain element needs to be the 2nd SGL entry in the main 2691 * message. 2692 */ 2693 main_chain_element = (Mpi25IeeeSgeChain64_t *) 2694 ((uint8_t *)main_chain_element + sizeof(MPI25_IEEE_SGE_CHAIN64)); 2695 2696 /* 2697 * For the PRP entries, use the specially allocated buffer of 2698 * contiguous memory. PRP Page allocation failures should not happen 2699 * because there should be enough PRP page buffers to account for the 2700 * possible NVMe QDepth. 2701 */ 2702 prp_page_info = mpr_alloc_prp_page(sc); 2703 KASSERT(prp_page_info != NULL, ("%s: There are no PRP Pages left to be " 2704 "used for building a native NVMe SGL.\n", __func__)); 2705 curr_buff = (uint32_t *)prp_page_info->prp_page; 2706 msg_phys = (uint64_t *)(uintptr_t)prp_page_info->prp_page_busaddr; 2707 2708 /* 2709 * Insert the allocated PRP page into the command's PRP page list. This 2710 * will be freed when the command is freed. 2711 */ 2712 TAILQ_INSERT_TAIL(&cm->cm_prp_page_list, prp_page_info, prp_page_link); 2713 2714 /* 2715 * Check if we are within 1 entry of a page boundary we don't want our 2716 * first entry to be a PRP List entry. 2717 */ 2718 page_mask_result = (uintptr_t)((uint8_t *)curr_buff + prp_size) & 2719 page_mask; 2720 if (!page_mask_result) { 2721 /* Bump up to next page boundary. */ 2722 curr_buff = (uint32_t *)((uint8_t *)curr_buff + prp_size); 2723 msg_phys = (uint64_t *)((uint8_t *)msg_phys + prp_size); 2724 } 2725 2726 /* Fill in the chain element and make it an NVMe segment type. */ 2727 main_chain_element->Address.High = 2728 htole32((uint32_t)((uint64_t)(uintptr_t)msg_phys >> 32)); 2729 main_chain_element->Address.Low = 2730 htole32((uint32_t)(uintptr_t)msg_phys); 2731 main_chain_element->NextChainOffset = 0; 2732 main_chain_element->Flags = MPI2_IEEE_SGE_FLAGS_CHAIN_ELEMENT | 2733 MPI2_IEEE_SGE_FLAGS_SYSTEM_ADDR | 2734 MPI26_IEEE_SGE_FLAGS_NSF_NVME_PRP; 2735 2736 /* Set SGL pointer to start of contiguous PCIe buffer. */ 2737 ptr_sgl = curr_buff; 2738 sge_dwords = 2; 2739 num_entries = 0; 2740 2741 /* 2742 * NVMe has a very convoluted PRP format. One PRP is required for each 2743 * page or partial page. We need to split up OS SG entries if they are 2744 * longer than one page or cross a page boundary. We also have to insert 2745 * a PRP list pointer entry as the last entry in each physical page of 2746 * the PRP list. 2747 * 2748 * NOTE: The first PRP "entry" is actually placed in the first SGL entry 2749 * in the main message in IEEE 64 format. The 2nd entry in the main 2750 * message is the chain element, and the rest of the PRP entries are 2751 * built in the contiguous PCIe buffer. 2752 */ 2753 first_prp_entry = 1; 2754 ptr_first_sgl = (uint32_t *)cm->cm_sge; 2755 2756 for (i = 0; i < segs_left; i++) { 2757 /* Get physical address and length of this SG entry. */ 2758 paddr = segs[i].ds_addr; 2759 length = segs[i].ds_len; 2760 2761 /* 2762 * Check whether a given SGE buffer lies on a non-PAGED 2763 * boundary if this is not the first page. If so, this is not 2764 * expected so have FW build the SGL. 2765 */ 2766 if (i) { 2767 if ((uint32_t)paddr & page_mask) { 2768 mpr_dprint(sc, MPR_ERROR, "Unaligned SGE while " 2769 "building NVMe PRPs, low address is 0x%x\n", 2770 (uint32_t)paddr); 2771 return 1; 2772 } 2773 } 2774 2775 /* Apart from last SGE, if any other SGE boundary is not page 2776 * aligned then it means that hole exists. Existence of hole 2777 * leads to data corruption. So fallback to IEEE SGEs. 2778 */ 2779 if (i != (segs_left - 1)) { 2780 if (((uint32_t)paddr + length) & page_mask) { 2781 mpr_dprint(sc, MPR_ERROR, "Unaligned SGE " 2782 "boundary while building NVMe PRPs, low " 2783 "address: 0x%x and length: %u\n", 2784 (uint32_t)paddr, length); 2785 return 1; 2786 } 2787 } 2788 2789 /* Loop while the length is not zero. */ 2790 while (length) { 2791 /* 2792 * Check if we need to put a list pointer here if we are 2793 * at page boundary - prp_size. 2794 */ 2795 page_mask_result = (uintptr_t)((uint8_t *)ptr_sgl + 2796 prp_size) & page_mask; 2797 if (!page_mask_result) { 2798 /* 2799 * Need to put a PRP list pointer here. 2800 */ 2801 msg_phys = (uint64_t *)((uint8_t *)msg_phys + 2802 prp_size); 2803 *ptr_sgl = htole32((uintptr_t)msg_phys); 2804 *(ptr_sgl+1) = htole32((uint64_t)(uintptr_t) 2805 msg_phys >> 32); 2806 ptr_sgl += sge_dwords; 2807 num_entries++; 2808 } 2809 2810 /* Need to handle if entry will be part of a page. */ 2811 offset = (uint32_t)paddr & page_mask; 2812 entry_len = PAGE_SIZE - offset; 2813 if (first_prp_entry) { 2814 /* 2815 * Put IEEE entry in first SGE in main message. 2816 * (Simple element, System addr, not end of 2817 * list.) 2818 */ 2819 *ptr_first_sgl = htole32((uint32_t)paddr); 2820 *(ptr_first_sgl + 1) = 2821 htole32((uint32_t)((uint64_t)paddr >> 32)); 2822 *(ptr_first_sgl + 2) = htole32(entry_len); 2823 *(ptr_first_sgl + 3) = 0; 2824 2825 /* No longer the first PRP entry. */ 2826 first_prp_entry = 0; 2827 } else { 2828 /* Put entry in list. */ 2829 *ptr_sgl = htole32((uint32_t)paddr); 2830 *(ptr_sgl + 1) = 2831 htole32((uint32_t)((uint64_t)paddr >> 32)); 2832 2833 /* Bump ptr_sgl, msg_phys, and num_entries. */ 2834 ptr_sgl += sge_dwords; 2835 msg_phys = (uint64_t *)((uint8_t *)msg_phys + 2836 prp_size); 2837 num_entries++; 2838 } 2839 2840 /* Bump the phys address by the entry_len. */ 2841 paddr += entry_len; 2842 2843 /* Decrement length accounting for last partial page. */ 2844 if (entry_len > length) 2845 length = 0; 2846 else 2847 length -= entry_len; 2848 } 2849 } 2850 2851 /* Set chain element Length. */ 2852 main_chain_element->Length = htole32(num_entries * prp_size); 2853 2854 /* Return 0, indicating we built a native SGL. */ 2855 return 0; 2856} 2857 2858/* 2859 * Add a chain element as the next SGE for the specified command. 2860 * Reset cm_sge and cm_sgesize to indicate all the available space. Chains are 2861 * only required for IEEE commands. Therefore there is no code for commands 2862 * that have the MPR_CM_FLAGS_SGE_SIMPLE flag set (and those commands 2863 * shouldn't be requesting chains). 2864 */ 2865static int 2866mpr_add_chain(struct mpr_command *cm, int segsleft) 2867{ 2868 struct mpr_softc *sc = cm->cm_sc; 2869 MPI2_REQUEST_HEADER *req; 2870 MPI25_IEEE_SGE_CHAIN64 *ieee_sgc; 2871 struct mpr_chain *chain; 2872 int sgc_size, current_segs, rem_segs, segs_per_frame; 2873 uint8_t next_chain_offset = 0; 2874 2875 /* 2876 * Fail if a command is requesting a chain for SIMPLE SGE's. For SAS3 2877 * only IEEE commands should be requesting chains. Return some error 2878 * code other than 0. 2879 */ 2880 if (cm->cm_flags & MPR_CM_FLAGS_SGE_SIMPLE) { 2881 mpr_dprint(sc, MPR_ERROR, "A chain element cannot be added to " 2882 "an MPI SGL.\n"); 2883 return(ENOBUFS); 2884 } 2885 2886 sgc_size = sizeof(MPI25_IEEE_SGE_CHAIN64); 2887 if (cm->cm_sglsize < sgc_size) 2888 panic("MPR: Need SGE Error Code\n"); 2889 2890 chain = mpr_alloc_chain(cm->cm_sc); 2891 if (chain == NULL) 2892 return (ENOBUFS); 2893 2894 /* 2895 * Note: a double-linked list is used to make it easier to walk for 2896 * debugging. 2897 */ 2898 TAILQ_INSERT_TAIL(&cm->cm_chain_list, chain, chain_link); 2899 2900 /* 2901 * Need to know if the number of frames left is more than 1 or not. If 2902 * more than 1 frame is required, NextChainOffset will need to be set, 2903 * which will just be the last segment of the frame. 2904 */ 2905 rem_segs = 0; 2906 if (cm->cm_sglsize < (sgc_size * segsleft)) { 2907 /* 2908 * rem_segs is the number of segements remaining after the 2909 * segments that will go into the current frame. Since it is 2910 * known that at least one more frame is required, account for 2911 * the chain element. To know if more than one more frame is 2912 * required, just check if there will be a remainder after using 2913 * the current frame (with this chain) and the next frame. If 2914 * so the NextChainOffset must be the last element of the next 2915 * frame. 2916 */ 2917 current_segs = (cm->cm_sglsize / sgc_size) - 1; 2918 rem_segs = segsleft - current_segs; 2919 segs_per_frame = sc->chain_frame_size / sgc_size; 2920 if (rem_segs > segs_per_frame) { 2921 next_chain_offset = segs_per_frame - 1; 2922 } 2923 } 2924 ieee_sgc = &((MPI25_SGE_IO_UNION *)cm->cm_sge)->IeeeChain; 2925 ieee_sgc->Length = next_chain_offset ? 2926 htole32((uint32_t)sc->chain_frame_size) : 2927 htole32((uint32_t)rem_segs * (uint32_t)sgc_size); 2928 ieee_sgc->NextChainOffset = next_chain_offset; 2929 ieee_sgc->Flags = (MPI2_IEEE_SGE_FLAGS_CHAIN_ELEMENT | 2930 MPI2_IEEE_SGE_FLAGS_SYSTEM_ADDR); 2931 ieee_sgc->Address.Low = htole32(chain->chain_busaddr); 2932 ieee_sgc->Address.High = htole32(chain->chain_busaddr >> 32); 2933 cm->cm_sge = &((MPI25_SGE_IO_UNION *)chain->chain)->IeeeSimple; 2934 req = (MPI2_REQUEST_HEADER *)cm->cm_req; 2935 req->ChainOffset = (sc->chain_frame_size - sgc_size) >> 4; 2936 2937 cm->cm_sglsize = sc->chain_frame_size; 2938 return (0); 2939} 2940 2941/* 2942 * Add one scatter-gather element to the scatter-gather list for a command. 2943 * Maintain cm_sglsize and cm_sge as the remaining size and pointer to the 2944 * next SGE to fill in, respectively. In Gen3, the MPI SGL does not have a 2945 * chain, so don't consider any chain additions. 2946 */ 2947int 2948mpr_push_sge(struct mpr_command *cm, MPI2_SGE_SIMPLE64 *sge, size_t len, 2949 int segsleft) 2950{ 2951 uint32_t saved_buf_len, saved_address_low, saved_address_high; 2952 u32 sge_flags; 2953 2954 /* 2955 * case 1: >=1 more segment, no room for anything (error) 2956 * case 2: 1 more segment and enough room for it 2957 */ 2958 2959 if (cm->cm_sglsize < (segsleft * sizeof(MPI2_SGE_SIMPLE64))) { 2960 mpr_dprint(cm->cm_sc, MPR_ERROR, 2961 "%s: warning: Not enough room for MPI SGL in frame.\n", 2962 __func__); 2963 return(ENOBUFS); 2964 } 2965 2966 KASSERT(segsleft == 1, 2967 ("segsleft cannot be more than 1 for an MPI SGL; segsleft = %d\n", 2968 segsleft)); 2969 2970 /* 2971 * There is one more segment left to add for the MPI SGL and there is 2972 * enough room in the frame to add it. This is the normal case because 2973 * MPI SGL's don't have chains, otherwise something is wrong. 2974 * 2975 * If this is a bi-directional request, need to account for that 2976 * here. Save the pre-filled sge values. These will be used 2977 * either for the 2nd SGL or for a single direction SGL. If 2978 * cm_out_len is non-zero, this is a bi-directional request, so 2979 * fill in the OUT SGL first, then the IN SGL, otherwise just 2980 * fill in the IN SGL. Note that at this time, when filling in 2981 * 2 SGL's for a bi-directional request, they both use the same 2982 * DMA buffer (same cm command). 2983 */ 2984 saved_buf_len = sge->FlagsLength & 0x00FFFFFF; 2985 saved_address_low = sge->Address.Low; 2986 saved_address_high = sge->Address.High; 2987 if (cm->cm_out_len) { 2988 sge->FlagsLength = cm->cm_out_len | 2989 ((uint32_t)(MPI2_SGE_FLAGS_SIMPLE_ELEMENT | 2990 MPI2_SGE_FLAGS_END_OF_BUFFER | 2991 MPI2_SGE_FLAGS_HOST_TO_IOC | 2992 MPI2_SGE_FLAGS_64_BIT_ADDRESSING) << 2993 MPI2_SGE_FLAGS_SHIFT); 2994 cm->cm_sglsize -= len; 2995 /* Endian Safe code */ 2996 sge_flags = sge->FlagsLength; 2997 sge->FlagsLength = htole32(sge_flags); 2998 sge->Address.High = htole32(sge->Address.High); 2999 sge->Address.Low = htole32(sge->Address.Low); 3000 bcopy(sge, cm->cm_sge, len); 3001 cm->cm_sge = (MPI2_SGE_IO_UNION *)((uintptr_t)cm->cm_sge + len); 3002 } 3003 sge->FlagsLength = saved_buf_len | 3004 ((uint32_t)(MPI2_SGE_FLAGS_SIMPLE_ELEMENT | 3005 MPI2_SGE_FLAGS_END_OF_BUFFER | 3006 MPI2_SGE_FLAGS_LAST_ELEMENT | 3007 MPI2_SGE_FLAGS_END_OF_LIST | 3008 MPI2_SGE_FLAGS_64_BIT_ADDRESSING) << 3009 MPI2_SGE_FLAGS_SHIFT); 3010 if (cm->cm_flags & MPR_CM_FLAGS_DATAIN) { 3011 sge->FlagsLength |= 3012 ((uint32_t)(MPI2_SGE_FLAGS_IOC_TO_HOST) << 3013 MPI2_SGE_FLAGS_SHIFT); 3014 } else { 3015 sge->FlagsLength |= 3016 ((uint32_t)(MPI2_SGE_FLAGS_HOST_TO_IOC) << 3017 MPI2_SGE_FLAGS_SHIFT); 3018 } 3019 sge->Address.Low = saved_address_low; 3020 sge->Address.High = saved_address_high; 3021 3022 cm->cm_sglsize -= len; 3023 /* Endian Safe code */ 3024 sge_flags = sge->FlagsLength; 3025 sge->FlagsLength = htole32(sge_flags); 3026 sge->Address.High = htole32(sge->Address.High); 3027 sge->Address.Low = htole32(sge->Address.Low); 3028 bcopy(sge, cm->cm_sge, len); 3029 cm->cm_sge = (MPI2_SGE_IO_UNION *)((uintptr_t)cm->cm_sge + len); 3030 return (0); 3031} 3032 3033/* 3034 * Add one IEEE scatter-gather element (chain or simple) to the IEEE scatter- 3035 * gather list for a command. Maintain cm_sglsize and cm_sge as the 3036 * remaining size and pointer to the next SGE to fill in, respectively. 3037 */ 3038int 3039mpr_push_ieee_sge(struct mpr_command *cm, void *sgep, int segsleft) 3040{ 3041 MPI2_IEEE_SGE_SIMPLE64 *sge = sgep; 3042 int error, ieee_sge_size = sizeof(MPI25_SGE_IO_UNION); 3043 uint32_t saved_buf_len, saved_address_low, saved_address_high; 3044 uint32_t sge_length; 3045 3046 /* 3047 * case 1: No room for chain or segment (error). 3048 * case 2: Two or more segments left but only room for chain. 3049 * case 3: Last segment and room for it, so set flags. 3050 */ 3051 3052 /* 3053 * There should be room for at least one element, or there is a big 3054 * problem. 3055 */ 3056 if (cm->cm_sglsize < ieee_sge_size) 3057 panic("MPR: Need SGE Error Code\n"); 3058 3059 if ((segsleft >= 2) && (cm->cm_sglsize < (ieee_sge_size * 2))) { 3060 if ((error = mpr_add_chain(cm, segsleft)) != 0) 3061 return (error); 3062 } 3063 3064 if (segsleft == 1) { 3065 /* 3066 * If this is a bi-directional request, need to account for that 3067 * here. Save the pre-filled sge values. These will be used 3068 * either for the 2nd SGL or for a single direction SGL. If 3069 * cm_out_len is non-zero, this is a bi-directional request, so 3070 * fill in the OUT SGL first, then the IN SGL, otherwise just 3071 * fill in the IN SGL. Note that at this time, when filling in 3072 * 2 SGL's for a bi-directional request, they both use the same 3073 * DMA buffer (same cm command). 3074 */ 3075 saved_buf_len = sge->Length; 3076 saved_address_low = sge->Address.Low; 3077 saved_address_high = sge->Address.High; 3078 if (cm->cm_out_len) { 3079 sge->Length = cm->cm_out_len; 3080 sge->Flags = (MPI2_IEEE_SGE_FLAGS_SIMPLE_ELEMENT | 3081 MPI2_IEEE_SGE_FLAGS_SYSTEM_ADDR); 3082 cm->cm_sglsize -= ieee_sge_size; 3083 /* Endian Safe code */ 3084 sge_length = sge->Length; 3085 sge->Length = htole32(sge_length); 3086 sge->Address.High = htole32(sge->Address.High); 3087 sge->Address.Low = htole32(sge->Address.Low); 3088 bcopy(sgep, cm->cm_sge, ieee_sge_size); 3089 cm->cm_sge = 3090 (MPI25_SGE_IO_UNION *)((uintptr_t)cm->cm_sge + 3091 ieee_sge_size); 3092 } 3093 sge->Length = saved_buf_len; 3094 sge->Flags = (MPI2_IEEE_SGE_FLAGS_SIMPLE_ELEMENT | 3095 MPI2_IEEE_SGE_FLAGS_SYSTEM_ADDR | 3096 MPI25_IEEE_SGE_FLAGS_END_OF_LIST); 3097 sge->Address.Low = saved_address_low; 3098 sge->Address.High = saved_address_high; 3099 } 3100 3101 cm->cm_sglsize -= ieee_sge_size; 3102 /* Endian Safe code */ 3103 sge_length = sge->Length; 3104 sge->Length = htole32(sge_length); 3105 sge->Address.High = htole32(sge->Address.High); 3106 sge->Address.Low = htole32(sge->Address.Low); 3107 bcopy(sgep, cm->cm_sge, ieee_sge_size); 3108 cm->cm_sge = (MPI25_SGE_IO_UNION *)((uintptr_t)cm->cm_sge + 3109 ieee_sge_size); 3110 return (0); 3111} 3112 3113/* 3114 * Add one dma segment to the scatter-gather list for a command. 3115 */ 3116int 3117mpr_add_dmaseg(struct mpr_command *cm, vm_paddr_t pa, size_t len, u_int flags, 3118 int segsleft) 3119{ 3120 MPI2_SGE_SIMPLE64 sge; 3121 MPI2_IEEE_SGE_SIMPLE64 ieee_sge; 3122 3123 if (!(cm->cm_flags & MPR_CM_FLAGS_SGE_SIMPLE)) { 3124 ieee_sge.Flags = (MPI2_IEEE_SGE_FLAGS_SIMPLE_ELEMENT | 3125 MPI2_IEEE_SGE_FLAGS_SYSTEM_ADDR); 3126 ieee_sge.Length = len; 3127 mpr_from_u64(pa, &ieee_sge.Address); 3128 3129 return (mpr_push_ieee_sge(cm, &ieee_sge, segsleft)); 3130 } else { 3131 /* 3132 * This driver always uses 64-bit address elements for 3133 * simplicity. 3134 */ 3135 flags |= MPI2_SGE_FLAGS_SIMPLE_ELEMENT | 3136 MPI2_SGE_FLAGS_64_BIT_ADDRESSING; 3137 /* Set Endian safe macro in mpr_push_sge */ 3138 sge.FlagsLength = len | (flags << MPI2_SGE_FLAGS_SHIFT); 3139 mpr_from_u64(pa, &sge.Address); 3140 3141 return (mpr_push_sge(cm, &sge, sizeof sge, segsleft)); 3142 } 3143} 3144 3145static void 3146mpr_data_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) 3147{ 3148 struct mpr_softc *sc; 3149 struct mpr_command *cm; 3150 u_int i, dir, sflags; 3151 3152 cm = (struct mpr_command *)arg; 3153 sc = cm->cm_sc; 3154 3155 /* 3156 * In this case, just print out a warning and let the chip tell the 3157 * user they did the wrong thing. 3158 */ 3159 if ((cm->cm_max_segs != 0) && (nsegs > cm->cm_max_segs)) { 3160 mpr_dprint(sc, MPR_ERROR, "%s: warning: busdma returned %d " 3161 "segments, more than the %d allowed\n", __func__, nsegs, 3162 cm->cm_max_segs); 3163 } 3164 3165 /* 3166 * Set up DMA direction flags. Bi-directional requests are also handled 3167 * here. In that case, both direction flags will be set. 3168 */ 3169 sflags = 0; 3170 if (cm->cm_flags & MPR_CM_FLAGS_SMP_PASS) { 3171 /* 3172 * We have to add a special case for SMP passthrough, there 3173 * is no easy way to generically handle it. The first 3174 * S/G element is used for the command (therefore the 3175 * direction bit needs to be set). The second one is used 3176 * for the reply. We'll leave it to the caller to make 3177 * sure we only have two buffers. 3178 */ 3179 /* 3180 * Even though the busdma man page says it doesn't make 3181 * sense to have both direction flags, it does in this case. 3182 * We have one s/g element being accessed in each direction. 3183 */ 3184 dir = BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD; 3185 3186 /* 3187 * Set the direction flag on the first buffer in the SMP 3188 * passthrough request. We'll clear it for the second one. 3189 */ 3190 sflags |= MPI2_SGE_FLAGS_DIRECTION | 3191 MPI2_SGE_FLAGS_END_OF_BUFFER; 3192 } else if (cm->cm_flags & MPR_CM_FLAGS_DATAOUT) { 3193 sflags |= MPI2_SGE_FLAGS_HOST_TO_IOC; 3194 dir = BUS_DMASYNC_PREWRITE; 3195 } else 3196 dir = BUS_DMASYNC_PREREAD; 3197 3198 /* Check if a native SG list is needed for an NVMe PCIe device. */ 3199 if (cm->cm_targ && cm->cm_targ->is_nvme && 3200 mpr_check_pcie_native_sgl(sc, cm, segs, nsegs) == 0) { 3201 /* A native SG list was built, skip to end. */ 3202 goto out; 3203 } 3204 3205 for (i = 0; i < nsegs; i++) { 3206 if ((cm->cm_flags & MPR_CM_FLAGS_SMP_PASS) && (i != 0)) { 3207 sflags &= ~MPI2_SGE_FLAGS_DIRECTION; 3208 } 3209 error = mpr_add_dmaseg(cm, segs[i].ds_addr, segs[i].ds_len, 3210 sflags, nsegs - i); 3211 if (error != 0) { 3212 /* Resource shortage, roll back! */ 3213 if (ratecheck(&sc->lastfail, &mpr_chainfail_interval)) 3214 mpr_dprint(sc, MPR_INFO, "Out of chain frames, " 3215 "consider increasing hw.mpr.max_chains.\n"); 3216 cm->cm_flags |= MPR_CM_FLAGS_CHAIN_FAILED; 3217 mpr_complete_command(sc, cm); 3218 return; 3219 } 3220 } 3221 3222out: 3223 bus_dmamap_sync(sc->buffer_dmat, cm->cm_dmamap, dir); 3224 mpr_enqueue_request(sc, cm); 3225 3226 return; 3227} 3228 3229static void 3230mpr_data_cb2(void *arg, bus_dma_segment_t *segs, int nsegs, bus_size_t mapsize, 3231 int error) 3232{ 3233 mpr_data_cb(arg, segs, nsegs, error); 3234} 3235 3236/* 3237 * This is the routine to enqueue commands ansynchronously. 3238 * Note that the only error path here is from bus_dmamap_load(), which can 3239 * return EINPROGRESS if it is waiting for resources. Other than this, it's 3240 * assumed that if you have a command in-hand, then you have enough credits 3241 * to use it. 3242 */ 3243int 3244mpr_map_command(struct mpr_softc *sc, struct mpr_command *cm) 3245{ 3246 int error = 0; 3247 3248 if (cm->cm_flags & MPR_CM_FLAGS_USE_UIO) { 3249 error = bus_dmamap_load_uio(sc->buffer_dmat, cm->cm_dmamap, 3250 &cm->cm_uio, mpr_data_cb2, cm, 0); 3251 } else if (cm->cm_flags & MPR_CM_FLAGS_USE_CCB) { 3252 error = bus_dmamap_load_ccb(sc->buffer_dmat, cm->cm_dmamap, 3253 cm->cm_data, mpr_data_cb, cm, 0); 3254 } else if ((cm->cm_data != NULL) && (cm->cm_length != 0)) { 3255 error = bus_dmamap_load(sc->buffer_dmat, cm->cm_dmamap, 3256 cm->cm_data, cm->cm_length, mpr_data_cb, cm, 0); 3257 } else { 3258 /* Add a zero-length element as needed */ 3259 if (cm->cm_sge != NULL) 3260 mpr_add_dmaseg(cm, 0, 0, 0, 1); 3261 mpr_enqueue_request(sc, cm); 3262 } 3263 3264 return (error); 3265} 3266 3267/* 3268 * This is the routine to enqueue commands synchronously. An error of 3269 * EINPROGRESS from mpr_map_command() is ignored since the command will 3270 * be executed and enqueued automatically. Other errors come from msleep(). 3271 */ 3272int 3273mpr_wait_command(struct mpr_softc *sc, struct mpr_command **cmp, int timeout, 3274 int sleep_flag) 3275{ 3276 int error, rc; 3277 struct timeval cur_time, start_time; 3278 struct mpr_command *cm = *cmp; 3279 3280 if (sc->mpr_flags & MPR_FLAGS_DIAGRESET) 3281 return EBUSY; 3282 3283 cm->cm_complete = NULL; 3284 cm->cm_flags |= (MPR_CM_FLAGS_WAKEUP + MPR_CM_FLAGS_POLLED); 3285 error = mpr_map_command(sc, cm); 3286 if ((error != 0) && (error != EINPROGRESS)) 3287 return (error); 3288 3289 // Check for context and wait for 50 mSec at a time until time has 3290 // expired or the command has finished. If msleep can't be used, need 3291 // to poll. 3292#if __FreeBSD_version >= 1000029 3293 if (curthread->td_no_sleeping) 3294#else //__FreeBSD_version < 1000029 3295 if (curthread->td_pflags & TDP_NOSLEEPING) 3296#endif //__FreeBSD_version >= 1000029 3297 sleep_flag = NO_SLEEP; 3298 getmicrouptime(&start_time); 3299 if (mtx_owned(&sc->mpr_mtx) && sleep_flag == CAN_SLEEP) { 3300 error = msleep(cm, &sc->mpr_mtx, 0, "mprwait", timeout*hz); 3301 if (error == EWOULDBLOCK) { 3302 /* 3303 * Record the actual elapsed time in the case of a 3304 * timeout for the message below. 3305 */ 3306 getmicrouptime(&cur_time); 3307 timevalsub(&cur_time, &start_time); 3308 } 3309 } else { 3310 while ((cm->cm_flags & MPR_CM_FLAGS_COMPLETE) == 0) { 3311 mpr_intr_locked(sc); 3312 if (sleep_flag == CAN_SLEEP) 3313 pause("mprwait", hz/20); 3314 else 3315 DELAY(50000); 3316 3317 getmicrouptime(&cur_time); 3318 timevalsub(&cur_time, &start_time); 3319 if (cur_time.tv_sec > timeout) { 3320 error = EWOULDBLOCK; 3321 break; 3322 } 3323 } 3324 } 3325 3326 if (error == EWOULDBLOCK) { 3327 mpr_dprint(sc, MPR_FAULT, "Calling Reinit from %s, timeout=%d," 3328 " elapsed=%jd\n", __func__, timeout, 3329 (intmax_t)cur_time.tv_sec); 3330 rc = mpr_reinit(sc); 3331 mpr_dprint(sc, MPR_FAULT, "Reinit %s\n", (rc == 0) ? "success" : 3332 "failed"); 3333 if (sc->mpr_flags & MPR_FLAGS_REALLOCATED) { 3334 /* 3335 * Tell the caller that we freed the command in a 3336 * reinit. 3337 */ 3338 *cmp = NULL; 3339 } 3340 error = ETIMEDOUT; 3341 } 3342 return (error); 3343} 3344 3345/* 3346 * This is the routine to enqueue a command synchonously and poll for 3347 * completion. Its use should be rare. 3348 */ 3349int 3350mpr_request_polled(struct mpr_softc *sc, struct mpr_command **cmp) 3351{ 3352 int error, rc; 3353 struct timeval cur_time, start_time; 3354 struct mpr_command *cm = *cmp; 3355 3356 error = 0; 3357 3358 cm->cm_flags |= MPR_CM_FLAGS_POLLED; 3359 cm->cm_complete = NULL; 3360 mpr_map_command(sc, cm); 3361 3362 getmicrouptime(&start_time); 3363 while ((cm->cm_flags & MPR_CM_FLAGS_COMPLETE) == 0) { 3364 mpr_intr_locked(sc); 3365 3366 if (mtx_owned(&sc->mpr_mtx)) 3367 msleep(&sc->msleep_fake_chan, &sc->mpr_mtx, 0, 3368 "mprpoll", hz/20); 3369 else 3370 pause("mprpoll", hz/20); 3371 3372 /* 3373 * Check for real-time timeout and fail if more than 60 seconds. 3374 */ 3375 getmicrouptime(&cur_time); 3376 timevalsub(&cur_time, &start_time); 3377 if (cur_time.tv_sec > 60) { 3378 mpr_dprint(sc, MPR_FAULT, "polling failed\n"); 3379 error = ETIMEDOUT; 3380 break; 3381 } 3382 } 3383 3384 if (error) { 3385 mpr_dprint(sc, MPR_FAULT, "Calling Reinit from %s\n", __func__); 3386 rc = mpr_reinit(sc); 3387 mpr_dprint(sc, MPR_FAULT, "Reinit %s\n", (rc == 0) ? "success" : 3388 "failed"); 3389 3390 if (sc->mpr_flags & MPR_FLAGS_REALLOCATED) { 3391 /* 3392 * Tell the caller that we freed the command in a 3393 * reinit. 3394 */ 3395 *cmp = NULL; 3396 } 3397 } 3398 return (error); 3399} 3400 3401/* 3402 * The MPT driver had a verbose interface for config pages. In this driver, 3403 * reduce it to much simpler terms, similar to the Linux driver. 3404 */ 3405int 3406mpr_read_config_page(struct mpr_softc *sc, struct mpr_config_params *params) 3407{ 3408 MPI2_CONFIG_REQUEST *req; 3409 struct mpr_command *cm; 3410 int error; 3411 3412 if (sc->mpr_flags & MPR_FLAGS_BUSY) { 3413 return (EBUSY); 3414 } 3415 3416 cm = mpr_alloc_command(sc); 3417 if (cm == NULL) { 3418 return (EBUSY); 3419 } 3420 3421 req = (MPI2_CONFIG_REQUEST *)cm->cm_req; 3422 req->Function = MPI2_FUNCTION_CONFIG; 3423 req->Action = params->action; 3424 req->SGLFlags = 0; 3425 req->ChainOffset = 0; 3426 req->PageAddress = params->page_address; 3427 if (params->hdr.Struct.PageType == MPI2_CONFIG_PAGETYPE_EXTENDED) { 3428 MPI2_CONFIG_EXTENDED_PAGE_HEADER *hdr; 3429 3430 hdr = ¶ms->hdr.Ext; 3431 req->ExtPageType = hdr->ExtPageType; 3432 req->ExtPageLength = hdr->ExtPageLength; 3433 req->Header.PageType = MPI2_CONFIG_PAGETYPE_EXTENDED; 3434 req->Header.PageLength = 0; /* Must be set to zero */ 3435 req->Header.PageNumber = hdr->PageNumber; 3436 req->Header.PageVersion = hdr->PageVersion; 3437 } else { 3438 MPI2_CONFIG_PAGE_HEADER *hdr; 3439 3440 hdr = ¶ms->hdr.Struct; 3441 req->Header.PageType = hdr->PageType; 3442 req->Header.PageNumber = hdr->PageNumber; 3443 req->Header.PageLength = hdr->PageLength; 3444 req->Header.PageVersion = hdr->PageVersion; 3445 } 3446 3447 cm->cm_data = params->buffer; 3448 cm->cm_length = params->length; 3449 if (cm->cm_data != NULL) { 3450 cm->cm_sge = &req->PageBufferSGE; 3451 cm->cm_sglsize = sizeof(MPI2_SGE_IO_UNION); 3452 cm->cm_flags = MPR_CM_FLAGS_SGE_SIMPLE | MPR_CM_FLAGS_DATAIN; 3453 } else 3454 cm->cm_sge = NULL; 3455 cm->cm_desc.Default.RequestFlags = MPI2_REQ_DESCRIPT_FLAGS_DEFAULT_TYPE; 3456 3457 cm->cm_complete_data = params; 3458 if (params->callback != NULL) { 3459 cm->cm_complete = mpr_config_complete; 3460 return (mpr_map_command(sc, cm)); 3461 } else { 3462 error = mpr_wait_command(sc, &cm, 0, CAN_SLEEP); 3463 if (error) { 3464 mpr_dprint(sc, MPR_FAULT, 3465 "Error %d reading config page\n", error); 3466 if (cm != NULL) 3467 mpr_free_command(sc, cm); 3468 return (error); 3469 } 3470 mpr_config_complete(sc, cm); 3471 } 3472 3473 return (0); 3474} 3475 3476int 3477mpr_write_config_page(struct mpr_softc *sc, struct mpr_config_params *params) 3478{ 3479 return (EINVAL); 3480} 3481 3482static void 3483mpr_config_complete(struct mpr_softc *sc, struct mpr_command *cm) 3484{ 3485 MPI2_CONFIG_REPLY *reply; 3486 struct mpr_config_params *params; 3487 3488 MPR_FUNCTRACE(sc); 3489 params = cm->cm_complete_data; 3490 3491 if (cm->cm_data != NULL) { 3492 bus_dmamap_sync(sc->buffer_dmat, cm->cm_dmamap, 3493 BUS_DMASYNC_POSTREAD); 3494 bus_dmamap_unload(sc->buffer_dmat, cm->cm_dmamap); 3495 } 3496 3497 /* 3498 * XXX KDM need to do more error recovery? This results in the 3499 * device in question not getting probed. 3500 */ 3501 if ((cm->cm_flags & MPR_CM_FLAGS_ERROR_MASK) != 0) { 3502 params->status = MPI2_IOCSTATUS_BUSY; 3503 goto done; 3504 } 3505 3506 reply = (MPI2_CONFIG_REPLY *)cm->cm_reply; 3507 if (reply == NULL) { 3508 params->status = MPI2_IOCSTATUS_BUSY; 3509 goto done; 3510 } 3511 params->status = reply->IOCStatus; 3512 if (params->hdr.Struct.PageType == MPI2_CONFIG_PAGETYPE_EXTENDED) { 3513 params->hdr.Ext.ExtPageType = reply->ExtPageType; 3514 params->hdr.Ext.ExtPageLength = reply->ExtPageLength; 3515 params->hdr.Ext.PageType = reply->Header.PageType; 3516 params->hdr.Ext.PageNumber = reply->Header.PageNumber; 3517 params->hdr.Ext.PageVersion = reply->Header.PageVersion; 3518 } else { 3519 params->hdr.Struct.PageType = reply->Header.PageType; 3520 params->hdr.Struct.PageNumber = reply->Header.PageNumber; 3521 params->hdr.Struct.PageLength = reply->Header.PageLength; 3522 params->hdr.Struct.PageVersion = reply->Header.PageVersion; 3523 } 3524 3525done: 3526 mpr_free_command(sc, cm); 3527 if (params->callback != NULL) 3528 params->callback(sc, params); 3529 3530 return; 3531} 3532