1/* 2 * This file is provided under a dual BSD/GPLv2 license. When using or 3 * redistributing this file, you may do so under either license. 4 * 5 * GPL LICENSE SUMMARY 6 * 7 * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved. 8 * 9 * This program is free software; you can redistribute it and/or modify it 10 * under the terms and conditions of the GNU General Public License, 11 * version 2, as published by the Free Software Foundation. 12 * 13 * This program is distributed in the hope that it will be useful, but WITHOUT 14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 15 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 16 * more details. 17 * 18 * You should have received a copy of the GNU General Public License along with 19 * this program; if not, write to the Free Software Foundation, Inc., 20 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. 21 * 22 * The full GNU General Public License is included in this distribution in 23 * the file called "COPYING". 24 * 25 * BSD LICENSE 26 * 27 * Copyright(c) 2004-2009 Intel Corporation. All rights reserved. 28 * 29 * Redistribution and use in source and binary forms, with or without 30 * modification, are permitted provided that the following conditions are met: 31 * 32 * * Redistributions of source code must retain the above copyright 33 * notice, this list of conditions and the following disclaimer. 34 * * Redistributions in binary form must reproduce the above copyright 35 * notice, this list of conditions and the following disclaimer in 36 * the documentation and/or other materials provided with the 37 * distribution. 38 * * Neither the name of Intel Corporation nor the names of its 39 * contributors may be used to endorse or promote products derived 40 * from this software without specific prior written permission. 41 * 42 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 43 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 45 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 46 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 47 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 48 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 49 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 50 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 51 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 52 * POSSIBILITY OF SUCH DAMAGE. 53 */ 54 55/* 56 * Support routines for v3+ hardware 57 */ 58 59#include <linux/pci.h> 60#include <linux/gfp.h> 61#include <linux/dmaengine.h> 62#include <linux/dma-mapping.h> 63#include "registers.h" 64#include "hw.h" 65#include "dma.h" 66#include "dma_v2.h" 67 68/* ioat hardware assumes at least two sources for raid operations */ 69#define src_cnt_to_sw(x) ((x) + 2) 70#define src_cnt_to_hw(x) ((x) - 2) 71 72/* provide a lookup table for setting the source address in the base or 73 * extended descriptor of an xor or pq descriptor 74 */ 75static const u8 xor_idx_to_desc __read_mostly = 0xd0; 76static const u8 xor_idx_to_field[] __read_mostly = { 1, 4, 5, 6, 7, 0, 1, 2 }; 77static const u8 pq_idx_to_desc __read_mostly = 0xf8; 78static const u8 pq_idx_to_field[] __read_mostly = { 1, 4, 5, 0, 1, 2, 4, 5 }; 79 80static dma_addr_t xor_get_src(struct ioat_raw_descriptor *descs[2], int idx) 81{ 82 struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1]; 83 84 return raw->field[xor_idx_to_field[idx]]; 85} 86 87static void xor_set_src(struct ioat_raw_descriptor *descs[2], 88 dma_addr_t addr, u32 offset, int idx) 89{ 90 struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1]; 91 92 raw->field[xor_idx_to_field[idx]] = addr + offset; 93} 94 95static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx) 96{ 97 struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1]; 98 99 return raw->field[pq_idx_to_field[idx]]; 100} 101 102static void pq_set_src(struct ioat_raw_descriptor *descs[2], 103 dma_addr_t addr, u32 offset, u8 coef, int idx) 104{ 105 struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *) descs[0]; 106 struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1]; 107 108 raw->field[pq_idx_to_field[idx]] = addr + offset; 109 pq->coef[idx] = coef; 110} 111 112static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat, 113 struct ioat_ring_ent *desc, int idx) 114{ 115 struct ioat_chan_common *chan = &ioat->base; 116 struct pci_dev *pdev = chan->device->pdev; 117 size_t len = desc->len; 118 size_t offset = len - desc->hw->size; 119 struct dma_async_tx_descriptor *tx = &desc->txd; 120 enum dma_ctrl_flags flags = tx->flags; 121 122 switch (desc->hw->ctl_f.op) { 123 case IOAT_OP_COPY: 124 if (!desc->hw->ctl_f.null) /* skip 'interrupt' ops */ 125 ioat_dma_unmap(chan, flags, len, desc->hw); 126 break; 127 case IOAT_OP_FILL: { 128 struct ioat_fill_descriptor *hw = desc->fill; 129 130 if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) 131 ioat_unmap(pdev, hw->dst_addr - offset, len, 132 PCI_DMA_FROMDEVICE, flags, 1); 133 break; 134 } 135 case IOAT_OP_XOR_VAL: 136 case IOAT_OP_XOR: { 137 struct ioat_xor_descriptor *xor = desc->xor; 138 struct ioat_ring_ent *ext; 139 struct ioat_xor_ext_descriptor *xor_ex = NULL; 140 int src_cnt = src_cnt_to_sw(xor->ctl_f.src_cnt); 141 struct ioat_raw_descriptor *descs[2]; 142 int i; 143 144 if (src_cnt > 5) { 145 ext = ioat2_get_ring_ent(ioat, idx + 1); 146 xor_ex = ext->xor_ex; 147 } 148 149 if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) { 150 descs[0] = (struct ioat_raw_descriptor *) xor; 151 descs[1] = (struct ioat_raw_descriptor *) xor_ex; 152 for (i = 0; i < src_cnt; i++) { 153 dma_addr_t src = xor_get_src(descs, i); 154 155 ioat_unmap(pdev, src - offset, len, 156 PCI_DMA_TODEVICE, flags, 0); 157 } 158 159 /* dest is a source in xor validate operations */ 160 if (xor->ctl_f.op == IOAT_OP_XOR_VAL) { 161 ioat_unmap(pdev, xor->dst_addr - offset, len, 162 PCI_DMA_TODEVICE, flags, 1); 163 break; 164 } 165 } 166 167 if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) 168 ioat_unmap(pdev, xor->dst_addr - offset, len, 169 PCI_DMA_FROMDEVICE, flags, 1); 170 break; 171 } 172 case IOAT_OP_PQ_VAL: 173 case IOAT_OP_PQ: { 174 struct ioat_pq_descriptor *pq = desc->pq; 175 struct ioat_ring_ent *ext; 176 struct ioat_pq_ext_descriptor *pq_ex = NULL; 177 int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt); 178 struct ioat_raw_descriptor *descs[2]; 179 int i; 180 181 if (src_cnt > 3) { 182 ext = ioat2_get_ring_ent(ioat, idx + 1); 183 pq_ex = ext->pq_ex; 184 } 185 186 /* in the 'continue' case don't unmap the dests as sources */ 187 if (dmaf_p_disabled_continue(flags)) 188 src_cnt--; 189 else if (dmaf_continue(flags)) 190 src_cnt -= 3; 191 192 if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) { 193 descs[0] = (struct ioat_raw_descriptor *) pq; 194 descs[1] = (struct ioat_raw_descriptor *) pq_ex; 195 for (i = 0; i < src_cnt; i++) { 196 dma_addr_t src = pq_get_src(descs, i); 197 198 ioat_unmap(pdev, src - offset, len, 199 PCI_DMA_TODEVICE, flags, 0); 200 } 201 202 /* the dests are sources in pq validate operations */ 203 if (pq->ctl_f.op == IOAT_OP_XOR_VAL) { 204 if (!(flags & DMA_PREP_PQ_DISABLE_P)) 205 ioat_unmap(pdev, pq->p_addr - offset, 206 len, PCI_DMA_TODEVICE, flags, 0); 207 if (!(flags & DMA_PREP_PQ_DISABLE_Q)) 208 ioat_unmap(pdev, pq->q_addr - offset, 209 len, PCI_DMA_TODEVICE, flags, 0); 210 break; 211 } 212 } 213 214 if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) { 215 if (!(flags & DMA_PREP_PQ_DISABLE_P)) 216 ioat_unmap(pdev, pq->p_addr - offset, len, 217 PCI_DMA_BIDIRECTIONAL, flags, 1); 218 if (!(flags & DMA_PREP_PQ_DISABLE_Q)) 219 ioat_unmap(pdev, pq->q_addr - offset, len, 220 PCI_DMA_BIDIRECTIONAL, flags, 1); 221 } 222 break; 223 } 224 default: 225 dev_err(&pdev->dev, "%s: unknown op type: %#x\n", 226 __func__, desc->hw->ctl_f.op); 227 } 228} 229 230static bool desc_has_ext(struct ioat_ring_ent *desc) 231{ 232 struct ioat_dma_descriptor *hw = desc->hw; 233 234 if (hw->ctl_f.op == IOAT_OP_XOR || 235 hw->ctl_f.op == IOAT_OP_XOR_VAL) { 236 struct ioat_xor_descriptor *xor = desc->xor; 237 238 if (src_cnt_to_sw(xor->ctl_f.src_cnt) > 5) 239 return true; 240 } else if (hw->ctl_f.op == IOAT_OP_PQ || 241 hw->ctl_f.op == IOAT_OP_PQ_VAL) { 242 struct ioat_pq_descriptor *pq = desc->pq; 243 244 if (src_cnt_to_sw(pq->ctl_f.src_cnt) > 3) 245 return true; 246 } 247 248 return false; 249} 250 251/** 252 * __cleanup - reclaim used descriptors 253 * @ioat: channel (ring) to clean 254 * 255 * The difference from the dma_v2.c __cleanup() is that this routine 256 * handles extended descriptors and dma-unmapping raid operations. 257 */ 258static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete) 259{ 260 struct ioat_chan_common *chan = &ioat->base; 261 struct ioat_ring_ent *desc; 262 bool seen_current = false; 263 int idx = ioat->tail, i; 264 u16 active; 265 266 dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n", 267 __func__, ioat->head, ioat->tail, ioat->issued); 268 269 active = ioat2_ring_active(ioat); 270 for (i = 0; i < active && !seen_current; i++) { 271 struct dma_async_tx_descriptor *tx; 272 273 smp_read_barrier_depends(); 274 prefetch(ioat2_get_ring_ent(ioat, idx + i + 1)); 275 desc = ioat2_get_ring_ent(ioat, idx + i); 276 dump_desc_dbg(ioat, desc); 277 tx = &desc->txd; 278 if (tx->cookie) { 279 chan->completed_cookie = tx->cookie; 280 ioat3_dma_unmap(ioat, desc, idx + i); 281 tx->cookie = 0; 282 if (tx->callback) { 283 tx->callback(tx->callback_param); 284 tx->callback = NULL; 285 } 286 } 287 288 if (tx->phys == phys_complete) 289 seen_current = true; 290 291 /* skip extended descriptors */ 292 if (desc_has_ext(desc)) { 293 BUG_ON(i + 1 >= active); 294 i++; 295 } 296 } 297 smp_mb(); /* finish all descriptor reads before incrementing tail */ 298 ioat->tail = idx + i; 299 BUG_ON(active && !seen_current); /* no active descs have written a completion? */ 300 chan->last_completion = phys_complete; 301 302 if (active - i == 0) { 303 dev_dbg(to_dev(chan), "%s: cancel completion timeout\n", 304 __func__); 305 clear_bit(IOAT_COMPLETION_PENDING, &chan->state); 306 mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); 307 } 308 /* 5 microsecond delay per pending descriptor */ 309 writew(min((5 * (active - i)), IOAT_INTRDELAY_MASK), 310 chan->device->reg_base + IOAT_INTRDELAY_OFFSET); 311} 312 313static void ioat3_cleanup(struct ioat2_dma_chan *ioat) 314{ 315 struct ioat_chan_common *chan = &ioat->base; 316 unsigned long phys_complete; 317 318 spin_lock_bh(&chan->cleanup_lock); 319 if (ioat_cleanup_preamble(chan, &phys_complete)) 320 __cleanup(ioat, phys_complete); 321 spin_unlock_bh(&chan->cleanup_lock); 322} 323 324static void ioat3_cleanup_event(unsigned long data) 325{ 326 struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data); 327 328 ioat3_cleanup(ioat); 329 writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); 330} 331 332static void ioat3_restart_channel(struct ioat2_dma_chan *ioat) 333{ 334 struct ioat_chan_common *chan = &ioat->base; 335 unsigned long phys_complete; 336 337 ioat2_quiesce(chan, 0); 338 if (ioat_cleanup_preamble(chan, &phys_complete)) 339 __cleanup(ioat, phys_complete); 340 341 __ioat2_restart_chan(ioat); 342} 343 344static void ioat3_timer_event(unsigned long data) 345{ 346 struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data); 347 struct ioat_chan_common *chan = &ioat->base; 348 349 if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) { 350 unsigned long phys_complete; 351 u64 status; 352 353 status = ioat_chansts(chan); 354 355 /* when halted due to errors check for channel 356 * programming errors before advancing the completion state 357 */ 358 if (is_ioat_halted(status)) { 359 u32 chanerr; 360 361 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); 362 dev_err(to_dev(chan), "%s: Channel halted (%x)\n", 363 __func__, chanerr); 364 if (test_bit(IOAT_RUN, &chan->state)) 365 BUG_ON(is_ioat_bug(chanerr)); 366 else /* we never got off the ground */ 367 return; 368 } 369 370 /* if we haven't made progress and we have already 371 * acknowledged a pending completion once, then be more 372 * forceful with a restart 373 */ 374 spin_lock_bh(&chan->cleanup_lock); 375 if (ioat_cleanup_preamble(chan, &phys_complete)) 376 __cleanup(ioat, phys_complete); 377 else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) { 378 spin_lock_bh(&ioat->prep_lock); 379 ioat3_restart_channel(ioat); 380 spin_unlock_bh(&ioat->prep_lock); 381 } else { 382 set_bit(IOAT_COMPLETION_ACK, &chan->state); 383 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); 384 } 385 spin_unlock_bh(&chan->cleanup_lock); 386 } else { 387 u16 active; 388 389 /* if the ring is idle, empty, and oversized try to step 390 * down the size 391 */ 392 spin_lock_bh(&chan->cleanup_lock); 393 spin_lock_bh(&ioat->prep_lock); 394 active = ioat2_ring_active(ioat); 395 if (active == 0 && ioat->alloc_order > ioat_get_alloc_order()) 396 reshape_ring(ioat, ioat->alloc_order-1); 397 spin_unlock_bh(&ioat->prep_lock); 398 spin_unlock_bh(&chan->cleanup_lock); 399 400 /* keep shrinking until we get back to our minimum 401 * default size 402 */ 403 if (ioat->alloc_order > ioat_get_alloc_order()) 404 mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); 405 } 406} 407 408static enum dma_status 409ioat3_tx_status(struct dma_chan *c, dma_cookie_t cookie, 410 struct dma_tx_state *txstate) 411{ 412 struct ioat2_dma_chan *ioat = to_ioat2_chan(c); 413 414 if (ioat_tx_status(c, cookie, txstate) == DMA_SUCCESS) 415 return DMA_SUCCESS; 416 417 ioat3_cleanup(ioat); 418 419 return ioat_tx_status(c, cookie, txstate); 420} 421 422static struct dma_async_tx_descriptor * 423ioat3_prep_memset_lock(struct dma_chan *c, dma_addr_t dest, int value, 424 size_t len, unsigned long flags) 425{ 426 struct ioat2_dma_chan *ioat = to_ioat2_chan(c); 427 struct ioat_ring_ent *desc; 428 size_t total_len = len; 429 struct ioat_fill_descriptor *fill; 430 u64 src_data = (0x0101010101010101ULL) * (value & 0xff); 431 int num_descs, idx, i; 432 433 num_descs = ioat2_xferlen_to_descs(ioat, len); 434 if (likely(num_descs) && ioat2_check_space_lock(ioat, num_descs) == 0) 435 idx = ioat->head; 436 else 437 return NULL; 438 i = 0; 439 do { 440 size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log); 441 442 desc = ioat2_get_ring_ent(ioat, idx + i); 443 fill = desc->fill; 444 445 fill->size = xfer_size; 446 fill->src_data = src_data; 447 fill->dst_addr = dest; 448 fill->ctl = 0; 449 fill->ctl_f.op = IOAT_OP_FILL; 450 451 len -= xfer_size; 452 dest += xfer_size; 453 dump_desc_dbg(ioat, desc); 454 } while (++i < num_descs); 455 456 desc->txd.flags = flags; 457 desc->len = total_len; 458 fill->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); 459 fill->ctl_f.fence = !!(flags & DMA_PREP_FENCE); 460 fill->ctl_f.compl_write = 1; 461 dump_desc_dbg(ioat, desc); 462 463 /* we leave the channel locked to ensure in order submission */ 464 return &desc->txd; 465} 466 467static struct dma_async_tx_descriptor * 468__ioat3_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result, 469 dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt, 470 size_t len, unsigned long flags) 471{ 472 struct ioat2_dma_chan *ioat = to_ioat2_chan(c); 473 struct ioat_ring_ent *compl_desc; 474 struct ioat_ring_ent *desc; 475 struct ioat_ring_ent *ext; 476 size_t total_len = len; 477 struct ioat_xor_descriptor *xor; 478 struct ioat_xor_ext_descriptor *xor_ex = NULL; 479 struct ioat_dma_descriptor *hw; 480 int num_descs, with_ext, idx, i; 481 u32 offset = 0; 482 u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR; 483 484 BUG_ON(src_cnt < 2); 485 486 num_descs = ioat2_xferlen_to_descs(ioat, len); 487 /* we need 2x the number of descriptors to cover greater than 5 488 * sources 489 */ 490 if (src_cnt > 5) { 491 with_ext = 1; 492 num_descs *= 2; 493 } else 494 with_ext = 0; 495 496 /* completion writes from the raid engine may pass completion 497 * writes from the legacy engine, so we need one extra null 498 * (legacy) descriptor to ensure all completion writes arrive in 499 * order. 500 */ 501 if (likely(num_descs) && ioat2_check_space_lock(ioat, num_descs+1) == 0) 502 idx = ioat->head; 503 else 504 return NULL; 505 i = 0; 506 do { 507 struct ioat_raw_descriptor *descs[2]; 508 size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log); 509 int s; 510 511 desc = ioat2_get_ring_ent(ioat, idx + i); 512 xor = desc->xor; 513 514 /* save a branch by unconditionally retrieving the 515 * extended descriptor xor_set_src() knows to not write 516 * to it in the single descriptor case 517 */ 518 ext = ioat2_get_ring_ent(ioat, idx + i + 1); 519 xor_ex = ext->xor_ex; 520 521 descs[0] = (struct ioat_raw_descriptor *) xor; 522 descs[1] = (struct ioat_raw_descriptor *) xor_ex; 523 for (s = 0; s < src_cnt; s++) 524 xor_set_src(descs, src[s], offset, s); 525 xor->size = xfer_size; 526 xor->dst_addr = dest + offset; 527 xor->ctl = 0; 528 xor->ctl_f.op = op; 529 xor->ctl_f.src_cnt = src_cnt_to_hw(src_cnt); 530 531 len -= xfer_size; 532 offset += xfer_size; 533 dump_desc_dbg(ioat, desc); 534 } while ((i += 1 + with_ext) < num_descs); 535 536 /* last xor descriptor carries the unmap parameters and fence bit */ 537 desc->txd.flags = flags; 538 desc->len = total_len; 539 if (result) 540 desc->result = result; 541 xor->ctl_f.fence = !!(flags & DMA_PREP_FENCE); 542 543 /* completion descriptor carries interrupt bit */ 544 compl_desc = ioat2_get_ring_ent(ioat, idx + i); 545 compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT; 546 hw = compl_desc->hw; 547 hw->ctl = 0; 548 hw->ctl_f.null = 1; 549 hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); 550 hw->ctl_f.compl_write = 1; 551 hw->size = NULL_DESC_BUFFER_SIZE; 552 dump_desc_dbg(ioat, compl_desc); 553 554 /* we leave the channel locked to ensure in order submission */ 555 return &compl_desc->txd; 556} 557 558static struct dma_async_tx_descriptor * 559ioat3_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, 560 unsigned int src_cnt, size_t len, unsigned long flags) 561{ 562 return __ioat3_prep_xor_lock(chan, NULL, dest, src, src_cnt, len, flags); 563} 564 565struct dma_async_tx_descriptor * 566ioat3_prep_xor_val(struct dma_chan *chan, dma_addr_t *src, 567 unsigned int src_cnt, size_t len, 568 enum sum_check_flags *result, unsigned long flags) 569{ 570 /* the cleanup routine only sets bits on validate failure, it 571 * does not clear bits on validate success... so clear it here 572 */ 573 *result = 0; 574 575 return __ioat3_prep_xor_lock(chan, result, src[0], &src[1], 576 src_cnt - 1, len, flags); 577} 578 579static void 580dump_pq_desc_dbg(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc, struct ioat_ring_ent *ext) 581{ 582 struct device *dev = to_dev(&ioat->base); 583 struct ioat_pq_descriptor *pq = desc->pq; 584 struct ioat_pq_ext_descriptor *pq_ex = ext ? ext->pq_ex : NULL; 585 struct ioat_raw_descriptor *descs[] = { (void *) pq, (void *) pq_ex }; 586 int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt); 587 int i; 588 589 dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x" 590 " sz: %#x ctl: %#x (op: %d int: %d compl: %d pq: '%s%s' src_cnt: %d)\n", 591 desc_id(desc), (unsigned long long) desc->txd.phys, 592 (unsigned long long) (pq_ex ? pq_ex->next : pq->next), 593 desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op, pq->ctl_f.int_en, 594 pq->ctl_f.compl_write, 595 pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q", 596 pq->ctl_f.src_cnt); 597 for (i = 0; i < src_cnt; i++) 598 dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i, 599 (unsigned long long) pq_get_src(descs, i), pq->coef[i]); 600 dev_dbg(dev, "\tP: %#llx\n", pq->p_addr); 601 dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr); 602} 603 604static struct dma_async_tx_descriptor * 605__ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, 606 const dma_addr_t *dst, const dma_addr_t *src, 607 unsigned int src_cnt, const unsigned char *scf, 608 size_t len, unsigned long flags) 609{ 610 struct ioat2_dma_chan *ioat = to_ioat2_chan(c); 611 struct ioat_chan_common *chan = &ioat->base; 612 struct ioat_ring_ent *compl_desc; 613 struct ioat_ring_ent *desc; 614 struct ioat_ring_ent *ext; 615 size_t total_len = len; 616 struct ioat_pq_descriptor *pq; 617 struct ioat_pq_ext_descriptor *pq_ex = NULL; 618 struct ioat_dma_descriptor *hw; 619 u32 offset = 0; 620 u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ; 621 int i, s, idx, with_ext, num_descs; 622 623 dev_dbg(to_dev(chan), "%s\n", __func__); 624 /* the engine requires at least two sources (we provide 625 * at least 1 implied source in the DMA_PREP_CONTINUE case) 626 */ 627 BUG_ON(src_cnt + dmaf_continue(flags) < 2); 628 629 num_descs = ioat2_xferlen_to_descs(ioat, len); 630 /* we need 2x the number of descriptors to cover greater than 3 631 * sources (we need 1 extra source in the q-only continuation 632 * case and 3 extra sources in the p+q continuation case. 633 */ 634 if (src_cnt + dmaf_p_disabled_continue(flags) > 3 || 635 (dmaf_continue(flags) && !dmaf_p_disabled_continue(flags))) { 636 with_ext = 1; 637 num_descs *= 2; 638 } else 639 with_ext = 0; 640 641 /* completion writes from the raid engine may pass completion 642 * writes from the legacy engine, so we need one extra null 643 * (legacy) descriptor to ensure all completion writes arrive in 644 * order. 645 */ 646 if (likely(num_descs) && 647 ioat2_check_space_lock(ioat, num_descs+1) == 0) 648 idx = ioat->head; 649 else 650 return NULL; 651 i = 0; 652 do { 653 struct ioat_raw_descriptor *descs[2]; 654 size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log); 655 656 desc = ioat2_get_ring_ent(ioat, idx + i); 657 pq = desc->pq; 658 659 /* save a branch by unconditionally retrieving the 660 * extended descriptor pq_set_src() knows to not write 661 * to it in the single descriptor case 662 */ 663 ext = ioat2_get_ring_ent(ioat, idx + i + with_ext); 664 pq_ex = ext->pq_ex; 665 666 descs[0] = (struct ioat_raw_descriptor *) pq; 667 descs[1] = (struct ioat_raw_descriptor *) pq_ex; 668 669 for (s = 0; s < src_cnt; s++) 670 pq_set_src(descs, src[s], offset, scf[s], s); 671 672 /* see the comment for dma_maxpq in include/linux/dmaengine.h */ 673 if (dmaf_p_disabled_continue(flags)) 674 pq_set_src(descs, dst[1], offset, 1, s++); 675 else if (dmaf_continue(flags)) { 676 pq_set_src(descs, dst[0], offset, 0, s++); 677 pq_set_src(descs, dst[1], offset, 1, s++); 678 pq_set_src(descs, dst[1], offset, 0, s++); 679 } 680 pq->size = xfer_size; 681 pq->p_addr = dst[0] + offset; 682 pq->q_addr = dst[1] + offset; 683 pq->ctl = 0; 684 pq->ctl_f.op = op; 685 pq->ctl_f.src_cnt = src_cnt_to_hw(s); 686 pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P); 687 pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q); 688 689 len -= xfer_size; 690 offset += xfer_size; 691 } while ((i += 1 + with_ext) < num_descs); 692 693 /* last pq descriptor carries the unmap parameters and fence bit */ 694 desc->txd.flags = flags; 695 desc->len = total_len; 696 if (result) 697 desc->result = result; 698 pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE); 699 dump_pq_desc_dbg(ioat, desc, ext); 700 701 /* completion descriptor carries interrupt bit */ 702 compl_desc = ioat2_get_ring_ent(ioat, idx + i); 703 compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT; 704 hw = compl_desc->hw; 705 hw->ctl = 0; 706 hw->ctl_f.null = 1; 707 hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); 708 hw->ctl_f.compl_write = 1; 709 hw->size = NULL_DESC_BUFFER_SIZE; 710 dump_desc_dbg(ioat, compl_desc); 711 712 /* we leave the channel locked to ensure in order submission */ 713 return &compl_desc->txd; 714} 715 716static struct dma_async_tx_descriptor * 717ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, 718 unsigned int src_cnt, const unsigned char *scf, size_t len, 719 unsigned long flags) 720{ 721 /* specify valid address for disabled result */ 722 if (flags & DMA_PREP_PQ_DISABLE_P) 723 dst[0] = dst[1]; 724 if (flags & DMA_PREP_PQ_DISABLE_Q) 725 dst[1] = dst[0]; 726 727 /* handle the single source multiply case from the raid6 728 * recovery path 729 */ 730 if ((flags & DMA_PREP_PQ_DISABLE_P) && src_cnt == 1) { 731 dma_addr_t single_source[2]; 732 unsigned char single_source_coef[2]; 733 734 BUG_ON(flags & DMA_PREP_PQ_DISABLE_Q); 735 single_source[0] = src[0]; 736 single_source[1] = src[0]; 737 single_source_coef[0] = scf[0]; 738 single_source_coef[1] = 0; 739 740 return __ioat3_prep_pq_lock(chan, NULL, dst, single_source, 2, 741 single_source_coef, len, flags); 742 } else 743 return __ioat3_prep_pq_lock(chan, NULL, dst, src, src_cnt, scf, 744 len, flags); 745} 746 747struct dma_async_tx_descriptor * 748ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, 749 unsigned int src_cnt, const unsigned char *scf, size_t len, 750 enum sum_check_flags *pqres, unsigned long flags) 751{ 752 /* specify valid address for disabled result */ 753 if (flags & DMA_PREP_PQ_DISABLE_P) 754 pq[0] = pq[1]; 755 if (flags & DMA_PREP_PQ_DISABLE_Q) 756 pq[1] = pq[0]; 757 758 /* the cleanup routine only sets bits on validate failure, it 759 * does not clear bits on validate success... so clear it here 760 */ 761 *pqres = 0; 762 763 return __ioat3_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len, 764 flags); 765} 766 767static struct dma_async_tx_descriptor * 768ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src, 769 unsigned int src_cnt, size_t len, unsigned long flags) 770{ 771 unsigned char scf[src_cnt]; 772 dma_addr_t pq[2]; 773 774 memset(scf, 0, src_cnt); 775 pq[0] = dst; 776 flags |= DMA_PREP_PQ_DISABLE_Q; 777 pq[1] = dst; /* specify valid address for disabled result */ 778 779 return __ioat3_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len, 780 flags); 781} 782 783struct dma_async_tx_descriptor * 784ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src, 785 unsigned int src_cnt, size_t len, 786 enum sum_check_flags *result, unsigned long flags) 787{ 788 unsigned char scf[src_cnt]; 789 dma_addr_t pq[2]; 790 791 /* the cleanup routine only sets bits on validate failure, it 792 * does not clear bits on validate success... so clear it here 793 */ 794 *result = 0; 795 796 memset(scf, 0, src_cnt); 797 pq[0] = src[0]; 798 flags |= DMA_PREP_PQ_DISABLE_Q; 799 pq[1] = pq[0]; /* specify valid address for disabled result */ 800 801 return __ioat3_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1, scf, 802 len, flags); 803} 804 805static struct dma_async_tx_descriptor * 806ioat3_prep_interrupt_lock(struct dma_chan *c, unsigned long flags) 807{ 808 struct ioat2_dma_chan *ioat = to_ioat2_chan(c); 809 struct ioat_ring_ent *desc; 810 struct ioat_dma_descriptor *hw; 811 812 if (ioat2_check_space_lock(ioat, 1) == 0) 813 desc = ioat2_get_ring_ent(ioat, ioat->head); 814 else 815 return NULL; 816 817 hw = desc->hw; 818 hw->ctl = 0; 819 hw->ctl_f.null = 1; 820 hw->ctl_f.int_en = 1; 821 hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE); 822 hw->ctl_f.compl_write = 1; 823 hw->size = NULL_DESC_BUFFER_SIZE; 824 hw->src_addr = 0; 825 hw->dst_addr = 0; 826 827 desc->txd.flags = flags; 828 desc->len = 1; 829 830 dump_desc_dbg(ioat, desc); 831 832 /* we leave the channel locked to ensure in order submission */ 833 return &desc->txd; 834} 835 836static void __devinit ioat3_dma_test_callback(void *dma_async_param) 837{ 838 struct completion *cmp = dma_async_param; 839 840 complete(cmp); 841} 842 843#define IOAT_NUM_SRC_TEST 6 /* must be <= 8 */ 844static int __devinit ioat_xor_val_self_test(struct ioatdma_device *device) 845{ 846 int i, src_idx; 847 struct page *dest; 848 struct page *xor_srcs[IOAT_NUM_SRC_TEST]; 849 struct page *xor_val_srcs[IOAT_NUM_SRC_TEST + 1]; 850 dma_addr_t dma_srcs[IOAT_NUM_SRC_TEST + 1]; 851 dma_addr_t dma_addr, dest_dma; 852 struct dma_async_tx_descriptor *tx; 853 struct dma_chan *dma_chan; 854 dma_cookie_t cookie; 855 u8 cmp_byte = 0; 856 u32 cmp_word; 857 u32 xor_val_result; 858 int err = 0; 859 struct completion cmp; 860 unsigned long tmo; 861 struct device *dev = &device->pdev->dev; 862 struct dma_device *dma = &device->common; 863 864 dev_dbg(dev, "%s\n", __func__); 865 866 if (!dma_has_cap(DMA_XOR, dma->cap_mask)) 867 return 0; 868 869 for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) { 870 xor_srcs[src_idx] = alloc_page(GFP_KERNEL); 871 if (!xor_srcs[src_idx]) { 872 while (src_idx--) 873 __free_page(xor_srcs[src_idx]); 874 return -ENOMEM; 875 } 876 } 877 878 dest = alloc_page(GFP_KERNEL); 879 if (!dest) { 880 while (src_idx--) 881 __free_page(xor_srcs[src_idx]); 882 return -ENOMEM; 883 } 884 885 /* Fill in src buffers */ 886 for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) { 887 u8 *ptr = page_address(xor_srcs[src_idx]); 888 for (i = 0; i < PAGE_SIZE; i++) 889 ptr[i] = (1 << src_idx); 890 } 891 892 for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) 893 cmp_byte ^= (u8) (1 << src_idx); 894 895 cmp_word = (cmp_byte << 24) | (cmp_byte << 16) | 896 (cmp_byte << 8) | cmp_byte; 897 898 memset(page_address(dest), 0, PAGE_SIZE); 899 900 dma_chan = container_of(dma->channels.next, struct dma_chan, 901 device_node); 902 if (dma->device_alloc_chan_resources(dma_chan) < 1) { 903 err = -ENODEV; 904 goto out; 905 } 906 907 /* test xor */ 908 dest_dma = dma_map_page(dev, dest, 0, PAGE_SIZE, DMA_FROM_DEVICE); 909 for (i = 0; i < IOAT_NUM_SRC_TEST; i++) 910 dma_srcs[i] = dma_map_page(dev, xor_srcs[i], 0, PAGE_SIZE, 911 DMA_TO_DEVICE); 912 tx = dma->device_prep_dma_xor(dma_chan, dest_dma, dma_srcs, 913 IOAT_NUM_SRC_TEST, PAGE_SIZE, 914 DMA_PREP_INTERRUPT); 915 916 if (!tx) { 917 dev_err(dev, "Self-test xor prep failed\n"); 918 err = -ENODEV; 919 goto free_resources; 920 } 921 922 async_tx_ack(tx); 923 init_completion(&cmp); 924 tx->callback = ioat3_dma_test_callback; 925 tx->callback_param = &cmp; 926 cookie = tx->tx_submit(tx); 927 if (cookie < 0) { 928 dev_err(dev, "Self-test xor setup failed\n"); 929 err = -ENODEV; 930 goto free_resources; 931 } 932 dma->device_issue_pending(dma_chan); 933 934 tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); 935 936 if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) { 937 dev_err(dev, "Self-test xor timed out\n"); 938 err = -ENODEV; 939 goto free_resources; 940 } 941 942 dma_sync_single_for_cpu(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE); 943 for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) { 944 u32 *ptr = page_address(dest); 945 if (ptr[i] != cmp_word) { 946 dev_err(dev, "Self-test xor failed compare\n"); 947 err = -ENODEV; 948 goto free_resources; 949 } 950 } 951 dma_sync_single_for_device(dev, dest_dma, PAGE_SIZE, DMA_TO_DEVICE); 952 953 /* skip validate if the capability is not present */ 954 if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask)) 955 goto free_resources; 956 957 /* validate the sources with the destintation page */ 958 for (i = 0; i < IOAT_NUM_SRC_TEST; i++) 959 xor_val_srcs[i] = xor_srcs[i]; 960 xor_val_srcs[i] = dest; 961 962 xor_val_result = 1; 963 964 for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) 965 dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE, 966 DMA_TO_DEVICE); 967 tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs, 968 IOAT_NUM_SRC_TEST + 1, PAGE_SIZE, 969 &xor_val_result, DMA_PREP_INTERRUPT); 970 if (!tx) { 971 dev_err(dev, "Self-test zero prep failed\n"); 972 err = -ENODEV; 973 goto free_resources; 974 } 975 976 async_tx_ack(tx); 977 init_completion(&cmp); 978 tx->callback = ioat3_dma_test_callback; 979 tx->callback_param = &cmp; 980 cookie = tx->tx_submit(tx); 981 if (cookie < 0) { 982 dev_err(dev, "Self-test zero setup failed\n"); 983 err = -ENODEV; 984 goto free_resources; 985 } 986 dma->device_issue_pending(dma_chan); 987 988 tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); 989 990 if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) { 991 dev_err(dev, "Self-test validate timed out\n"); 992 err = -ENODEV; 993 goto free_resources; 994 } 995 996 if (xor_val_result != 0) { 997 dev_err(dev, "Self-test validate failed compare\n"); 998 err = -ENODEV; 999 goto free_resources; 1000 } 1001 1002 /* skip memset if the capability is not present */ 1003 if (!dma_has_cap(DMA_MEMSET, dma_chan->device->cap_mask)) 1004 goto free_resources; 1005 1006 /* test memset */ 1007 dma_addr = dma_map_page(dev, dest, 0, 1008 PAGE_SIZE, DMA_FROM_DEVICE); 1009 tx = dma->device_prep_dma_memset(dma_chan, dma_addr, 0, PAGE_SIZE, 1010 DMA_PREP_INTERRUPT); 1011 if (!tx) { 1012 dev_err(dev, "Self-test memset prep failed\n"); 1013 err = -ENODEV; 1014 goto free_resources; 1015 } 1016 1017 async_tx_ack(tx); 1018 init_completion(&cmp); 1019 tx->callback = ioat3_dma_test_callback; 1020 tx->callback_param = &cmp; 1021 cookie = tx->tx_submit(tx); 1022 if (cookie < 0) { 1023 dev_err(dev, "Self-test memset setup failed\n"); 1024 err = -ENODEV; 1025 goto free_resources; 1026 } 1027 dma->device_issue_pending(dma_chan); 1028 1029 tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); 1030 1031 if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) { 1032 dev_err(dev, "Self-test memset timed out\n"); 1033 err = -ENODEV; 1034 goto free_resources; 1035 } 1036 1037 for (i = 0; i < PAGE_SIZE/sizeof(u32); i++) { 1038 u32 *ptr = page_address(dest); 1039 if (ptr[i]) { 1040 dev_err(dev, "Self-test memset failed compare\n"); 1041 err = -ENODEV; 1042 goto free_resources; 1043 } 1044 } 1045 1046 /* test for non-zero parity sum */ 1047 xor_val_result = 0; 1048 for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) 1049 dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE, 1050 DMA_TO_DEVICE); 1051 tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs, 1052 IOAT_NUM_SRC_TEST + 1, PAGE_SIZE, 1053 &xor_val_result, DMA_PREP_INTERRUPT); 1054 if (!tx) { 1055 dev_err(dev, "Self-test 2nd zero prep failed\n"); 1056 err = -ENODEV; 1057 goto free_resources; 1058 } 1059 1060 async_tx_ack(tx); 1061 init_completion(&cmp); 1062 tx->callback = ioat3_dma_test_callback; 1063 tx->callback_param = &cmp; 1064 cookie = tx->tx_submit(tx); 1065 if (cookie < 0) { 1066 dev_err(dev, "Self-test 2nd zero setup failed\n"); 1067 err = -ENODEV; 1068 goto free_resources; 1069 } 1070 dma->device_issue_pending(dma_chan); 1071 1072 tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); 1073 1074 if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) { 1075 dev_err(dev, "Self-test 2nd validate timed out\n"); 1076 err = -ENODEV; 1077 goto free_resources; 1078 } 1079 1080 if (xor_val_result != SUM_CHECK_P_RESULT) { 1081 dev_err(dev, "Self-test validate failed compare\n"); 1082 err = -ENODEV; 1083 goto free_resources; 1084 } 1085 1086free_resources: 1087 dma->device_free_chan_resources(dma_chan); 1088out: 1089 src_idx = IOAT_NUM_SRC_TEST; 1090 while (src_idx--) 1091 __free_page(xor_srcs[src_idx]); 1092 __free_page(dest); 1093 return err; 1094} 1095 1096static int __devinit ioat3_dma_self_test(struct ioatdma_device *device) 1097{ 1098 int rc = ioat_dma_self_test(device); 1099 1100 if (rc) 1101 return rc; 1102 1103 rc = ioat_xor_val_self_test(device); 1104 if (rc) 1105 return rc; 1106 1107 return 0; 1108} 1109 1110static int ioat3_reset_hw(struct ioat_chan_common *chan) 1111{ 1112 /* throw away whatever the channel was doing and get it 1113 * initialized, with ioat3 specific workarounds 1114 */ 1115 struct ioatdma_device *device = chan->device; 1116 struct pci_dev *pdev = device->pdev; 1117 u32 chanerr; 1118 u16 dev_id; 1119 int err; 1120 1121 ioat2_quiesce(chan, msecs_to_jiffies(100)); 1122 1123 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); 1124 writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET); 1125 1126 /* -= IOAT ver.3 workarounds =- */ 1127 /* Write CHANERRMSK_INT with 3E07h to mask out the errors 1128 * that can cause stability issues for IOAT ver.3, and clear any 1129 * pending errors 1130 */ 1131 pci_write_config_dword(pdev, IOAT_PCI_CHANERRMASK_INT_OFFSET, 0x3e07); 1132 err = pci_read_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, &chanerr); 1133 if (err) { 1134 dev_err(&pdev->dev, "channel error register unreachable\n"); 1135 return err; 1136 } 1137 pci_write_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, chanerr); 1138 1139 pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id); 1140 if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) 1141 pci_write_config_dword(pdev, IOAT_PCI_DMAUNCERRSTS_OFFSET, 0x10); 1142 1143 return ioat2_reset_sync(chan, msecs_to_jiffies(200)); 1144} 1145 1146int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) 1147{ 1148 struct pci_dev *pdev = device->pdev; 1149 int dca_en = system_has_dca_enabled(pdev); 1150 struct dma_device *dma; 1151 struct dma_chan *c; 1152 struct ioat_chan_common *chan; 1153 bool is_raid_device = false; 1154 int err; 1155 u32 cap; 1156 1157 device->enumerate_channels = ioat2_enumerate_channels; 1158 device->reset_hw = ioat3_reset_hw; 1159 device->self_test = ioat3_dma_self_test; 1160 dma = &device->common; 1161 dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock; 1162 dma->device_issue_pending = ioat2_issue_pending; 1163 dma->device_alloc_chan_resources = ioat2_alloc_chan_resources; 1164 dma->device_free_chan_resources = ioat2_free_chan_resources; 1165 1166 dma_cap_set(DMA_INTERRUPT, dma->cap_mask); 1167 dma->device_prep_dma_interrupt = ioat3_prep_interrupt_lock; 1168 1169 cap = readl(device->reg_base + IOAT_DMA_CAP_OFFSET); 1170 1171 /* dca is incompatible with raid operations */ 1172 if (dca_en && (cap & (IOAT_CAP_XOR|IOAT_CAP_PQ))) 1173 cap &= ~(IOAT_CAP_XOR|IOAT_CAP_PQ); 1174 1175 if (cap & IOAT_CAP_XOR) { 1176 is_raid_device = true; 1177 dma->max_xor = 8; 1178 dma->xor_align = 6; 1179 1180 dma_cap_set(DMA_XOR, dma->cap_mask); 1181 dma->device_prep_dma_xor = ioat3_prep_xor; 1182 1183 dma_cap_set(DMA_XOR_VAL, dma->cap_mask); 1184 dma->device_prep_dma_xor_val = ioat3_prep_xor_val; 1185 } 1186 if (cap & IOAT_CAP_PQ) { 1187 is_raid_device = true; 1188 dma_set_maxpq(dma, 8, 0); 1189 dma->pq_align = 6; 1190 1191 dma_cap_set(DMA_PQ, dma->cap_mask); 1192 dma->device_prep_dma_pq = ioat3_prep_pq; 1193 1194 dma_cap_set(DMA_PQ_VAL, dma->cap_mask); 1195 dma->device_prep_dma_pq_val = ioat3_prep_pq_val; 1196 1197 if (!(cap & IOAT_CAP_XOR)) { 1198 dma->max_xor = 8; 1199 dma->xor_align = 6; 1200 1201 dma_cap_set(DMA_XOR, dma->cap_mask); 1202 dma->device_prep_dma_xor = ioat3_prep_pqxor; 1203 1204 dma_cap_set(DMA_XOR_VAL, dma->cap_mask); 1205 dma->device_prep_dma_xor_val = ioat3_prep_pqxor_val; 1206 } 1207 } 1208 if (is_raid_device && (cap & IOAT_CAP_FILL_BLOCK)) { 1209 dma_cap_set(DMA_MEMSET, dma->cap_mask); 1210 dma->device_prep_dma_memset = ioat3_prep_memset_lock; 1211 } 1212 1213 1214 if (is_raid_device) { 1215 dma->device_tx_status = ioat3_tx_status; 1216 device->cleanup_fn = ioat3_cleanup_event; 1217 device->timer_fn = ioat3_timer_event; 1218 } else { 1219 dma->device_tx_status = ioat_dma_tx_status; 1220 device->cleanup_fn = ioat2_cleanup_event; 1221 device->timer_fn = ioat2_timer_event; 1222 } 1223 1224 #ifdef CONFIG_ASYNC_TX_DISABLE_PQ_VAL_DMA 1225 dma_cap_clear(DMA_PQ_VAL, dma->cap_mask); 1226 dma->device_prep_dma_pq_val = NULL; 1227 #endif 1228 1229 #ifdef CONFIG_ASYNC_TX_DISABLE_XOR_VAL_DMA 1230 dma_cap_clear(DMA_XOR_VAL, dma->cap_mask); 1231 dma->device_prep_dma_xor_val = NULL; 1232 #endif 1233 1234 err = ioat_probe(device); 1235 if (err) 1236 return err; 1237 ioat_set_tcp_copy_break(262144); 1238 1239 list_for_each_entry(c, &dma->channels, device_node) { 1240 chan = to_chan_common(c); 1241 writel(IOAT_DMA_DCA_ANY_CPU, 1242 chan->reg_base + IOAT_DCACTRL_OFFSET); 1243 } 1244 1245 err = ioat_register(device); 1246 if (err) 1247 return err; 1248 1249 ioat_kobject_add(device, &ioat2_ktype); 1250 1251 if (dca) 1252 device->dca = ioat3_dca_init(pdev, device->reg_base); 1253 1254 return 0; 1255} 1256