1/***********************license start*************** 2 * Copyright (c) 2003-2010 Cavium Networks (support@cavium.com). All rights 3 * reserved. 4 * 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions are 8 * met: 9 * 10 * * Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 13 * * Redistributions in binary form must reproduce the above 14 * copyright notice, this list of conditions and the following 15 * disclaimer in the documentation and/or other materials provided 16 * with the distribution. 17 18 * * Neither the name of Cavium Networks nor the names of 19 * its contributors may be used to endorse or promote products 20 * derived from this software without specific prior written 21 * permission. 22 23 * This Software, including technical data, may be subject to U.S. export control 24 * laws, including the U.S. Export Administration Act and its associated 25 * regulations, and may be subject to export or import regulations in other 26 * countries. 27 28 * TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS" 29 * AND WITH ALL FAULTS AND CAVIUM NETWORKS MAKES NO PROMISES, REPRESENTATIONS OR 30 * WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT TO 31 * THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY REPRESENTATION OR 32 * DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT DEFECTS, AND CAVIUM 33 * SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES OF TITLE, 34 * MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR PURPOSE, LACK OF 35 * VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, QUIET POSSESSION OR 36 * CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK ARISING OUT OF USE OR 37 * PERFORMANCE OF THE SOFTWARE LIES WITH YOU. 38 ***********************license end**************************************/ 39 40 41 42 43 44 45 46/** 47 * @file 48 * 49 * Interface to the PCI / PCIe DMA engines. These are only avialable 50 * on chips with PCI / PCIe. 51 * 52 * <hr>$Revision: 50126 $<hr> 53 */ 54#include "executive-config.h" 55#include "cvmx-config.h" 56#include "cvmx.h" 57#include "cvmx-cmd-queue.h" 58#include "cvmx-dma-engine.h" 59 60#ifdef CVMX_ENABLE_PKO_FUNCTIONS 61 62/** 63 * Return the number of DMA engimes supported by this chip 64 * 65 * @return Number of DMA engines 66 */ 67int cvmx_dma_engine_get_num(void) 68{ 69 if (octeon_has_feature(OCTEON_FEATURE_NPEI)) 70 { 71 if (OCTEON_IS_MODEL(OCTEON_CN52XX_PASS1_X)) 72 return 4; 73 else 74 return 5; 75 } 76 else if (octeon_has_feature(OCTEON_FEATURE_PCIE)) 77 return 8; 78 else 79 return 2; 80} 81 82/** 83 * Initialize the DMA engines for use 84 * 85 * @return Zero on success, negative on failure 86 */ 87int cvmx_dma_engine_initialize(void) 88{ 89 int engine; 90 91 for (engine=0; engine < cvmx_dma_engine_get_num(); engine++) 92 { 93 cvmx_cmd_queue_result_t result; 94 result = cvmx_cmd_queue_initialize(CVMX_CMD_QUEUE_DMA(engine), 95 0, CVMX_FPA_OUTPUT_BUFFER_POOL, 96 CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE); 97 if (result != CVMX_CMD_QUEUE_SUCCESS) 98 return -1; 99 if (octeon_has_feature(OCTEON_FEATURE_NPEI)) 100 { 101 cvmx_npei_dmax_ibuff_saddr_t dmax_ibuff_saddr; 102 dmax_ibuff_saddr.u64 = 0; 103 dmax_ibuff_saddr.s.saddr = cvmx_ptr_to_phys(cvmx_cmd_queue_buffer(CVMX_CMD_QUEUE_DMA(engine))) >> 7; 104 cvmx_write_csr(CVMX_PEXP_NPEI_DMAX_IBUFF_SADDR(engine), dmax_ibuff_saddr.u64); 105 } 106 else if (octeon_has_feature(OCTEON_FEATURE_PCIE)) 107 { 108 cvmx_dpi_dmax_ibuff_saddr_t dpi_dmax_ibuff_saddr; 109 dpi_dmax_ibuff_saddr.u64 = 0; 110 dpi_dmax_ibuff_saddr.s.csize = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/8; 111 dpi_dmax_ibuff_saddr.s.saddr = cvmx_ptr_to_phys(cvmx_cmd_queue_buffer(CVMX_CMD_QUEUE_DMA(engine))) >> 7; 112 cvmx_write_csr(CVMX_DPI_DMAX_IBUFF_SADDR(engine), dpi_dmax_ibuff_saddr.u64); 113 } 114 else 115 { 116 uint64_t address = cvmx_ptr_to_phys(cvmx_cmd_queue_buffer(CVMX_CMD_QUEUE_DMA(engine))); 117 if (engine) 118 cvmx_write_csr(CVMX_NPI_HIGHP_IBUFF_SADDR, address); 119 else 120 cvmx_write_csr(CVMX_NPI_LOWP_IBUFF_SADDR, address); 121 } 122 } 123 124 if (octeon_has_feature(OCTEON_FEATURE_NPEI)) 125 { 126 cvmx_npei_dma_control_t dma_control; 127 dma_control.u64 = 0; 128 if (cvmx_dma_engine_get_num() >= 5) 129 dma_control.s.dma4_enb = 1; 130 dma_control.s.dma3_enb = 1; 131 dma_control.s.dma2_enb = 1; 132 dma_control.s.dma1_enb = 1; 133 dma_control.s.dma0_enb = 1; 134 dma_control.s.o_mode = 1; /* Pull NS and RO from this register, not the pointers */ 135 //dma_control.s.dwb_denb = 1; 136 //dma_control.s.dwb_ichk = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/128; 137 dma_control.s.fpa_que = CVMX_FPA_OUTPUT_BUFFER_POOL; 138 dma_control.s.csize = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/8; 139 cvmx_write_csr(CVMX_PEXP_NPEI_DMA_CONTROL, dma_control.u64); 140 /* As a workaround for errata PCIE-811 we only allow a single 141 outstanding DMA read over PCIe at a time. This limits performance, 142 but works in all cases. If you need higher performance, remove 143 this code and implement the more complicated workaround documented 144 in the errata. This only affects CN56XX pass 2.0 chips */ 145 if (OCTEON_IS_MODEL(OCTEON_CN56XX_PASS2_0)) 146 { 147 cvmx_npei_dma_pcie_req_num_t pcie_req_num; 148 pcie_req_num.u64 = cvmx_read_csr(CVMX_PEXP_NPEI_DMA_PCIE_REQ_NUM); 149 pcie_req_num.s.dma_cnt = 1; 150 cvmx_write_csr(CVMX_PEXP_NPEI_DMA_PCIE_REQ_NUM, pcie_req_num.u64); 151 } 152 } 153 else if (octeon_has_feature(OCTEON_FEATURE_PCIE)) 154 { 155 cvmx_dpi_engx_buf_t dpi_engx_buf; 156 cvmx_dpi_dma_control_t dma_control; 157 cvmx_dpi_ctl_t dpi_ctl; 158 159 /* Give engine 0-4 1KB, and 5 3KB. This gives the packet engines better 160 performance. Total must not exceed 8KB */ 161 dpi_engx_buf.u64 = 0; 162 dpi_engx_buf.s.blks = 2; 163 cvmx_write_csr(CVMX_DPI_ENGX_BUF(0), dpi_engx_buf.u64); 164 cvmx_write_csr(CVMX_DPI_ENGX_BUF(1), dpi_engx_buf.u64); 165 cvmx_write_csr(CVMX_DPI_ENGX_BUF(2), dpi_engx_buf.u64); 166 cvmx_write_csr(CVMX_DPI_ENGX_BUF(3), dpi_engx_buf.u64); 167 cvmx_write_csr(CVMX_DPI_ENGX_BUF(4), dpi_engx_buf.u64); 168 dpi_engx_buf.s.blks = 6; 169 cvmx_write_csr(CVMX_DPI_ENGX_BUF(5), dpi_engx_buf.u64); 170 171 dma_control.u64 = cvmx_read_csr(CVMX_DPI_DMA_CONTROL); 172 dma_control.s.pkt_hp = 1; 173 dma_control.s.pkt_en = 1; 174 dma_control.s.dma_enb = 0x1f; 175 dma_control.s.dwb_denb = 1; 176 dma_control.s.dwb_ichk = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/128; 177 dma_control.s.fpa_que = CVMX_FPA_OUTPUT_BUFFER_POOL; 178 dma_control.s.o_mode = 1; 179 cvmx_write_csr(CVMX_DPI_DMA_CONTROL, dma_control.u64); 180 dpi_ctl.u64 = cvmx_read_csr(CVMX_DPI_CTL); 181 dpi_ctl.s.en = 1; 182 cvmx_write_csr(CVMX_DPI_CTL, dpi_ctl.u64); 183 } 184 else 185 { 186 cvmx_npi_dma_control_t dma_control; 187 dma_control.u64 = 0; 188 //dma_control.s.dwb_denb = 1; 189 //dma_control.s.dwb_ichk = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/128; 190 dma_control.s.o_add1 = 1; 191 dma_control.s.fpa_que = CVMX_FPA_OUTPUT_BUFFER_POOL; 192 dma_control.s.hp_enb = 1; 193 dma_control.s.lp_enb = 1; 194 dma_control.s.csize = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/8; 195 cvmx_write_csr(CVMX_NPI_DMA_CONTROL, dma_control.u64); 196 } 197 198 return 0; 199} 200 201 202/** 203 * Shutdown all DMA engines. The engeines must be idle when this 204 * function is called. 205 * 206 * @return Zero on success, negative on failure 207 */ 208int cvmx_dma_engine_shutdown(void) 209{ 210 int engine; 211 212 for (engine=0; engine < cvmx_dma_engine_get_num(); engine++) 213 { 214 if (cvmx_cmd_queue_length(CVMX_CMD_QUEUE_DMA(engine))) 215 { 216 cvmx_dprintf("ERROR: cvmx_dma_engine_shutdown: Engine not idle.\n"); 217 return -1; 218 } 219 } 220 221 if (octeon_has_feature(OCTEON_FEATURE_NPEI)) 222 { 223 cvmx_npei_dma_control_t dma_control; 224 dma_control.u64 = cvmx_read_csr(CVMX_PEXP_NPEI_DMA_CONTROL); 225 if (cvmx_dma_engine_get_num() >= 5) 226 dma_control.s.dma4_enb = 0; 227 dma_control.s.dma3_enb = 0; 228 dma_control.s.dma2_enb = 0; 229 dma_control.s.dma1_enb = 0; 230 dma_control.s.dma0_enb = 0; 231 cvmx_write_csr(CVMX_PEXP_NPEI_DMA_CONTROL, dma_control.u64); 232 /* Make sure the disable completes */ 233 cvmx_read_csr(CVMX_PEXP_NPEI_DMA_CONTROL); 234 } 235 else if (octeon_has_feature(OCTEON_FEATURE_PCIE)) 236 { 237 cvmx_dpi_dma_control_t dma_control; 238 dma_control.u64 = cvmx_read_csr(CVMX_DPI_DMA_CONTROL); 239 dma_control.s.dma_enb = 0; 240 cvmx_write_csr(CVMX_DPI_DMA_CONTROL, dma_control.u64); 241 /* Make sure the disable completes */ 242 cvmx_read_csr(CVMX_DPI_DMA_CONTROL); 243 } 244 else 245 { 246 cvmx_npi_dma_control_t dma_control; 247 dma_control.u64 = cvmx_read_csr(CVMX_NPI_DMA_CONTROL); 248 dma_control.s.hp_enb = 0; 249 dma_control.s.lp_enb = 0; 250 cvmx_write_csr(CVMX_NPI_DMA_CONTROL, dma_control.u64); 251 /* Make sure the disable completes */ 252 cvmx_read_csr(CVMX_NPI_DMA_CONTROL); 253 } 254 255 for (engine=0; engine < cvmx_dma_engine_get_num(); engine++) 256 { 257 cvmx_cmd_queue_shutdown(CVMX_CMD_QUEUE_DMA(engine)); 258 if (octeon_has_feature(OCTEON_FEATURE_NPEI)) 259 cvmx_write_csr(CVMX_PEXP_NPEI_DMAX_IBUFF_SADDR(engine), 0); 260 else if (octeon_has_feature(OCTEON_FEATURE_PCIE)) 261 cvmx_write_csr(CVMX_DPI_DMAX_IBUFF_SADDR(engine), 0); 262 else 263 { 264 if (engine) 265 cvmx_write_csr(CVMX_NPI_HIGHP_IBUFF_SADDR, 0); 266 else 267 cvmx_write_csr(CVMX_NPI_LOWP_IBUFF_SADDR, 0); 268 } 269 } 270 271 return 0; 272} 273 274 275/** 276 * Submit a series of DMA comamnd to the DMA engines. 277 * 278 * @param engine Engine to submit to (0 to cvmx_dma_engine_get_num()-1) 279 * @param header Command header 280 * @param num_buffers 281 * The number of data pointers 282 * @param buffers Comamnd data pointers 283 * 284 * @return Zero on success, negative on failure 285 */ 286int cvmx_dma_engine_submit(int engine, cvmx_dma_engine_header_t header, int num_buffers, cvmx_dma_engine_buffer_t buffers[]) 287{ 288 cvmx_cmd_queue_result_t result; 289 int cmd_count = 1; 290 uint64_t cmds[num_buffers + 1]; 291 292 if (OCTEON_IS_MODEL(OCTEON_CN56XX_PASS1_X)) 293 { 294 /* Check for Errata PCIe-604 */ 295 if ((header.s.nfst > 11) || (header.s.nlst > 11) || (header.s.nfst + header.s.nlst > 15)) 296 { 297 cvmx_dprintf("DMA engine submit too large\n"); 298 return -1; 299 } 300 } 301 302 cmds[0] = header.u64; 303 while (num_buffers--) 304 { 305 cmds[cmd_count++] = buffers->u64; 306 buffers++; 307 } 308 309 /* Due to errata PCIE-13315, it is necessary to have the queue lock while we 310 ring the doorbell for the DMA engines. This prevents doorbells from 311 possibly arriving out of order with respect to the command queue 312 entries */ 313 __cvmx_cmd_queue_lock(CVMX_CMD_QUEUE_DMA(engine), __cvmx_cmd_queue_get_state(CVMX_CMD_QUEUE_DMA(engine))); 314 result = cvmx_cmd_queue_write(CVMX_CMD_QUEUE_DMA(engine), 0, cmd_count, cmds); 315 /* This SYNCWS is needed since the command queue didn't do locking, which 316 normally implies the SYNCWS. This one makes sure the command queue 317 updates make it to L2 before we ring the doorbell */ 318 CVMX_SYNCWS; 319 /* A syncw isn't needed here since the command queue did one as part of the queue unlock */ 320 if (cvmx_likely(result == CVMX_CMD_QUEUE_SUCCESS)) 321 { 322 if (octeon_has_feature(OCTEON_FEATURE_NPEI)) 323 { 324 /* DMA doorbells are 32bit writes in little endian space. This means we need to xor the address with 4 */ 325 cvmx_write64_uint32(CVMX_PEXP_NPEI_DMAX_DBELL(engine)^4, cmd_count); 326 } 327 else if (octeon_has_feature(OCTEON_FEATURE_PCIE)) 328 cvmx_write_csr(CVMX_DPI_DMAX_DBELL(engine), cmd_count); 329 else 330 { 331 if (engine) 332 cvmx_write_csr(CVMX_NPI_HIGHP_DBELL, cmd_count); 333 else 334 cvmx_write_csr(CVMX_NPI_LOWP_DBELL, cmd_count); 335 } 336 } 337 /* Here is the unlock for the above errata workaround */ 338 __cvmx_cmd_queue_unlock(__cvmx_cmd_queue_get_state(CVMX_CMD_QUEUE_DMA(engine))); 339 return result; 340} 341 342 343/** 344 * @INTERNAL 345 * Function used by cvmx_dma_engine_transfer() to build the 346 * internal address list. 347 * 348 * @param buffers Location to store the list 349 * @param address Address to build list for 350 * @param size Length of the memory pointed to by address 351 * 352 * @return Number of internal pointer chunks created 353 */ 354static inline int __cvmx_dma_engine_build_internal_pointers(cvmx_dma_engine_buffer_t *buffers, uint64_t address, int size) 355{ 356 int segments = 0; 357 while (size) 358 { 359 /* Each internal chunk can contain a maximum of 8191 bytes */ 360 int chunk = size; 361 if (chunk > 8191) 362 chunk = 8191; 363 buffers[segments].u64 = 0; 364 buffers[segments].internal.size = chunk; 365 buffers[segments].internal.addr = address; 366 address += chunk; 367 size -= chunk; 368 segments++; 369 } 370 return segments; 371} 372 373 374/** 375 * @INTERNAL 376 * Function used by cvmx_dma_engine_transfer() to build the PCI / PCIe address 377 * list. 378 * @param buffers Location to store the list 379 * @param address Address to build list for 380 * @param size Length of the memory pointed to by address 381 * 382 * @return Number of PCI / PCIe address chunks created. The number of words used 383 * will be segments + (segments-1)/4 + 1. 384 */ 385static inline int __cvmx_dma_engine_build_external_pointers(cvmx_dma_engine_buffer_t *buffers, uint64_t address, int size) 386{ 387 const int MAX_SIZE = 65535; 388 int segments = 0; 389 while (size) 390 { 391 /* Each block of 4 PCI / PCIe pointers uses one dword for lengths followed by 392 up to 4 addresses. This then repeats if more data is needed */ 393 buffers[0].u64 = 0; 394 if (size <= MAX_SIZE) 395 { 396 /* Only one more segment needed */ 397 buffers[0].pcie_length.len0 = size; 398 buffers[1].u64 = address; 399 segments++; 400 break; 401 } 402 else if (size <= MAX_SIZE * 2) 403 { 404 /* Two more segments needed */ 405 buffers[0].pcie_length.len0 = MAX_SIZE; 406 buffers[0].pcie_length.len1 = size - MAX_SIZE; 407 buffers[1].u64 = address; 408 address += MAX_SIZE; 409 buffers[2].u64 = address; 410 segments+=2; 411 break; 412 } 413 else if (size <= MAX_SIZE * 3) 414 { 415 /* Three more segments needed */ 416 buffers[0].pcie_length.len0 = MAX_SIZE; 417 buffers[0].pcie_length.len1 = MAX_SIZE; 418 buffers[0].pcie_length.len2 = size - MAX_SIZE * 2; 419 buffers[1].u64 = address; 420 address += MAX_SIZE; 421 buffers[2].u64 = address; 422 address += MAX_SIZE; 423 buffers[3].u64 = address; 424 segments+=3; 425 break; 426 } 427 else if (size <= MAX_SIZE * 4) 428 { 429 /* Four more segments needed */ 430 buffers[0].pcie_length.len0 = MAX_SIZE; 431 buffers[0].pcie_length.len1 = MAX_SIZE; 432 buffers[0].pcie_length.len2 = MAX_SIZE; 433 buffers[0].pcie_length.len3 = size - MAX_SIZE * 3; 434 buffers[1].u64 = address; 435 address += MAX_SIZE; 436 buffers[2].u64 = address; 437 address += MAX_SIZE; 438 buffers[3].u64 = address; 439 address += MAX_SIZE; 440 buffers[4].u64 = address; 441 segments+=4; 442 break; 443 } 444 else 445 { 446 /* Five or more segments are needed */ 447 buffers[0].pcie_length.len0 = MAX_SIZE; 448 buffers[0].pcie_length.len1 = MAX_SIZE; 449 buffers[0].pcie_length.len2 = MAX_SIZE; 450 buffers[0].pcie_length.len3 = MAX_SIZE; 451 buffers[1].u64 = address; 452 address += MAX_SIZE; 453 buffers[2].u64 = address; 454 address += MAX_SIZE; 455 buffers[3].u64 = address; 456 address += MAX_SIZE; 457 buffers[4].u64 = address; 458 address += MAX_SIZE; 459 size -= MAX_SIZE*4; 460 buffers += 5; 461 segments+=4; 462 } 463 } 464 return segments; 465} 466 467 468/** 469 * Build the first and last pointers based on a DMA engine header 470 * and submit them to the engine. The purpose of this function is 471 * to simplify the building of DMA engine commands by automatically 472 * converting a simple address and size into the apropriate internal 473 * or PCI / PCIe address list. This function does not support gather lists, 474 * so you will need to build your own lists in that case. 475 * 476 * @param engine Engine to submit to (0 to cvmx_dma_engine_get_num()-1) 477 * @param header DMA Command header. Note that the nfst and nlst fields do not 478 * need to be filled in. All other fields must be set properly. 479 * @param first_address 480 * Address to use for the first pointers. In the case of INTERNAL, 481 * INBOUND, and OUTBOUND this is an Octeon memory address. In the 482 * case of EXTERNAL, this is the source PCI / PCIe address. 483 * @param last_address 484 * Address to use for the last pointers. In the case of EXTERNAL, 485 * INBOUND, and OUTBOUND this is a PCI / PCIe address. In the 486 * case of INTERNAL, this is the Octeon memory destination address. 487 * @param size Size of the transfer to perform. 488 * 489 * @return Zero on success, negative on failure 490 */ 491int cvmx_dma_engine_transfer(int engine, cvmx_dma_engine_header_t header, 492 uint64_t first_address, uint64_t last_address, 493 int size) 494{ 495 cvmx_dma_engine_buffer_t buffers[32]; 496 int words = 0; 497 498 switch (header.s.type) 499 { 500 case CVMX_DMA_ENGINE_TRANSFER_INTERNAL: 501 header.s.nfst = __cvmx_dma_engine_build_internal_pointers(buffers, first_address, size); 502 words += header.s.nfst; 503 header.s.nlst = __cvmx_dma_engine_build_internal_pointers(buffers + words, last_address, size); 504 words += header.s.nlst; 505 break; 506 case CVMX_DMA_ENGINE_TRANSFER_INBOUND: 507 case CVMX_DMA_ENGINE_TRANSFER_OUTBOUND: 508 header.s.nfst = __cvmx_dma_engine_build_internal_pointers(buffers, first_address, size); 509 words += header.s.nfst; 510 header.s.nlst = __cvmx_dma_engine_build_external_pointers(buffers + words, last_address, size); 511 words += header.s.nlst + ((header.s.nlst-1) >> 2) + 1; 512 break; 513 case CVMX_DMA_ENGINE_TRANSFER_EXTERNAL: 514 header.s.nfst = __cvmx_dma_engine_build_external_pointers(buffers, first_address, size); 515 words += header.s.nfst + ((header.s.nfst-1) >> 2) + 1; 516 header.s.nlst = __cvmx_dma_engine_build_external_pointers(buffers + words, last_address, size); 517 words += header.s.nlst + ((header.s.nlst-1) >> 2) + 1; 518 break; 519 } 520 return cvmx_dma_engine_submit(engine, header, words, buffers); 521} 522 523#endif 524