cvmx-dma-engine.c revision 210284
1168404Spjd/***********************license start*************** 2168404Spjd * Copyright (c) 2003-2008 Cavium Networks (support@cavium.com). All rights 3168404Spjd * reserved. 4168404Spjd * 5168404Spjd * 6168404Spjd * Redistribution and use in source and binary forms, with or without 7168404Spjd * modification, are permitted provided that the following conditions are 8168404Spjd * met: 9168404Spjd * 10168404Spjd * * Redistributions of source code must retain the above copyright 11168404Spjd * notice, this list of conditions and the following disclaimer. 12168404Spjd * 13168404Spjd * * Redistributions in binary form must reproduce the above 14168404Spjd * copyright notice, this list of conditions and the following 15168404Spjd * disclaimer in the documentation and/or other materials provided 16168404Spjd * with the distribution. 17168404Spjd * 18168404Spjd * * Neither the name of Cavium Networks nor the names of 19168404Spjd * its contributors may be used to endorse or promote products 20168404Spjd * derived from this software without specific prior written 21168404Spjd * permission. 22168404Spjd * 23185029Spjd * TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS" 24168404Spjd * AND WITH ALL FAULTS AND CAVIUM NETWORKS MAKES NO PROMISES, REPRESENTATIONS 25168404Spjd * OR WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH 26168404Spjd * RESPECT TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY 27168404Spjd * REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT 28168404Spjd * DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES 29168404Spjd * OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR 30168404Spjd * PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, QUIET 31185029Spjd * POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK ARISING OUT 32168404Spjd * OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU. 33168404Spjd * 34168404Spjd * 35168404Spjd * For any questions regarding licensing please contact marketing@caviumnetworks.com 36168404Spjd * 37168404Spjd ***********************license end**************************************/ 38168404Spjd 39168404Spjd 40168404Spjd 41168404Spjd 42168404Spjd 43168404Spjd 44168404Spjd/** 45168404Spjd * @file 46168404Spjd * 47168404Spjd * Interface to the PCI / PCIe DMA engines. These are only avialable 48168404Spjd * on chips with PCI / PCIe. 49168404Spjd * 50168404Spjd * <hr>$Revision: 41586 $<hr> 51185029Spjd */ 52185029Spjd#include "executive-config.h" 53185029Spjd#include "cvmx-config.h" 54185029Spjd#include "cvmx.h" 55185029Spjd#include "cvmx-cmd-queue.h" 56185029Spjd#include "cvmx-dma-engine.h" 57168404Spjd 58168404Spjd#ifdef CVMX_ENABLE_PKO_FUNCTIONS 59168404Spjd 60168404Spjd/** 61168404Spjd * Return the number of DMA engimes supported by this chip 62168404Spjd * 63168404Spjd * @return Number of DMA engines 64168404Spjd */ 65168404Spjdint cvmx_dma_engine_get_num(void) 66168404Spjd{ 67168404Spjd if (octeon_has_feature(OCTEON_FEATURE_PCIE)) 68168404Spjd { 69168404Spjd if (OCTEON_IS_MODEL(OCTEON_CN52XX_PASS1_X)) 70168404Spjd return 4; 71168404Spjd else 72168404Spjd return 5; 73168404Spjd } 74168404Spjd else 75168404Spjd return 2; 76168404Spjd} 77168404Spjd 78168404Spjd/** 79168404Spjd * Initialize the DMA engines for use 80168404Spjd * 81168404Spjd * @return Zero on success, negative on failure 82168404Spjd */ 83168404Spjdint cvmx_dma_engine_initialize(void) 84168404Spjd{ 85168404Spjd cvmx_npei_dmax_ibuff_saddr_t dmax_ibuff_saddr; 86168404Spjd int engine; 87168404Spjd 88168404Spjd for (engine=0; engine < cvmx_dma_engine_get_num(); engine++) 89168404Spjd { 90168404Spjd cvmx_cmd_queue_result_t result; 91168404Spjd result = cvmx_cmd_queue_initialize(CVMX_CMD_QUEUE_DMA(engine), 92168404Spjd 0, CVMX_FPA_OUTPUT_BUFFER_POOL, 93168404Spjd CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE); 94168404Spjd if (result != CVMX_CMD_QUEUE_SUCCESS) 95168404Spjd return -1; 96168404Spjd dmax_ibuff_saddr.u64 = 0; 97168404Spjd dmax_ibuff_saddr.s.saddr = cvmx_ptr_to_phys(cvmx_cmd_queue_buffer(CVMX_CMD_QUEUE_DMA(engine))) >> 7; 98168404Spjd if (octeon_has_feature(OCTEON_FEATURE_PCIE)) 99168404Spjd cvmx_write_csr(CVMX_PEXP_NPEI_DMAX_IBUFF_SADDR(engine), dmax_ibuff_saddr.u64); 100168404Spjd else 101168404Spjd { 102168404Spjd if (engine) 103168404Spjd cvmx_write_csr(CVMX_NPI_HIGHP_IBUFF_SADDR, dmax_ibuff_saddr.u64); 104168404Spjd else 105168404Spjd cvmx_write_csr(CVMX_NPI_LOWP_IBUFF_SADDR, dmax_ibuff_saddr.u64); 106168404Spjd } 107168404Spjd } 108168404Spjd 109168404Spjd if (octeon_has_feature(OCTEON_FEATURE_PCIE)) 110168404Spjd { 111168404Spjd cvmx_npei_dma_control_t dma_control; 112168404Spjd dma_control.u64 = 0; 113168404Spjd if (cvmx_dma_engine_get_num() >= 5) 114 dma_control.s.dma4_enb = 1; 115 dma_control.s.dma3_enb = 1; 116 dma_control.s.dma2_enb = 1; 117 dma_control.s.dma1_enb = 1; 118 dma_control.s.dma0_enb = 1; 119 dma_control.s.o_mode = 1; /* Pull NS and RO from this register, not the pointers */ 120 //dma_control.s.dwb_denb = 1; 121 //dma_control.s.dwb_ichk = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/128; 122 dma_control.s.fpa_que = CVMX_FPA_OUTPUT_BUFFER_POOL; 123 dma_control.s.csize = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/8; 124 cvmx_write_csr(CVMX_PEXP_NPEI_DMA_CONTROL, dma_control.u64); 125 /* As a workaround for errata PCIE-811 we only allow a single 126 outstanding DMA read over PCIe at a time. This limits performance, 127 but works in all cases. If you need higher performance, remove 128 this code and implement the more complicated workaround documented 129 in the errata. This only affects CN56XX pass 2.0 chips */ 130 if (OCTEON_IS_MODEL(OCTEON_CN56XX_PASS2_0)) 131 { 132 cvmx_npei_dma_pcie_req_num_t pcie_req_num; 133 pcie_req_num.u64 = cvmx_read_csr(CVMX_PEXP_NPEI_DMA_PCIE_REQ_NUM); 134 pcie_req_num.s.dma_cnt = 1; 135 cvmx_write_csr(CVMX_PEXP_NPEI_DMA_PCIE_REQ_NUM, pcie_req_num.u64); 136 } 137 } 138 else 139 { 140 cvmx_npi_dma_control_t dma_control; 141 dma_control.u64 = 0; 142 //dma_control.s.dwb_denb = 1; 143 //dma_control.s.dwb_ichk = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/128; 144 dma_control.s.o_add1 = 1; 145 dma_control.s.fpa_que = CVMX_FPA_OUTPUT_BUFFER_POOL; 146 dma_control.s.hp_enb = 1; 147 dma_control.s.lp_enb = 1; 148 dma_control.s.csize = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/8; 149 cvmx_write_csr(CVMX_NPI_DMA_CONTROL, dma_control.u64); 150 } 151 152 return 0; 153} 154 155 156/** 157 * Shutdown all DMA engines. The engeines must be idle when this 158 * function is called. 159 * 160 * @return Zero on success, negative on failure 161 */ 162int cvmx_dma_engine_shutdown(void) 163{ 164 int engine; 165 166 for (engine=0; engine < cvmx_dma_engine_get_num(); engine++) 167 { 168 if (cvmx_cmd_queue_length(CVMX_CMD_QUEUE_DMA(engine))) 169 { 170 cvmx_dprintf("ERROR: cvmx_dma_engine_shutdown: Engine not idle.\n"); 171 return -1; 172 } 173 } 174 175 if (octeon_has_feature(OCTEON_FEATURE_PCIE)) 176 { 177 cvmx_npei_dma_control_t dma_control; 178 dma_control.u64 = cvmx_read_csr(CVMX_PEXP_NPEI_DMA_CONTROL); 179 if (cvmx_dma_engine_get_num() >= 5) 180 dma_control.s.dma4_enb = 0; 181 dma_control.s.dma3_enb = 0; 182 dma_control.s.dma2_enb = 0; 183 dma_control.s.dma1_enb = 0; 184 dma_control.s.dma0_enb = 0; 185 cvmx_write_csr(CVMX_PEXP_NPEI_DMA_CONTROL, dma_control.u64); 186 /* Make sure the disable completes */ 187 cvmx_read_csr(CVMX_PEXP_NPEI_DMA_CONTROL); 188 } 189 else 190 { 191 cvmx_npi_dma_control_t dma_control; 192 dma_control.u64 = cvmx_read_csr(CVMX_NPI_DMA_CONTROL); 193 dma_control.s.hp_enb = 0; 194 dma_control.s.lp_enb = 0; 195 cvmx_write_csr(CVMX_NPI_DMA_CONTROL, dma_control.u64); 196 /* Make sure the disable completes */ 197 cvmx_read_csr(CVMX_NPI_DMA_CONTROL); 198 } 199 200 for (engine=0; engine < cvmx_dma_engine_get_num(); engine++) 201 { 202 cvmx_cmd_queue_shutdown(CVMX_CMD_QUEUE_DMA(engine)); 203 if (octeon_has_feature(OCTEON_FEATURE_PCIE)) 204 cvmx_write_csr(CVMX_PEXP_NPEI_DMAX_IBUFF_SADDR(engine), 0); 205 else 206 { 207 if (engine) 208 cvmx_write_csr(CVMX_NPI_HIGHP_IBUFF_SADDR, 0); 209 else 210 cvmx_write_csr(CVMX_NPI_LOWP_IBUFF_SADDR, 0); 211 } 212 } 213 214 return 0; 215} 216 217 218/** 219 * Submit a series of DMA comamnd to the DMA engines. 220 * 221 * @param engine Engine to submit to (0-4) 222 * @param header Command header 223 * @param num_buffers 224 * The number of data pointers 225 * @param buffers Comamnd data pointers 226 * 227 * @return Zero on success, negative on failure 228 */ 229int cvmx_dma_engine_submit(int engine, cvmx_dma_engine_header_t header, int num_buffers, cvmx_dma_engine_buffer_t buffers[]) 230{ 231 cvmx_cmd_queue_result_t result; 232 int cmd_count = 1; 233 uint64_t cmds[num_buffers + 1]; 234 235 if (OCTEON_IS_MODEL(OCTEON_CN56XX_PASS1_X)) 236 { 237 /* Check for Errata PCIe-604 */ 238 if ((header.s.nfst > 11) || (header.s.nlst > 11) || (header.s.nfst + header.s.nlst > 15)) 239 { 240 cvmx_dprintf("DMA engine submit too large\n"); 241 return -1; 242 } 243 } 244 245 cmds[0] = header.u64; 246 while (num_buffers--) 247 { 248 cmds[cmd_count++] = buffers->u64; 249 buffers++; 250 } 251 252 /* Due to errata PCIE-13315, it is necessary to have the queue lock while we 253 ring the doorbell for the DMA engines. This prevents doorbells from 254 possibly arriving out of order with respect to the command queue 255 entries */ 256 __cvmx_cmd_queue_lock(CVMX_CMD_QUEUE_DMA(engine), __cvmx_cmd_queue_get_state(CVMX_CMD_QUEUE_DMA(engine))); 257 result = cvmx_cmd_queue_write(CVMX_CMD_QUEUE_DMA(engine), 0, cmd_count, cmds); 258 /* This SYNCWS is needed since the command queue didn't do locking, which 259 normally implies the SYNCWS. This one makes sure the command queue 260 updates make it to L2 before we ring the doorbell */ 261 CVMX_SYNCWS; 262 /* A syncw isn't needed here since the command queue did one as part of the queue unlock */ 263 if (cvmx_likely(result == CVMX_CMD_QUEUE_SUCCESS)) 264 { 265 if (octeon_has_feature(OCTEON_FEATURE_PCIE)) 266 { 267 /* DMA doorbells are 32bit writes in little endian space. This means we need to xor the address with 4 */ 268 cvmx_write64_uint32(CVMX_PEXP_NPEI_DMAX_DBELL(engine)^4, cmd_count); 269 } 270 else 271 { 272 if (engine) 273 cvmx_write_csr(CVMX_NPI_HIGHP_DBELL, cmd_count); 274 else 275 cvmx_write_csr(CVMX_NPI_LOWP_DBELL, cmd_count); 276 } 277 } 278 /* Here is the unlock for the above errata workaround */ 279 __cvmx_cmd_queue_unlock(__cvmx_cmd_queue_get_state(CVMX_CMD_QUEUE_DMA(engine))); 280 return result; 281} 282 283 284/** 285 * @INTERNAL 286 * Function used by cvmx_dma_engine_transfer() to build the 287 * internal address list. 288 * 289 * @param buffers Location to store the list 290 * @param address Address to build list for 291 * @param size Length of the memory pointed to by address 292 * 293 * @return Number of internal pointer chunks created 294 */ 295static inline int __cvmx_dma_engine_build_internal_pointers(cvmx_dma_engine_buffer_t *buffers, uint64_t address, int size) 296{ 297 int segments = 0; 298 while (size) 299 { 300 /* Each internal chunk can contain a maximum of 8191 bytes */ 301 int chunk = size; 302 if (chunk > 8191) 303 chunk = 8191; 304 buffers[segments].u64 = 0; 305 buffers[segments].internal.size = chunk; 306 buffers[segments].internal.addr = address; 307 address += chunk; 308 size -= chunk; 309 segments++; 310 } 311 return segments; 312} 313 314 315/** 316 * @INTERNAL 317 * Function used by cvmx_dma_engine_transfer() to build the PCI / PCIe address 318 * list. 319 * @param buffers Location to store the list 320 * @param address Address to build list for 321 * @param size Length of the memory pointed to by address 322 * 323 * @return Number of PCI / PCIe address chunks created. The number of words used 324 * will be segments + (segments-1)/4 + 1. 325 */ 326static inline int __cvmx_dma_engine_build_external_pointers(cvmx_dma_engine_buffer_t *buffers, uint64_t address, int size) 327{ 328 const int MAX_SIZE = 65535; 329 int segments = 0; 330 while (size) 331 { 332 /* Each block of 4 PCI / PCIe pointers uses one dword for lengths followed by 333 up to 4 addresses. This then repeats if more data is needed */ 334 buffers[0].u64 = 0; 335 if (size <= MAX_SIZE) 336 { 337 /* Only one more segment needed */ 338 buffers[0].pcie_length.len0 = size; 339 buffers[1].u64 = address; 340 segments++; 341 break; 342 } 343 else if (size <= MAX_SIZE * 2) 344 { 345 /* Two more segments needed */ 346 buffers[0].pcie_length.len0 = MAX_SIZE; 347 buffers[0].pcie_length.len1 = size - MAX_SIZE; 348 buffers[1].u64 = address; 349 address += MAX_SIZE; 350 buffers[2].u64 = address; 351 segments+=2; 352 break; 353 } 354 else if (size <= MAX_SIZE * 3) 355 { 356 /* Three more segments needed */ 357 buffers[0].pcie_length.len0 = MAX_SIZE; 358 buffers[0].pcie_length.len1 = MAX_SIZE; 359 buffers[0].pcie_length.len2 = size - MAX_SIZE * 2; 360 buffers[1].u64 = address; 361 address += MAX_SIZE; 362 buffers[2].u64 = address; 363 address += MAX_SIZE; 364 buffers[3].u64 = address; 365 segments+=3; 366 break; 367 } 368 else if (size <= MAX_SIZE * 4) 369 { 370 /* Four more segments needed */ 371 buffers[0].pcie_length.len0 = MAX_SIZE; 372 buffers[0].pcie_length.len1 = MAX_SIZE; 373 buffers[0].pcie_length.len2 = MAX_SIZE; 374 buffers[0].pcie_length.len3 = size - MAX_SIZE * 3; 375 buffers[1].u64 = address; 376 address += MAX_SIZE; 377 buffers[2].u64 = address; 378 address += MAX_SIZE; 379 buffers[3].u64 = address; 380 address += MAX_SIZE; 381 buffers[4].u64 = address; 382 segments+=4; 383 break; 384 } 385 else 386 { 387 /* Five or more segments are needed */ 388 buffers[0].pcie_length.len0 = MAX_SIZE; 389 buffers[0].pcie_length.len1 = MAX_SIZE; 390 buffers[0].pcie_length.len2 = MAX_SIZE; 391 buffers[0].pcie_length.len3 = MAX_SIZE; 392 buffers[1].u64 = address; 393 address += MAX_SIZE; 394 buffers[2].u64 = address; 395 address += MAX_SIZE; 396 buffers[3].u64 = address; 397 address += MAX_SIZE; 398 buffers[4].u64 = address; 399 address += MAX_SIZE; 400 size -= MAX_SIZE*4; 401 buffers += 5; 402 segments+=4; 403 } 404 } 405 return segments; 406} 407 408 409/** 410 * Build the first and last pointers based on a DMA engine header 411 * and submit them to the engine. The purpose of this function is 412 * to simplify the building of DMA engine commands by automatically 413 * converting a simple address and size into the apropriate internal 414 * or PCI / PCIe address list. This function does not support gather lists, 415 * so you will need to build your own lists in that case. 416 * 417 * @param engine Engine to submit to (0-4) 418 * @param header DMA Command header. Note that the nfst and nlst fields do not 419 * need to be filled in. All other fields must be set properly. 420 * @param first_address 421 * Address to use for the first pointers. In the case of INTERNAL, 422 * INBOUND, and OUTBOUND this is an Octeon memory address. In the 423 * case of EXTERNAL, this is the source PCI / PCIe address. 424 * @param last_address 425 * Address to use for the last pointers. In the case of EXTERNAL, 426 * INBOUND, and OUTBOUND this is a PCI / PCIe address. In the 427 * case of INTERNAL, this is the Octeon memory destination address. 428 * @param size Size of the transfer to perform. 429 * 430 * @return Zero on success, negative on failure 431 */ 432int cvmx_dma_engine_transfer(int engine, cvmx_dma_engine_header_t header, 433 uint64_t first_address, uint64_t last_address, 434 int size) 435{ 436 cvmx_dma_engine_buffer_t buffers[32]; 437 int words = 0; 438 439 switch (header.s.type) 440 { 441 case CVMX_DMA_ENGINE_TRANSFER_INTERNAL: 442 header.s.nfst = __cvmx_dma_engine_build_internal_pointers(buffers, first_address, size); 443 words += header.s.nfst; 444 header.s.nlst = __cvmx_dma_engine_build_internal_pointers(buffers + words, last_address, size); 445 words += header.s.nlst; 446 break; 447 case CVMX_DMA_ENGINE_TRANSFER_INBOUND: 448 case CVMX_DMA_ENGINE_TRANSFER_OUTBOUND: 449 header.s.nfst = __cvmx_dma_engine_build_internal_pointers(buffers, first_address, size); 450 words += header.s.nfst; 451 header.s.nlst = __cvmx_dma_engine_build_external_pointers(buffers + words, last_address, size); 452 words += header.s.nlst + ((header.s.nlst-1) >> 2) + 1; 453 break; 454 case CVMX_DMA_ENGINE_TRANSFER_EXTERNAL: 455 header.s.nfst = __cvmx_dma_engine_build_external_pointers(buffers, first_address, size); 456 words += header.s.nfst + ((header.s.nfst-1) >> 2) + 1; 457 header.s.nlst = __cvmx_dma_engine_build_external_pointers(buffers + words, last_address, size); 458 words += header.s.nlst + ((header.s.nlst-1) >> 2) + 1; 459 break; 460 } 461 return cvmx_dma_engine_submit(engine, header, words, buffers); 462} 463 464#endif 465