cvmx-dma-engine.c revision 243587
1/***********************license start***************
2 * Copyright (c) 2003-2010  Cavium Inc. (support@cavium.com). All rights
3 * reserved.
4 *
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are
8 * met:
9 *
10 *   * Redistributions of source code must retain the above copyright
11 *     notice, this list of conditions and the following disclaimer.
12 *
13 *   * Redistributions in binary form must reproduce the above
14 *     copyright notice, this list of conditions and the following
15 *     disclaimer in the documentation and/or other materials provided
16 *     with the distribution.
17
18 *   * Neither the name of Cavium Inc. nor the names of
19 *     its contributors may be used to endorse or promote products
20 *     derived from this software without specific prior written
21 *     permission.
22
23 * This Software, including technical data, may be subject to U.S. export  control
24 * laws, including the U.S. Export Administration Act and its  associated
25 * regulations, and may be subject to export or import  regulations in other
26 * countries.
27
28 * TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
29 * AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
30 * WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT TO
31 * THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY REPRESENTATION OR
32 * DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT DEFECTS, AND CAVIUM
33 * SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES OF TITLE,
34 * MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR PURPOSE, LACK OF
35 * VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, QUIET POSSESSION OR
36 * CORRESPONDENCE TO DESCRIPTION. THE ENTIRE  RISK ARISING OUT OF USE OR
37 * PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
38 ***********************license end**************************************/
39
40
41
42
43
44
45
46/**
47 * @file
48 *
49 * Interface to the PCI / PCIe DMA engines. These are only avialable
50 * on chips with PCI / PCIe.
51 *
52 * <hr>$Revision: 70030 $<hr>
53 */
54#ifdef CVMX_BUILD_FOR_LINUX_KERNEL
55#include <linux/module.h>
56#include <asm/octeon/cvmx.h>
57#include <asm/octeon/octeon-model.h>
58#include <asm/octeon/cvmx-config.h>
59#include <asm/octeon/cvmx-cmd-queue.h>
60#include <asm/octeon/cvmx-dma-engine.h>
61#include <asm/octeon/octeon-feature.h>
62#include <asm/octeon/cvmx-npi-defs.h>
63#include <asm/octeon/cvmx-npei-defs.h>
64#include <asm/octeon/cvmx-dpi-defs.h>
65#include <asm/octeon/cvmx-pexp-defs.h>
66#include <asm/octeon/cvmx-helper-cfg.h>
67#else
68#if !defined(__FreeBSD__) || !defined(_KERNEL)
69#include "executive-config.h"
70#include "cvmx-config.h"
71#endif
72#include "cvmx.h"
73#include "cvmx-cmd-queue.h"
74#include "cvmx-dma-engine.h"
75#include "cvmx-helper-cfg.h"
76#endif
77
78#ifdef CVMX_ENABLE_PKO_FUNCTIONS
79
80/**
81 * Return the number of DMA engimes supported by this chip
82 *
83 * @return Number of DMA engines
84 */
85int cvmx_dma_engine_get_num(void)
86{
87    if (octeon_has_feature(OCTEON_FEATURE_NPEI))
88    {
89        if (OCTEON_IS_MODEL(OCTEON_CN52XX_PASS1_X))
90            return 4;
91        else
92            return 5;
93    }
94    else if (octeon_has_feature(OCTEON_FEATURE_PCIE))
95        return 8;
96    else
97        return 2;
98}
99
100/**
101 * Initialize the DMA engines for use
102 *
103 * @return Zero on success, negative on failure
104 */
105int cvmx_dma_engine_initialize(void)
106{
107    int engine;
108
109    for (engine=0; engine < cvmx_dma_engine_get_num(); engine++)
110    {
111        cvmx_cmd_queue_result_t result;
112        result = cvmx_cmd_queue_initialize(CVMX_CMD_QUEUE_DMA(engine),
113                                           0, CVMX_FPA_OUTPUT_BUFFER_POOL,
114                                           CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE);
115        if (result != CVMX_CMD_QUEUE_SUCCESS)
116            return -1;
117        if (octeon_has_feature(OCTEON_FEATURE_NPEI))
118        {
119            cvmx_npei_dmax_ibuff_saddr_t dmax_ibuff_saddr;
120            dmax_ibuff_saddr.u64 = 0;
121            dmax_ibuff_saddr.s.saddr = cvmx_ptr_to_phys(cvmx_cmd_queue_buffer(CVMX_CMD_QUEUE_DMA(engine))) >> 7;
122            cvmx_write_csr(CVMX_PEXP_NPEI_DMAX_IBUFF_SADDR(engine), dmax_ibuff_saddr.u64);
123        }
124        else if (octeon_has_feature(OCTEON_FEATURE_PCIE))
125        {
126            cvmx_dpi_dmax_ibuff_saddr_t dpi_dmax_ibuff_saddr;
127            dpi_dmax_ibuff_saddr.u64 = 0;
128            dpi_dmax_ibuff_saddr.s.csize = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/8;
129            dpi_dmax_ibuff_saddr.s.saddr = cvmx_ptr_to_phys(cvmx_cmd_queue_buffer(CVMX_CMD_QUEUE_DMA(engine))) >> 7;
130            cvmx_write_csr(CVMX_DPI_DMAX_IBUFF_SADDR(engine), dpi_dmax_ibuff_saddr.u64);
131        }
132        else
133        {
134            uint64_t address = cvmx_ptr_to_phys(cvmx_cmd_queue_buffer(CVMX_CMD_QUEUE_DMA(engine)));
135            if (engine)
136                cvmx_write_csr(CVMX_NPI_HIGHP_IBUFF_SADDR, address);
137            else
138                cvmx_write_csr(CVMX_NPI_LOWP_IBUFF_SADDR, address);
139        }
140    }
141
142    if (octeon_has_feature(OCTEON_FEATURE_NPEI))
143    {
144        cvmx_npei_dma_control_t dma_control;
145        dma_control.u64 = 0;
146        if (cvmx_dma_engine_get_num() >= 5)
147            dma_control.s.dma4_enb = 1;
148        dma_control.s.dma3_enb = 1;
149        dma_control.s.dma2_enb = 1;
150        dma_control.s.dma1_enb = 1;
151        dma_control.s.dma0_enb = 1;
152        dma_control.s.o_mode = 1; /* Pull NS and RO from this register, not the pointers */
153        //dma_control.s.dwb_denb = 1;
154        //dma_control.s.dwb_ichk = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/128;
155        dma_control.s.fpa_que = CVMX_FPA_OUTPUT_BUFFER_POOL;
156        dma_control.s.csize = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/8;
157        cvmx_write_csr(CVMX_PEXP_NPEI_DMA_CONTROL, dma_control.u64);
158        /* As a workaround for errata PCIE-811 we only allow a single
159            outstanding DMA read over PCIe at a time. This limits performance,
160            but works in all cases. If you need higher performance, remove
161            this code and implement the more complicated workaround documented
162            in the errata. This only affects CN56XX pass 2.0 chips */
163        if (OCTEON_IS_MODEL(OCTEON_CN56XX_PASS2_0))
164        {
165            cvmx_npei_dma_pcie_req_num_t pcie_req_num;
166            pcie_req_num.u64 = cvmx_read_csr(CVMX_PEXP_NPEI_DMA_PCIE_REQ_NUM);
167            pcie_req_num.s.dma_cnt = 1;
168            cvmx_write_csr(CVMX_PEXP_NPEI_DMA_PCIE_REQ_NUM, pcie_req_num.u64);
169        }
170    }
171    else if (octeon_has_feature(OCTEON_FEATURE_PCIE))
172    {
173        cvmx_dpi_engx_buf_t dpi_engx_buf;
174        cvmx_dpi_dma_engx_en_t dpi_dma_engx_en;
175        cvmx_dpi_dma_control_t dma_control;
176        cvmx_dpi_ctl_t dpi_ctl;
177
178        /* Give engine 0-4 1KB, and 5 3KB. This gives the packet engines better
179            performance. Total must not exceed 8KB */
180        dpi_engx_buf.u64 = 0;
181        dpi_engx_buf.s.blks = 2;
182        cvmx_write_csr(CVMX_DPI_ENGX_BUF(0), dpi_engx_buf.u64);
183        cvmx_write_csr(CVMX_DPI_ENGX_BUF(1), dpi_engx_buf.u64);
184        cvmx_write_csr(CVMX_DPI_ENGX_BUF(2), dpi_engx_buf.u64);
185        cvmx_write_csr(CVMX_DPI_ENGX_BUF(3), dpi_engx_buf.u64);
186        cvmx_write_csr(CVMX_DPI_ENGX_BUF(4), dpi_engx_buf.u64);
187        dpi_engx_buf.s.blks = 6;
188        cvmx_write_csr(CVMX_DPI_ENGX_BUF(5), dpi_engx_buf.u64);
189
190        dma_control.u64 = cvmx_read_csr(CVMX_DPI_DMA_CONTROL);
191        dma_control.s.pkt_hp = 1;
192        dma_control.s.pkt_en = 1;
193        dma_control.s.dma_enb = 0x1f;
194        dma_control.s.dwb_denb = cvmx_helper_cfg_opt_get(CVMX_HELPER_CFG_OPT_USE_DWB);
195        dma_control.s.dwb_ichk = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/128;
196        dma_control.s.fpa_que = CVMX_FPA_OUTPUT_BUFFER_POOL;
197        dma_control.s.o_mode = 1;
198        cvmx_write_csr(CVMX_DPI_DMA_CONTROL, dma_control.u64);
199        /* When dma_control[pkt_en] = 1, engine 5 is used for packets and is not
200           available for DMA. */
201        dpi_dma_engx_en.u64 = cvmx_read_csr(CVMX_DPI_DMA_ENGX_EN(5));
202        dpi_dma_engx_en.s.qen = 0;
203        cvmx_write_csr(CVMX_DPI_DMA_ENGX_EN(5), dpi_dma_engx_en.u64);
204        dpi_ctl.u64 = cvmx_read_csr(CVMX_DPI_CTL);
205        dpi_ctl.s.en = 1;
206        cvmx_write_csr(CVMX_DPI_CTL, dpi_ctl.u64);
207    }
208    else
209    {
210        cvmx_npi_dma_control_t dma_control;
211        dma_control.u64 = 0;
212        //dma_control.s.dwb_denb = 1;
213        //dma_control.s.dwb_ichk = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/128;
214        dma_control.s.o_add1 = 1;
215        dma_control.s.fpa_que = CVMX_FPA_OUTPUT_BUFFER_POOL;
216        dma_control.s.hp_enb = 1;
217        dma_control.s.lp_enb = 1;
218        dma_control.s.csize = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/8;
219        cvmx_write_csr(CVMX_NPI_DMA_CONTROL, dma_control.u64);
220    }
221
222    return 0;
223}
224#ifdef CVMX_BUILD_FOR_LINUX_KERNEL
225EXPORT_SYMBOL(cvmx_dma_engine_initialize);
226#endif
227
228/**
229 * Shutdown all DMA engines. The engines must be idle when this
230 * function is called.
231 *
232 * @return Zero on success, negative on failure
233 */
234int cvmx_dma_engine_shutdown(void)
235{
236    int engine;
237
238    for (engine=0; engine < cvmx_dma_engine_get_num(); engine++)
239    {
240        if (cvmx_cmd_queue_length(CVMX_CMD_QUEUE_DMA(engine)))
241        {
242            cvmx_dprintf("ERROR: cvmx_dma_engine_shutdown: Engine not idle.\n");
243            return -1;
244        }
245    }
246
247    if (octeon_has_feature(OCTEON_FEATURE_NPEI))
248    {
249        cvmx_npei_dma_control_t dma_control;
250        dma_control.u64 = cvmx_read_csr(CVMX_PEXP_NPEI_DMA_CONTROL);
251        if (cvmx_dma_engine_get_num() >= 5)
252            dma_control.s.dma4_enb = 0;
253        dma_control.s.dma3_enb = 0;
254        dma_control.s.dma2_enb = 0;
255        dma_control.s.dma1_enb = 0;
256        dma_control.s.dma0_enb = 0;
257        cvmx_write_csr(CVMX_PEXP_NPEI_DMA_CONTROL, dma_control.u64);
258        /* Make sure the disable completes */
259        cvmx_read_csr(CVMX_PEXP_NPEI_DMA_CONTROL);
260    }
261    else if (octeon_has_feature(OCTEON_FEATURE_PCIE))
262    {
263        cvmx_dpi_dma_control_t dma_control;
264        dma_control.u64 = cvmx_read_csr(CVMX_DPI_DMA_CONTROL);
265        dma_control.s.dma_enb = 0;
266        cvmx_write_csr(CVMX_DPI_DMA_CONTROL, dma_control.u64);
267        /* Make sure the disable completes */
268        cvmx_read_csr(CVMX_DPI_DMA_CONTROL);
269    }
270    else
271    {
272        cvmx_npi_dma_control_t dma_control;
273        dma_control.u64 = cvmx_read_csr(CVMX_NPI_DMA_CONTROL);
274        dma_control.s.hp_enb = 0;
275        dma_control.s.lp_enb = 0;
276        cvmx_write_csr(CVMX_NPI_DMA_CONTROL, dma_control.u64);
277        /* Make sure the disable completes */
278        cvmx_read_csr(CVMX_NPI_DMA_CONTROL);
279    }
280
281    for (engine=0; engine < cvmx_dma_engine_get_num(); engine++)
282    {
283        cvmx_cmd_queue_shutdown(CVMX_CMD_QUEUE_DMA(engine));
284        if (octeon_has_feature(OCTEON_FEATURE_NPEI))
285            cvmx_write_csr(CVMX_PEXP_NPEI_DMAX_IBUFF_SADDR(engine), 0);
286        else if (octeon_has_feature(OCTEON_FEATURE_PCIE))
287            cvmx_write_csr(CVMX_DPI_DMAX_IBUFF_SADDR(engine), 0);
288        else
289        {
290            if (engine)
291                cvmx_write_csr(CVMX_NPI_HIGHP_IBUFF_SADDR, 0);
292            else
293                cvmx_write_csr(CVMX_NPI_LOWP_IBUFF_SADDR, 0);
294        }
295    }
296
297    return 0;
298}
299#ifdef CVMX_BUILD_FOR_LINUX_KERNEL
300EXPORT_SYMBOL(cvmx_dma_engine_shutdown);
301#endif
302
303/**
304 * Submit a series of DMA command to the DMA engines.
305 *
306 * @param engine  Engine to submit to (0 to cvmx_dma_engine_get_num()-1)
307 * @param header  Command header
308 * @param num_buffers
309 *                The number of data pointers
310 * @param buffers Command data pointers
311 *
312 * @return Zero on success, negative on failure
313 */
314int cvmx_dma_engine_submit(int engine, cvmx_dma_engine_header_t header, int num_buffers, cvmx_dma_engine_buffer_t buffers[])
315{
316    cvmx_cmd_queue_result_t result;
317    int cmd_count = 1;
318    uint64_t cmds[num_buffers + 1];
319
320    if (OCTEON_IS_MODEL(OCTEON_CN56XX_PASS1_X))
321    {
322        /* Check for Errata PCIe-604 */
323        if ((header.s.nfst > 11) || (header.s.nlst > 11) || (header.s.nfst + header.s.nlst > 15))
324        {
325            cvmx_dprintf("DMA engine submit too large\n");
326            return -1;
327        }
328    }
329
330    cmds[0] = header.u64;
331    while (num_buffers--)
332    {
333        cmds[cmd_count++] = buffers->u64;
334        buffers++;
335    }
336
337    /* Due to errata PCIE-13315, it is necessary to have the queue lock while we
338        ring the doorbell for the DMA engines. This prevents doorbells from
339        possibly arriving out of order with respect to the command queue
340        entries */
341    __cvmx_cmd_queue_lock(CVMX_CMD_QUEUE_DMA(engine), __cvmx_cmd_queue_get_state(CVMX_CMD_QUEUE_DMA(engine)));
342    result = cvmx_cmd_queue_write(CVMX_CMD_QUEUE_DMA(engine), 0, cmd_count, cmds);
343    /* This SYNCWS is needed since the command queue didn't do locking, which
344        normally implies the SYNCWS. This one makes sure the command queue
345        updates make it to L2 before we ring the doorbell */
346    CVMX_SYNCWS;
347    /* A syncw isn't needed here since the command queue did one as part of the queue unlock */
348    if (cvmx_likely(result == CVMX_CMD_QUEUE_SUCCESS))
349    {
350        if (octeon_has_feature(OCTEON_FEATURE_NPEI))
351        {
352            /* DMA doorbells are 32bit writes in little endian space. This means we need to xor the address with 4 */
353            cvmx_write64_uint32(CVMX_PEXP_NPEI_DMAX_DBELL(engine)^4, cmd_count);
354        }
355        else if (octeon_has_feature(OCTEON_FEATURE_PCIE))
356            cvmx_write_csr(CVMX_DPI_DMAX_DBELL(engine), cmd_count);
357        else
358        {
359            if (engine)
360                cvmx_write_csr(CVMX_NPI_HIGHP_DBELL, cmd_count);
361            else
362                cvmx_write_csr(CVMX_NPI_LOWP_DBELL, cmd_count);
363        }
364    }
365    /* Here is the unlock for the above errata workaround */
366    __cvmx_cmd_queue_unlock(__cvmx_cmd_queue_get_state(CVMX_CMD_QUEUE_DMA(engine)));
367    return result;
368}
369
370
371/**
372 * @INTERNAL
373 * Function used by cvmx_dma_engine_transfer() to build the
374 * internal address list.
375 *
376 * @param buffers Location to store the list
377 * @param address Address to build list for
378 * @param size    Length of the memory pointed to by address
379 *
380 * @return Number of internal pointer chunks created
381 */
382static inline int __cvmx_dma_engine_build_internal_pointers(cvmx_dma_engine_buffer_t *buffers, uint64_t address, int size)
383{
384    int segments = 0;
385    while (size)
386    {
387        /* Each internal chunk can contain a maximum of 8191 bytes */
388        int chunk = size;
389        if (chunk > 8191)
390            chunk = 8191;
391        buffers[segments].u64 = 0;
392        buffers[segments].internal.size = chunk;
393        buffers[segments].internal.addr = address;
394        address += chunk;
395        size -= chunk;
396        segments++;
397    }
398    return segments;
399}
400
401
402/**
403 * @INTERNAL
404 * Function used by cvmx_dma_engine_transfer() to build the PCI / PCIe address
405 * list.
406 * @param buffers Location to store the list
407 * @param address Address to build list for
408 * @param size    Length of the memory pointed to by address
409 *
410 * @return Number of PCI / PCIe address chunks created. The number of words used
411 *         will be segments + (segments-1)/4 + 1.
412 */
413static inline int __cvmx_dma_engine_build_external_pointers(cvmx_dma_engine_buffer_t *buffers, uint64_t address, int size)
414{
415    const int MAX_SIZE = 65535;
416    int segments = 0;
417    while (size)
418    {
419        /* Each block of 4 PCI / PCIe pointers uses one dword for lengths followed by
420            up to 4 addresses. This then repeats if more data is needed */
421        buffers[0].u64 = 0;
422        if (size <= MAX_SIZE)
423        {
424            /* Only one more segment needed */
425            buffers[0].pcie_length.len0 = size;
426            buffers[1].u64 = address;
427            segments++;
428            break;
429        }
430        else if (size <= MAX_SIZE * 2)
431        {
432            /* Two more segments needed */
433            buffers[0].pcie_length.len0 = MAX_SIZE;
434            buffers[0].pcie_length.len1 = size - MAX_SIZE;
435            buffers[1].u64 = address;
436            address += MAX_SIZE;
437            buffers[2].u64 = address;
438            segments+=2;
439            break;
440        }
441        else if (size <= MAX_SIZE * 3)
442        {
443            /* Three more segments needed */
444            buffers[0].pcie_length.len0 = MAX_SIZE;
445            buffers[0].pcie_length.len1 = MAX_SIZE;
446            buffers[0].pcie_length.len2 = size - MAX_SIZE * 2;
447            buffers[1].u64 = address;
448            address += MAX_SIZE;
449            buffers[2].u64 = address;
450            address += MAX_SIZE;
451            buffers[3].u64 = address;
452            segments+=3;
453            break;
454        }
455        else if (size <= MAX_SIZE * 4)
456        {
457            /* Four more segments needed */
458            buffers[0].pcie_length.len0 = MAX_SIZE;
459            buffers[0].pcie_length.len1 = MAX_SIZE;
460            buffers[0].pcie_length.len2 = MAX_SIZE;
461            buffers[0].pcie_length.len3 = size - MAX_SIZE * 3;
462            buffers[1].u64 = address;
463            address += MAX_SIZE;
464            buffers[2].u64 = address;
465            address += MAX_SIZE;
466            buffers[3].u64 = address;
467            address += MAX_SIZE;
468            buffers[4].u64 = address;
469            segments+=4;
470            break;
471        }
472        else
473        {
474            /* Five or more segments are needed */
475            buffers[0].pcie_length.len0 = MAX_SIZE;
476            buffers[0].pcie_length.len1 = MAX_SIZE;
477            buffers[0].pcie_length.len2 = MAX_SIZE;
478            buffers[0].pcie_length.len3 = MAX_SIZE;
479            buffers[1].u64 = address;
480            address += MAX_SIZE;
481            buffers[2].u64 = address;
482            address += MAX_SIZE;
483            buffers[3].u64 = address;
484            address += MAX_SIZE;
485            buffers[4].u64 = address;
486            address += MAX_SIZE;
487            size -= MAX_SIZE*4;
488            buffers += 5;
489            segments+=4;
490        }
491    }
492    return segments;
493}
494
495
496/**
497 * Build the first and last pointers based on a DMA engine header
498 * and submit them to the engine. The purpose of this function is
499 * to simplify the building of DMA engine commands by automatically
500 * converting a simple address and size into the apropriate internal
501 * or PCI / PCIe address list. This function does not support gather lists,
502 * so you will need to build your own lists in that case.
503 *
504 * @param engine Engine to submit to (0 to cvmx_dma_engine_get_num()-1)
505 * @param header DMA Command header. Note that the nfst and nlst fields do not
506 *               need to be filled in. All other fields must be set properly.
507 * @param first_address
508 *               Address to use for the first pointers. In the case of INTERNAL,
509 *               INBOUND, and OUTBOUND this is an Octeon memory address. In the
510 *               case of EXTERNAL, this is the source PCI / PCIe address.
511 * @param last_address
512 *               Address to use for the last pointers. In the case of EXTERNAL,
513 *               INBOUND, and OUTBOUND this is a PCI / PCIe address. In the
514 *               case of INTERNAL, this is the Octeon memory destination address.
515 * @param size   Size of the transfer to perform.
516 *
517 * @return Zero on success, negative on failure
518 */
519int cvmx_dma_engine_transfer(int engine, cvmx_dma_engine_header_t header,
520                             uint64_t first_address, uint64_t last_address,
521                             int size)
522{
523    cvmx_dma_engine_buffer_t buffers[32];
524    int words = 0;
525
526    switch (header.s.type)
527    {
528        case CVMX_DMA_ENGINE_TRANSFER_INTERNAL:
529            header.s.nfst = __cvmx_dma_engine_build_internal_pointers(buffers, first_address, size);
530            words += header.s.nfst;
531            header.s.nlst = __cvmx_dma_engine_build_internal_pointers(buffers + words, last_address, size);
532            words += header.s.nlst;
533            break;
534        case CVMX_DMA_ENGINE_TRANSFER_INBOUND:
535        case CVMX_DMA_ENGINE_TRANSFER_OUTBOUND:
536            header.s.nfst = __cvmx_dma_engine_build_internal_pointers(buffers, first_address, size);
537            words += header.s.nfst;
538            header.s.nlst = __cvmx_dma_engine_build_external_pointers(buffers + words, last_address, size);
539            words +=  header.s.nlst + ((header.s.nlst-1) >> 2) + 1;
540            break;
541        case CVMX_DMA_ENGINE_TRANSFER_EXTERNAL:
542            header.s.nfst = __cvmx_dma_engine_build_external_pointers(buffers, first_address, size);
543            words +=  header.s.nfst + ((header.s.nfst-1) >> 2) + 1;
544            header.s.nlst = __cvmx_dma_engine_build_external_pointers(buffers + words, last_address, size);
545            words +=  header.s.nlst + ((header.s.nlst-1) >> 2) + 1;
546            break;
547    }
548    return cvmx_dma_engine_submit(engine, header, words, buffers);
549}
550#ifdef CVMX_BUILD_FOR_LINUX_KERNEL
551EXPORT_SYMBOL(cvmx_dma_engine_transfer);
552#endif
553#endif
554