1210284Sjmallett/***********************license start***************
2232812Sjmallett * Copyright (c) 2003-2010  Cavium Inc. (support@cavium.com). All rights
3215990Sjmallett * reserved.
4210284Sjmallett *
5210284Sjmallett *
6215990Sjmallett * Redistribution and use in source and binary forms, with or without
7215990Sjmallett * modification, are permitted provided that the following conditions are
8215990Sjmallett * met:
9210284Sjmallett *
10215990Sjmallett *   * Redistributions of source code must retain the above copyright
11215990Sjmallett *     notice, this list of conditions and the following disclaimer.
12210284Sjmallett *
13215990Sjmallett *   * Redistributions in binary form must reproduce the above
14215990Sjmallett *     copyright notice, this list of conditions and the following
15215990Sjmallett *     disclaimer in the documentation and/or other materials provided
16215990Sjmallett *     with the distribution.
17215990Sjmallett
18232812Sjmallett *   * Neither the name of Cavium Inc. nor the names of
19215990Sjmallett *     its contributors may be used to endorse or promote products
20215990Sjmallett *     derived from this software without specific prior written
21215990Sjmallett *     permission.
22215990Sjmallett
23215990Sjmallett * This Software, including technical data, may be subject to U.S. export  control
24215990Sjmallett * laws, including the U.S. Export Administration Act and its  associated
25215990Sjmallett * regulations, and may be subject to export or import  regulations in other
26215990Sjmallett * countries.
27215990Sjmallett
28215990Sjmallett * TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
29232812Sjmallett * AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
30215990Sjmallett * WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT TO
31215990Sjmallett * THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY REPRESENTATION OR
32215990Sjmallett * DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT DEFECTS, AND CAVIUM
33215990Sjmallett * SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES OF TITLE,
34215990Sjmallett * MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR PURPOSE, LACK OF
35215990Sjmallett * VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, QUIET POSSESSION OR
36215990Sjmallett * CORRESPONDENCE TO DESCRIPTION. THE ENTIRE  RISK ARISING OUT OF USE OR
37215990Sjmallett * PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
38210284Sjmallett ***********************license end**************************************/
39210284Sjmallett
40210284Sjmallett
41210284Sjmallett
42210284Sjmallett
43210284Sjmallett
44210284Sjmallett
45215990Sjmallett
46210284Sjmallett/**
47210284Sjmallett * @file
48210284Sjmallett *
49210284Sjmallett * Interface to the PCI / PCIe DMA engines. These are only avialable
50210284Sjmallett * on chips with PCI / PCIe.
51210284Sjmallett *
52232812Sjmallett * <hr>$Revision: 70030 $<hr>
53210284Sjmallett */
54232812Sjmallett#ifdef CVMX_BUILD_FOR_LINUX_KERNEL
55232812Sjmallett#include <linux/module.h>
56232812Sjmallett#include <asm/octeon/cvmx.h>
57232812Sjmallett#include <asm/octeon/octeon-model.h>
58232812Sjmallett#include <asm/octeon/cvmx-config.h>
59232812Sjmallett#include <asm/octeon/cvmx-cmd-queue.h>
60232812Sjmallett#include <asm/octeon/cvmx-dma-engine.h>
61232812Sjmallett#include <asm/octeon/octeon-feature.h>
62232812Sjmallett#include <asm/octeon/cvmx-npi-defs.h>
63232812Sjmallett#include <asm/octeon/cvmx-npei-defs.h>
64232812Sjmallett#include <asm/octeon/cvmx-dpi-defs.h>
65232812Sjmallett#include <asm/octeon/cvmx-pexp-defs.h>
66232812Sjmallett#include <asm/octeon/cvmx-helper-cfg.h>
67232812Sjmallett#else
68243587Sjmallett#if !defined(__FreeBSD__) || !defined(_KERNEL)
69210284Sjmallett#include "executive-config.h"
70210284Sjmallett#include "cvmx-config.h"
71243587Sjmallett#endif
72210284Sjmallett#include "cvmx.h"
73210284Sjmallett#include "cvmx-cmd-queue.h"
74210284Sjmallett#include "cvmx-dma-engine.h"
75232812Sjmallett#include "cvmx-helper-cfg.h"
76232812Sjmallett#endif
77210284Sjmallett
78210284Sjmallett#ifdef CVMX_ENABLE_PKO_FUNCTIONS
79210284Sjmallett
80210284Sjmallett/**
81210284Sjmallett * Return the number of DMA engimes supported by this chip
82210284Sjmallett *
83210284Sjmallett * @return Number of DMA engines
84210284Sjmallett */
85210284Sjmallettint cvmx_dma_engine_get_num(void)
86210284Sjmallett{
87215990Sjmallett    if (octeon_has_feature(OCTEON_FEATURE_NPEI))
88210284Sjmallett    {
89210284Sjmallett        if (OCTEON_IS_MODEL(OCTEON_CN52XX_PASS1_X))
90210284Sjmallett            return 4;
91210284Sjmallett        else
92210284Sjmallett            return 5;
93210284Sjmallett    }
94215990Sjmallett    else if (octeon_has_feature(OCTEON_FEATURE_PCIE))
95215990Sjmallett        return 8;
96210284Sjmallett    else
97210284Sjmallett        return 2;
98210284Sjmallett}
99210284Sjmallett
100210284Sjmallett/**
101210284Sjmallett * Initialize the DMA engines for use
102210284Sjmallett *
103210284Sjmallett * @return Zero on success, negative on failure
104210284Sjmallett */
105210284Sjmallettint cvmx_dma_engine_initialize(void)
106210284Sjmallett{
107210284Sjmallett    int engine;
108210284Sjmallett
109210284Sjmallett    for (engine=0; engine < cvmx_dma_engine_get_num(); engine++)
110210284Sjmallett    {
111210284Sjmallett        cvmx_cmd_queue_result_t result;
112210284Sjmallett        result = cvmx_cmd_queue_initialize(CVMX_CMD_QUEUE_DMA(engine),
113210284Sjmallett                                           0, CVMX_FPA_OUTPUT_BUFFER_POOL,
114210284Sjmallett                                           CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE);
115210284Sjmallett        if (result != CVMX_CMD_QUEUE_SUCCESS)
116210284Sjmallett            return -1;
117215990Sjmallett        if (octeon_has_feature(OCTEON_FEATURE_NPEI))
118215990Sjmallett        {
119215990Sjmallett            cvmx_npei_dmax_ibuff_saddr_t dmax_ibuff_saddr;
120215990Sjmallett            dmax_ibuff_saddr.u64 = 0;
121215990Sjmallett            dmax_ibuff_saddr.s.saddr = cvmx_ptr_to_phys(cvmx_cmd_queue_buffer(CVMX_CMD_QUEUE_DMA(engine))) >> 7;
122210284Sjmallett            cvmx_write_csr(CVMX_PEXP_NPEI_DMAX_IBUFF_SADDR(engine), dmax_ibuff_saddr.u64);
123215990Sjmallett        }
124215990Sjmallett        else if (octeon_has_feature(OCTEON_FEATURE_PCIE))
125215990Sjmallett        {
126215990Sjmallett            cvmx_dpi_dmax_ibuff_saddr_t dpi_dmax_ibuff_saddr;
127215990Sjmallett            dpi_dmax_ibuff_saddr.u64 = 0;
128215990Sjmallett            dpi_dmax_ibuff_saddr.s.csize = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/8;
129215990Sjmallett            dpi_dmax_ibuff_saddr.s.saddr = cvmx_ptr_to_phys(cvmx_cmd_queue_buffer(CVMX_CMD_QUEUE_DMA(engine))) >> 7;
130215990Sjmallett            cvmx_write_csr(CVMX_DPI_DMAX_IBUFF_SADDR(engine), dpi_dmax_ibuff_saddr.u64);
131215990Sjmallett        }
132210284Sjmallett        else
133210284Sjmallett        {
134215990Sjmallett            uint64_t address = cvmx_ptr_to_phys(cvmx_cmd_queue_buffer(CVMX_CMD_QUEUE_DMA(engine)));
135210284Sjmallett            if (engine)
136215990Sjmallett                cvmx_write_csr(CVMX_NPI_HIGHP_IBUFF_SADDR, address);
137210284Sjmallett            else
138215990Sjmallett                cvmx_write_csr(CVMX_NPI_LOWP_IBUFF_SADDR, address);
139210284Sjmallett        }
140210284Sjmallett    }
141210284Sjmallett
142215990Sjmallett    if (octeon_has_feature(OCTEON_FEATURE_NPEI))
143210284Sjmallett    {
144210284Sjmallett        cvmx_npei_dma_control_t dma_control;
145210284Sjmallett        dma_control.u64 = 0;
146210284Sjmallett        if (cvmx_dma_engine_get_num() >= 5)
147210284Sjmallett            dma_control.s.dma4_enb = 1;
148210284Sjmallett        dma_control.s.dma3_enb = 1;
149210284Sjmallett        dma_control.s.dma2_enb = 1;
150210284Sjmallett        dma_control.s.dma1_enb = 1;
151210284Sjmallett        dma_control.s.dma0_enb = 1;
152210284Sjmallett        dma_control.s.o_mode = 1; /* Pull NS and RO from this register, not the pointers */
153210284Sjmallett        //dma_control.s.dwb_denb = 1;
154210284Sjmallett        //dma_control.s.dwb_ichk = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/128;
155210284Sjmallett        dma_control.s.fpa_que = CVMX_FPA_OUTPUT_BUFFER_POOL;
156210284Sjmallett        dma_control.s.csize = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/8;
157210284Sjmallett        cvmx_write_csr(CVMX_PEXP_NPEI_DMA_CONTROL, dma_control.u64);
158210284Sjmallett        /* As a workaround for errata PCIE-811 we only allow a single
159210284Sjmallett            outstanding DMA read over PCIe at a time. This limits performance,
160210284Sjmallett            but works in all cases. If you need higher performance, remove
161210284Sjmallett            this code and implement the more complicated workaround documented
162210284Sjmallett            in the errata. This only affects CN56XX pass 2.0 chips */
163210284Sjmallett        if (OCTEON_IS_MODEL(OCTEON_CN56XX_PASS2_0))
164210284Sjmallett        {
165210284Sjmallett            cvmx_npei_dma_pcie_req_num_t pcie_req_num;
166210284Sjmallett            pcie_req_num.u64 = cvmx_read_csr(CVMX_PEXP_NPEI_DMA_PCIE_REQ_NUM);
167210284Sjmallett            pcie_req_num.s.dma_cnt = 1;
168210284Sjmallett            cvmx_write_csr(CVMX_PEXP_NPEI_DMA_PCIE_REQ_NUM, pcie_req_num.u64);
169210284Sjmallett        }
170210284Sjmallett    }
171215990Sjmallett    else if (octeon_has_feature(OCTEON_FEATURE_PCIE))
172215990Sjmallett    {
173215990Sjmallett        cvmx_dpi_engx_buf_t dpi_engx_buf;
174232812Sjmallett        cvmx_dpi_dma_engx_en_t dpi_dma_engx_en;
175215990Sjmallett        cvmx_dpi_dma_control_t dma_control;
176215990Sjmallett        cvmx_dpi_ctl_t dpi_ctl;
177215990Sjmallett
178215990Sjmallett        /* Give engine 0-4 1KB, and 5 3KB. This gives the packet engines better
179215990Sjmallett            performance. Total must not exceed 8KB */
180215990Sjmallett        dpi_engx_buf.u64 = 0;
181215990Sjmallett        dpi_engx_buf.s.blks = 2;
182215990Sjmallett        cvmx_write_csr(CVMX_DPI_ENGX_BUF(0), dpi_engx_buf.u64);
183215990Sjmallett        cvmx_write_csr(CVMX_DPI_ENGX_BUF(1), dpi_engx_buf.u64);
184215990Sjmallett        cvmx_write_csr(CVMX_DPI_ENGX_BUF(2), dpi_engx_buf.u64);
185215990Sjmallett        cvmx_write_csr(CVMX_DPI_ENGX_BUF(3), dpi_engx_buf.u64);
186215990Sjmallett        cvmx_write_csr(CVMX_DPI_ENGX_BUF(4), dpi_engx_buf.u64);
187215990Sjmallett        dpi_engx_buf.s.blks = 6;
188215990Sjmallett        cvmx_write_csr(CVMX_DPI_ENGX_BUF(5), dpi_engx_buf.u64);
189215990Sjmallett
190215990Sjmallett        dma_control.u64 = cvmx_read_csr(CVMX_DPI_DMA_CONTROL);
191215990Sjmallett        dma_control.s.pkt_hp = 1;
192215990Sjmallett        dma_control.s.pkt_en = 1;
193215990Sjmallett        dma_control.s.dma_enb = 0x1f;
194232812Sjmallett        dma_control.s.dwb_denb = cvmx_helper_cfg_opt_get(CVMX_HELPER_CFG_OPT_USE_DWB);
195215990Sjmallett        dma_control.s.dwb_ichk = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/128;
196215990Sjmallett        dma_control.s.fpa_que = CVMX_FPA_OUTPUT_BUFFER_POOL;
197215990Sjmallett        dma_control.s.o_mode = 1;
198215990Sjmallett        cvmx_write_csr(CVMX_DPI_DMA_CONTROL, dma_control.u64);
199232812Sjmallett        /* When dma_control[pkt_en] = 1, engine 5 is used for packets and is not
200232812Sjmallett           available for DMA. */
201232812Sjmallett        dpi_dma_engx_en.u64 = cvmx_read_csr(CVMX_DPI_DMA_ENGX_EN(5));
202232812Sjmallett        dpi_dma_engx_en.s.qen = 0;
203232812Sjmallett        cvmx_write_csr(CVMX_DPI_DMA_ENGX_EN(5), dpi_dma_engx_en.u64);
204215990Sjmallett        dpi_ctl.u64 = cvmx_read_csr(CVMX_DPI_CTL);
205215990Sjmallett        dpi_ctl.s.en = 1;
206215990Sjmallett        cvmx_write_csr(CVMX_DPI_CTL, dpi_ctl.u64);
207215990Sjmallett    }
208210284Sjmallett    else
209210284Sjmallett    {
210210284Sjmallett        cvmx_npi_dma_control_t dma_control;
211210284Sjmallett        dma_control.u64 = 0;
212210284Sjmallett        //dma_control.s.dwb_denb = 1;
213210284Sjmallett        //dma_control.s.dwb_ichk = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/128;
214210284Sjmallett        dma_control.s.o_add1 = 1;
215210284Sjmallett        dma_control.s.fpa_que = CVMX_FPA_OUTPUT_BUFFER_POOL;
216210284Sjmallett        dma_control.s.hp_enb = 1;
217210284Sjmallett        dma_control.s.lp_enb = 1;
218210284Sjmallett        dma_control.s.csize = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/8;
219210284Sjmallett        cvmx_write_csr(CVMX_NPI_DMA_CONTROL, dma_control.u64);
220210284Sjmallett    }
221210284Sjmallett
222210284Sjmallett    return 0;
223210284Sjmallett}
224232812Sjmallett#ifdef CVMX_BUILD_FOR_LINUX_KERNEL
225232812SjmallettEXPORT_SYMBOL(cvmx_dma_engine_initialize);
226232812Sjmallett#endif
227210284Sjmallett
228210284Sjmallett/**
229232812Sjmallett * Shutdown all DMA engines. The engines must be idle when this
230210284Sjmallett * function is called.
231210284Sjmallett *
232210284Sjmallett * @return Zero on success, negative on failure
233210284Sjmallett */
234210284Sjmallettint cvmx_dma_engine_shutdown(void)
235210284Sjmallett{
236210284Sjmallett    int engine;
237210284Sjmallett
238210284Sjmallett    for (engine=0; engine < cvmx_dma_engine_get_num(); engine++)
239210284Sjmallett    {
240210284Sjmallett        if (cvmx_cmd_queue_length(CVMX_CMD_QUEUE_DMA(engine)))
241210284Sjmallett        {
242210284Sjmallett            cvmx_dprintf("ERROR: cvmx_dma_engine_shutdown: Engine not idle.\n");
243210284Sjmallett            return -1;
244210284Sjmallett        }
245210284Sjmallett    }
246210284Sjmallett
247215990Sjmallett    if (octeon_has_feature(OCTEON_FEATURE_NPEI))
248210284Sjmallett    {
249210284Sjmallett        cvmx_npei_dma_control_t dma_control;
250210284Sjmallett        dma_control.u64 = cvmx_read_csr(CVMX_PEXP_NPEI_DMA_CONTROL);
251210284Sjmallett        if (cvmx_dma_engine_get_num() >= 5)
252210284Sjmallett            dma_control.s.dma4_enb = 0;
253210284Sjmallett        dma_control.s.dma3_enb = 0;
254210284Sjmallett        dma_control.s.dma2_enb = 0;
255210284Sjmallett        dma_control.s.dma1_enb = 0;
256210284Sjmallett        dma_control.s.dma0_enb = 0;
257210284Sjmallett        cvmx_write_csr(CVMX_PEXP_NPEI_DMA_CONTROL, dma_control.u64);
258210284Sjmallett        /* Make sure the disable completes */
259210284Sjmallett        cvmx_read_csr(CVMX_PEXP_NPEI_DMA_CONTROL);
260210284Sjmallett    }
261215990Sjmallett    else if (octeon_has_feature(OCTEON_FEATURE_PCIE))
262215990Sjmallett    {
263215990Sjmallett        cvmx_dpi_dma_control_t dma_control;
264215990Sjmallett        dma_control.u64 = cvmx_read_csr(CVMX_DPI_DMA_CONTROL);
265215990Sjmallett        dma_control.s.dma_enb = 0;
266215990Sjmallett        cvmx_write_csr(CVMX_DPI_DMA_CONTROL, dma_control.u64);
267215990Sjmallett        /* Make sure the disable completes */
268215990Sjmallett        cvmx_read_csr(CVMX_DPI_DMA_CONTROL);
269215990Sjmallett    }
270210284Sjmallett    else
271210284Sjmallett    {
272210284Sjmallett        cvmx_npi_dma_control_t dma_control;
273210284Sjmallett        dma_control.u64 = cvmx_read_csr(CVMX_NPI_DMA_CONTROL);
274210284Sjmallett        dma_control.s.hp_enb = 0;
275210284Sjmallett        dma_control.s.lp_enb = 0;
276210284Sjmallett        cvmx_write_csr(CVMX_NPI_DMA_CONTROL, dma_control.u64);
277210284Sjmallett        /* Make sure the disable completes */
278210284Sjmallett        cvmx_read_csr(CVMX_NPI_DMA_CONTROL);
279210284Sjmallett    }
280210284Sjmallett
281210284Sjmallett    for (engine=0; engine < cvmx_dma_engine_get_num(); engine++)
282210284Sjmallett    {
283210284Sjmallett        cvmx_cmd_queue_shutdown(CVMX_CMD_QUEUE_DMA(engine));
284215990Sjmallett        if (octeon_has_feature(OCTEON_FEATURE_NPEI))
285210284Sjmallett            cvmx_write_csr(CVMX_PEXP_NPEI_DMAX_IBUFF_SADDR(engine), 0);
286215990Sjmallett        else if (octeon_has_feature(OCTEON_FEATURE_PCIE))
287215990Sjmallett            cvmx_write_csr(CVMX_DPI_DMAX_IBUFF_SADDR(engine), 0);
288210284Sjmallett        else
289210284Sjmallett        {
290210284Sjmallett            if (engine)
291210284Sjmallett                cvmx_write_csr(CVMX_NPI_HIGHP_IBUFF_SADDR, 0);
292210284Sjmallett            else
293210284Sjmallett                cvmx_write_csr(CVMX_NPI_LOWP_IBUFF_SADDR, 0);
294210284Sjmallett        }
295210284Sjmallett    }
296210284Sjmallett
297210284Sjmallett    return 0;
298210284Sjmallett}
299232812Sjmallett#ifdef CVMX_BUILD_FOR_LINUX_KERNEL
300232812SjmallettEXPORT_SYMBOL(cvmx_dma_engine_shutdown);
301232812Sjmallett#endif
302210284Sjmallett
303210284Sjmallett/**
304232812Sjmallett * Submit a series of DMA command to the DMA engines.
305210284Sjmallett *
306215990Sjmallett * @param engine  Engine to submit to (0 to cvmx_dma_engine_get_num()-1)
307210284Sjmallett * @param header  Command header
308210284Sjmallett * @param num_buffers
309210284Sjmallett *                The number of data pointers
310232812Sjmallett * @param buffers Command data pointers
311210284Sjmallett *
312210284Sjmallett * @return Zero on success, negative on failure
313210284Sjmallett */
314210284Sjmallettint cvmx_dma_engine_submit(int engine, cvmx_dma_engine_header_t header, int num_buffers, cvmx_dma_engine_buffer_t buffers[])
315210284Sjmallett{
316210284Sjmallett    cvmx_cmd_queue_result_t result;
317210284Sjmallett    int cmd_count = 1;
318210284Sjmallett    uint64_t cmds[num_buffers + 1];
319210284Sjmallett
320210284Sjmallett    if (OCTEON_IS_MODEL(OCTEON_CN56XX_PASS1_X))
321210284Sjmallett    {
322210284Sjmallett        /* Check for Errata PCIe-604 */
323210284Sjmallett        if ((header.s.nfst > 11) || (header.s.nlst > 11) || (header.s.nfst + header.s.nlst > 15))
324210284Sjmallett        {
325210284Sjmallett            cvmx_dprintf("DMA engine submit too large\n");
326210284Sjmallett            return -1;
327210284Sjmallett        }
328210284Sjmallett    }
329210284Sjmallett
330210284Sjmallett    cmds[0] = header.u64;
331210284Sjmallett    while (num_buffers--)
332210284Sjmallett    {
333210284Sjmallett        cmds[cmd_count++] = buffers->u64;
334210284Sjmallett        buffers++;
335210284Sjmallett    }
336210284Sjmallett
337210284Sjmallett    /* Due to errata PCIE-13315, it is necessary to have the queue lock while we
338210284Sjmallett        ring the doorbell for the DMA engines. This prevents doorbells from
339210284Sjmallett        possibly arriving out of order with respect to the command queue
340210284Sjmallett        entries */
341210284Sjmallett    __cvmx_cmd_queue_lock(CVMX_CMD_QUEUE_DMA(engine), __cvmx_cmd_queue_get_state(CVMX_CMD_QUEUE_DMA(engine)));
342210284Sjmallett    result = cvmx_cmd_queue_write(CVMX_CMD_QUEUE_DMA(engine), 0, cmd_count, cmds);
343210284Sjmallett    /* This SYNCWS is needed since the command queue didn't do locking, which
344210284Sjmallett        normally implies the SYNCWS. This one makes sure the command queue
345210284Sjmallett        updates make it to L2 before we ring the doorbell */
346210284Sjmallett    CVMX_SYNCWS;
347210284Sjmallett    /* A syncw isn't needed here since the command queue did one as part of the queue unlock */
348210284Sjmallett    if (cvmx_likely(result == CVMX_CMD_QUEUE_SUCCESS))
349210284Sjmallett    {
350215990Sjmallett        if (octeon_has_feature(OCTEON_FEATURE_NPEI))
351210284Sjmallett        {
352210284Sjmallett            /* DMA doorbells are 32bit writes in little endian space. This means we need to xor the address with 4 */
353210284Sjmallett            cvmx_write64_uint32(CVMX_PEXP_NPEI_DMAX_DBELL(engine)^4, cmd_count);
354210284Sjmallett        }
355215990Sjmallett        else if (octeon_has_feature(OCTEON_FEATURE_PCIE))
356215990Sjmallett            cvmx_write_csr(CVMX_DPI_DMAX_DBELL(engine), cmd_count);
357210284Sjmallett        else
358210284Sjmallett        {
359210284Sjmallett            if (engine)
360210284Sjmallett                cvmx_write_csr(CVMX_NPI_HIGHP_DBELL, cmd_count);
361210284Sjmallett            else
362210284Sjmallett                cvmx_write_csr(CVMX_NPI_LOWP_DBELL, cmd_count);
363210284Sjmallett        }
364210284Sjmallett    }
365210284Sjmallett    /* Here is the unlock for the above errata workaround */
366210284Sjmallett    __cvmx_cmd_queue_unlock(__cvmx_cmd_queue_get_state(CVMX_CMD_QUEUE_DMA(engine)));
367210284Sjmallett    return result;
368210284Sjmallett}
369210284Sjmallett
370210284Sjmallett
371210284Sjmallett/**
372210284Sjmallett * @INTERNAL
373210284Sjmallett * Function used by cvmx_dma_engine_transfer() to build the
374210284Sjmallett * internal address list.
375210284Sjmallett *
376210284Sjmallett * @param buffers Location to store the list
377210284Sjmallett * @param address Address to build list for
378210284Sjmallett * @param size    Length of the memory pointed to by address
379210284Sjmallett *
380210284Sjmallett * @return Number of internal pointer chunks created
381210284Sjmallett */
382210284Sjmallettstatic inline int __cvmx_dma_engine_build_internal_pointers(cvmx_dma_engine_buffer_t *buffers, uint64_t address, int size)
383210284Sjmallett{
384210284Sjmallett    int segments = 0;
385210284Sjmallett    while (size)
386210284Sjmallett    {
387210284Sjmallett        /* Each internal chunk can contain a maximum of 8191 bytes */
388210284Sjmallett        int chunk = size;
389210284Sjmallett        if (chunk > 8191)
390210284Sjmallett            chunk = 8191;
391210284Sjmallett        buffers[segments].u64 = 0;
392210284Sjmallett        buffers[segments].internal.size = chunk;
393210284Sjmallett        buffers[segments].internal.addr = address;
394210284Sjmallett        address += chunk;
395210284Sjmallett        size -= chunk;
396210284Sjmallett        segments++;
397210284Sjmallett    }
398210284Sjmallett    return segments;
399210284Sjmallett}
400210284Sjmallett
401210284Sjmallett
402210284Sjmallett/**
403210284Sjmallett * @INTERNAL
404210284Sjmallett * Function used by cvmx_dma_engine_transfer() to build the PCI / PCIe address
405210284Sjmallett * list.
406210284Sjmallett * @param buffers Location to store the list
407210284Sjmallett * @param address Address to build list for
408210284Sjmallett * @param size    Length of the memory pointed to by address
409210284Sjmallett *
410210284Sjmallett * @return Number of PCI / PCIe address chunks created. The number of words used
411210284Sjmallett *         will be segments + (segments-1)/4 + 1.
412210284Sjmallett */
413210284Sjmallettstatic inline int __cvmx_dma_engine_build_external_pointers(cvmx_dma_engine_buffer_t *buffers, uint64_t address, int size)
414210284Sjmallett{
415210284Sjmallett    const int MAX_SIZE = 65535;
416210284Sjmallett    int segments = 0;
417210284Sjmallett    while (size)
418210284Sjmallett    {
419210284Sjmallett        /* Each block of 4 PCI / PCIe pointers uses one dword for lengths followed by
420210284Sjmallett            up to 4 addresses. This then repeats if more data is needed */
421210284Sjmallett        buffers[0].u64 = 0;
422210284Sjmallett        if (size <= MAX_SIZE)
423210284Sjmallett        {
424210284Sjmallett            /* Only one more segment needed */
425210284Sjmallett            buffers[0].pcie_length.len0 = size;
426210284Sjmallett            buffers[1].u64 = address;
427210284Sjmallett            segments++;
428210284Sjmallett            break;
429210284Sjmallett        }
430210284Sjmallett        else if (size <= MAX_SIZE * 2)
431210284Sjmallett        {
432210284Sjmallett            /* Two more segments needed */
433210284Sjmallett            buffers[0].pcie_length.len0 = MAX_SIZE;
434210284Sjmallett            buffers[0].pcie_length.len1 = size - MAX_SIZE;
435210284Sjmallett            buffers[1].u64 = address;
436210284Sjmallett            address += MAX_SIZE;
437210284Sjmallett            buffers[2].u64 = address;
438210284Sjmallett            segments+=2;
439210284Sjmallett            break;
440210284Sjmallett        }
441210284Sjmallett        else if (size <= MAX_SIZE * 3)
442210284Sjmallett        {
443210284Sjmallett            /* Three more segments needed */
444210284Sjmallett            buffers[0].pcie_length.len0 = MAX_SIZE;
445210284Sjmallett            buffers[0].pcie_length.len1 = MAX_SIZE;
446210284Sjmallett            buffers[0].pcie_length.len2 = size - MAX_SIZE * 2;
447210284Sjmallett            buffers[1].u64 = address;
448210284Sjmallett            address += MAX_SIZE;
449210284Sjmallett            buffers[2].u64 = address;
450210284Sjmallett            address += MAX_SIZE;
451210284Sjmallett            buffers[3].u64 = address;
452210284Sjmallett            segments+=3;
453210284Sjmallett            break;
454210284Sjmallett        }
455210284Sjmallett        else if (size <= MAX_SIZE * 4)
456210284Sjmallett        {
457210284Sjmallett            /* Four more segments needed */
458210284Sjmallett            buffers[0].pcie_length.len0 = MAX_SIZE;
459210284Sjmallett            buffers[0].pcie_length.len1 = MAX_SIZE;
460210284Sjmallett            buffers[0].pcie_length.len2 = MAX_SIZE;
461210284Sjmallett            buffers[0].pcie_length.len3 = size - MAX_SIZE * 3;
462210284Sjmallett            buffers[1].u64 = address;
463210284Sjmallett            address += MAX_SIZE;
464210284Sjmallett            buffers[2].u64 = address;
465210284Sjmallett            address += MAX_SIZE;
466210284Sjmallett            buffers[3].u64 = address;
467210284Sjmallett            address += MAX_SIZE;
468210284Sjmallett            buffers[4].u64 = address;
469210284Sjmallett            segments+=4;
470210284Sjmallett            break;
471210284Sjmallett        }
472210284Sjmallett        else
473210284Sjmallett        {
474210284Sjmallett            /* Five or more segments are needed */
475210284Sjmallett            buffers[0].pcie_length.len0 = MAX_SIZE;
476210284Sjmallett            buffers[0].pcie_length.len1 = MAX_SIZE;
477210284Sjmallett            buffers[0].pcie_length.len2 = MAX_SIZE;
478210284Sjmallett            buffers[0].pcie_length.len3 = MAX_SIZE;
479210284Sjmallett            buffers[1].u64 = address;
480210284Sjmallett            address += MAX_SIZE;
481210284Sjmallett            buffers[2].u64 = address;
482210284Sjmallett            address += MAX_SIZE;
483210284Sjmallett            buffers[3].u64 = address;
484210284Sjmallett            address += MAX_SIZE;
485210284Sjmallett            buffers[4].u64 = address;
486210284Sjmallett            address += MAX_SIZE;
487210284Sjmallett            size -= MAX_SIZE*4;
488210284Sjmallett            buffers += 5;
489210284Sjmallett            segments+=4;
490210284Sjmallett        }
491210284Sjmallett    }
492210284Sjmallett    return segments;
493210284Sjmallett}
494210284Sjmallett
495210284Sjmallett
496210284Sjmallett/**
497210284Sjmallett * Build the first and last pointers based on a DMA engine header
498210284Sjmallett * and submit them to the engine. The purpose of this function is
499210284Sjmallett * to simplify the building of DMA engine commands by automatically
500210284Sjmallett * converting a simple address and size into the apropriate internal
501210284Sjmallett * or PCI / PCIe address list. This function does not support gather lists,
502210284Sjmallett * so you will need to build your own lists in that case.
503210284Sjmallett *
504215990Sjmallett * @param engine Engine to submit to (0 to cvmx_dma_engine_get_num()-1)
505210284Sjmallett * @param header DMA Command header. Note that the nfst and nlst fields do not
506210284Sjmallett *               need to be filled in. All other fields must be set properly.
507210284Sjmallett * @param first_address
508210284Sjmallett *               Address to use for the first pointers. In the case of INTERNAL,
509210284Sjmallett *               INBOUND, and OUTBOUND this is an Octeon memory address. In the
510210284Sjmallett *               case of EXTERNAL, this is the source PCI / PCIe address.
511210284Sjmallett * @param last_address
512210284Sjmallett *               Address to use for the last pointers. In the case of EXTERNAL,
513210284Sjmallett *               INBOUND, and OUTBOUND this is a PCI / PCIe address. In the
514210284Sjmallett *               case of INTERNAL, this is the Octeon memory destination address.
515210284Sjmallett * @param size   Size of the transfer to perform.
516210284Sjmallett *
517210284Sjmallett * @return Zero on success, negative on failure
518210284Sjmallett */
519210284Sjmallettint cvmx_dma_engine_transfer(int engine, cvmx_dma_engine_header_t header,
520210284Sjmallett                             uint64_t first_address, uint64_t last_address,
521210284Sjmallett                             int size)
522210284Sjmallett{
523210284Sjmallett    cvmx_dma_engine_buffer_t buffers[32];
524210284Sjmallett    int words = 0;
525210284Sjmallett
526210284Sjmallett    switch (header.s.type)
527210284Sjmallett    {
528210284Sjmallett        case CVMX_DMA_ENGINE_TRANSFER_INTERNAL:
529210284Sjmallett            header.s.nfst = __cvmx_dma_engine_build_internal_pointers(buffers, first_address, size);
530210284Sjmallett            words += header.s.nfst;
531210284Sjmallett            header.s.nlst = __cvmx_dma_engine_build_internal_pointers(buffers + words, last_address, size);
532210284Sjmallett            words += header.s.nlst;
533210284Sjmallett            break;
534210284Sjmallett        case CVMX_DMA_ENGINE_TRANSFER_INBOUND:
535210284Sjmallett        case CVMX_DMA_ENGINE_TRANSFER_OUTBOUND:
536210284Sjmallett            header.s.nfst = __cvmx_dma_engine_build_internal_pointers(buffers, first_address, size);
537210284Sjmallett            words += header.s.nfst;
538210284Sjmallett            header.s.nlst = __cvmx_dma_engine_build_external_pointers(buffers + words, last_address, size);
539210284Sjmallett            words +=  header.s.nlst + ((header.s.nlst-1) >> 2) + 1;
540210284Sjmallett            break;
541210284Sjmallett        case CVMX_DMA_ENGINE_TRANSFER_EXTERNAL:
542210284Sjmallett            header.s.nfst = __cvmx_dma_engine_build_external_pointers(buffers, first_address, size);
543210284Sjmallett            words +=  header.s.nfst + ((header.s.nfst-1) >> 2) + 1;
544210284Sjmallett            header.s.nlst = __cvmx_dma_engine_build_external_pointers(buffers + words, last_address, size);
545210284Sjmallett            words +=  header.s.nlst + ((header.s.nlst-1) >> 2) + 1;
546210284Sjmallett            break;
547210284Sjmallett    }
548210284Sjmallett    return cvmx_dma_engine_submit(engine, header, words, buffers);
549210284Sjmallett}
550232812Sjmallett#ifdef CVMX_BUILD_FOR_LINUX_KERNEL
551232812SjmallettEXPORT_SYMBOL(cvmx_dma_engine_transfer);
552210284Sjmallett#endif
553232812Sjmallett#endif
554