cvmx-dma-engine.c revision 210284
1168404Spjd/***********************license start***************
2168404Spjd *  Copyright (c) 2003-2008 Cavium Networks (support@cavium.com). All rights
3168404Spjd *  reserved.
4168404Spjd *
5168404Spjd *
6168404Spjd *  Redistribution and use in source and binary forms, with or without
7168404Spjd *  modification, are permitted provided that the following conditions are
8168404Spjd *  met:
9168404Spjd *
10168404Spjd *      * Redistributions of source code must retain the above copyright
11168404Spjd *        notice, this list of conditions and the following disclaimer.
12168404Spjd *
13168404Spjd *      * Redistributions in binary form must reproduce the above
14168404Spjd *        copyright notice, this list of conditions and the following
15168404Spjd *        disclaimer in the documentation and/or other materials provided
16168404Spjd *        with the distribution.
17168404Spjd *
18168404Spjd *      * Neither the name of Cavium Networks nor the names of
19168404Spjd *        its contributors may be used to endorse or promote products
20168404Spjd *        derived from this software without specific prior written
21168404Spjd *        permission.
22168404Spjd *
23185029Spjd *  TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
24168404Spjd *  AND WITH ALL FAULTS AND CAVIUM NETWORKS MAKES NO PROMISES, REPRESENTATIONS
25168404Spjd *  OR WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH
26168404Spjd *  RESPECT TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
27168404Spjd *  REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
28168404Spjd *  DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
29168404Spjd *  OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
30168404Spjd *  PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, QUIET
31185029Spjd *  POSSESSION OR CORRESPONDENCE TO DESCRIPTION.  THE ENTIRE RISK ARISING OUT
32168404Spjd *  OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
33168404Spjd *
34168404Spjd *
35168404Spjd *  For any questions regarding licensing please contact marketing@caviumnetworks.com
36168404Spjd *
37168404Spjd ***********************license end**************************************/
38168404Spjd
39168404Spjd
40168404Spjd
41168404Spjd
42168404Spjd
43168404Spjd
44168404Spjd/**
45168404Spjd * @file
46168404Spjd *
47168404Spjd * Interface to the PCI / PCIe DMA engines. These are only avialable
48168404Spjd * on chips with PCI / PCIe.
49168404Spjd *
50168404Spjd * <hr>$Revision: 41586 $<hr>
51185029Spjd */
52185029Spjd#include "executive-config.h"
53185029Spjd#include "cvmx-config.h"
54185029Spjd#include "cvmx.h"
55185029Spjd#include "cvmx-cmd-queue.h"
56185029Spjd#include "cvmx-dma-engine.h"
57168404Spjd
58168404Spjd#ifdef CVMX_ENABLE_PKO_FUNCTIONS
59168404Spjd
60168404Spjd/**
61168404Spjd * Return the number of DMA engimes supported by this chip
62168404Spjd *
63168404Spjd * @return Number of DMA engines
64168404Spjd */
65168404Spjdint cvmx_dma_engine_get_num(void)
66168404Spjd{
67168404Spjd    if (octeon_has_feature(OCTEON_FEATURE_PCIE))
68168404Spjd    {
69168404Spjd        if (OCTEON_IS_MODEL(OCTEON_CN52XX_PASS1_X))
70168404Spjd            return 4;
71168404Spjd        else
72168404Spjd            return 5;
73168404Spjd    }
74168404Spjd    else
75168404Spjd        return 2;
76168404Spjd}
77168404Spjd
78168404Spjd/**
79168404Spjd * Initialize the DMA engines for use
80168404Spjd *
81168404Spjd * @return Zero on success, negative on failure
82168404Spjd */
83168404Spjdint cvmx_dma_engine_initialize(void)
84168404Spjd{
85168404Spjd    cvmx_npei_dmax_ibuff_saddr_t dmax_ibuff_saddr;
86168404Spjd    int engine;
87168404Spjd
88168404Spjd    for (engine=0; engine < cvmx_dma_engine_get_num(); engine++)
89168404Spjd    {
90168404Spjd        cvmx_cmd_queue_result_t result;
91168404Spjd        result = cvmx_cmd_queue_initialize(CVMX_CMD_QUEUE_DMA(engine),
92168404Spjd                                           0, CVMX_FPA_OUTPUT_BUFFER_POOL,
93168404Spjd                                           CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE);
94168404Spjd        if (result != CVMX_CMD_QUEUE_SUCCESS)
95168404Spjd            return -1;
96168404Spjd        dmax_ibuff_saddr.u64 = 0;
97168404Spjd        dmax_ibuff_saddr.s.saddr = cvmx_ptr_to_phys(cvmx_cmd_queue_buffer(CVMX_CMD_QUEUE_DMA(engine))) >> 7;
98168404Spjd        if (octeon_has_feature(OCTEON_FEATURE_PCIE))
99168404Spjd            cvmx_write_csr(CVMX_PEXP_NPEI_DMAX_IBUFF_SADDR(engine), dmax_ibuff_saddr.u64);
100168404Spjd        else
101168404Spjd        {
102168404Spjd            if (engine)
103168404Spjd                cvmx_write_csr(CVMX_NPI_HIGHP_IBUFF_SADDR, dmax_ibuff_saddr.u64);
104168404Spjd            else
105168404Spjd                cvmx_write_csr(CVMX_NPI_LOWP_IBUFF_SADDR, dmax_ibuff_saddr.u64);
106168404Spjd        }
107168404Spjd    }
108168404Spjd
109168404Spjd    if (octeon_has_feature(OCTEON_FEATURE_PCIE))
110168404Spjd    {
111168404Spjd        cvmx_npei_dma_control_t dma_control;
112168404Spjd        dma_control.u64 = 0;
113168404Spjd        if (cvmx_dma_engine_get_num() >= 5)
114            dma_control.s.dma4_enb = 1;
115        dma_control.s.dma3_enb = 1;
116        dma_control.s.dma2_enb = 1;
117        dma_control.s.dma1_enb = 1;
118        dma_control.s.dma0_enb = 1;
119        dma_control.s.o_mode = 1; /* Pull NS and RO from this register, not the pointers */
120        //dma_control.s.dwb_denb = 1;
121        //dma_control.s.dwb_ichk = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/128;
122        dma_control.s.fpa_que = CVMX_FPA_OUTPUT_BUFFER_POOL;
123        dma_control.s.csize = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/8;
124        cvmx_write_csr(CVMX_PEXP_NPEI_DMA_CONTROL, dma_control.u64);
125        /* As a workaround for errata PCIE-811 we only allow a single
126            outstanding DMA read over PCIe at a time. This limits performance,
127            but works in all cases. If you need higher performance, remove
128            this code and implement the more complicated workaround documented
129            in the errata. This only affects CN56XX pass 2.0 chips */
130        if (OCTEON_IS_MODEL(OCTEON_CN56XX_PASS2_0))
131        {
132            cvmx_npei_dma_pcie_req_num_t pcie_req_num;
133            pcie_req_num.u64 = cvmx_read_csr(CVMX_PEXP_NPEI_DMA_PCIE_REQ_NUM);
134            pcie_req_num.s.dma_cnt = 1;
135            cvmx_write_csr(CVMX_PEXP_NPEI_DMA_PCIE_REQ_NUM, pcie_req_num.u64);
136        }
137    }
138    else
139    {
140        cvmx_npi_dma_control_t dma_control;
141        dma_control.u64 = 0;
142        //dma_control.s.dwb_denb = 1;
143        //dma_control.s.dwb_ichk = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/128;
144        dma_control.s.o_add1 = 1;
145        dma_control.s.fpa_que = CVMX_FPA_OUTPUT_BUFFER_POOL;
146        dma_control.s.hp_enb = 1;
147        dma_control.s.lp_enb = 1;
148        dma_control.s.csize = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/8;
149        cvmx_write_csr(CVMX_NPI_DMA_CONTROL, dma_control.u64);
150    }
151
152    return 0;
153}
154
155
156/**
157 * Shutdown all DMA engines. The engeines must be idle when this
158 * function is called.
159 *
160 * @return Zero on success, negative on failure
161 */
162int cvmx_dma_engine_shutdown(void)
163{
164    int engine;
165
166    for (engine=0; engine < cvmx_dma_engine_get_num(); engine++)
167    {
168        if (cvmx_cmd_queue_length(CVMX_CMD_QUEUE_DMA(engine)))
169        {
170            cvmx_dprintf("ERROR: cvmx_dma_engine_shutdown: Engine not idle.\n");
171            return -1;
172        }
173    }
174
175    if (octeon_has_feature(OCTEON_FEATURE_PCIE))
176    {
177        cvmx_npei_dma_control_t dma_control;
178        dma_control.u64 = cvmx_read_csr(CVMX_PEXP_NPEI_DMA_CONTROL);
179        if (cvmx_dma_engine_get_num() >= 5)
180            dma_control.s.dma4_enb = 0;
181        dma_control.s.dma3_enb = 0;
182        dma_control.s.dma2_enb = 0;
183        dma_control.s.dma1_enb = 0;
184        dma_control.s.dma0_enb = 0;
185        cvmx_write_csr(CVMX_PEXP_NPEI_DMA_CONTROL, dma_control.u64);
186        /* Make sure the disable completes */
187        cvmx_read_csr(CVMX_PEXP_NPEI_DMA_CONTROL);
188    }
189    else
190    {
191        cvmx_npi_dma_control_t dma_control;
192        dma_control.u64 = cvmx_read_csr(CVMX_NPI_DMA_CONTROL);
193        dma_control.s.hp_enb = 0;
194        dma_control.s.lp_enb = 0;
195        cvmx_write_csr(CVMX_NPI_DMA_CONTROL, dma_control.u64);
196        /* Make sure the disable completes */
197        cvmx_read_csr(CVMX_NPI_DMA_CONTROL);
198    }
199
200    for (engine=0; engine < cvmx_dma_engine_get_num(); engine++)
201    {
202        cvmx_cmd_queue_shutdown(CVMX_CMD_QUEUE_DMA(engine));
203        if (octeon_has_feature(OCTEON_FEATURE_PCIE))
204            cvmx_write_csr(CVMX_PEXP_NPEI_DMAX_IBUFF_SADDR(engine), 0);
205        else
206        {
207            if (engine)
208                cvmx_write_csr(CVMX_NPI_HIGHP_IBUFF_SADDR, 0);
209            else
210                cvmx_write_csr(CVMX_NPI_LOWP_IBUFF_SADDR, 0);
211        }
212    }
213
214    return 0;
215}
216
217
218/**
219 * Submit a series of DMA comamnd to the DMA engines.
220 *
221 * @param engine  Engine to submit to (0-4)
222 * @param header  Command header
223 * @param num_buffers
224 *                The number of data pointers
225 * @param buffers Comamnd data pointers
226 *
227 * @return Zero on success, negative on failure
228 */
229int cvmx_dma_engine_submit(int engine, cvmx_dma_engine_header_t header, int num_buffers, cvmx_dma_engine_buffer_t buffers[])
230{
231    cvmx_cmd_queue_result_t result;
232    int cmd_count = 1;
233    uint64_t cmds[num_buffers + 1];
234
235    if (OCTEON_IS_MODEL(OCTEON_CN56XX_PASS1_X))
236    {
237        /* Check for Errata PCIe-604 */
238        if ((header.s.nfst > 11) || (header.s.nlst > 11) || (header.s.nfst + header.s.nlst > 15))
239        {
240            cvmx_dprintf("DMA engine submit too large\n");
241            return -1;
242        }
243    }
244
245    cmds[0] = header.u64;
246    while (num_buffers--)
247    {
248        cmds[cmd_count++] = buffers->u64;
249        buffers++;
250    }
251
252    /* Due to errata PCIE-13315, it is necessary to have the queue lock while we
253        ring the doorbell for the DMA engines. This prevents doorbells from
254        possibly arriving out of order with respect to the command queue
255        entries */
256    __cvmx_cmd_queue_lock(CVMX_CMD_QUEUE_DMA(engine), __cvmx_cmd_queue_get_state(CVMX_CMD_QUEUE_DMA(engine)));
257    result = cvmx_cmd_queue_write(CVMX_CMD_QUEUE_DMA(engine), 0, cmd_count, cmds);
258    /* This SYNCWS is needed since the command queue didn't do locking, which
259        normally implies the SYNCWS. This one makes sure the command queue
260        updates make it to L2 before we ring the doorbell */
261    CVMX_SYNCWS;
262    /* A syncw isn't needed here since the command queue did one as part of the queue unlock */
263    if (cvmx_likely(result == CVMX_CMD_QUEUE_SUCCESS))
264    {
265        if (octeon_has_feature(OCTEON_FEATURE_PCIE))
266        {
267            /* DMA doorbells are 32bit writes in little endian space. This means we need to xor the address with 4 */
268            cvmx_write64_uint32(CVMX_PEXP_NPEI_DMAX_DBELL(engine)^4, cmd_count);
269        }
270        else
271        {
272            if (engine)
273                cvmx_write_csr(CVMX_NPI_HIGHP_DBELL, cmd_count);
274            else
275                cvmx_write_csr(CVMX_NPI_LOWP_DBELL, cmd_count);
276        }
277    }
278    /* Here is the unlock for the above errata workaround */
279    __cvmx_cmd_queue_unlock(__cvmx_cmd_queue_get_state(CVMX_CMD_QUEUE_DMA(engine)));
280    return result;
281}
282
283
284/**
285 * @INTERNAL
286 * Function used by cvmx_dma_engine_transfer() to build the
287 * internal address list.
288 *
289 * @param buffers Location to store the list
290 * @param address Address to build list for
291 * @param size    Length of the memory pointed to by address
292 *
293 * @return Number of internal pointer chunks created
294 */
295static inline int __cvmx_dma_engine_build_internal_pointers(cvmx_dma_engine_buffer_t *buffers, uint64_t address, int size)
296{
297    int segments = 0;
298    while (size)
299    {
300        /* Each internal chunk can contain a maximum of 8191 bytes */
301        int chunk = size;
302        if (chunk > 8191)
303            chunk = 8191;
304        buffers[segments].u64 = 0;
305        buffers[segments].internal.size = chunk;
306        buffers[segments].internal.addr = address;
307        address += chunk;
308        size -= chunk;
309        segments++;
310    }
311    return segments;
312}
313
314
315/**
316 * @INTERNAL
317 * Function used by cvmx_dma_engine_transfer() to build the PCI / PCIe address
318 * list.
319 * @param buffers Location to store the list
320 * @param address Address to build list for
321 * @param size    Length of the memory pointed to by address
322 *
323 * @return Number of PCI / PCIe address chunks created. The number of words used
324 *         will be segments + (segments-1)/4 + 1.
325 */
326static inline int __cvmx_dma_engine_build_external_pointers(cvmx_dma_engine_buffer_t *buffers, uint64_t address, int size)
327{
328    const int MAX_SIZE = 65535;
329    int segments = 0;
330    while (size)
331    {
332        /* Each block of 4 PCI / PCIe pointers uses one dword for lengths followed by
333            up to 4 addresses. This then repeats if more data is needed */
334        buffers[0].u64 = 0;
335        if (size <= MAX_SIZE)
336        {
337            /* Only one more segment needed */
338            buffers[0].pcie_length.len0 = size;
339            buffers[1].u64 = address;
340            segments++;
341            break;
342        }
343        else if (size <= MAX_SIZE * 2)
344        {
345            /* Two more segments needed */
346            buffers[0].pcie_length.len0 = MAX_SIZE;
347            buffers[0].pcie_length.len1 = size - MAX_SIZE;
348            buffers[1].u64 = address;
349            address += MAX_SIZE;
350            buffers[2].u64 = address;
351            segments+=2;
352            break;
353        }
354        else if (size <= MAX_SIZE * 3)
355        {
356            /* Three more segments needed */
357            buffers[0].pcie_length.len0 = MAX_SIZE;
358            buffers[0].pcie_length.len1 = MAX_SIZE;
359            buffers[0].pcie_length.len2 = size - MAX_SIZE * 2;
360            buffers[1].u64 = address;
361            address += MAX_SIZE;
362            buffers[2].u64 = address;
363            address += MAX_SIZE;
364            buffers[3].u64 = address;
365            segments+=3;
366            break;
367        }
368        else if (size <= MAX_SIZE * 4)
369        {
370            /* Four more segments needed */
371            buffers[0].pcie_length.len0 = MAX_SIZE;
372            buffers[0].pcie_length.len1 = MAX_SIZE;
373            buffers[0].pcie_length.len2 = MAX_SIZE;
374            buffers[0].pcie_length.len3 = size - MAX_SIZE * 3;
375            buffers[1].u64 = address;
376            address += MAX_SIZE;
377            buffers[2].u64 = address;
378            address += MAX_SIZE;
379            buffers[3].u64 = address;
380            address += MAX_SIZE;
381            buffers[4].u64 = address;
382            segments+=4;
383            break;
384        }
385        else
386        {
387            /* Five or more segments are needed */
388            buffers[0].pcie_length.len0 = MAX_SIZE;
389            buffers[0].pcie_length.len1 = MAX_SIZE;
390            buffers[0].pcie_length.len2 = MAX_SIZE;
391            buffers[0].pcie_length.len3 = MAX_SIZE;
392            buffers[1].u64 = address;
393            address += MAX_SIZE;
394            buffers[2].u64 = address;
395            address += MAX_SIZE;
396            buffers[3].u64 = address;
397            address += MAX_SIZE;
398            buffers[4].u64 = address;
399            address += MAX_SIZE;
400            size -= MAX_SIZE*4;
401            buffers += 5;
402            segments+=4;
403        }
404    }
405    return segments;
406}
407
408
409/**
410 * Build the first and last pointers based on a DMA engine header
411 * and submit them to the engine. The purpose of this function is
412 * to simplify the building of DMA engine commands by automatically
413 * converting a simple address and size into the apropriate internal
414 * or PCI / PCIe address list. This function does not support gather lists,
415 * so you will need to build your own lists in that case.
416 *
417 * @param engine Engine to submit to (0-4)
418 * @param header DMA Command header. Note that the nfst and nlst fields do not
419 *               need to be filled in. All other fields must be set properly.
420 * @param first_address
421 *               Address to use for the first pointers. In the case of INTERNAL,
422 *               INBOUND, and OUTBOUND this is an Octeon memory address. In the
423 *               case of EXTERNAL, this is the source PCI / PCIe address.
424 * @param last_address
425 *               Address to use for the last pointers. In the case of EXTERNAL,
426 *               INBOUND, and OUTBOUND this is a PCI / PCIe address. In the
427 *               case of INTERNAL, this is the Octeon memory destination address.
428 * @param size   Size of the transfer to perform.
429 *
430 * @return Zero on success, negative on failure
431 */
432int cvmx_dma_engine_transfer(int engine, cvmx_dma_engine_header_t header,
433                             uint64_t first_address, uint64_t last_address,
434                             int size)
435{
436    cvmx_dma_engine_buffer_t buffers[32];
437    int words = 0;
438
439    switch (header.s.type)
440    {
441        case CVMX_DMA_ENGINE_TRANSFER_INTERNAL:
442            header.s.nfst = __cvmx_dma_engine_build_internal_pointers(buffers, first_address, size);
443            words += header.s.nfst;
444            header.s.nlst = __cvmx_dma_engine_build_internal_pointers(buffers + words, last_address, size);
445            words += header.s.nlst;
446            break;
447        case CVMX_DMA_ENGINE_TRANSFER_INBOUND:
448        case CVMX_DMA_ENGINE_TRANSFER_OUTBOUND:
449            header.s.nfst = __cvmx_dma_engine_build_internal_pointers(buffers, first_address, size);
450            words += header.s.nfst;
451            header.s.nlst = __cvmx_dma_engine_build_external_pointers(buffers + words, last_address, size);
452            words +=  header.s.nlst + ((header.s.nlst-1) >> 2) + 1;
453            break;
454        case CVMX_DMA_ENGINE_TRANSFER_EXTERNAL:
455            header.s.nfst = __cvmx_dma_engine_build_external_pointers(buffers, first_address, size);
456            words +=  header.s.nfst + ((header.s.nfst-1) >> 2) + 1;
457            header.s.nlst = __cvmx_dma_engine_build_external_pointers(buffers + words, last_address, size);
458            words +=  header.s.nlst + ((header.s.nlst-1) >> 2) + 1;
459            break;
460    }
461    return cvmx_dma_engine_submit(engine, header, words, buffers);
462}
463
464#endif
465