1/***********************license start***************
2 * Copyright (c) 2003-2010  Cavium Networks (support@cavium.com). All rights
3 * reserved.
4 *
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are
8 * met:
9 *
10 *   * Redistributions of source code must retain the above copyright
11 *     notice, this list of conditions and the following disclaimer.
12 *
13 *   * Redistributions in binary form must reproduce the above
14 *     copyright notice, this list of conditions and the following
15 *     disclaimer in the documentation and/or other materials provided
16 *     with the distribution.
17
18 *   * Neither the name of Cavium Networks nor the names of
19 *     its contributors may be used to endorse or promote products
20 *     derived from this software without specific prior written
21 *     permission.
22
23 * This Software, including technical data, may be subject to U.S. export  control
24 * laws, including the U.S. Export Administration Act and its  associated
25 * regulations, and may be subject to export or import  regulations in other
26 * countries.
27
28 * TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
29 * AND WITH ALL FAULTS AND CAVIUM  NETWORKS MAKES NO PROMISES, REPRESENTATIONS OR
30 * WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT TO
31 * THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY REPRESENTATION OR
32 * DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT DEFECTS, AND CAVIUM
33 * SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES OF TITLE,
34 * MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR PURPOSE, LACK OF
35 * VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, QUIET POSSESSION OR
36 * CORRESPONDENCE TO DESCRIPTION. THE ENTIRE  RISK ARISING OUT OF USE OR
37 * PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
38 ***********************license end**************************************/
39
40
41
42
43
44
45
46/**
47 * @file
48 *
49 * Interface to the PCI / PCIe DMA engines. These are only avialable
50 * on chips with PCI / PCIe.
51 *
52 * <hr>$Revision: 50126 $<hr>
53 */
54#include "executive-config.h"
55#include "cvmx-config.h"
56#include "cvmx.h"
57#include "cvmx-cmd-queue.h"
58#include "cvmx-dma-engine.h"
59
60#ifdef CVMX_ENABLE_PKO_FUNCTIONS
61
62/**
63 * Return the number of DMA engimes supported by this chip
64 *
65 * @return Number of DMA engines
66 */
67int cvmx_dma_engine_get_num(void)
68{
69    if (octeon_has_feature(OCTEON_FEATURE_NPEI))
70    {
71        if (OCTEON_IS_MODEL(OCTEON_CN52XX_PASS1_X))
72            return 4;
73        else
74            return 5;
75    }
76    else if (octeon_has_feature(OCTEON_FEATURE_PCIE))
77        return 8;
78    else
79        return 2;
80}
81
82/**
83 * Initialize the DMA engines for use
84 *
85 * @return Zero on success, negative on failure
86 */
87int cvmx_dma_engine_initialize(void)
88{
89    int engine;
90
91    for (engine=0; engine < cvmx_dma_engine_get_num(); engine++)
92    {
93        cvmx_cmd_queue_result_t result;
94        result = cvmx_cmd_queue_initialize(CVMX_CMD_QUEUE_DMA(engine),
95                                           0, CVMX_FPA_OUTPUT_BUFFER_POOL,
96                                           CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE);
97        if (result != CVMX_CMD_QUEUE_SUCCESS)
98            return -1;
99        if (octeon_has_feature(OCTEON_FEATURE_NPEI))
100        {
101            cvmx_npei_dmax_ibuff_saddr_t dmax_ibuff_saddr;
102            dmax_ibuff_saddr.u64 = 0;
103            dmax_ibuff_saddr.s.saddr = cvmx_ptr_to_phys(cvmx_cmd_queue_buffer(CVMX_CMD_QUEUE_DMA(engine))) >> 7;
104            cvmx_write_csr(CVMX_PEXP_NPEI_DMAX_IBUFF_SADDR(engine), dmax_ibuff_saddr.u64);
105        }
106        else if (octeon_has_feature(OCTEON_FEATURE_PCIE))
107        {
108            cvmx_dpi_dmax_ibuff_saddr_t dpi_dmax_ibuff_saddr;
109            dpi_dmax_ibuff_saddr.u64 = 0;
110            dpi_dmax_ibuff_saddr.s.csize = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/8;
111            dpi_dmax_ibuff_saddr.s.saddr = cvmx_ptr_to_phys(cvmx_cmd_queue_buffer(CVMX_CMD_QUEUE_DMA(engine))) >> 7;
112            cvmx_write_csr(CVMX_DPI_DMAX_IBUFF_SADDR(engine), dpi_dmax_ibuff_saddr.u64);
113        }
114        else
115        {
116            uint64_t address = cvmx_ptr_to_phys(cvmx_cmd_queue_buffer(CVMX_CMD_QUEUE_DMA(engine)));
117            if (engine)
118                cvmx_write_csr(CVMX_NPI_HIGHP_IBUFF_SADDR, address);
119            else
120                cvmx_write_csr(CVMX_NPI_LOWP_IBUFF_SADDR, address);
121        }
122    }
123
124    if (octeon_has_feature(OCTEON_FEATURE_NPEI))
125    {
126        cvmx_npei_dma_control_t dma_control;
127        dma_control.u64 = 0;
128        if (cvmx_dma_engine_get_num() >= 5)
129            dma_control.s.dma4_enb = 1;
130        dma_control.s.dma3_enb = 1;
131        dma_control.s.dma2_enb = 1;
132        dma_control.s.dma1_enb = 1;
133        dma_control.s.dma0_enb = 1;
134        dma_control.s.o_mode = 1; /* Pull NS and RO from this register, not the pointers */
135        //dma_control.s.dwb_denb = 1;
136        //dma_control.s.dwb_ichk = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/128;
137        dma_control.s.fpa_que = CVMX_FPA_OUTPUT_BUFFER_POOL;
138        dma_control.s.csize = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/8;
139        cvmx_write_csr(CVMX_PEXP_NPEI_DMA_CONTROL, dma_control.u64);
140        /* As a workaround for errata PCIE-811 we only allow a single
141            outstanding DMA read over PCIe at a time. This limits performance,
142            but works in all cases. If you need higher performance, remove
143            this code and implement the more complicated workaround documented
144            in the errata. This only affects CN56XX pass 2.0 chips */
145        if (OCTEON_IS_MODEL(OCTEON_CN56XX_PASS2_0))
146        {
147            cvmx_npei_dma_pcie_req_num_t pcie_req_num;
148            pcie_req_num.u64 = cvmx_read_csr(CVMX_PEXP_NPEI_DMA_PCIE_REQ_NUM);
149            pcie_req_num.s.dma_cnt = 1;
150            cvmx_write_csr(CVMX_PEXP_NPEI_DMA_PCIE_REQ_NUM, pcie_req_num.u64);
151        }
152    }
153    else if (octeon_has_feature(OCTEON_FEATURE_PCIE))
154    {
155        cvmx_dpi_engx_buf_t dpi_engx_buf;
156        cvmx_dpi_dma_control_t dma_control;
157        cvmx_dpi_ctl_t dpi_ctl;
158
159        /* Give engine 0-4 1KB, and 5 3KB. This gives the packet engines better
160            performance. Total must not exceed 8KB */
161        dpi_engx_buf.u64 = 0;
162        dpi_engx_buf.s.blks = 2;
163        cvmx_write_csr(CVMX_DPI_ENGX_BUF(0), dpi_engx_buf.u64);
164        cvmx_write_csr(CVMX_DPI_ENGX_BUF(1), dpi_engx_buf.u64);
165        cvmx_write_csr(CVMX_DPI_ENGX_BUF(2), dpi_engx_buf.u64);
166        cvmx_write_csr(CVMX_DPI_ENGX_BUF(3), dpi_engx_buf.u64);
167        cvmx_write_csr(CVMX_DPI_ENGX_BUF(4), dpi_engx_buf.u64);
168        dpi_engx_buf.s.blks = 6;
169        cvmx_write_csr(CVMX_DPI_ENGX_BUF(5), dpi_engx_buf.u64);
170
171        dma_control.u64 = cvmx_read_csr(CVMX_DPI_DMA_CONTROL);
172        dma_control.s.pkt_hp = 1;
173        dma_control.s.pkt_en = 1;
174        dma_control.s.dma_enb = 0x1f;
175        dma_control.s.dwb_denb = 1;
176        dma_control.s.dwb_ichk = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/128;
177        dma_control.s.fpa_que = CVMX_FPA_OUTPUT_BUFFER_POOL;
178        dma_control.s.o_mode = 1;
179        cvmx_write_csr(CVMX_DPI_DMA_CONTROL, dma_control.u64);
180        dpi_ctl.u64 = cvmx_read_csr(CVMX_DPI_CTL);
181        dpi_ctl.s.en = 1;
182        cvmx_write_csr(CVMX_DPI_CTL, dpi_ctl.u64);
183    }
184    else
185    {
186        cvmx_npi_dma_control_t dma_control;
187        dma_control.u64 = 0;
188        //dma_control.s.dwb_denb = 1;
189        //dma_control.s.dwb_ichk = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/128;
190        dma_control.s.o_add1 = 1;
191        dma_control.s.fpa_que = CVMX_FPA_OUTPUT_BUFFER_POOL;
192        dma_control.s.hp_enb = 1;
193        dma_control.s.lp_enb = 1;
194        dma_control.s.csize = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/8;
195        cvmx_write_csr(CVMX_NPI_DMA_CONTROL, dma_control.u64);
196    }
197
198    return 0;
199}
200
201
202/**
203 * Shutdown all DMA engines. The engeines must be idle when this
204 * function is called.
205 *
206 * @return Zero on success, negative on failure
207 */
208int cvmx_dma_engine_shutdown(void)
209{
210    int engine;
211
212    for (engine=0; engine < cvmx_dma_engine_get_num(); engine++)
213    {
214        if (cvmx_cmd_queue_length(CVMX_CMD_QUEUE_DMA(engine)))
215        {
216            cvmx_dprintf("ERROR: cvmx_dma_engine_shutdown: Engine not idle.\n");
217            return -1;
218        }
219    }
220
221    if (octeon_has_feature(OCTEON_FEATURE_NPEI))
222    {
223        cvmx_npei_dma_control_t dma_control;
224        dma_control.u64 = cvmx_read_csr(CVMX_PEXP_NPEI_DMA_CONTROL);
225        if (cvmx_dma_engine_get_num() >= 5)
226            dma_control.s.dma4_enb = 0;
227        dma_control.s.dma3_enb = 0;
228        dma_control.s.dma2_enb = 0;
229        dma_control.s.dma1_enb = 0;
230        dma_control.s.dma0_enb = 0;
231        cvmx_write_csr(CVMX_PEXP_NPEI_DMA_CONTROL, dma_control.u64);
232        /* Make sure the disable completes */
233        cvmx_read_csr(CVMX_PEXP_NPEI_DMA_CONTROL);
234    }
235    else if (octeon_has_feature(OCTEON_FEATURE_PCIE))
236    {
237        cvmx_dpi_dma_control_t dma_control;
238        dma_control.u64 = cvmx_read_csr(CVMX_DPI_DMA_CONTROL);
239        dma_control.s.dma_enb = 0;
240        cvmx_write_csr(CVMX_DPI_DMA_CONTROL, dma_control.u64);
241        /* Make sure the disable completes */
242        cvmx_read_csr(CVMX_DPI_DMA_CONTROL);
243    }
244    else
245    {
246        cvmx_npi_dma_control_t dma_control;
247        dma_control.u64 = cvmx_read_csr(CVMX_NPI_DMA_CONTROL);
248        dma_control.s.hp_enb = 0;
249        dma_control.s.lp_enb = 0;
250        cvmx_write_csr(CVMX_NPI_DMA_CONTROL, dma_control.u64);
251        /* Make sure the disable completes */
252        cvmx_read_csr(CVMX_NPI_DMA_CONTROL);
253    }
254
255    for (engine=0; engine < cvmx_dma_engine_get_num(); engine++)
256    {
257        cvmx_cmd_queue_shutdown(CVMX_CMD_QUEUE_DMA(engine));
258        if (octeon_has_feature(OCTEON_FEATURE_NPEI))
259            cvmx_write_csr(CVMX_PEXP_NPEI_DMAX_IBUFF_SADDR(engine), 0);
260        else if (octeon_has_feature(OCTEON_FEATURE_PCIE))
261            cvmx_write_csr(CVMX_DPI_DMAX_IBUFF_SADDR(engine), 0);
262        else
263        {
264            if (engine)
265                cvmx_write_csr(CVMX_NPI_HIGHP_IBUFF_SADDR, 0);
266            else
267                cvmx_write_csr(CVMX_NPI_LOWP_IBUFF_SADDR, 0);
268        }
269    }
270
271    return 0;
272}
273
274
275/**
276 * Submit a series of DMA comamnd to the DMA engines.
277 *
278 * @param engine  Engine to submit to (0 to cvmx_dma_engine_get_num()-1)
279 * @param header  Command header
280 * @param num_buffers
281 *                The number of data pointers
282 * @param buffers Comamnd data pointers
283 *
284 * @return Zero on success, negative on failure
285 */
286int cvmx_dma_engine_submit(int engine, cvmx_dma_engine_header_t header, int num_buffers, cvmx_dma_engine_buffer_t buffers[])
287{
288    cvmx_cmd_queue_result_t result;
289    int cmd_count = 1;
290    uint64_t cmds[num_buffers + 1];
291
292    if (OCTEON_IS_MODEL(OCTEON_CN56XX_PASS1_X))
293    {
294        /* Check for Errata PCIe-604 */
295        if ((header.s.nfst > 11) || (header.s.nlst > 11) || (header.s.nfst + header.s.nlst > 15))
296        {
297            cvmx_dprintf("DMA engine submit too large\n");
298            return -1;
299        }
300    }
301
302    cmds[0] = header.u64;
303    while (num_buffers--)
304    {
305        cmds[cmd_count++] = buffers->u64;
306        buffers++;
307    }
308
309    /* Due to errata PCIE-13315, it is necessary to have the queue lock while we
310        ring the doorbell for the DMA engines. This prevents doorbells from
311        possibly arriving out of order with respect to the command queue
312        entries */
313    __cvmx_cmd_queue_lock(CVMX_CMD_QUEUE_DMA(engine), __cvmx_cmd_queue_get_state(CVMX_CMD_QUEUE_DMA(engine)));
314    result = cvmx_cmd_queue_write(CVMX_CMD_QUEUE_DMA(engine), 0, cmd_count, cmds);
315    /* This SYNCWS is needed since the command queue didn't do locking, which
316        normally implies the SYNCWS. This one makes sure the command queue
317        updates make it to L2 before we ring the doorbell */
318    CVMX_SYNCWS;
319    /* A syncw isn't needed here since the command queue did one as part of the queue unlock */
320    if (cvmx_likely(result == CVMX_CMD_QUEUE_SUCCESS))
321    {
322        if (octeon_has_feature(OCTEON_FEATURE_NPEI))
323        {
324            /* DMA doorbells are 32bit writes in little endian space. This means we need to xor the address with 4 */
325            cvmx_write64_uint32(CVMX_PEXP_NPEI_DMAX_DBELL(engine)^4, cmd_count);
326        }
327        else if (octeon_has_feature(OCTEON_FEATURE_PCIE))
328            cvmx_write_csr(CVMX_DPI_DMAX_DBELL(engine), cmd_count);
329        else
330        {
331            if (engine)
332                cvmx_write_csr(CVMX_NPI_HIGHP_DBELL, cmd_count);
333            else
334                cvmx_write_csr(CVMX_NPI_LOWP_DBELL, cmd_count);
335        }
336    }
337    /* Here is the unlock for the above errata workaround */
338    __cvmx_cmd_queue_unlock(__cvmx_cmd_queue_get_state(CVMX_CMD_QUEUE_DMA(engine)));
339    return result;
340}
341
342
343/**
344 * @INTERNAL
345 * Function used by cvmx_dma_engine_transfer() to build the
346 * internal address list.
347 *
348 * @param buffers Location to store the list
349 * @param address Address to build list for
350 * @param size    Length of the memory pointed to by address
351 *
352 * @return Number of internal pointer chunks created
353 */
354static inline int __cvmx_dma_engine_build_internal_pointers(cvmx_dma_engine_buffer_t *buffers, uint64_t address, int size)
355{
356    int segments = 0;
357    while (size)
358    {
359        /* Each internal chunk can contain a maximum of 8191 bytes */
360        int chunk = size;
361        if (chunk > 8191)
362            chunk = 8191;
363        buffers[segments].u64 = 0;
364        buffers[segments].internal.size = chunk;
365        buffers[segments].internal.addr = address;
366        address += chunk;
367        size -= chunk;
368        segments++;
369    }
370    return segments;
371}
372
373
374/**
375 * @INTERNAL
376 * Function used by cvmx_dma_engine_transfer() to build the PCI / PCIe address
377 * list.
378 * @param buffers Location to store the list
379 * @param address Address to build list for
380 * @param size    Length of the memory pointed to by address
381 *
382 * @return Number of PCI / PCIe address chunks created. The number of words used
383 *         will be segments + (segments-1)/4 + 1.
384 */
385static inline int __cvmx_dma_engine_build_external_pointers(cvmx_dma_engine_buffer_t *buffers, uint64_t address, int size)
386{
387    const int MAX_SIZE = 65535;
388    int segments = 0;
389    while (size)
390    {
391        /* Each block of 4 PCI / PCIe pointers uses one dword for lengths followed by
392            up to 4 addresses. This then repeats if more data is needed */
393        buffers[0].u64 = 0;
394        if (size <= MAX_SIZE)
395        {
396            /* Only one more segment needed */
397            buffers[0].pcie_length.len0 = size;
398            buffers[1].u64 = address;
399            segments++;
400            break;
401        }
402        else if (size <= MAX_SIZE * 2)
403        {
404            /* Two more segments needed */
405            buffers[0].pcie_length.len0 = MAX_SIZE;
406            buffers[0].pcie_length.len1 = size - MAX_SIZE;
407            buffers[1].u64 = address;
408            address += MAX_SIZE;
409            buffers[2].u64 = address;
410            segments+=2;
411            break;
412        }
413        else if (size <= MAX_SIZE * 3)
414        {
415            /* Three more segments needed */
416            buffers[0].pcie_length.len0 = MAX_SIZE;
417            buffers[0].pcie_length.len1 = MAX_SIZE;
418            buffers[0].pcie_length.len2 = size - MAX_SIZE * 2;
419            buffers[1].u64 = address;
420            address += MAX_SIZE;
421            buffers[2].u64 = address;
422            address += MAX_SIZE;
423            buffers[3].u64 = address;
424            segments+=3;
425            break;
426        }
427        else if (size <= MAX_SIZE * 4)
428        {
429            /* Four more segments needed */
430            buffers[0].pcie_length.len0 = MAX_SIZE;
431            buffers[0].pcie_length.len1 = MAX_SIZE;
432            buffers[0].pcie_length.len2 = MAX_SIZE;
433            buffers[0].pcie_length.len3 = size - MAX_SIZE * 3;
434            buffers[1].u64 = address;
435            address += MAX_SIZE;
436            buffers[2].u64 = address;
437            address += MAX_SIZE;
438            buffers[3].u64 = address;
439            address += MAX_SIZE;
440            buffers[4].u64 = address;
441            segments+=4;
442            break;
443        }
444        else
445        {
446            /* Five or more segments are needed */
447            buffers[0].pcie_length.len0 = MAX_SIZE;
448            buffers[0].pcie_length.len1 = MAX_SIZE;
449            buffers[0].pcie_length.len2 = MAX_SIZE;
450            buffers[0].pcie_length.len3 = MAX_SIZE;
451            buffers[1].u64 = address;
452            address += MAX_SIZE;
453            buffers[2].u64 = address;
454            address += MAX_SIZE;
455            buffers[3].u64 = address;
456            address += MAX_SIZE;
457            buffers[4].u64 = address;
458            address += MAX_SIZE;
459            size -= MAX_SIZE*4;
460            buffers += 5;
461            segments+=4;
462        }
463    }
464    return segments;
465}
466
467
468/**
469 * Build the first and last pointers based on a DMA engine header
470 * and submit them to the engine. The purpose of this function is
471 * to simplify the building of DMA engine commands by automatically
472 * converting a simple address and size into the apropriate internal
473 * or PCI / PCIe address list. This function does not support gather lists,
474 * so you will need to build your own lists in that case.
475 *
476 * @param engine Engine to submit to (0 to cvmx_dma_engine_get_num()-1)
477 * @param header DMA Command header. Note that the nfst and nlst fields do not
478 *               need to be filled in. All other fields must be set properly.
479 * @param first_address
480 *               Address to use for the first pointers. In the case of INTERNAL,
481 *               INBOUND, and OUTBOUND this is an Octeon memory address. In the
482 *               case of EXTERNAL, this is the source PCI / PCIe address.
483 * @param last_address
484 *               Address to use for the last pointers. In the case of EXTERNAL,
485 *               INBOUND, and OUTBOUND this is a PCI / PCIe address. In the
486 *               case of INTERNAL, this is the Octeon memory destination address.
487 * @param size   Size of the transfer to perform.
488 *
489 * @return Zero on success, negative on failure
490 */
491int cvmx_dma_engine_transfer(int engine, cvmx_dma_engine_header_t header,
492                             uint64_t first_address, uint64_t last_address,
493                             int size)
494{
495    cvmx_dma_engine_buffer_t buffers[32];
496    int words = 0;
497
498    switch (header.s.type)
499    {
500        case CVMX_DMA_ENGINE_TRANSFER_INTERNAL:
501            header.s.nfst = __cvmx_dma_engine_build_internal_pointers(buffers, first_address, size);
502            words += header.s.nfst;
503            header.s.nlst = __cvmx_dma_engine_build_internal_pointers(buffers + words, last_address, size);
504            words += header.s.nlst;
505            break;
506        case CVMX_DMA_ENGINE_TRANSFER_INBOUND:
507        case CVMX_DMA_ENGINE_TRANSFER_OUTBOUND:
508            header.s.nfst = __cvmx_dma_engine_build_internal_pointers(buffers, first_address, size);
509            words += header.s.nfst;
510            header.s.nlst = __cvmx_dma_engine_build_external_pointers(buffers + words, last_address, size);
511            words +=  header.s.nlst + ((header.s.nlst-1) >> 2) + 1;
512            break;
513        case CVMX_DMA_ENGINE_TRANSFER_EXTERNAL:
514            header.s.nfst = __cvmx_dma_engine_build_external_pointers(buffers, first_address, size);
515            words +=  header.s.nfst + ((header.s.nfst-1) >> 2) + 1;
516            header.s.nlst = __cvmx_dma_engine_build_external_pointers(buffers + words, last_address, size);
517            words +=  header.s.nlst + ((header.s.nlst-1) >> 2) + 1;
518            break;
519    }
520    return cvmx_dma_engine_submit(engine, header, words, buffers);
521}
522
523#endif
524