1/*-
2 * Copyright 2003-2011 Netlogic Microsystems (Netlogic). All rights
3 * reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in
13 *    the documentation and/or other materials provided with the
14 *    distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY Netlogic Microsystems ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26 * THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * NETLOGIC_BSD */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD$");
32#include <sys/types.h>
33#include <sys/systm.h>
34
35#include <machine/cpufunc.h>
36#include <mips/nlm/hal/mips-extns.h>
37#include <mips/nlm/hal/haldefs.h>
38#include <mips/nlm/hal/iomap.h>
39#include <mips/nlm/hal/fmn.h>
40
41/* XLP can take upto 16K of FMN messages per hardware queue, as spill.
42* But, configuring all 16K causes the total spill memory to required
43* to blow upto 192MB for single chip configuration, and 768MB in four
44* chip configuration. Hence for now, we will setup the per queue spill
45* as 1K FMN messages. With this, the total spill memory needed for 1024
46* hardware queues (with 12bytes per single entry FMN message) becomes
47* (1*1024)*12*1024queues = 12MB. For the four chip config, the memory
48* needed = 12 * 4 = 48MB.
49*/
50uint64_t nlm_cms_spill_total_messages = 1 * 1024;
51
52/* On a XLP832, we have the following FMN stations:
53* CPU    stations: 8
54* PCIE0  stations: 1
55* PCIE1  stations: 1
56* PCIE2  stations: 1
57* PCIE3  stations: 1
58* GDX    stations: 1
59* CRYPTO stations: 1
60* RSA    stations: 1
61* CMP    stations: 1
62* POE    stations: 1
63* NAE    stations: 1
64* ==================
65* Total          : 18 stations per chip
66*
67* For all 4 nodes, there are 18*4 = 72 FMN stations
68*/
69uint32_t nlm_cms_total_stations = 18 * 4 /*xlp_num_nodes*/;
70
71/**
72 * Takes inputs as node, queue_size and maximum number of queues.
73 * Calculates the base, start & end and returns the same for a
74 * defined qid.
75 *
76 * The output queues are maintained in the internal output buffer
77 * which is a on-chip SRAM structure. For the actial hardware
78 * internal implementation, It is a structure which consists
79 * of eight banks of 4096-entry x message-width SRAMs. The SRAM
80 * implementation is designed to run at 1GHz with a 1-cycle read/write
81 * access. A read/write transaction can be initiated for each bank
82 * every cycle for a total of eight accesses per cycle. Successive
83 * entries of the same output queue are placed in successive banks.
84 * This is done to spread different read & write accesses to same/different
85 * output queue over as many different banks as possible so that they
86 * can be scheduled concurrently. Spreading the accesses to as many banks
87 * as possible to maximize the concurrency internally is important for
88 * achieving the desired peak throughput. This is done by h/w implementation
89 * itself.
90 *
91 * Output queues are allocated from this internal output buffer by
92 * software. The total capacity of the output buffer is 32K-entry.
93 * Each output queue can be sized from 32-entry to 1024-entry in
94 * increments of 32-entry. This is done by specifying a Start & a
95 * End pointer: pointers to the first & last 32-entry chunks allocated
96 * to the output queue.
97 *
98 * To optimize the storage required for 1024 OQ pointers, the upper 5-bits
99 * are shared by the Start & the End pointer. The side-effect of this
100 * optimization is that an OQ can't cross a 1024-entry boundary. Also, the
101 * lower 5-bits don't need to be specified in the Start & the End pointer
102 * as the allocation is in increments of 32-entries.
103 *
104 * Queue occupancy is tracked by a Head & a Tail pointer. Tail pointer
105 * indicates the location to which next entry will be written & Head
106 * pointer indicates the location from which next entry will be read. When
107 * these pointers reach the top of the allocated space (indicated by the
108 * End pointer), they are reset to the bottom of the allocated space
109 * (indicated by the Start pointer).
110 *
111 * Output queue pointer information:
112 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
113 *
114 *   14               10 9              5 4                 0
115 *   ------------------
116 *   | base ptr       |
117 *   ------------------
118 *                       ----------------
119 *                       | start ptr    |
120 *                       ----------------
121 *                       ----------------
122 *                       | end   ptr    |
123 *                       ----------------
124 *                       ------------------------------------
125 *                       |           head ptr               |
126 *                       ------------------------------------
127 *                       ------------------------------------
128 *                       |           tail ptr               |
129 *                       ------------------------------------
130 * Note:
131 * A total of 1024 segments can sit on one software-visible "bank"
132 * of internal SRAM. Each segment contains 32 entries. Also note
133 * that sw-visible "banks" are not the same as the actual internal
134 * 8-bank implementation of hardware. It is an optimization of
135 * internal access.
136 *
137 */
138
139void nlm_cms_setup_credits(uint64_t base, int destid, int srcid, int credit)
140{
141	uint64_t val;
142
143	val = (((uint64_t)credit << 24) | (destid << 12) | (srcid << 0));
144	nlm_write_cms_reg(base, CMS_OUTPUTQ_CREDIT_CFG, val);
145
146}
147
148/*
149 * base		- CMS module base address for this node.
150 * qid		- is the output queue id otherwise called as vc id
151 * spill_base   - is the 40-bit physical address of spill memory. Must be
152		  4KB aligned.
153 * nsegs	- No of segments where a "1" indicates 4KB. Spill size must be
154 *                a multiple of 4KB.
155 */
156int nlm_cms_alloc_spill_q(uint64_t base, int qid, uint64_t spill_base,
157				int nsegs)
158{
159	uint64_t queue_config;
160	uint32_t spill_start;
161
162	if (nsegs > CMS_MAX_SPILL_SEGMENTS_PER_QUEUE) {
163		return 1;
164	}
165
166	queue_config = nlm_read_cms_reg(base,(CMS_OUTPUTQ_CONFIG(qid)));
167
168	spill_start = ((spill_base >> 12) & 0x3F);
169	/* Spill configuration */
170	queue_config = (((uint64_t)CMS_SPILL_ENA << 62) |
171				(((spill_base >> 18) & 0x3FFFFF) << 27) |
172				(spill_start + nsegs - 1) << 21 |
173				(spill_start << 15));
174
175	nlm_write_cms_reg(base,(CMS_OUTPUTQ_CONFIG(qid)),queue_config);
176
177	return 0;
178}
179
180uint64_t nlm_cms_get_onchip_queue (uint64_t base, int qid)
181{
182	return nlm_read_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid));
183}
184
185void nlm_cms_set_onchip_queue (uint64_t base, int qid, uint64_t val)
186{
187	uint64_t rdval;
188
189	rdval = nlm_read_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid));
190	rdval |= val;
191	nlm_write_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid), rdval);
192}
193
194void nlm_cms_per_queue_level_intr(uint64_t base, int qid, int sub_type,
195					int intr_val)
196{
197	uint64_t val;
198
199	val = nlm_read_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid));
200
201	val &= ~((0x7ULL << 56) | (0x3ULL << 54));
202
203	val |= (((uint64_t)sub_type<<54) |
204		((uint64_t)intr_val<<56));
205
206	nlm_write_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid), val);
207}
208
209void nlm_cms_per_queue_timer_intr(uint64_t base, int qid, int sub_type,
210					int intr_val)
211{
212	uint64_t val;
213
214	val = nlm_read_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid));
215
216	val &= ~((0x7ULL << 51) | (0x3ULL << 49));
217
218	val |= (((uint64_t)sub_type<<49) |
219		((uint64_t)intr_val<<51));
220
221	nlm_write_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid), val);
222}
223
224/* returns 1 if interrupt has been generated for this output queue */
225int nlm_cms_outputq_intr_check(uint64_t base, int qid)
226{
227	uint64_t val;
228	val = nlm_read_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid));
229
230	return ((val >> 59) & 0x1);
231}
232
233void nlm_cms_outputq_clr_intr(uint64_t base, int qid)
234{
235	uint64_t val;
236	val = nlm_read_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid));
237	val |= (1ULL<<59);
238	nlm_write_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid), val);
239}
240
241void nlm_cms_illegal_dst_error_intr(uint64_t base, int en)
242{
243	uint64_t val;
244
245	val = nlm_read_cms_reg(base, CMS_MSG_CONFIG);
246	val |= (en<<8);
247	nlm_write_cms_reg(base, CMS_MSG_CONFIG, val);
248}
249
250void nlm_cms_timeout_error_intr(uint64_t base, int en)
251{
252	uint64_t val;
253
254	val = nlm_read_cms_reg(base, CMS_MSG_CONFIG);
255	val |= (en<<7);
256	nlm_write_cms_reg(base, CMS_MSG_CONFIG, val);
257}
258
259void nlm_cms_biu_error_resp_intr(uint64_t base, int en)
260{
261	uint64_t val;
262
263	val = nlm_read_cms_reg(base, CMS_MSG_CONFIG);
264	val |= (en<<6);
265	nlm_write_cms_reg(base, CMS_MSG_CONFIG, val);
266}
267
268void nlm_cms_spill_uncorrectable_ecc_error_intr(uint64_t base, int en)
269{
270	uint64_t val;
271
272	val = nlm_read_cms_reg(base, CMS_MSG_CONFIG);
273	val |= (en<<5) | (en<<3);
274	nlm_write_cms_reg(base, CMS_MSG_CONFIG, val);
275}
276
277void nlm_cms_spill_correctable_ecc_error_intr(uint64_t base, int en)
278{
279	uint64_t val;
280
281	val = nlm_read_cms_reg(base, CMS_MSG_CONFIG);
282	val |= (en<<4) | (en<<2);
283	nlm_write_cms_reg(base, CMS_MSG_CONFIG, val);
284}
285
286void nlm_cms_outputq_uncorrectable_ecc_error_intr(uint64_t base, int en)
287{
288	uint64_t val;
289
290	val = nlm_read_cms_reg(base, CMS_MSG_CONFIG);
291	val |= (en<<1);
292	nlm_write_cms_reg(base, CMS_MSG_CONFIG, val);
293}
294
295void nlm_cms_outputq_correctable_ecc_error_intr(uint64_t base, int en)
296{
297	uint64_t val;
298
299	val = nlm_read_cms_reg(base, CMS_MSG_CONFIG);
300	val |= (en<<0);
301	nlm_write_cms_reg(base, CMS_MSG_CONFIG, val);
302}
303
304uint64_t nlm_cms_network_error_status(uint64_t base)
305{
306	return nlm_read_cms_reg(base, CMS_MSG_ERR);
307}
308
309int nlm_cms_get_net_error_code(uint64_t err)
310{
311	return ((err >> 12) & 0xf);
312}
313
314int nlm_cms_get_net_error_syndrome(uint64_t err)
315{
316	return ((err >> 32) & 0x1ff);
317}
318
319int nlm_cms_get_net_error_ramindex(uint64_t err)
320{
321	return ((err >> 44) & 0x7fff);
322}
323
324int nlm_cms_get_net_error_outputq(uint64_t err)
325{
326	return ((err >> 16) & 0xfff);
327}
328
329/*========================= FMN Tracing related APIs ================*/
330
331void nlm_cms_trace_setup(uint64_t base, int en, uint64_t trace_base,
332				uint64_t trace_limit, int match_dstid_en,
333				int dst_id, int match_srcid_en, int src_id,
334				int wrap)
335{
336	uint64_t val;
337
338	nlm_write_cms_reg(base, CMS_TRACE_BASE_ADDR, trace_base);
339	nlm_write_cms_reg(base, CMS_TRACE_LIMIT_ADDR, trace_limit);
340
341	val = nlm_read_cms_reg(base, CMS_TRACE_CONFIG);
342	val |= (((uint64_t)match_dstid_en << 39) |
343		((dst_id & 0xfff) << 24) |
344		(match_srcid_en << 23) |
345		((src_id & 0xfff) << 8) |
346		(wrap << 1) |
347		(en << 0));
348	nlm_write_cms_reg(base, CMS_MSG_CONFIG, val);
349}
350
351void nlm_cms_endian_byte_swap (uint64_t base, int en)
352{
353	nlm_write_cms_reg(base, CMS_MSG_ENDIAN_SWAP, en);
354}
355