1/*- 2 * Copyright 2003-2011 Netlogic Microsystems (Netlogic). All rights 3 * reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: 8 * 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in 13 * the documentation and/or other materials provided with the 14 * distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY Netlogic Microsystems ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE 20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 26 * THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * NETLOGIC_BSD */ 29 30#include <sys/cdefs.h> 31__FBSDID("$FreeBSD$"); 32#include <sys/types.h> 33#include <sys/systm.h> 34 35#include <machine/cpufunc.h> 36#include <mips/nlm/hal/mips-extns.h> 37#include <mips/nlm/hal/haldefs.h> 38#include <mips/nlm/hal/iomap.h> 39#include <mips/nlm/hal/fmn.h> 40 41/* XLP can take upto 16K of FMN messages per hardware queue, as spill. 42* But, configuring all 16K causes the total spill memory to required 43* to blow upto 192MB for single chip configuration, and 768MB in four 44* chip configuration. Hence for now, we will setup the per queue spill 45* as 1K FMN messages. With this, the total spill memory needed for 1024 46* hardware queues (with 12bytes per single entry FMN message) becomes 47* (1*1024)*12*1024queues = 12MB. For the four chip config, the memory 48* needed = 12 * 4 = 48MB. 49*/ 50uint64_t nlm_cms_spill_total_messages = 1 * 1024; 51 52/* On a XLP832, we have the following FMN stations: 53* CPU stations: 8 54* PCIE0 stations: 1 55* PCIE1 stations: 1 56* PCIE2 stations: 1 57* PCIE3 stations: 1 58* GDX stations: 1 59* CRYPTO stations: 1 60* RSA stations: 1 61* CMP stations: 1 62* POE stations: 1 63* NAE stations: 1 64* ================== 65* Total : 18 stations per chip 66* 67* For all 4 nodes, there are 18*4 = 72 FMN stations 68*/ 69uint32_t nlm_cms_total_stations = 18 * 4 /*xlp_num_nodes*/; 70 71/** 72 * Takes inputs as node, queue_size and maximum number of queues. 73 * Calculates the base, start & end and returns the same for a 74 * defined qid. 75 * 76 * The output queues are maintained in the internal output buffer 77 * which is a on-chip SRAM structure. For the actial hardware 78 * internal implementation, It is a structure which consists 79 * of eight banks of 4096-entry x message-width SRAMs. The SRAM 80 * implementation is designed to run at 1GHz with a 1-cycle read/write 81 * access. A read/write transaction can be initiated for each bank 82 * every cycle for a total of eight accesses per cycle. Successive 83 * entries of the same output queue are placed in successive banks. 84 * This is done to spread different read & write accesses to same/different 85 * output queue over as many different banks as possible so that they 86 * can be scheduled concurrently. Spreading the accesses to as many banks 87 * as possible to maximize the concurrency internally is important for 88 * achieving the desired peak throughput. This is done by h/w implementation 89 * itself. 90 * 91 * Output queues are allocated from this internal output buffer by 92 * software. The total capacity of the output buffer is 32K-entry. 93 * Each output queue can be sized from 32-entry to 1024-entry in 94 * increments of 32-entry. This is done by specifying a Start & a 95 * End pointer: pointers to the first & last 32-entry chunks allocated 96 * to the output queue. 97 * 98 * To optimize the storage required for 1024 OQ pointers, the upper 5-bits 99 * are shared by the Start & the End pointer. The side-effect of this 100 * optimization is that an OQ can't cross a 1024-entry boundary. Also, the 101 * lower 5-bits don't need to be specified in the Start & the End pointer 102 * as the allocation is in increments of 32-entries. 103 * 104 * Queue occupancy is tracked by a Head & a Tail pointer. Tail pointer 105 * indicates the location to which next entry will be written & Head 106 * pointer indicates the location from which next entry will be read. When 107 * these pointers reach the top of the allocated space (indicated by the 108 * End pointer), they are reset to the bottom of the allocated space 109 * (indicated by the Start pointer). 110 * 111 * Output queue pointer information: 112 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 113 * 114 * 14 10 9 5 4 0 115 * ------------------ 116 * | base ptr | 117 * ------------------ 118 * ---------------- 119 * | start ptr | 120 * ---------------- 121 * ---------------- 122 * | end ptr | 123 * ---------------- 124 * ------------------------------------ 125 * | head ptr | 126 * ------------------------------------ 127 * ------------------------------------ 128 * | tail ptr | 129 * ------------------------------------ 130 * Note: 131 * A total of 1024 segments can sit on one software-visible "bank" 132 * of internal SRAM. Each segment contains 32 entries. Also note 133 * that sw-visible "banks" are not the same as the actual internal 134 * 8-bank implementation of hardware. It is an optimization of 135 * internal access. 136 * 137 */ 138 139void nlm_cms_setup_credits(uint64_t base, int destid, int srcid, int credit) 140{ 141 uint64_t val; 142 143 val = (((uint64_t)credit << 24) | (destid << 12) | (srcid << 0)); 144 nlm_write_cms_reg(base, CMS_OUTPUTQ_CREDIT_CFG, val); 145 146} 147 148/* 149 * base - CMS module base address for this node. 150 * qid - is the output queue id otherwise called as vc id 151 * spill_base - is the 40-bit physical address of spill memory. Must be 152 4KB aligned. 153 * nsegs - No of segments where a "1" indicates 4KB. Spill size must be 154 * a multiple of 4KB. 155 */ 156int nlm_cms_alloc_spill_q(uint64_t base, int qid, uint64_t spill_base, 157 int nsegs) 158{ 159 uint64_t queue_config; 160 uint32_t spill_start; 161 162 if (nsegs > CMS_MAX_SPILL_SEGMENTS_PER_QUEUE) { 163 return 1; 164 } 165 166 queue_config = nlm_read_cms_reg(base,(CMS_OUTPUTQ_CONFIG(qid))); 167 168 spill_start = ((spill_base >> 12) & 0x3F); 169 /* Spill configuration */ 170 queue_config = (((uint64_t)CMS_SPILL_ENA << 62) | 171 (((spill_base >> 18) & 0x3FFFFF) << 27) | 172 (spill_start + nsegs - 1) << 21 | 173 (spill_start << 15)); 174 175 nlm_write_cms_reg(base,(CMS_OUTPUTQ_CONFIG(qid)),queue_config); 176 177 return 0; 178} 179 180uint64_t nlm_cms_get_onchip_queue (uint64_t base, int qid) 181{ 182 return nlm_read_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid)); 183} 184 185void nlm_cms_set_onchip_queue (uint64_t base, int qid, uint64_t val) 186{ 187 uint64_t rdval; 188 189 rdval = nlm_read_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid)); 190 rdval |= val; 191 nlm_write_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid), rdval); 192} 193 194void nlm_cms_per_queue_level_intr(uint64_t base, int qid, int sub_type, 195 int intr_val) 196{ 197 uint64_t val; 198 199 val = nlm_read_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid)); 200 201 val &= ~((0x7ULL << 56) | (0x3ULL << 54)); 202 203 val |= (((uint64_t)sub_type<<54) | 204 ((uint64_t)intr_val<<56)); 205 206 nlm_write_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid), val); 207} 208 209void nlm_cms_per_queue_timer_intr(uint64_t base, int qid, int sub_type, 210 int intr_val) 211{ 212 uint64_t val; 213 214 val = nlm_read_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid)); 215 216 val &= ~((0x7ULL << 51) | (0x3ULL << 49)); 217 218 val |= (((uint64_t)sub_type<<49) | 219 ((uint64_t)intr_val<<51)); 220 221 nlm_write_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid), val); 222} 223 224/* returns 1 if interrupt has been generated for this output queue */ 225int nlm_cms_outputq_intr_check(uint64_t base, int qid) 226{ 227 uint64_t val; 228 val = nlm_read_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid)); 229 230 return ((val >> 59) & 0x1); 231} 232 233void nlm_cms_outputq_clr_intr(uint64_t base, int qid) 234{ 235 uint64_t val; 236 val = nlm_read_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid)); 237 val |= (1ULL<<59); 238 nlm_write_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid), val); 239} 240 241void nlm_cms_illegal_dst_error_intr(uint64_t base, int en) 242{ 243 uint64_t val; 244 245 val = nlm_read_cms_reg(base, CMS_MSG_CONFIG); 246 val |= (en<<8); 247 nlm_write_cms_reg(base, CMS_MSG_CONFIG, val); 248} 249 250void nlm_cms_timeout_error_intr(uint64_t base, int en) 251{ 252 uint64_t val; 253 254 val = nlm_read_cms_reg(base, CMS_MSG_CONFIG); 255 val |= (en<<7); 256 nlm_write_cms_reg(base, CMS_MSG_CONFIG, val); 257} 258 259void nlm_cms_biu_error_resp_intr(uint64_t base, int en) 260{ 261 uint64_t val; 262 263 val = nlm_read_cms_reg(base, CMS_MSG_CONFIG); 264 val |= (en<<6); 265 nlm_write_cms_reg(base, CMS_MSG_CONFIG, val); 266} 267 268void nlm_cms_spill_uncorrectable_ecc_error_intr(uint64_t base, int en) 269{ 270 uint64_t val; 271 272 val = nlm_read_cms_reg(base, CMS_MSG_CONFIG); 273 val |= (en<<5) | (en<<3); 274 nlm_write_cms_reg(base, CMS_MSG_CONFIG, val); 275} 276 277void nlm_cms_spill_correctable_ecc_error_intr(uint64_t base, int en) 278{ 279 uint64_t val; 280 281 val = nlm_read_cms_reg(base, CMS_MSG_CONFIG); 282 val |= (en<<4) | (en<<2); 283 nlm_write_cms_reg(base, CMS_MSG_CONFIG, val); 284} 285 286void nlm_cms_outputq_uncorrectable_ecc_error_intr(uint64_t base, int en) 287{ 288 uint64_t val; 289 290 val = nlm_read_cms_reg(base, CMS_MSG_CONFIG); 291 val |= (en<<1); 292 nlm_write_cms_reg(base, CMS_MSG_CONFIG, val); 293} 294 295void nlm_cms_outputq_correctable_ecc_error_intr(uint64_t base, int en) 296{ 297 uint64_t val; 298 299 val = nlm_read_cms_reg(base, CMS_MSG_CONFIG); 300 val |= (en<<0); 301 nlm_write_cms_reg(base, CMS_MSG_CONFIG, val); 302} 303 304uint64_t nlm_cms_network_error_status(uint64_t base) 305{ 306 return nlm_read_cms_reg(base, CMS_MSG_ERR); 307} 308 309int nlm_cms_get_net_error_code(uint64_t err) 310{ 311 return ((err >> 12) & 0xf); 312} 313 314int nlm_cms_get_net_error_syndrome(uint64_t err) 315{ 316 return ((err >> 32) & 0x1ff); 317} 318 319int nlm_cms_get_net_error_ramindex(uint64_t err) 320{ 321 return ((err >> 44) & 0x7fff); 322} 323 324int nlm_cms_get_net_error_outputq(uint64_t err) 325{ 326 return ((err >> 16) & 0xfff); 327} 328 329/*========================= FMN Tracing related APIs ================*/ 330 331void nlm_cms_trace_setup(uint64_t base, int en, uint64_t trace_base, 332 uint64_t trace_limit, int match_dstid_en, 333 int dst_id, int match_srcid_en, int src_id, 334 int wrap) 335{ 336 uint64_t val; 337 338 nlm_write_cms_reg(base, CMS_TRACE_BASE_ADDR, trace_base); 339 nlm_write_cms_reg(base, CMS_TRACE_LIMIT_ADDR, trace_limit); 340 341 val = nlm_read_cms_reg(base, CMS_TRACE_CONFIG); 342 val |= (((uint64_t)match_dstid_en << 39) | 343 ((dst_id & 0xfff) << 24) | 344 (match_srcid_en << 23) | 345 ((src_id & 0xfff) << 8) | 346 (wrap << 1) | 347 (en << 0)); 348 nlm_write_cms_reg(base, CMS_MSG_CONFIG, val); 349} 350 351void nlm_cms_endian_byte_swap (uint64_t base, int en) 352{ 353 nlm_write_cms_reg(base, CMS_MSG_ENDIAN_SWAP, en); 354} 355