1/*  *********************************************************************
2    *  SB1250 Board Support Package
3    *
4    *  DRAM Startup Module  		      File: sb1250_draminit.c
5    *
6    *  This module contains code to initialize and start the DRAM
7    *  controller on the SB1250.
8    *
9    *  This is the fancy new init module, written in "C".
10    *
11    *  Author:  Mitch Lichtenberg
12    *
13    *********************************************************************
14    *
15    *  Copyright 2000,2001,2002,2003
16    *  Broadcom Corporation. All rights reserved.
17    *
18    *  This software is furnished under license and may be used and
19    *  copied only in accordance with the following terms and
20    *  conditions.  Subject to these conditions, you may download,
21    *  copy, install, use, modify and distribute modified or unmodified
22    *  copies of this software in source and/or binary form.  No title
23    *  or ownership is transferred hereby.
24    *
25    *  1) Any source code used, modified or distributed must reproduce
26    *     and retain this copyright notice and list of conditions
27    *     as they appear in the source file.
28    *
29    *  2) No right is granted to use any trade name, trademark, or
30    *     logo of Broadcom Corporation.  The "Broadcom Corporation"
31    *     name may not be used to endorse or promote products derived
32    *     from this software without the prior written permission of
33    *     Broadcom Corporation.
34    *
35    *  3) THIS SOFTWARE IS PROVIDED "AS-IS" AND ANY EXPRESS OR
36    *     IMPLIED WARRANTIES, INCLUDING BUT NOT LIMITED TO, ANY IMPLIED
37    *     WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
38    *     PURPOSE, OR NON-INFRINGEMENT ARE DISCLAIMED. IN NO EVENT
39    *     SHALL BROADCOM BE LIABLE FOR ANY DAMAGES WHATSOEVER, AND IN
40    *     PARTICULAR, BROADCOM SHALL NOT BE LIABLE FOR DIRECT, INDIRECT,
41    *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
42    *     (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
43    *     GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
44    *     BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
45    *     OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
46    *     TORT (INCLUDING NEGLIGENCE OR OTHERWISE), EVEN IF ADVISED OF
47    *     THE POSSIBILITY OF SUCH DAMAGE.
48    ********************************************************************* */
49
50/*
51 * This code can be linked into non-CFE, non-SB1250 things like SOCVIEW, a JTAG
52 * tool.  In that case it's not even running on a 1250, but we can
53 * borrow the code to generate timing values for us.
54 *
55 * The _MCSTANDALONE_ ifdef is normally turned *off* for firmware use,
56 * but programs like "memconfig" (CFE host tool) or SOCVIEW use it
57 * to allow us to run the memory initialization outside a 1250.
58 */
59
60#ifdef _MCSTANDALONE_
61#include <stdio.h>
62#include <string.h>
63#else
64#include "sbmips.h"
65#endif
66
67#include "sb1250_regs.h"
68#include "sb1250_mc.h"
69#include "sb1250_smbus.h"
70#include "sb1250_scd.h"
71
72/*
73 * Uncomment to use data mover to zero memory
74 * Note: this is not a good idea in Pass1, since we'll
75 * be running cacheable noncoherent at this point in the
76 * CFE init sequence.
77 */
78/* #define _DMZERO_ */
79
80#ifdef _DMZERO_
81#include "sb1250_dma.h"
82#endif
83
84/*  *********************************************************************
85    *  Magic Constants
86    ********************************************************************* */
87
88/*
89 * This constant represents the "round trip" time of your board.
90 * Measured from the pins on the BCM1250, it is the time from the
91 * rising edge of the MCLK pin to the rising edge of the DQS coming
92 * back from the memory.
93 *
94 * It is used in the calculation of which cycle responses are expected
95 * from the memory for a given request.  The units are in tenths of
96 * nanoseconds.
97 */
98
99#define DEFAULT_MEMORY_ROUNDTRIP_TIME	25		/* 2.5ns (default) */
100#define DEFAULT_MEMORY_ROUNDTRIP_TIME_FCRAM	20	/* 2.0ns for FCRAM */
101
102
103#define PASS1_DLL_SCALE_NUMERATOR	30		/* 30/400 = 0.075 */
104#define PASS1_DLL_SCALE_DENOMINATOR	400
105#define PASS1_DLL_OFFSET		63		/* 63/400 = 0.1575 */
106
107#define PASS2_DLL_SCALE_NUMERATOR	30		/* 30/400 = 0.075 */
108#define PASS2_DLL_SCALE_DENOMINATOR	400
109#define PASS2_DLL_OFFSET		63		/* 63/400 = 0.1575 */
110
111
112/*
113 * The constants below were created by careful measurement of
114 * BCM1250 parts.  The units are in tenths of nanoseconds
115 * to be compatible with the rest of the calculations in sb1250_auto_timing.
116 */
117
118#define SB1250_MIN_R2W_TIME		30	/* 3.0 ns */
119#define SB1250_MIN_DQS_MARGIN		25
120#define SB1250_WINDOW_OPEN_OFFSET	18
121#define SB1250_CLOSE_01_OFFSET		34
122#define SB1250_CLOSE_02_OFFSET		22
123#define SB1250_CLOSE_12_OFFSET		24
124
125
126#define BURSTLEN			4		/* always 4 per burst */
127
128/*  *********************************************************************
129    *  Basic types
130    ********************************************************************* */
131
132#ifdef _CFE_
133#include "lib_types.h"
134#else
135typedef unsigned char uint8_t;
136typedef unsigned short uint16_t;
137typedef unsigned int uint32_t;
138typedef unsigned long long uint64_t;
139#endif
140
141/*
142 * For SOCVIEW and non-CFE, non-MIPS stuff, make sure the "port"
143 * data type is 64 bits.  Otherwise we take our cue from 'long'
144 * which will be pointer-sized.
145 */
146
147#if defined(_MCSTANDALONE_)
148typedef long long sbport_t;
149#else
150typedef long sbport_t;
151#endif
152
153#ifdef _CFE_
154#include "bsp_config.h"
155#endif
156
157#define TRUE 1
158#define FALSE 0
159
160/*  *********************************************************************
161    *  Configuration
162    ********************************************************************* */
163
164/*
165 * This module needs to be compiled with mips64 to ensure that 64-bit
166 * values are in 64-bit registers and that reads/writes of 64-bit numbers
167 * are done with the ld/sd instructions.
168 */
169#if !defined(__mips64) && !defined(_MCSTANDALONE_)
170#error "This module MUST be compiled with __mips64.  See the comments for details."
171#endif
172
173/*
174 * Configure some stuff here if not running under the firmware.
175 */
176
177#ifndef _CFE_
178#define CFG_DRAM_ECC		0
179#define CFG_DRAM_SMBUS_CHANNEL	0
180#define CFG_DRAM_SMBUS_BASE	0x54
181#define CFG_DRAM_BLOCK_SIZE	32
182#endif
183
184
185/*
186 * Clock configuration parameters, except for the MCLK ratio
187 * which is set according to the value of the PLL divide ratio.
188 */
189
190#define V_MC_CLKCONFIG_VALUE_PASS1   V_MC_ADDR_SKEW(0x0F) | \
191                                     V_MC_DQO_SKEW(0x8) | \
192                                     V_MC_DQI_SKEW(0x8) | \
193                                     V_MC_ADDR_DRIVE(0xF) | \
194                                     V_MC_DATA_DRIVE(0xF) | \
195                                     V_MC_CLOCK_DRIVE(0)
196
197#define V_MC_CLKCONFIG_VALUE         V_MC_ADDR_SKEW(0x08) | \
198                                     V_MC_DQO_SKEW(0x8) | \
199                                     V_MC_DQI_SKEW(0x8) | \
200                                     V_MC_ADDR_DRIVE(0xF) | \
201                                     V_MC_DATA_DRIVE(0xF) | \
202                                     V_MC_CLOCK_DRIVE(0xF)
203
204/*
205 * These belong in some SB1250-specific file I'm sure.
206 */
207
208#define MC_CHANNELS	2		/* we have two channels */
209#define MC_CHIPSELS	4		/* and four chipsels per channel */
210
211
212/*  *********************************************************************
213    *  Reference Clock
214    ********************************************************************* */
215
216#ifdef _MAGICWID_
217  /*
218   * You really don't want to know about this.  During testing, we futz
219   * with the 100mhz clock and store the actual speed of the clock
220   * in the PromICE so we can make the calculations work out correctly
221   * (and automatically)
222   */
223  #define SB1250_REFCLK (*((uint64_t *) PHYS_TO_K1(0x1FC00018)))
224  #undef K_SMB_FREQ_100KHZ
225  #define K_SMB_FREQ_100KHZ ((SB1250_REFCLK*10)/8)
226#else
227  /*
228   * If non-CFE, non-MIPS, make the refclk an input parameter.
229   */
230  #if defined(_MCSTANDALONE_)
231    int sb1250_refclk = 100;
232    int dram_cas_latency;
233    int dram_tMemClk;
234    #define SB1250_REFCLK sb1250_refclk
235  #endif
236#endif
237
238/*
239 * Define our reference clock.  The default is 100MHz unless
240 * overridden.  You can override this in your bsp_config.h file.
241 */
242
243#ifdef SB1250_REFCLK_HZ
244   #define SB1250_REFCLK ((SB1250_REFCLK_HZ)/1000000)
245#endif
246
247#ifndef SB1250_REFCLK
248  #define SB1250_REFCLK	100		/* speed of refclk, in Mhz */
249#endif
250
251/*  *********************************************************************
252    *  Macros
253    ********************************************************************* */
254
255/*
256 * For the general case, reads/writes to MC CSRs are just pointer
257 * references.  In SOCVIEW and other non-CFE, non-MIPS programs, we hook the
258 * read/write calls to let us supply the data from somewhere else.
259 */
260
261#if defined(_MCSTANDALONE_)
262  #define PHYS_TO_K1(x) (x)
263  #define WRITECSR(csr,val) sbwritecsr(csr,val)
264  #define READCSR(csr)      sbreadcsr(csr)
265  extern void sbwritecsr(uint64_t,uint64_t);
266  extern uint64_t sbreadcsr(uint64_t);
267  extern void sbdelay(void);
268#else	/* normal case */
269  #define WRITECSR(csr,val) *((volatile uint64_t *) (csr)) = (val)
270  #define READCSR(csr) (*((volatile uint64_t *) (csr)))
271#endif
272
273#define max(a,b) ((a) > (b) ? (a) : (b))
274
275/*  *********************************************************************
276    *  JEDEC values
277    ********************************************************************* */
278
279
280#define JEDEC_SDRAM_MRVAL_CAS15 0x52	/* 4-byte bursts, sequential, CAS 1.5 */
281#define JEDEC_SDRAM_MRVAL_CAS2	0x22	/* 4-byte bursts, sequential, CAS 2 */
282#define JEDEC_SDRAM_MRVAL_CAS25	0x62	/* 4-byte bursts, sequential, CAS 2.5 */
283#define JEDEC_SDRAM_MRVAL_CAS3	0x32	/* 4-byte bursts, sequential, CAS 3 */
284#define JEDEC_SDRAM_MRVAL_CAS35 0x72    /* 4-byte bursts, sequential, CAS 3.5 */
285#define JEDEC_SDRAM_MRVAL_RESETDLL 0x100
286#define JEDEC_SDRAM_EMRVAL	0x00
287#define JEDEC_SDRAM_EMRVAL_DS_REDUCED	0x02
288
289#define FCRAM_MRVAL		0x32
290#define FCRAM_EMRVAL		0
291
292#define SGRAM_MRVAL		0x32	/* 4-byte bursts, sequential, CAS 3 */
293#define SGRAM_MRVAL_RESETDLL	0x400
294#define SGRAM_EMRVAL		0x02
295
296/*
297 * DECTO10THS(x) - this converts a BCD-style number found in
298 * JEDEC SPDs to a regular number.  So, 0x75 might mean "7.5ns"
299 * and we convert this into tenths (75 decimal).  Many of the
300 * calculations for the timing are done in terms of tenths of nanoseconds
301 */
302
303#define DECTO10THS(x) ((((x) >> 4)*10)+((x) & 0x0F))
304
305/*  *********************************************************************
306    *  Configuration parameter values
307    ********************************************************************* */
308
309#ifndef CFG_DRAM_MIN_tMEMCLK
310#define CFG_DRAM_MIN_tMEMCLK	DRT10(8,0)	/* 8 ns, 125Mhz */
311#endif
312
313#ifndef CFG_DRAM_INTERLEAVE
314#define CFG_DRAM_INTERLEAVE	0
315#endif
316
317#ifndef CFG_DRAM_SMBUS_CHANNEL
318#define CFG_DRAM_SMBUS_CHANNEL	0
319#endif
320
321#ifndef CFG_DRAM_SMBUS_BASE
322#define CFG_DRAM_SMBUS_BASE	0x54
323#endif
324
325#ifndef CFG_DRAM_ECC
326#define CFG_DRAM_ECC		0
327#endif
328
329#ifndef CFG_DRAM_BLOCK_SIZE
330#define CFG_DRAM_BLOCK_SIZE	32
331#endif
332
333#ifndef CFG_DRAM_CSINTERLEAVE
334#define CFG_DRAM_CSINTERLEAVE	0
335#endif
336
337/*  *********************************************************************
338    *  Memory region sizes (SB1250-specific)
339    ********************************************************************* */
340
341#define REGION0_LOC	0x0000
342#define REGION0_SIZE	256
343
344#define REGION1_LOC	0x0800
345#define REGION1_SIZE	512
346
347#define REGION2_LOC	0x0C00
348#define REGION2_SIZE	256
349
350#define REGION3_LOC	0x1000
351#define REGION3_SIZE	(508*1024)		/* 508 GB! */
352
353/*  *********************************************************************
354    *  Global Data structure
355    *
356    *  This is a hideous hack.  We're going to actually use "memory"
357    *  before it is configured.  The L1 DCache will be clean before
358    *  we get here, so we'll just locate this structure in memory
359    *  (at 0, for example) and "hope" we don't need to evict anything.
360    *  If we keep the data below 256 cache lines, we'll only use one way
361    *  of each cache line.  That's 8K, more than enough.
362    *
363    *  This data structure needs to be used both for our data and the
364    *  "C" stack, so be careful when you edit it!
365    ********************************************************************* */
366
367typedef struct csdata_s {		/* Geometry information from table */
368    uint8_t rows;			/* or SMBbus */
369    uint8_t cols;
370    uint8_t banks;
371    uint8_t flags;
372
373    uint8_t spd_dramtype;		/* SPD[2] */
374    uint8_t spd_tCK_25;			/* SPD[9]  tCK @ CAS 2.5 */
375    uint8_t spd_tCK_20;			/* SPD[23] tCK @ CAS 2.0 */
376    uint8_t spd_tCK_10;			/* SPD[25] tCK @ CAS 1.0 */
377    uint8_t spd_rfsh;			/* SPD[12] Refresh Rate */
378    uint8_t spd_caslatency;		/* SPD[18] CAS Latencies Supported */
379    uint8_t spd_attributes;		/* SPD[21] Attributes */
380    uint8_t spd_tRAS;			/* SPD[30] */
381    uint8_t spd_tRP;			/* SPD[27] */
382    uint8_t spd_tRRD;			/* SPD[28] */
383    uint8_t spd_tRCD;			/* SPD[29] */
384    uint8_t spd_tRFC;			/* SPD[42] */
385    uint8_t spd_tRC;			/* SPD[41] */
386
387} csdata_t;				/* total size: 16 bytes */
388
389#define CS_PRESENT 1			/* chipsel is present (in use) */
390#define CS_AUTO_TIMING 2		/* chipsel has timing information */
391
392#define CS_CASLAT_10	0x20		/* upper four bits are the CAS latency */
393#define CS_CASLAT_15	0x30		/* we selected.  bits 7..5 are the */
394#define CS_CASLAT_20	0x40		/* whole number and bit 4 is the */
395#define CS_CASLAT_25	0x50		/* fraction. */
396#define CS_CASLAT_30	0x60
397#define CS_CASLAT_MASK	0xF0
398#define CS_CASLAT_SHIFT 4
399
400typedef struct mcdata_s {		/* Information per memory controller */
401    uint32_t cfgcsint; 			/* try to interleave this many CS bits */
402    uint32_t csint;			/* # of chip select interleave bits */
403    uint16_t mintmemclk;		/* minimum tMemClk */
404    uint16_t roundtrip;			/* Round trip time from CLK to returned DQS at BCM1250 pin */
405    uint32_t dramtype;			/* DRAM Type */
406    uint32_t pagepolicy;		/* Page policy */
407    uint32_t blksize;			/* Block Size */
408    uint32_t flags;			/* ECC enabled */
409    uint16_t tCK;			/* tCK for manual timing */
410    uint16_t rfsh;			/* refresh rate for manual timing */
411    uint64_t clkconfig;			/* default clock config */
412    uint64_t mantiming;			/* manual timing */
413    csdata_t csdata[MC_CHIPSELS];	/* Total size: 48 + 16*4 = 112 bytes */
414} mcdata_t;
415
416typedef struct initdata_s {
417    uint64_t dscr[4];			/* Data Mover descriptor (one cache line)*/
418    uint32_t flags;			/* various flags */
419    uint32_t inuse;			/* indicates MC is in use */
420    uint32_t pintbit;			/* port interleave bit */
421    uint16_t firstchan;			/* first channel */
422    uint16_t soctype;			/* SOC type */
423    uint64_t ttlbytes;			/* total bytes */
424    mcdata_t mc[MC_CHANNELS];		/* data per memory controller */
425} initdata_t;				/* Total size: 56 + 112*2 = 280 bytes */
426
427#define M_MCINIT_TRYPINTLV 1		/* Try to do port interleaving */
428#define M_MCINIT_PINTLV	   2		/* Actually do port interleaving */
429
430
431/* Work area: initdata structure plus enough working stack to run the
432   DRAM init routine.  We round the initdata structure up to a 1K boundary,
433   and throw in an extra 1K for stack space.
434
435   This **MUST** evaluate to a compile-time constant.  */
436#define WORK_AREA_SIZE (((sizeof(initdata_t) + 1023) / 1024) + 1) * 1024
437
438
439/*  *********************************************************************
440    *  Configuration data structure
441    ********************************************************************* */
442
443#include "sb1250_draminit.h"
444#include "jedec.h"
445
446/*  *********************************************************************
447    *  Initialized data
448    *
449    *  WARNING WARNING WARNING!
450    *
451    *  This module is *very magical*!   We are using the cache as
452    *  SRAM, and we're running as relocatable code *before* the code
453    *  is relocated and *before* the GP register is set up.
454    *
455    *  Therefore, there should be NO data declared in the data
456    *  segment - all data must be allocated in the .text segment
457    *  and references to this data must be calculated by an inline
458    *  assembly stub.
459    *
460    *  If you grep the disassembly of this file, you should not see
461    *  ANY references to the GP register.
462    ********************************************************************* */
463
464
465#ifdef _MCSTANDALONE_NOISY_
466static char *sb1250_rectypes[] = {"MCR_GLOBALS","MCR_CHCFG","MCR_TIMING",
467				  "MCR_CLKCFG","MCR_GEOM","MCR_CFG",
468				  "MCR_MANTIMING"};
469#endif
470
471/*  *********************************************************************
472    *  Module Description
473    *
474    *  This module attempts to initialize the DRAM controllers on
475    *  the SB1250.  Each DRAM controller can control four chip
476    *  selects, or two double-sided DDR SDRAM DIMMs.  Therefore, at
477    *  most four DIMMs can be attached.
478    *
479    *  We will assume that all of the DIMMs are connected to the same
480    *  SMBUS serial bus, and are addressed sequentially starting from
481    *  module 0.   The first two DIMMs will be assigned to memory
482    *  controller #0 and the second two DIMMs will be assigned to
483    *  memory controller #1.
484    *
485    *  There is one serial ROM per DIMM, and we will assume that the
486    *  front and back of the DIMM are the same memory configuration.
487    *  The first DIMM will be configured for CS0 and CS1, and the
488    *  second DIMM will be configured for CS2 and CS3.   If the DIMM
489    *  has only one side, it will be assigned to CS0 or CS2.
490    *
491    *  No interleaving will be configured by this routine, but it
492    *  should not be difficult to modify it should that be necessary.
493    *
494    *  This entire routine needs to run from registers (no read/write
495    *  data is allowed).
496    *
497    *  The steps to initialize the DRAM controller are:
498    *
499    *      * Read the SPD, verify DDR SDRAMs or FCRAMs
500    *      * Obtain #rows, #cols, #banks, and module size
501    *      * Calculate row, column, and bank masks
502    *      * Calculate chip selects
503    *      * Calculate timing register.  Note that we assume that
504    *        all banks will use the same timing.
505    *      * Repeat for each DRAM.
506    *
507    *  DIMM0 -> MCTL0 : CS0, CS1	SPD Addr = 0x54
508    *  DIMM1 -> MCTL0 : CS2, CS3	SPD Addr = 0x55
509    *  DIMM2 -> MCTL1 : CS0, CS1	SPD Addr = 0x56
510    *  DIMM3 -> MCTL1 : CS2, CS3	SPD Addr = 0x57
511    *
512    *  DRAM Controller registers are programmed in the following order:
513    *
514    *  	   MC_CS_INTERLEAVE
515    *  	   MC_CS_ATTR
516    *  	   MC_TEST_DATA, MC_TEST_ECC
517    *
518    *  	   MC_CSx_ROWS, MC_CSx_COLS
519    *      (repeated for each bank)
520    *
521    *  	   MC_CS_START, MC_CS_END
522    *
523    *  	   MC_CLOCK_CFG
524    *      (delay)
525    *  	   MC_TIMING
526    *  	   MC_CONFIG
527    *      (delay)
528    *  	   MC_DRAMMODE
529    *      (delay after each mode setting ??)
530    *
531    *  Once the registers are initialized, the DRAM is activated by
532    *  sending it the following sequence of commands:
533    *
534    *       PRE (precharge)
535    *       EMRS (extended mode register set)
536    *       MRS (mode register set)
537    *       PRE (precharge)
538    *       AR (auto-refresh)
539    *       AR (auto-refresh again)
540    *       MRS (mode register set)
541    *
542    *  then wait 200 memory clock cycles without accessing DRAM.
543    *
544    *  Following initialization, the ECC bits must be cleared.  This
545    *  can be accomplished by disabling ECC checking on both memory
546    *  controllers, and then zeroing all memory via the mapping
547    *  in xkseg.
548    ********************************************************************* */
549
550/*  *********************************************************************
551    *
552    * Address Bit Assignment Algorithm:
553    *
554    * Good performance can be achieved by taking the following steps
555    * when assigning address bits to the row, column, and interleave
556    * masks.  You will need to know the following:
557    *
558    *    - The number of rows, columns, and banks on the memory devices
559    *    - The block size (larger tends to be better for sequential
560    *      access)
561    *    - Whether you will interleave chip-selects
562    *    - Whether you will be using both memory controllers and want
563    *      to interleave between them
564    *
565    * By choosing the masks carefully you can maximize the number of
566    * open SDRAM banks and reduce access times for nearby and sequential
567    * accesses.
568    *
569    * The diagram below depicts a physical address and the order
570    * that the bits should be placed into the masks.  Start with the
571    * least significant bit and assign bits to the row, column, bank,
572    * and interleave registers in the following order:
573    *
574    *         <------------Physical Address--------------->
575    * Bits:	RRRRRRR..R  CCCCCCCC..C  NN  BB  P  CC  xx000
576    * Step:	    7           6        5   4   3  2     1
577    *
578    * Where:
579    *     R = Row Address Bit     (MC_CSX_ROW register)
580    *     C = Column Address Bit  (MC_CSX_COL register)
581    *     N = Chip Select         (MC_CS_INTERLEAVE)
582    *                             (when interleaving via chip selects)
583    *     B = Bank Bit            (MC_CSX_BA register)
584    *     P = Port Select bit     (MC_CONFIG register)
585    *                             (when interleaving memory channels)
586    *     x = Does not matter     (MC_CSX_COL register)
587    *                             (internally driven by controller)
588    *     0 = must be zero
589    *
590    * When an address bit is "assigned" it is set in one of the masks
591    * in the MC_CSX_ROW, MC_CSX_COL, MC_CSX_BA, or MC_CS_INTERLEAVE
592    * registers.
593    *
594    *
595    * 1. The bottom 3 bits are ignored and should be set to zero.
596    *    The next two bits are also ignored, but are considered
597    *    to be column bits, so they should be taken from the
598    *    total column bits supported by the device.
599    *
600    * 2. The next two bits are used for column interleave.  For
601    *    32-byte blocks (and no column interleave), do not use
602    *    any column bits.  For 64-byte blocks, use one column
603    *    bit, and for 128 byte blocks, use two column bits.  Subtract
604    *    the column bits assigned in this step from the total.
605    *
606    * 3. If you are using both memory controllers and wish to interleave
607    *    between them, assign one bit for the controller interleave. The
608    *    bit number is assigned in the MC_CONFIG register.
609    *
610    * 4. These bits represent the bank bits on the memory device.
611    *    If the device has 4 banks, assign 2 bits in the MC_CSX_BA
612    *    register.
613    *
614    * 5. If you are interleaving via chip-selects, set one or two
615    *    bits in the MC_CS_INTERLEAVE register for the bits that will
616    *    be interleaved.
617    *
618    * 6. The remaining column bits are assigned in the MC_CSX_COL
619    *    register.
620    *
621    * 7. The row bits are assigned in the MC_CSX_ROW register.
622    *
623    ********************************************************************* */
624
625
626
627/*  *********************************************************************
628    *  sb1250_find_timingcs(mc)
629    *
630    *  For a given memory controller, choose the chip select whose
631    *  timing values will be used to base the TIMING and MCLOCK_CFG
632    *  registers on.
633    *
634    *  Input parameters:
635    *  	   mc - memory controller
636    *
637    *  Return value:
638    *  	   chip select index, or -1 if no active chip selects.
639    ********************************************************************* */
640
641
642static int sb1250_find_timingcs(mcdata_t *mc)
643{
644    int idx;
645
646    /* for now, the first one with data is the one we pick */
647
648    for (idx = 0; idx < MC_CHIPSELS; idx++) {
649	if (mc->csdata[idx].flags & CS_PRESENT) return idx;
650	}
651
652    return -1;
653}
654
655/*  *********************************************************************
656    *  sb1250_auto_timing(mcidx,tdata)
657    *
658    *  Program the memory controller's timing registers based on the
659    *  timing information stored with the chip select data.  For DIMMs
660    *  this information comes from the SPDs, otherwise it was entered
661    *  from the datasheets into the tables in the init modules.
662    *
663    *  Input parameters:
664    *  	   mcidx - memory controller index (0 or 1)
665    *  	   tdata - a chip select data (csdata_t)
666    *
667    *  Return value:
668    *  	   nothing
669    ********************************************************************* */
670
671static void sb1250_auto_timing(int mcidx,mcdata_t *mc,csdata_t *tdata)
672{
673    unsigned int res;
674
675    unsigned int plldiv;
676    unsigned int clk_ratio;
677    unsigned int refrate;
678    unsigned int ref_freq;
679    unsigned int caslatency;
680
681    unsigned int spd_tCK_25;
682    unsigned int spd_tCK_20;
683    unsigned int spd_tCK_10;
684    unsigned int tCpuClk;
685    unsigned int tMemClk;
686
687    unsigned int w2rIdle,r2wIdle,r2rIdle;
688    unsigned int tCrD,tCrDh,tFIFO;
689    unsigned int tCwD;
690    unsigned int tRAS;
691    unsigned int tWR,tWTR;
692    unsigned int tRP,tRRD,tRCD,tRC,tRCw,tRCr,tCwCr,tRFC;
693    unsigned int tRFC_max;
694
695    uint64_t timing1;
696    uint64_t mclkcfg;
697    sbport_t base;
698    uint64_t sysrev;
699
700    /* Timing window variables */
701
702    int addrSkew,dqiSkew,dqoSkew,clkDrive;
703    int n01_open,n02_open,n12_open;
704    int n01_close,n02_close,n12_close;
705    int dqsArrival;
706    int addrAdjust,dqiAdjust,dqoAdjust;
707    int minDqsMargin;
708    int dllScaleNum,dllScaleDenom,dllOffset;
709
710
711    /*
712     * We need our cpu clock for all sorts of things.
713     */
714
715    sysrev = READCSR(PHYS_TO_K1(A_SCD_SYSTEM_REVISION));
716#if defined(_VERILOG_) || defined(_FUNCSIM_)
717    plldiv = 16;		/* 800MHz CPU for RTL simulation */
718#else
719    plldiv = G_SYS_PLL_DIV(READCSR(PHYS_TO_K1(A_SCD_SYSTEM_CFG)));
720#endif
721    if (plldiv == 0) {
722	/* XXX: should be common macro, also defaulted by boards' *_devs.c.  */
723	plldiv = 6;
724	}
725
726    tRFC_max = 15;
727    if ((G_SYS_REVISION(sysrev) >= K_SYS_REVISION_BCM1250_C3) ||
728	(G_SYS_REVISION(sysrev) >= K_SYS_REVISION_BCM112x_B0)) {
729	tRFC_max = 31;
730	}
731
732    /*
733     * Compute tCpuClk, in picoseconds to avoid rounding errors.
734     *
735     * Calculation:
736     *     tCpuClk = 1/fCpuClk
737     *             = 1/(100MHz * plldiv/2)
738     *             = 2/(100MHz*plldiv)
739     *             = 2/(100*plldiv) us
740     *		   = 20/plldiv ns
741     *             = 2000000/plldiv 10ths of ns
742     *
743     * If SB1250_REFCLK is in MHz, then:
744     *           2/(SB1250_REFCLK*plldiv) us
745     *         = 2000/(SB1250_REFCLK*plldiv) ns
746     *         = 2000000/(SB1250_REFCLK*plldiv) ps
747     *
748     * However, we want to round the result to the nearest integer,
749     * so we double the numerator (to 4000000) to get one more bit
750     * of precision in the quotient, then add one and scale it back down
751     */
752
753    /* tCpuClk is in picoseconds */
754    tCpuClk = ((4000000/(SB1250_REFCLK*plldiv))+1)/2;
755
756    spd_tCK_25 = DECTO10THS(tdata->spd_tCK_25);
757    spd_tCK_20 = DECTO10THS(tdata->spd_tCK_20);
758    spd_tCK_10 = DECTO10THS(tdata->spd_tCK_10);
759
760    /*
761     * Compute the target tMemClk, in units of tenths of nanoseconds
762     * to be similar to the JEDEC SPD values.  This will be
763     *
764     *     MAX(MIN_tMEMCLK,spd_tCK_25)
765     */
766
767    tMemClk = spd_tCK_25;
768    if (mc->mintmemclk > tMemClk) tMemClk = mc->mintmemclk;
769
770    /*
771     * Now compute our clock ratio (the amount we'll divide tCpuClk by
772     * to get as close as possible to tMemClk without exceeding it
773     *
774     * It's (tMemClk*100) here because tCpuClk is in picoseconds
775     */
776
777    clk_ratio = ((tMemClk*100) + tCpuClk - 1) / tCpuClk;
778    if (clk_ratio < 4) clk_ratio = 4;
779    if (clk_ratio > 9) clk_ratio = 9;
780
781    /*
782     * BCM112x A1 parts do not function properly with MC ratio 5.
783     * (This is fixed in A2 parts.)  On BCM112x before A2, When
784     * that ratio would be used, back off to 6.
785     */
786    if ((SYS_SOC_TYPE(sysrev) == K_SYS_SOC_TYPE_BCM1120 ||
787         SYS_SOC_TYPE(sysrev) == K_SYS_SOC_TYPE_BCM1125 ||
788         SYS_SOC_TYPE(sysrev) == K_SYS_SOC_TYPE_BCM1125H) &&
789        G_SYS_REVISION(sysrev) < K_SYS_REVISION_BCM112x_A2 &&
790	clk_ratio == 5) {
791	clk_ratio = 6;
792	}
793
794    /*
795     * Now, recompute tMemClk using the new clk_ratio.  This gives us
796     * the actual tMemClk that the memory controller will generate
797     *
798     * Calculation:
799     *      fMemClk = SB1250_REFCLK * plldiv / (2 * clk_ratio) Mhz
800     *
801     *      tMemClk = 1/fMemClk us
802     *              = (2 * clk_ratio) / (SB1250_REFCLK * plldiv) us
803     *              = 10000 * (2 * clk_ratio) / (SB1250_REFCLK * plldiv) 0.1ns
804     *
805     * The resulting tMemClk is in tenths of nanoseconds so we
806     * can compare it with the SPD values.  The x10000 converts
807     * us to 0.1ns
808     */
809
810new_ratio:
811    tMemClk = (10000 * 2 * clk_ratio)/(SB1250_REFCLK * plldiv);
812
813    /* Calculate the refresh rate */
814
815    switch (tdata->spd_rfsh & JEDEC_RFSH_MASK) {
816	case JEDEC_RFSH_64khz:	ref_freq = 64;  break;
817	case JEDEC_RFSH_256khz:	ref_freq = 256; break;
818	case JEDEC_RFSH_128khz:	ref_freq = 128; break;
819	case JEDEC_RFSH_32khz:	ref_freq = 32;  break;
820	case JEDEC_RFSH_8khz:	ref_freq = 16;  break;
821	default: 		ref_freq = 8;   break;
822	}
823
824    /*
825     * Compute the target refresh value, in Khz/16.  We know
826     * the rate that the DIMMs expect (in Khz, above).  So we need
827     * to calculate what the MemClk is divided by to get that value.
828     * There is an internal divide-by-16 in the 1250 in the refresh
829     * generation.
830     *
831     * Calculation:
832     *     refrate = (plldiv/2)*SB1250_REFCLK*1000 Khz /(ref_freq*16*clk_ratio)
833     */
834
835    refrate = ((plldiv * SB1250_REFCLK * 1000 / 2) / (ref_freq*16*clk_ratio)) - 1;
836
837    /*
838     * Calculate CAS Latency in half cycles.  The low bit indicates
839     * half a cycle, so 2 (0010) = 1 cycle and 3 (0011) = 1.5 cycles
840     */
841
842    res = tdata->spd_caslatency;
843    if (res & JEDEC_CASLAT_35) caslatency = (3 << 1) + 1;	/* 3.5 */
844    else if (res & JEDEC_CASLAT_30) caslatency = (3 << 1);	/* 3.0 */
845    else if (res & JEDEC_CASLAT_25) caslatency = (2 << 1) + 1;	/* 2.5 */
846    else if (res & JEDEC_CASLAT_20) caslatency = (2 << 1);	/* 2.0 */
847    else if (res & JEDEC_CASLAT_15) caslatency = (1 << 1) + 1;	/* 1.5 */
848    else caslatency = (1 << 1);					/* 1.0 */
849
850    if ((spd_tCK_10 != 0) && (spd_tCK_10 <= tMemClk)) {
851	caslatency -= (1 << 1);				/* subtract 1.0 */
852	}
853    else if ((spd_tCK_20 != 0) && (spd_tCK_20 <= tMemClk)) {
854	caslatency -= 1;				/* subtract 0.5 */
855	}
856
857    /*
858     * Store the CAS latency in the chip select info
859     */
860
861    tdata->flags &= ~CS_CASLAT_MASK;
862    tdata->flags |= (((caslatency << CS_CASLAT_SHIFT)) & CS_CASLAT_MASK);
863#ifdef _MCSTANDALONE_
864    dram_cas_latency = caslatency;
865    dram_tMemClk = tMemClk;
866#endif
867
868    /*
869     * Now, on to the timing parameters.
870     */
871
872    w2rIdle = 1;	/* Needs to be set on all parts. */
873    r2rIdle = 0;
874
875    /* ======================================================================== */
876
877    /*
878     * New "Window" calculations
879     */
880
881    n01_open = -SB1250_WINDOW_OPEN_OFFSET;
882    n02_open = -SB1250_WINDOW_OPEN_OFFSET;
883    n12_open = tMemClk/2 - SB1250_WINDOW_OPEN_OFFSET;
884    n01_close = tMemClk - SB1250_CLOSE_01_OFFSET;
885    n02_close = 3*tMemClk/2 - SB1250_CLOSE_02_OFFSET;
886    n12_close = 7*tMemClk/4 - SB1250_CLOSE_12_OFFSET;
887    minDqsMargin = SB1250_MIN_DQS_MARGIN;
888
889    if (SYS_SOC_TYPE(sysrev) == K_SYS_SOC_TYPE_BCM1250 &&
890	G_SYS_REVISION(sysrev) >= K_SYS_REVISION_PASS1 &&
891	G_SYS_REVISION(sysrev) < K_SYS_REVISION_PASS2) {
892	/* pass1 bcm1250 */
893	dllScaleNum = PASS1_DLL_SCALE_NUMERATOR;
894	dllScaleDenom = PASS1_DLL_SCALE_DENOMINATOR;
895	dllOffset = PASS1_DLL_OFFSET;
896	}
897    else {
898	/* pass2+ BCM1250, or BCM112x */
899	dllScaleNum = PASS2_DLL_SCALE_NUMERATOR;
900	dllScaleDenom = PASS2_DLL_SCALE_DENOMINATOR;
901	dllOffset = PASS2_DLL_OFFSET;
902	}
903
904
905    /*
906     * Get fields out of the clock config register
907     */
908
909    dqiSkew =  (int) G_MC_DQI_SKEW(mc->clkconfig);
910    dqoSkew =  (int) G_MC_DQO_SKEW(mc->clkconfig);
911    addrSkew = (int) G_MC_ADDR_SKEW(mc->clkconfig);
912    clkDrive = (int) G_MC_CLOCK_DRIVE(mc->clkconfig);
913
914    /*
915     * get initial values for tCrD and dqsArrival
916     */
917
918    tCrD = (caslatency >> 1);
919    dqsArrival = mc->roundtrip;
920    if (caslatency & 1) {
921	dqsArrival += tMemClk/2;
922	}
923
924    /*
925     * need to adjust for settings of skew values.
926     * can either add to dqsArrival or subtract from
927     * all the windows.
928     */
929
930    addrAdjust = (addrSkew - 8) * ((int)tMemClk * dllScaleNum - dllOffset) / (8 * dllScaleDenom);
931    dqiAdjust  = (dqiSkew - 8)  * ((int)tMemClk * dllScaleNum - dllOffset) / (8 * dllScaleDenom);
932    dqsArrival += addrAdjust + dqiAdjust;
933
934    /* for pass 2, dqoAdjust applies only to n12_Close */
935    dqoAdjust  = (dqoSkew - 8)  * (tMemClk * dllScaleNum - dllOffset) / (8 * dllScaleDenom);
936    n12_close += dqoAdjust;
937
938    /*
939     * adjust window for clock drive strength
940     * Don't be tempted to turn this into an array.  It will break the
941     * relocation stuff!
942     */
943    switch (clkDrive) {
944	case 0:   dqsArrival += 10; break;
945	case 1:   dqsArrival += 4;  break;
946	case 2:   dqsArrival += 3;  break;
947	case 3:   dqsArrival += 2;  break;
948	case 4:   dqsArrival += 2;  break;
949	case 5:   dqsArrival += 1;  break;
950	case 6:   dqsArrival += 1;  break;
951	case 7:   dqsArrival += 1;  break;
952	case 8:   dqsArrival += 8;  break;
953	case 9:   dqsArrival += 2;  break;
954	case 0xa: dqsArrival += 1;  break;
955	case 0xb: break;
956	case 0xc: break;
957	case 0xd: break;
958	case 0xe: break;
959	case 0xf: break;
960	default:
961	    /* shouldn't get here */
962	    break;
963	}
964
965    while ((n02_close - dqsArrival < minDqsMargin) &&
966	   (n12_close - dqsArrival < minDqsMargin)) {
967	/* very late DQS arrival; shift latency by one tick */
968#ifdef _MCSTANDALONE_NOISY_
969	printf("DRAM: Very late DQS arrival, shift latency one tick\n");
970#endif
971	++tCrD;
972	dqsArrival -= tMemClk;
973	}
974
975    if ((dqsArrival - n01_open  >= minDqsMargin) &&
976	(n01_close - dqsArrival >= minDqsMargin)) {
977	/* use n,0,1 */
978	tCrDh = 0;
979	tFIFO = 1;
980#ifdef _MCSTANDALONE_NOISY_
981	printf("DRAM: DQS arrival in n,0,1 window\n");
982#endif
983	}
984    else if ((dqsArrival - n02_open  >= minDqsMargin) &&
985	     (n02_close - dqsArrival >= minDqsMargin)) {
986	/* use n,0,2 */
987	tCrDh = 0;
988	tFIFO = 2;
989#ifdef _MCSTANDALONE_NOISY_
990	printf("DRAM: DQS arrival in n,0,2 window\n");
991#endif
992	}
993    else if ((dqsArrival - n12_open  >= minDqsMargin) &&
994	     (n12_close - dqsArrival >= minDqsMargin)) {
995	/* use n,1,2 */
996	tCrDh = 1;
997	tFIFO = 2;
998#ifdef _MCSTANDALONE_NOISY_
999	printf("DRAM: DQS arrival in n,1,2 window\n");
1000#endif
1001	}
1002    else {
1003	/*
1004	 * minDqsMargin is probably set too high
1005	 * try using n,0,2
1006	 */
1007	tCrDh = 0;
1008	tFIFO = 2;
1009#ifdef _MCSTANDALONE_NOISY_
1010	printf("DRAM: Default: DQS arrival in n,0,2 window\n");
1011#endif
1012	}
1013
1014    r2wIdle = ((tMemClk - dqsArrival) < SB1250_MIN_R2W_TIME);
1015
1016    /*
1017     * Pass1 BCM112x parts do not function properly with
1018     * M_MC_r2wIDLE_TWOCYCLES clear, so we set r2wIdle here for them
1019     * so that that flag will be set later.
1020     */
1021    if ((SYS_SOC_TYPE(sysrev) == K_SYS_SOC_TYPE_BCM1120 ||
1022         SYS_SOC_TYPE(sysrev) == K_SYS_SOC_TYPE_BCM1125 ||
1023         SYS_SOC_TYPE(sysrev) == K_SYS_SOC_TYPE_BCM1125H) &&
1024	1 /* XXXCGD: When fixed, check revision! */) {
1025	r2wIdle = 1;
1026	}
1027
1028    /*
1029     * Above stuff just calculated tCrDh, tCrD, and tFIFO
1030     */
1031
1032    /* ======================================================================== */
1033
1034    /* Recompute tMemClk as a fixed-point 6.2 value */
1035
1036    tMemClk = (4000 * 2 *  clk_ratio) / (SB1250_REFCLK * plldiv);
1037
1038    /*
1039     * With the actual tMemClk in hand, calculate tRAS, tRC, tRP, tRRD, and tRCD
1040     */
1041
1042    tRAS = ( ((unsigned int)(tdata->spd_tRAS))*4 + tMemClk-1) / tMemClk;
1043
1044    tRC =  ( ((unsigned int)(tdata->spd_tRC))*4  + tMemClk-1) / tMemClk;
1045
1046    tRP =  ( ((unsigned int)(tdata->spd_tRP))    + tMemClk-1) / tMemClk;
1047    tRRD = ( ((unsigned int)(tdata->spd_tRRD))   + tMemClk-1) / tMemClk;
1048    tRCD = ( ((unsigned int)(tdata->spd_tRCD))   + tMemClk-1) / tMemClk;
1049
1050    /* tWR is the write recovery time, a constant of 15ns for DDR DIMMs. */
1051
1052    tWR  = ( ((unsigned int) 15)*4               + tMemClk-1) / tMemClk;
1053
1054    /*
1055     * tWTR should be 1 tick unless we're actually using
1056     * CAS Latency 1.5 (unlikely) or memory runs faster than
1057     * 166MHz (tCK = 6.0ns or less)
1058     *
1059     * CAS Latency is stored in "halves", so 3 means "1.5"
1060     */
1061
1062    tWTR = 1;
1063    if ((caslatency == 3) || (spd_tCK_25 <= 60)) tWTR = 2;
1064
1065    /*
1066     * Check for registered DIMMs, or if we are "forcing" registered
1067     * DIMMs, as might be the case of regular unregistered DIMMs
1068     * behind an external register.
1069     */
1070
1071    switch (mc->dramtype) {
1072	case FCRAM:
1073	    /* For FCRAMs, tCwD is always caslatency - 1  */
1074	    tCwD = (caslatency >> 1) - 1;
1075	    tRCD = 1;		/* always 1 for FCRAM */
1076	    tRP = 0;		/* always 0 for FCRAM */
1077	    tWR = 1;		/* always 1 for FCRAM */
1078	    tWTR = 0;		/* Must be 0 or 1.  Undecided on which for FCRAM. */
1079	                        /* Used in tCwCr below.  */
1080	    break;
1081	default:
1082	    /* Otherwise calculate based on registered attribute */
1083	    if ((tdata->spd_attributes & JEDEC_ATTRIB_REG) ||
1084		(mc->flags & MCFLG_FORCEREG)) {  	/* registered DIMM */
1085		tCwD = 2;
1086		tCrD++;
1087		}
1088	    else {			/* standard unbuffered DIMM */
1089		tCwD = 1;
1090		}
1091	    break;
1092	}
1093
1094    /*
1095     * Okay, using this info, figure out tRCw,tRCr,tCwCr.
1096     */
1097
1098    tRCw = max(tRC, tRP + max(tRAS, tRCD + tCwD + BURSTLEN/2 + tWR));
1099    tRCr = max(tRC, tRP + max(tRAS, tRCD + BURSTLEN/2));
1100    tCwCr = tRCw - (tRCD + tCwD + 2 + tWTR);
1101
1102    /*
1103     * Calculate tRFC if the SPD did not specify it.  Use the DIMM's
1104     * actual rated speed, spd_tCK_25.  Remember that spd_tCK_25 is in
1105     * tenths of nanoseconds, and tMemClk is in fixed-6.2 format,
1106     * but the SPD value itself is in nanoseconds (no tenths).
1107     *
1108     * Use the value from the first expression below that matches:
1109     *
1110     *    100Mhz or less  [10.0ns or more]  -- tRFC = 80ns
1111     *    133Mhz or less  [7.5ns or more]   -- tRFC = 75ns
1112     *    166Mhz or less  [6.0ns or more]   -- tRFC = 72ns
1113     *    All others:                       -- tRFC = 70ns
1114     *
1115     * Special case for gigabit parts: always use 120ns [see JEDEC spec]
1116     *
1117     * Note: the calculation may cause tRFC to overflow the 4-bit field
1118     * that hardware uses for it on parts prior to  BCM1250C3.  If that
1119     * happens, reduce memory speed and try again.  Hopefully we won't go
1120     * into a loop.
1121     */
1122
1123    if (tdata->spd_tRFC == 0) {
1124	unsigned int calcRFC;	/* in nanoseconds */
1125
1126	if (tdata->rows >= 14) {	/* Gigabit parts have >= 14 rows */
1127	    calcRFC = 120;
1128	    }
1129	else {
1130	    if (spd_tCK_25 >= 100)     calcRFC = 80;	/* 100MHz */
1131	    else if (spd_tCK_25 >= 75) calcRFC = 75;	/* 133MHz */
1132	    else if (spd_tCK_25 >= 60) calcRFC = 72;	/* 166MHz */
1133	    else calcRFC = 70;				/* Others */
1134	    }
1135
1136	tRFC = (calcRFC*4 + tMemClk-1) / tMemClk;
1137	}
1138    else {
1139	tRFC = ( ((unsigned int) tdata->spd_tRFC)*4 + tMemClk-1) / tMemClk;
1140	}
1141
1142    /*
1143     * If tRFC will not fit in our field then we need to slow
1144     * the memory down.
1145     */
1146    if (tRFC > tRFC_max) {
1147#ifdef _MCSTANDALONE_NOISY_
1148        printf("DRAM: tRFC too big (%d > %d), reducing memory speed\n",
1149	       tRFC, tRFC_max);
1150#endif
1151        clk_ratio++;
1152        goto new_ratio;			/* yikes! */
1153        }
1154
1155
1156    /*
1157     * Finally, put it all together in the timing register.
1158     */
1159
1160
1161    timing1 = V_MC_tRCD(tRCD) |
1162	V_MC_tCrD(tCrD) |
1163	(tCrDh ? M_tCrDh : 0) |
1164	V_MC_tRP(tRP) |
1165	V_MC_tRRD(tRRD) |
1166	V_MC_tRCw(tRCw - 1) |
1167	V_MC_tRCr(tRCr - 1) |
1168	V_MC_tCwCr(tCwCr) |
1169	V_MC_tRFC(tRFC & 0xf) |
1170	((tRFC & 0x10) != 0 ? M_MC_tRFC_PLUS16 : 0) |
1171	V_MC_tFIFO(tFIFO) |
1172	V_MC_tCwD(tCwD) |
1173	(w2rIdle ? M_MC_w2rIDLE_TWOCYCLES : 0) |
1174	(r2wIdle ? M_MC_r2wIDLE_TWOCYCLES : 0) |
1175	(r2rIdle ? M_MC_r2rIDLE_TWOCYCLES : 0);
1176
1177    mclkcfg = V_MC_CLK_RATIO(clk_ratio) |
1178	V_MC_REF_RATE(refrate);
1179
1180    /* Merge in drive strengths from the MC structure */
1181    mclkcfg |= mc->clkconfig;
1182
1183    base = PHYS_TO_K1(A_MC_BASE(mcidx));
1184    WRITECSR(base+R_MC_TIMING1,timing1);
1185
1186#ifdef _VERILOG_
1187    /* Smash in some defaults for Verilog simulation */
1188    mclkcfg &= ~(M_MC_CLK_RATIO | M_MC_DLL_DEFAULT | M_MC_REF_RATE);
1189    mclkcfg |= V_MC_CLK_RATIO_3X | V_MC_REF_RATE(K_MC_REF_RATE_200MHz) |
1190	V_MC_DLL_DEFAULT(0x18);
1191#endif
1192
1193    WRITECSR(base+R_MC_MCLK_CFG,mclkcfg);
1194
1195}
1196
1197
1198/*  *********************************************************************
1199    *  SB1250_MANUAL_TIMING(mcidx,mc)
1200    *
1201    *  Program the timing registers, for the case of user-specified
1202    *  timing parameters (don't calculate values based on datasheet
1203    *  values, just stuff the info into the MC registers)
1204    *
1205    *  Input parameters:
1206    *  	   mcidx - memory controller index
1207    *  	   mc - memory controller data
1208    *
1209    *  Return value:
1210    *  	   nothing
1211    ********************************************************************* */
1212
1213static void sb1250_manual_timing(int mcidx,mcdata_t *mc)
1214{
1215    unsigned int plldiv;
1216    unsigned int clk_ratio;
1217    unsigned int refrate;
1218    unsigned int ref_freq;
1219    unsigned int tCpuClk;
1220    unsigned int tMemClk;
1221
1222    uint64_t timing1;
1223    uint64_t mclkcfg;
1224
1225    sbport_t base;
1226
1227    /*
1228     * We need our cpu clock for all sorts of things.
1229     */
1230
1231#if defined(_VERILOG_) || defined(_FUNCSIM_)
1232    plldiv = 16;		/* 800MHz CPU for RTL simulation */
1233#else
1234    plldiv = G_SYS_PLL_DIV(READCSR(PHYS_TO_K1(A_SCD_SYSTEM_CFG)));
1235#endif
1236    if (plldiv == 0) {
1237	/* XXX: should be common macro, also defaulted by boards' *_devs.c.  */
1238	plldiv = 6;
1239	}
1240
1241    /* See comments in auto_timing for details */
1242    tCpuClk = 2000000/(SB1250_REFCLK*plldiv);	/* tCpuClk is in picoseconds */
1243
1244    /* Compute MAX(MIN_tMEMCLK,spd_tCK_25) */
1245    tMemClk = DECTO10THS(mc->tCK);
1246    if (mc->mintmemclk > tMemClk) tMemClk = mc->mintmemclk;
1247
1248    clk_ratio = ((tMemClk*100) + tCpuClk - 1) / tCpuClk;
1249    if (clk_ratio < 4) clk_ratio = 4;
1250    if (clk_ratio > 9) clk_ratio = 9;
1251
1252    /* recompute tMemClk using the new clk_ratio */
1253
1254    tMemClk = (10000 * 2 * clk_ratio)/(SB1250_REFCLK * plldiv);
1255
1256    /* Calculate the refresh rate */
1257
1258    switch (mc->rfsh & JEDEC_RFSH_MASK) {
1259	case JEDEC_RFSH_64khz:	ref_freq = 64;  break;
1260	case JEDEC_RFSH_256khz:	ref_freq = 256; break;
1261	case JEDEC_RFSH_128khz:	ref_freq = 128; break;
1262	case JEDEC_RFSH_32khz:	ref_freq = 32;  break;
1263	case JEDEC_RFSH_8khz:	ref_freq = 16;  break;
1264	default: 		ref_freq = 8;   break;
1265	}
1266
1267    refrate = ((plldiv * SB1250_REFCLK * 1000 / 2) / (ref_freq*16*clk_ratio)) - 1;
1268
1269    timing1 = mc->mantiming;
1270    mclkcfg = V_MC_CLK_RATIO(clk_ratio) |
1271	V_MC_REF_RATE(refrate);
1272
1273    /* Merge in drive strengths from the MC structure */
1274    mclkcfg |= mc->clkconfig;
1275
1276    base = PHYS_TO_K1(A_MC_BASE(mcidx));
1277    WRITECSR(base+R_MC_TIMING1,timing1);
1278
1279#ifdef _VERILOG_
1280    /* Smash in some defaults for Verilog simulation */
1281    mclkcfg &= ~(M_MC_CLK_RATIO | M_MC_DLL_DEFAULT | M_MC_REF_RATE);
1282    mclkcfg |= V_MC_CLK_RATIO_3X | V_MC_REF_RATE(K_MC_REF_RATE_200MHz) |
1283	V_MC_DLL_DEFAULT(0x18);
1284#endif
1285
1286    WRITECSR(base+R_MC_MCLK_CFG,mclkcfg);
1287
1288}
1289
1290
1291
1292/*  *********************************************************************
1293    *  Default DRAM init table
1294    *
1295    *  This is just here to make SB1250 BSPs easier to write.
1296    *  If you've hooked up standard JEDEC SDRAMs in a standard
1297    *  way with all your SPD ROMs on one SMBus channel,
1298    *  This table is for you.
1299    *
1300    *  Otherwise, copy it into your board_init.S file and
1301    *  modify it, and return a pointer to the table from
1302    *  the board_draminfo routine.
1303    *
1304    *  (See the CFE manual for more details)
1305    ********************************************************************* */
1306
1307#if !defined(_MCSTANDALONE_)		/* no tables in the non-CFE, non-MIPS version */
1308
1309#ifdef _VERILOG_
1310static const mc_initrec_t draminittab_11xx[5] /*__attribute__ ((section(".text"))) */ = {
1311
1312    /*
1313     * Globals: No interleaving
1314     */
1315
1316    DRAM_GLOBALS(MC_NOPORTINTLV),
1317
1318    /*
1319     * Memory channel 0:  manually configure for verilog runs
1320     * Configure chip select 0.
1321     */
1322
1323    DRAM_CHAN_CFG(MC_CHAN0, 80, JEDEC, CASCHECK, BLKSIZE32, NOCSINTLV, ECCDISABLE, 0),
1324
1325    DRAM_CS_GEOM(MC_CS0, 12, 8, 2),
1326    DRAM_CS_TIMING(DRT10(7,5), JEDEC_RFSH_64khz, JEDEC_CASLAT_25, 0,  45, DRT4(20,0), DRT4(15,0),  DRT4(20,0),  0, 0),
1327
1328    DRAM_EOT
1329};
1330
1331static const mc_initrec_t draminittab_12xx[5] /* __attribute__ ((section(".text"))) */ = {
1332
1333    /*
1334     * Globals: No interleaving
1335     */
1336
1337    DRAM_GLOBALS(MC_NOPORTINTLV),
1338
1339    /*
1340     * Memory channel 0:  manually configure for verilog runs
1341     * Configure chip select 0.
1342     */
1343
1344    DRAM_CHAN_CFG(MC_CHAN0, 80, JEDEC, CASCHECK, BLKSIZE32, NOCSINTLV, ECCDISABLE, 0),
1345
1346    DRAM_CS_GEOM(MC_CS0, 12, 8, 2),
1347    DRAM_CS_TIMING(DRT10(7,5), JEDEC_RFSH_64khz, JEDEC_CASLAT_25, 0,  45, DRT4(20,0), DRT4(15,0),  DRT4(20,0),  0, 0),
1348
1349    DRAM_EOT
1350};
1351
1352
1353#else
1354#define DEVADDR (CFG_DRAM_SMBUS_BASE)
1355#define DEFCHAN (CFG_DRAM_SMBUS_CHANNEL)
1356static const mc_initrec_t draminittab_12xx[8] /* __attribute__ ((section(".text"))) */ = {
1357
1358    /*
1359     * Global data: Interleave mode from bsp_config.h
1360     */
1361
1362    DRAM_GLOBALS(CFG_DRAM_INTERLEAVE),	   		/* do port interleaving if possible */
1363
1364    /*
1365     * Memory channel 0: Configure via SMBUS, Automatic Timing
1366     * Assumes SMBus device numbers are arranged such
1367     * that the first two addresses are CS0,1 and CS2,3 on MC0
1368     * and the second two addresses are CS0,1 and CS2,3 on MC1
1369     */
1370
1371    DRAM_CHAN_CFG(MC_CHAN0, CFG_DRAM_MIN_tMEMCLK, DRAM_TYPE_SPD, CASCHECK, CFG_DRAM_BLOCK_SIZE, CFG_DRAM_CSINTERLEAVE, CFG_DRAM_ECC, 0),
1372
1373    DRAM_CS_SPD(MC_CS0, 0, DEFCHAN, DEVADDR+0),
1374    DRAM_CS_SPD(MC_CS2, 0, DEFCHAN, DEVADDR+1),
1375
1376    /*
1377     * Memory channel 1: Configure via SMBUS
1378     */
1379
1380    DRAM_CHAN_CFG(MC_CHAN1, CFG_DRAM_MIN_tMEMCLK, DRAM_TYPE_SPD, CASCHECK, CFG_DRAM_BLOCK_SIZE, CFG_DRAM_CSINTERLEAVE, CFG_DRAM_ECC, 0),
1381
1382    DRAM_CS_SPD(MC_CS0, 0, DEFCHAN, DEVADDR+2),
1383    DRAM_CS_SPD(MC_CS2, 0, DEFCHAN, DEVADDR+3),
1384
1385   /*
1386    * End of Table
1387    */
1388
1389    DRAM_EOT
1390
1391};
1392
1393static const mc_initrec_t draminittab_11xx[5] /*__attribute__ ((section(".text"))) */ = {
1394
1395    /*
1396     * Global data: Interleave mode from bsp_config.h
1397     */
1398
1399    DRAM_GLOBALS(0),	   		/* no port interleaving on 11xx */
1400
1401    /*
1402     * Memory channel 1: Configure via SMBUS
1403     */
1404
1405    DRAM_CHAN_CFG(MC_CHAN1, CFG_DRAM_MIN_tMEMCLK, DRAM_TYPE_SPD, CASCHECK, CFG_DRAM_BLOCK_SIZE, CFG_DRAM_CSINTERLEAVE, CFG_DRAM_ECC, 0),
1406
1407    DRAM_CS_SPD(MC_CS0, 0, DEFCHAN, DEVADDR+0),
1408    DRAM_CS_SPD(MC_CS2, 0, DEFCHAN, DEVADDR+1),
1409
1410   /*
1411    * End of Table
1412    */
1413
1414    DRAM_EOT
1415
1416};
1417#endif
1418
1419#endif
1420
1421
1422/*  *********************************************************************
1423    *  SB1250_SMBUS_INIT()
1424    *
1425    *  Initialize SMBUS channel
1426    *
1427    *  Input parameters:
1428    *  	   chan - SMBus channel number, 0 or 1
1429    *
1430    *  Return value:
1431    *  	   smbus_base - KSEG1 address of SMBus channel
1432    *
1433    *  Registers used:
1434    *  	   tmp0
1435    ********************************************************************* */
1436
1437static sbport_t sb1250_smbus_init(int chan)
1438{
1439    sbport_t base;
1440
1441    base = PHYS_TO_K1(A_SMB_BASE(chan));
1442
1443    WRITECSR(base+R_SMB_FREQ,K_SMB_FREQ_100KHZ);
1444    WRITECSR(base+R_SMB_CONTROL,0);
1445
1446    return base;
1447}
1448
1449
1450/*  *********************************************************************
1451    *  SB1250_SMBUS_WAITREADY()
1452    *
1453    *  Wait for SMBUS channel to be ready.
1454    *
1455    *  Input parameters:
1456    *  	   smbus_base - SMBus channel base (K1seg addr)
1457    *
1458    *  Return value:
1459    *  	   ret0 - 0 if no error occured, else -1
1460    *
1461    *  Registers used:
1462    *  	   tmp0,tmp1
1463    ********************************************************************* */
1464
1465static int sb1250_smbus_waitready(sbport_t base)
1466{
1467    uint64_t status;
1468
1469    /*
1470     * Wait for busy bit to clear
1471     */
1472
1473    for (;;) {
1474	status = READCSR(base+R_SMB_STATUS);
1475	if (!(status & M_SMB_BUSY)) break;
1476	}
1477
1478    /*
1479     * Isolate error bit and clear error status
1480     */
1481
1482    status &= M_SMB_ERROR;
1483    WRITECSR(base+R_SMB_STATUS,status);
1484
1485    /*
1486     * Return status
1487     */
1488
1489    return (status) ? -1 : 0;
1490}
1491
1492
1493
1494/*  *********************************************************************
1495    *  SB1250_SMBUS_READBYTE()
1496    *
1497    *  Read a byte from a serial ROM attached to an SMBus channel
1498    *
1499    *  Input parameters:
1500    *      base - SMBus channel base address (K1seg addr)
1501    *  	   dev - address of device on SMBUS
1502    *      offset - address of byte within device on SMBUS
1503    *
1504    *  Return value:
1505    *  	   byte from device (-1 indicates an error)
1506    ********************************************************************* */
1507
1508
1509static int sb1250_smbus_readbyte(sbport_t base,unsigned int dev,unsigned int offset)
1510{
1511    int res;
1512
1513    /*
1514     * Wait for channel to be ready
1515     */
1516
1517    res = sb1250_smbus_waitready(base);
1518    if (res < 0) return res;
1519
1520    /*
1521     * Set up a READ BYTE command.  This command has no associated
1522     * data field, the command code is the data
1523     */
1524
1525    WRITECSR(base+R_SMB_CMD,offset);
1526    WRITECSR(base+R_SMB_START,dev | V_SMB_TT(K_SMB_TT_CMD_RD1BYTE));
1527
1528    /*
1529     * Wait for the command to complete
1530     */
1531
1532    res = sb1250_smbus_waitready(base);
1533    if (res < 0) return res;
1534
1535    /*
1536     * Return the data byte
1537     */
1538
1539    return (int) ((READCSR(base+R_SMB_DATA)) & 0xFF);
1540}
1541
1542
1543/*  *********************************************************************
1544    *  SB1250_DRAM_GETINFO
1545    *
1546    *  Process a single init table entry and move data into the
1547    *  memory controller's data structure.
1548    *
1549    *  Input parameters:
1550    *	   smbase - points to base of SMbus device to read from
1551    *  	   mc - memory controller data
1552    *      init - pointer to current user init table entry
1553    *
1554    *  Return value:
1555    *  	   nothing
1556    ********************************************************************* */
1557
1558static void sb1250_dram_getinfo(unsigned int smbchan,
1559				unsigned int smbdev,
1560				mcdata_t *mc,
1561				int chipsel)
1562
1563{
1564    int res;
1565    unsigned char spd[JEDEC_SPD_SIZE];
1566    int idx;
1567    csdata_t *cs = &(mc->csdata[chipsel]);
1568    sbport_t smbase;
1569
1570    smbase = sb1250_smbus_init(smbchan);
1571
1572    /*
1573     * Read just the memory type to see if the RAM is present.
1574     */
1575
1576    res = sb1250_smbus_readbyte(smbase,smbdev,JEDEC_SPD_MEMTYPE);
1577
1578    if ((res < 0) || ((res != JEDEC_MEMTYPE_DDRSDRAM) &&
1579		      (res != JEDEC_MEMTYPE_DDRSDRAM2) &&
1580	              (res != SPD_MEMTYPE_FCRAM))) {
1581	return;			/* invalid or no memory installed */
1582	}
1583
1584    /*
1585     * Now go back and read everything.
1586     */
1587
1588    res = 0;
1589    for (idx = 0; idx < JEDEC_SPD_SIZE; idx++) {
1590	res = sb1250_smbus_readbyte(smbase,smbdev,idx);
1591	if (res < 0) break;
1592	spd[idx] = res;
1593	}
1594
1595    if (res < 0) return;		/* some SMBus error */
1596
1597
1598    cs->rows = spd[JEDEC_SPD_ROWS];
1599    cs->cols = spd[JEDEC_SPD_COLS];
1600
1601    /*
1602     * Determine how many bits the banks represent.  Unlike
1603     * the rows/columns, the bank byte says how *many* banks
1604     * there are, not how many bits represent banks
1605     */
1606
1607    switch (spd[JEDEC_SPD_BANKS]) {
1608	case 2:					/* 2 banks = 1 bits */
1609	    cs->banks = 1;
1610	    break;
1611
1612	case 4:					/* 4 banks = 2 bits */
1613	    cs->banks = 2;
1614	    break;
1615
1616	case 8:					/* 8 banks = 3 bits */
1617	    cs->banks = 3;
1618	    break;
1619
1620	case 16:				/* 16 banks = 4 bits */
1621	    cs->banks = 4;
1622	    break;
1623
1624	default:				/* invalid bank count */
1625	    return;
1626	}
1627
1628
1629    /*
1630     * Read timing parameters from the DIMM.  By this time we kind of trust
1631     */
1632
1633    cs->spd_dramtype   = spd[JEDEC_SPD_MEMTYPE];
1634    cs->spd_tCK_25     = spd[JEDEC_SPD_tCK25];
1635    cs->spd_tCK_20     = spd[JEDEC_SPD_tCK20];
1636    cs->spd_tCK_10     = spd[JEDEC_SPD_tCK10];
1637    cs->spd_rfsh       = spd[JEDEC_SPD_RFSH];
1638    cs->spd_caslatency = spd[JEDEC_SPD_CASLATENCIES];
1639    cs->spd_attributes = spd[JEDEC_SPD_ATTRIBUTES];
1640    cs->spd_tRAS       = spd[JEDEC_SPD_tRAS];
1641    cs->spd_tRP        = spd[JEDEC_SPD_tRP];
1642    cs->spd_tRRD       = spd[JEDEC_SPD_tRRD];
1643    cs->spd_tRCD       = spd[JEDEC_SPD_tRCD];
1644    cs->spd_tRFC       = spd[JEDEC_SPD_tRFC];
1645    cs->spd_tRC        = spd[JEDEC_SPD_tRC];
1646
1647    /*
1648     * Okay, we got all the required data.  mark this CS present.
1649     */
1650
1651    cs->flags = CS_PRESENT | CS_AUTO_TIMING;
1652
1653    /*
1654     * If the module width is not 72 for any DIMM, disable ECC for this
1655     * channel.  All DIMMs must support ECC for us to enable it.
1656     */
1657
1658    if (spd[JEDEC_SPD_WIDTH] != 72) mc->flags &= ~MCFLG_ECC_ENABLE;
1659
1660    /*
1661     * If it was a double-sided DIMM, also mark the odd chip select
1662     * present.
1663     */
1664
1665    if ((spd[JEDEC_SPD_SIDES] == 2) && !(mc->flags & MCFLG_BIGMEM)) {
1666	csdata_t *oddcs = &(mc->csdata[chipsel | 1]);
1667
1668	oddcs->rows  = cs->rows;
1669	oddcs->cols  = cs->cols;
1670	oddcs->banks = cs->banks;
1671	oddcs->flags = CS_PRESENT;
1672
1673	oddcs->spd_dramtype   = spd[JEDEC_SPD_MEMTYPE];
1674	oddcs->spd_tCK_25     = spd[JEDEC_SPD_tCK25];
1675	oddcs->spd_tCK_20     = spd[JEDEC_SPD_tCK20];
1676	oddcs->spd_tCK_10     = spd[JEDEC_SPD_tCK10];
1677	oddcs->spd_rfsh       = spd[JEDEC_SPD_RFSH];
1678	oddcs->spd_caslatency = spd[JEDEC_SPD_CASLATENCIES];
1679	oddcs->spd_attributes = spd[JEDEC_SPD_ATTRIBUTES];
1680	oddcs->spd_tRAS       = spd[JEDEC_SPD_tRAS];
1681	oddcs->spd_tRP        = spd[JEDEC_SPD_tRP];
1682	oddcs->spd_tRRD       = spd[JEDEC_SPD_tRRD];
1683	oddcs->spd_tRCD       = spd[JEDEC_SPD_tRCD];
1684	oddcs->spd_tRFC       = spd[JEDEC_SPD_tRFC];
1685	oddcs->spd_tRC        = spd[JEDEC_SPD_tRC];
1686	}
1687    else if ((spd[JEDEC_SPD_SIDES] > 2) && !(mc->flags & MCFLG_BIGMEM)
1688	     && (chipsel == 0)) {
1689
1690	/* More than 2 chip selects on a single DIMM. Start from cs 1 */
1691	csdata_t *loopcs;
1692	int i;
1693	for (i=chipsel+1;i<spd[JEDEC_SPD_SIDES];i++) {
1694	    loopcs = &(mc->csdata[i]);
1695
1696	    loopcs->rows  = cs->rows;
1697	    loopcs->cols  = cs->cols;
1698	    loopcs->banks = cs->banks;
1699	    loopcs->flags = CS_PRESENT;
1700
1701	    loopcs->spd_dramtype   = spd[JEDEC_SPD_MEMTYPE];
1702	    loopcs->spd_tCK_25     = spd[JEDEC_SPD_tCK25];
1703	    loopcs->spd_tCK_20     = spd[JEDEC_SPD_tCK20];
1704	    loopcs->spd_tCK_10     = spd[JEDEC_SPD_tCK10];
1705	    loopcs->spd_rfsh       = spd[JEDEC_SPD_RFSH];
1706	    loopcs->spd_caslatency = spd[JEDEC_SPD_CASLATENCIES];
1707	    loopcs->spd_attributes = spd[JEDEC_SPD_ATTRIBUTES];
1708	    loopcs->spd_tRAS       = spd[JEDEC_SPD_tRAS];
1709	    loopcs->spd_tRP        = spd[JEDEC_SPD_tRP];
1710	    loopcs->spd_tRRD       = spd[JEDEC_SPD_tRRD];
1711	    loopcs->spd_tRCD       = spd[JEDEC_SPD_tRCD];
1712	    loopcs->spd_tRFC       = spd[JEDEC_SPD_tRFC];
1713	    loopcs->spd_tRC        = spd[JEDEC_SPD_tRC];
1714	    }
1715	}
1716}
1717
1718
1719/*  *********************************************************************
1720    *  SB1250_DRAM_READPARAMS(d,init)
1721    *
1722    *  Read all the parameters from the user parameter table and
1723    *  digest them into our local data structure.  This routine basically
1724    *  walks the table and calls the routine above to handle each
1725    *  entry.
1726    *
1727    *  Input parameters:
1728    *  	   d - our data structure (our RAM data)
1729    *  	   init - pointer to user config table
1730    *
1731    *  Return value:
1732    *  	   nothing
1733    ********************************************************************* */
1734
1735static void sb1250_dram_readparams(initdata_t *d,const draminittab_t *init)
1736{
1737    mcdata_t *mc;
1738    csdata_t *cs;
1739    uint64_t sysrev;
1740
1741    sysrev = READCSR(PHYS_TO_K1(A_SCD_SYSTEM_REVISION));
1742
1743    /*
1744     * Assume we're starting on the first channel.  We should have a CHCFG record
1745     * to set the initial channel number, this is just in case.
1746     */
1747    mc = &(d->mc[d->firstchan]);
1748
1749    /* Default clock config unless overridden */
1750    if (G_SYS_REVISION(sysrev) >= K_SYS_REVISION_PASS1 &&
1751	G_SYS_REVISION(sysrev) < K_SYS_REVISION_PASS2) {
1752	/* pass1 */
1753	mc->clkconfig = V_MC_CLKCONFIG_VALUE_PASS1;
1754	}
1755    else {
1756	/* pass2 */
1757	mc->clkconfig = V_MC_CLKCONFIG_VALUE;
1758	}
1759
1760    cs = &(mc->csdata[0]);
1761
1762    while (init->mcr.mcr_type != MCR_EOT) {
1763
1764#ifdef _MCSTANDALONE_NOISY_
1765	printf("DRAM: Processing record '%s'\n",sb1250_rectypes[init->mcr.mcr_type]);
1766#endif
1767
1768	switch (init->mcr.mcr_type) {
1769
1770	    case MCR_GLOBALS:
1771		if (init->gbl.gbl_intlv_ch) d->flags |= M_MCINIT_TRYPINTLV;
1772		break;
1773
1774	    case MCR_CHCFG:
1775		mc = &(d->mc[init->cfg.cfg_chan]);
1776		mc->mintmemclk = DECTO10THS(init->cfg.cfg_mintmemclk);
1777		mc->dramtype   = init->cfg.cfg_dramtype;
1778		mc->pagepolicy = init->cfg.cfg_pagepolicy;
1779		mc->blksize    = init->cfg.cfg_blksize;
1780		mc->cfgcsint   = init->cfg.cfg_intlv_cs;
1781
1782		/* Default clock config unless overridden */
1783		if (G_SYS_REVISION(sysrev) >= K_SYS_REVISION_PASS1 &&
1784		    G_SYS_REVISION(sysrev) < K_SYS_REVISION_PASS2) {
1785		    /* pass1 */
1786		    mc->clkconfig = V_MC_CLKCONFIG_VALUE_PASS1;
1787		    }
1788		else {
1789		    /* pass2 */
1790		    mc->clkconfig = V_MC_CLKCONFIG_VALUE;
1791		    }
1792
1793		mc->flags = (init->cfg.cfg_ecc & MCFLG_ECC_ENABLE) |
1794		    (init->cfg.cfg_flags & (MCFLG_FORCEREG | MCFLG_BIGMEM | MCFLG_DS_REDUCED));
1795		mc->roundtrip  = DECTO10THS(init->cfg.cfg_roundtrip);
1796		if (mc->roundtrip == 0 && mc->dramtype != DRAM_TYPE_SPD) {
1797		    /*
1798		     * Only set default roundtrip if mem type is specified, else wait
1799		     * to get type from SPD
1800		     */
1801		    mc->roundtrip = (mc->dramtype == FCRAM) ?
1802			DEFAULT_MEMORY_ROUNDTRIP_TIME_FCRAM : DEFAULT_MEMORY_ROUNDTRIP_TIME;
1803		    }
1804		if (mc->dramtype == FCRAM) mc->pagepolicy = CLOSED; 	/*FCRAM must be closed page policy*/
1805		cs = &(mc->csdata[0]);
1806		break;
1807
1808	    case MCR_TIMING:
1809		cs->spd_tCK_25 = init->tmg.tmg_tCK;
1810		cs->spd_tCK_20 = 0;
1811		cs->spd_tCK_10 = 0;
1812		cs->spd_rfsh = init->tmg.tmg_rfsh;
1813		cs->spd_caslatency = init->tmg.tmg_caslatency;
1814		cs->spd_attributes = init->tmg.tmg_attributes;
1815		cs->spd_tRAS = init->tmg.tmg_tRAS;
1816		cs->spd_tRP = init->tmg.tmg_tRP;
1817		cs->spd_tRRD = init->tmg.tmg_tRRD;
1818		cs->spd_tRCD = init->tmg.tmg_tRCD;
1819		cs->spd_tRFC = init->tmg.tmg_tRFC;
1820		cs->spd_tRC = init->tmg.tmg_tRC;
1821		break;
1822
1823	    case MCR_CLKCFG:
1824		mc->clkconfig =
1825		    V_MC_ADDR_SKEW((uint64_t)(init->clk.clk_addrskew)) |
1826		    V_MC_DQO_SKEW((uint64_t)(init->clk.clk_dqoskew)) |
1827		    V_MC_DQI_SKEW((uint64_t)(init->clk.clk_dqiskew)) |
1828		    V_MC_ADDR_DRIVE((uint64_t)(init->clk.clk_addrdrive)) |
1829		    V_MC_DATA_DRIVE((uint64_t)(init->clk.clk_datadrive)) |
1830		    V_MC_CLOCK_DRIVE((uint64_t)(init->clk.clk_clkdrive));
1831		break;
1832
1833	    case MCR_GEOM:
1834		cs = &(mc->csdata[init->geom.geom_csel]);
1835		cs->rows = init->geom.geom_rows;
1836		cs->cols = init->geom.geom_cols;
1837		cs->banks = init->geom.geom_banks;
1838		cs->flags |= CS_PRESENT;
1839		break;
1840
1841	    case MCR_SPD:
1842		cs = &(mc->csdata[init->spd.spd_csel]);
1843		sb1250_dram_getinfo(init->spd.spd_smbuschan,
1844				    init->spd.spd_smbusdev,
1845				    mc,
1846				    init->spd.spd_csel);
1847
1848		if (mc->dramtype == DRAM_TYPE_SPD) {
1849		    /* Use the DRAM type we get from the SPD */
1850		    if (cs->spd_dramtype == SPD_MEMTYPE_FCRAM){
1851			mc->dramtype = FCRAM;
1852			mc->pagepolicy = CLOSED;
1853			if (mc->roundtrip == 0) mc->roundtrip = DEFAULT_MEMORY_ROUNDTRIP_TIME_FCRAM;
1854			}
1855		    else {
1856			mc->dramtype = JEDEC;
1857			if (mc->roundtrip == 0) mc->roundtrip = DEFAULT_MEMORY_ROUNDTRIP_TIME;
1858			}
1859		    }
1860		/*
1861		 * The line below lets you put a MCR_MANTIMING record
1862		 * before an MCR_SPD to work around some missing information
1863		 * on certain DIMMs.   Normally you have only one or the
1864		 * other.
1865		 */
1866		mc->rfsh = cs->spd_rfsh;
1867		/* XXX flags ignored */
1868		break;
1869
1870	    case MCR_MANTIMING:
1871		/* Manual timing - pick record up as bytes because we cannot
1872		   guarantee the alignment of the "mtm_timing" field in our
1873		   structure -- each row is 12 bytes, not good */
1874		mc->rfsh = (uint16_t) init->mtm.mtm_rfsh;	/* units: JEDEC refresh value */
1875		mc->tCK  = (uint16_t) init->mtm.mtm_tCK;	/* units: BCD, like SPD */
1876		mc->mantiming =
1877		    (((uint64_t) init->mtm.mtm_timing[0]) << 56) |
1878		    (((uint64_t) init->mtm.mtm_timing[1]) << 48) |
1879		    (((uint64_t) init->mtm.mtm_timing[2]) << 40) |
1880		    (((uint64_t) init->mtm.mtm_timing[3]) << 32) |
1881		    (((uint64_t) init->mtm.mtm_timing[4]) << 24) |
1882		    (((uint64_t) init->mtm.mtm_timing[5]) << 16) |
1883		    (((uint64_t) init->mtm.mtm_timing[6]) << 8) |
1884		    (((uint64_t) init->mtm.mtm_timing[7]) << 0);
1885		break;
1886
1887	    default:
1888		break;
1889	    }
1890
1891	init++;
1892	}
1893
1894    /*
1895     * Okay, now we've internalized all the data from the SPDs
1896     * and/or the init table.
1897     */
1898
1899}
1900
1901
1902
1903/*  *********************************************************************
1904    *  SB1250_DRAMINIT_DELAY
1905    *
1906    *  This little routine delays at least 200 microseconds.
1907    *
1908    *  Input parameters:
1909    *  	   nothing
1910    *
1911    *  Return value:
1912    *  	   nothing.
1913    *
1914    *  Registers used:
1915    *  	   tmp0,tmp1
1916    ********************************************************************* */
1917
1918/* 200 microseconds = 5KHz, so delay 1GHz/5Khz = 200,000 cycles */
1919
1920#ifdef _FASTEMUL_
1921#define DRAMINIT_DELAY_CNT	"50"
1922#else
1923#define DRAMINIT_DELAY_CNT	 "(1000000000/5000)"
1924#endif
1925
1926#if defined(_MCSTANDALONE_)
1927#define DRAMINIT_DELAY() sbdelay()		/* not running on a 1250, no delays */
1928#else
1929#define DRAMINIT_DELAY() sb1250_draminit_delay()
1930
1931static void sb1250_draminit_delay(void)
1932{
1933    __asm("     li $9," DRAMINIT_DELAY_CNT " ; "
1934	  "     mtc0 $0,$9 ; "
1935	  "1:   mfc0 $8,$9 ; "
1936	  "     .set push ; .set mips64 ; ssnop ; ssnop ; .set pop ;"
1937	  "     blt $8,$9,1b ;");
1938}
1939#endif
1940
1941
1942
1943
1944/*  *********************************************************************
1945    *  MAKEDRAMMASK(dest,width,pos)
1946    *
1947    *  Create a 64-bit mask for the DRAM config registers
1948    *
1949    *  Input parameters:
1950    *  	   width - number of '1' bits to set
1951    *  	   pos - position (from the right) of the first '1' bit
1952    *
1953    *  Return value:
1954    *  	   mask with specified with at specified position
1955    ********************************************************************* */
1956
1957#define MAKEDRAMMASK(width,pos) _SB_MAKEMASK(width,pos)
1958
1959
1960/*  *********************************************************************
1961    *  SB1250_JEDEC_INITCMDS
1962    *
1963    *  Issue the sequence of DRAM init commands (JEDEC SDRAMs)
1964    *
1965    *  Input parameters:
1966    *      mcnum - memory controller index (0/1)
1967    *  	   mc - pointer to data for this memory controller
1968    *	   csel - which chip select to send init commands for
1969    *      lmbank - for largemem systems, the cs qualifiers to be
1970    *		    output on CS[2:3]
1971    *      tdata - chip select to use as a template for timing data
1972    *
1973    *  Return value:
1974    *  	   nothing
1975    ********************************************************************* */
1976
1977static void sb1250_jedec_initcmds(int mcnum,mcdata_t *mc,int csel,
1978				  int lmbank,csdata_t *tdata)
1979{
1980    uint64_t csmask;
1981    sbport_t cmd;
1982    sbport_t mode;
1983    uint64_t modebits;
1984    uint64_t casbits;
1985
1986    csmask = M_MC_CS0 << csel;		/* convert chip select # to mask */
1987
1988    if (mc->flags & MCFLG_BIGMEM) {
1989	/*
1990	 * so that the banks will all get their precharge signals,
1991	 * put the CS qualifiers out on CS[2:3].
1992	 */
1993	csmask |= (uint64_t)(lmbank << 6);
1994	}
1995
1996    cmd  = (sbport_t) PHYS_TO_K1(A_MC_REGISTER(mcnum,R_MC_DRAMCMD));
1997    mode = (sbport_t) PHYS_TO_K1(A_MC_REGISTER(mcnum,R_MC_DRAMMODE));
1998
1999    /*
2000     * Using the data in the timing template, figure out which
2001     * CAS latency command to issue.
2002     */
2003
2004    switch (tdata->flags & CS_CASLAT_MASK) {
2005	case CS_CASLAT_30:
2006	    casbits = V_MC_MODE(JEDEC_SDRAM_MRVAL_CAS3);
2007	    break;
2008	default:
2009	case CS_CASLAT_25:
2010	    casbits = V_MC_MODE(JEDEC_SDRAM_MRVAL_CAS25);
2011	    break;
2012	case CS_CASLAT_20:
2013	    casbits = V_MC_MODE(JEDEC_SDRAM_MRVAL_CAS2);
2014	    break;
2015	case CS_CASLAT_15:
2016	    casbits = V_MC_MODE(JEDEC_SDRAM_MRVAL_CAS15);
2017	    break;
2018	}
2019
2020    /*
2021     * Set up for doing mode register writes to the SDRAMs
2022     *
2023     * First time, we set bit 8 to reset the DLL
2024     */
2025
2026    modebits = V_MC_EMODE(JEDEC_SDRAM_EMRVAL) |
2027	     V_MC_MODE(JEDEC_SDRAM_MRVAL_RESETDLL) |
2028	     V_MC_DRAM_TYPE(JEDEC);
2029
2030    /*
2031     * Check to see if we need to set reduced drive strength.
2032     */
2033    if (mc->flags & MCFLG_DS_REDUCED)
2034	modebits |= V_MC_EMODE(JEDEC_SDRAM_EMRVAL_DS_REDUCED);
2035
2036    /*
2037     * Writing to mode regsiter will start clock.  Need delay
2038     * before issuing commands.
2039     */
2040    WRITECSR(mode,modebits | casbits);
2041    DRAMINIT_DELAY();
2042
2043    /*
2044     * Clear power-down without CS assertion to assert CKE.
2045     */
2046    WRITECSR(cmd,V_MC_COMMAND_CLRPWRDN);
2047    DRAMINIT_DELAY();
2048
2049    WRITECSR(cmd,csmask | V_MC_COMMAND_CLRPWRDN);
2050    DRAMINIT_DELAY();
2051
2052    WRITECSR(cmd,csmask | V_MC_COMMAND_PRE);
2053    DRAMINIT_DELAY();
2054
2055    WRITECSR(cmd,csmask | V_MC_COMMAND_EMRS);
2056    DRAMINIT_DELAY();
2057
2058    WRITECSR(cmd,csmask | V_MC_COMMAND_MRS);
2059    DRAMINIT_DELAY();
2060
2061    WRITECSR(cmd,csmask | V_MC_COMMAND_PRE);
2062    DRAMINIT_DELAY();
2063
2064    WRITECSR(cmd,csmask | V_MC_COMMAND_AR);
2065    DRAMINIT_DELAY();
2066
2067    WRITECSR(cmd,csmask | V_MC_COMMAND_AR);
2068    DRAMINIT_DELAY();
2069
2070    /*
2071     * This time, clear bit 8 to start the DLL
2072     */
2073
2074    modebits = V_MC_EMODE(JEDEC_SDRAM_EMRVAL) |
2075	V_MC_DRAM_TYPE(JEDEC);
2076
2077    /*
2078     * Check to see if we need to set reduced drive strength.
2079     */
2080    if (mc->flags & MCFLG_DS_REDUCED)
2081	modebits |= V_MC_EMODE(JEDEC_SDRAM_EMRVAL_DS_REDUCED);
2082
2083    WRITECSR(mode,modebits | casbits);
2084    WRITECSR(cmd,csmask | V_MC_COMMAND_MRS);
2085    DRAMINIT_DELAY();
2086
2087}
2088
2089/*  *********************************************************************
2090    *  SB1250_SGRAM_INITCMDS
2091    *
2092    *  Issue the sequence of DRAM init commands. (SGRAMs)
2093    *  Note: this routine does not support "big memory" (external decode)
2094    *
2095    *  Input parameters:
2096    *      mcnum - memory controller index (0/1)
2097    *  	   mc - pointer to data for this memory controller
2098    *	   csel - which chip select to send init commands for
2099    *      tdata - chip select to use as a template for timing data
2100    *
2101    *  Return value:
2102    *  	   nothing
2103    ********************************************************************* */
2104
2105static void sb1250_sgram_initcmds(int mcnum,mcdata_t *mc,int csel,csdata_t *tdata)
2106{
2107    uint64_t csmask;
2108    sbport_t cmd;
2109    sbport_t mode;
2110
2111    csmask = M_MC_CS0 << csel;		/* convert chip select # to mask */
2112    cmd  = (sbport_t) PHYS_TO_K1(A_MC_REGISTER(mcnum,R_MC_DRAMCMD));
2113    mode = (sbport_t) PHYS_TO_K1(A_MC_REGISTER(mcnum,R_MC_DRAMMODE));
2114
2115
2116    WRITECSR(cmd,csmask | V_MC_COMMAND_CLRPWRDN);
2117    DRAMINIT_DELAY();
2118
2119    WRITECSR(cmd,csmask | V_MC_COMMAND_PRE);
2120    DRAMINIT_DELAY();
2121
2122    /*
2123     * Set up for doing mode register writes to the SDRAMs
2124     *
2125     * mode 0x62 is "sequential 4-byte bursts, CAS Latency 2.5"
2126     * mode 0x22 is "sequential 4-byte bursts, CAS Latency 2"
2127     *
2128     * First time, we set bit 8 to reset the DLL
2129     */
2130
2131    WRITECSR(mode,V_MC_EMODE(SGRAM_EMRVAL) |
2132	     V_MC_MODE(SGRAM_MRVAL) |
2133	     V_MC_MODE(SGRAM_MRVAL_RESETDLL) |
2134	     V_MC_DRAM_TYPE(SGRAM));
2135
2136    WRITECSR(cmd,csmask | V_MC_COMMAND_EMRS);
2137    DRAMINIT_DELAY();
2138
2139    WRITECSR(cmd,csmask | V_MC_COMMAND_MRS);
2140    DRAMINIT_DELAY();
2141
2142    WRITECSR(cmd,csmask | V_MC_COMMAND_PRE);
2143    DRAMINIT_DELAY();
2144
2145    WRITECSR(cmd,csmask | V_MC_COMMAND_AR);
2146    DRAMINIT_DELAY();
2147
2148    WRITECSR(cmd,csmask | V_MC_COMMAND_AR);
2149    DRAMINIT_DELAY();
2150
2151    /*
2152     * This time, clear bit 8 to start the DLL
2153     */
2154
2155    WRITECSR(mode,V_MC_EMODE(SGRAM_EMRVAL) |
2156	     V_MC_MODE(SGRAM_MRVAL) |
2157	     V_MC_DRAM_TYPE(SGRAM));
2158    WRITECSR(cmd,csmask | V_MC_COMMAND_MRS);
2159    DRAMINIT_DELAY();
2160
2161}
2162
2163/*  *********************************************************************
2164    *  SB1250_FCRAM_INITCMDS
2165    *
2166    *  Issue the sequence of DRAM init commands.  (FCRAMs)
2167    *  Note: this routine does not support "big memory" (external decode)
2168    *
2169    *  Input parameters:
2170    *      mcnum - memory controller index (0/1)
2171    *  	   mc - pointer to data for this memory controller
2172    *	   csel - which chip select to send init commands for
2173    *      tdata - chip select to use as a template for timing data
2174    *
2175    *  Return value:
2176    *  	   nothing
2177    ********************************************************************* */
2178
2179static void sb1250_fcram_initcmds(int mcnum,mcdata_t *mc,int csel,csdata_t *tdata)
2180{
2181    uint64_t csmask;
2182    sbport_t cmd;
2183    sbport_t mode;
2184
2185    csmask = M_MC_CS0 << csel;		/* convert chip select # to mask */
2186    cmd  = (sbport_t) PHYS_TO_K1(A_MC_REGISTER(mcnum,R_MC_DRAMCMD));
2187    mode = (sbport_t) PHYS_TO_K1(A_MC_REGISTER(mcnum,R_MC_DRAMMODE));
2188
2189
2190    /*
2191     * For FCRAMs the type must be set first, since much of the
2192     * init state machine is done in hardware.
2193     */
2194
2195    WRITECSR(mode, V_MC_DRAM_TYPE(FCRAM));
2196    DRAMINIT_DELAY();	/* Required delay for FCRAM */
2197
2198    WRITECSR(cmd,csmask | V_MC_COMMAND_CLRPWRDN);
2199    DRAMINIT_DELAY();
2200
2201    WRITECSR(cmd,csmask | K_MC_COMMAND_PRE);
2202    DRAMINIT_DELAY();
2203
2204    /*
2205     * Set up for doing mode register writes to the FCRAMs
2206     *
2207     * mode 0x32 is "sequential 4-byte bursts, CAS Latency 3.0"
2208     */
2209
2210    WRITECSR(mode,V_MC_EMODE(FCRAM_EMRVAL) |
2211	     V_MC_MODE(FCRAM_MRVAL) |
2212	     V_MC_DRAM_TYPE(FCRAM));
2213
2214    WRITECSR(cmd,csmask | V_MC_COMMAND_EMRS);
2215    DRAMINIT_DELAY();
2216
2217    WRITECSR(cmd,csmask | V_MC_COMMAND_MRS);
2218    DRAMINIT_DELAY();
2219
2220    WRITECSR(cmd,csmask | V_MC_COMMAND_AR);
2221    DRAMINIT_DELAY();
2222
2223    WRITECSR(cmd,csmask | V_MC_COMMAND_AR);
2224    DRAMINIT_DELAY();
2225
2226    /*
2227     * Do 4 dummy writes, one to each bank, to get the
2228     * memory started.
2229     */
2230
2231#ifndef _MCSTANDALONE_		/* only on real hardware */
2232    do {
2233	volatile uint64_t *ptr;
2234
2235	ptr = (volatile uint64_t *) PHYS_TO_K1(0);
2236	*(ptr+(0x00>>3)) = 0;
2237	*(ptr+(0x20>>3)) = 0;
2238	*(ptr+(0x40>>3)) = 0;
2239	*(ptr+(0x60>>3)) = 0;
2240	} while (0);
2241#endif
2242
2243}
2244
2245
2246
2247
2248/*  *********************************************************************
2249    *  SB1250_DRAM_INTLV
2250    *
2251    *  Do row/column/bank initialization for 128-byte interleaved
2252    *  mode, and also interleave across ports.  You need to have
2253    *  the same geometry DIMMs installed on both memory
2254    *  channels for this to work.
2255    *
2256    *  Interleaved modes will assign address bits in the following
2257    *  order:
2258    *
2259    *  RRRRRRRR...R CCCCCCCC...C NN BB P CC xx000
2260    *
2261    *  Where 'R' are row address bits,
2262    *        'C' are column address bits
2263    *        'N' are chip-select bits
2264    *        'B' are bank select bits
2265    *        'P' is the channel (port) select
2266    *        'x' is ignored by the MC, but will be set to '1'.
2267    *
2268    *  Input parameters:
2269    *  	   lots of stuff
2270    *
2271    *  Return value:
2272    *  	   lots of stuff
2273    ********************************************************************* */
2274static void sb1250_dram_intlv(initdata_t *d)
2275{
2276    int ttlbits;			/* bit counter */
2277    int rows,columns,banks;
2278    int csidx;
2279    sbport_t mc0base;
2280    sbport_t mc1base;
2281    sbport_t csx0base;
2282    sbport_t csx1base;
2283    uint64_t mask;
2284    uint64_t colmask;
2285    int t;
2286    uint64_t dimmsize;
2287    int num_csint = 1 << d->mc[0].csint;
2288    uint64_t tmp;
2289
2290    d->ttlbytes = 0;			/* start with zero memory */
2291
2292    /*
2293     * Loop through each chip select
2294     */
2295
2296    mc0base = PHYS_TO_K1(A_MC_BASE(0));
2297    mc1base = PHYS_TO_K1(A_MC_BASE(1));
2298
2299    for (csidx = 0; csidx < MC_CHIPSELS; csidx++) {
2300
2301	/*
2302	 * Address of CS-specific registers
2303	 */
2304
2305	csx0base = mc0base + R_MC_CSX_BASE + csidx*MC_CSX_SPACING;
2306	csx1base = mc1base + R_MC_CSX_BASE + csidx*MC_CSX_SPACING;
2307
2308	/*
2309	 * Ignore this chipsel if we're not using it
2310	 */
2311
2312	if (!(d->mc[0].csdata[csidx].flags & CS_PRESENT)) continue;
2313	if (!(d->mc[1].csdata[csidx].flags & CS_PRESENT)) continue;
2314
2315	/*
2316	 * Remember we did something to this MC.  We won't bother
2317	 * activating controllers we don't use.
2318	 */
2319
2320	d->inuse |= 3;		/* we're using both controllers 0 and 1 */
2321
2322	/*
2323	 * Dig the geometry out of the structure
2324	 */
2325
2326	columns = d->mc[0].csdata[csidx].cols;
2327	rows    = d->mc[0].csdata[csidx].rows;
2328	banks   = d->mc[0].csdata[csidx].banks;
2329
2330	/*
2331	 * The lowest 3 bits are never set in any mask.
2332	 * They represent the byte width of the DIMM.
2333	 */
2334
2335	ttlbits = 3;
2336
2337	/*
2338	 * The first two bits are always set and are part of the
2339	 * column bits.  Actually, the MC ignores these bits
2340	 * but we set them here for clarity.
2341	 *
2342	 * Depending on the block size, 0, 1, or 2 additional
2343	 * bits are used for column interleave.
2344	 *
2345	 * When interleaving ports, we always have a block
2346	 * size of 128.  It will work to use other block sizes,
2347	 * but performance will suffer.
2348	 */
2349
2350	switch (d->mc[0].blksize) {
2351	    case 32:	t = 2; break;	/* 32-byte interleave */
2352	    case 64:	t = 3; break;	/* 64-byte interleave */
2353	    default:	t = 4; break;	/* 128-byte interleave */
2354	    }
2355
2356	columns -= t;
2357	colmask = MAKEDRAMMASK(t,ttlbits);
2358	ttlbits += t;
2359
2360	/*
2361	 * add 1 to 'ttlbits' to account for the bit we're
2362	 * using for port intleave.  The current value
2363	 * of 'ttlbits' also happens to be the
2364	 * bit number for port interleaving.
2365	 */
2366
2367	d->pintbit = ttlbits;		/* interleave bit is right here */
2368
2369	ttlbits++;
2370
2371	/*
2372	 * Now do the bank mask
2373	 */
2374
2375	mask = MAKEDRAMMASK(banks,ttlbits);
2376	ttlbits += banks;
2377	WRITECSR(csx0base+R_MC_CSX_BA,mask);
2378	WRITECSR(csx1base+R_MC_CSX_BA,mask);
2379
2380	/*
2381	 * Now do the chip select mask
2382	 */
2383
2384	if ((d->mc[0].csint > 0) &&
2385	    (csidx < num_csint)) {
2386	    mask = MAKEDRAMMASK(d->mc[0].csint,ttlbits);
2387	    ttlbits += d->mc[0].csint;
2388	    WRITECSR(mc0base+R_MC_CS_INTERLEAVE,mask);
2389	    WRITECSR(mc1base+R_MC_CS_INTERLEAVE,mask);
2390	    }
2391
2392	/*
2393	 * Do the rest of the column bits
2394	 */
2395
2396	mask = MAKEDRAMMASK(columns,ttlbits);
2397	colmask |= mask;
2398	WRITECSR(csx0base+R_MC_CSX_COL,colmask);
2399	WRITECSR(csx1base+R_MC_CSX_COL,colmask);
2400	ttlbits += columns;
2401
2402	/*
2403	 * Finally, do the rows.  If we're in "big" memory
2404	 * mode, two additional row bits are used for the
2405	 * chip select bits.
2406	 */
2407
2408	if (d->mc[0].flags & MCFLG_BIGMEM) {
2409	    rows += 2;		/* add two bits for chip select */
2410	    /*
2411	     * The "bigmem" bit will be set in the MC_CONFIG
2412	     * register back in the main routine.
2413	     */
2414	    }
2415
2416	mask = MAKEDRAMMASK(rows,ttlbits);
2417	ttlbits += rows;
2418	WRITECSR(csx0base+R_MC_CSX_ROW,mask);
2419	WRITECSR(csx1base+R_MC_CSX_ROW,mask);
2420
2421	/*
2422	 * The total size of this DIMM is 1 << ttlbits, which is inflated by a
2423	 * factor of num_csint to cover all interleaved chip selects.
2424	 */
2425
2426	dimmsize = ((uint64_t) 1) << ttlbits;
2427
2428	/*
2429	 * Program the start and end registers.  The start address is 0
2430	 * our if csidx is cs-interleaved; otherwise, the start address
2431	 * is the current "ttlbytes".
2432	 */
2433
2434	if (csidx < num_csint) {
2435	    d->ttlbytes += dimmsize >> d->mc[0].csint;
2436	    }
2437	else {
2438	    mask = READCSR(mc0base+R_MC_CS_START);
2439	    tmp = d->ttlbytes >> 24;
2440	    if (tmp >= 0x40) tmp = (0x100 + (tmp - 0x40)); 	/* Adj for exp space */
2441	    mask |= (tmp << (16*csidx));
2442	    WRITECSR(mc0base+R_MC_CS_START,mask);
2443
2444	    mask = READCSR(mc1base+R_MC_CS_START);
2445	    tmp = d->ttlbytes >> 24;
2446	    if (tmp >= 0x40) tmp = (0x100 + (tmp - 0x40)); 	/* Adj for exp space */
2447	    mask |= (tmp << (16*csidx));
2448	    WRITECSR(mc1base+R_MC_CS_START,mask);
2449
2450	    d->ttlbytes += dimmsize;
2451	    dimmsize = d->ttlbytes;  /* setup dimmsize for cs_end below */
2452	    }
2453
2454	mask = READCSR(mc0base+R_MC_CS_END);
2455	tmp = dimmsize >> 24;
2456	if (tmp > 0x40) tmp = (0x100 + (tmp - 0x40)); 	/* Adj for exp space */
2457	mask |= (tmp << (16*csidx));
2458	WRITECSR(mc0base+R_MC_CS_END,mask);
2459
2460	mask = READCSR(mc1base+R_MC_CS_END);
2461	tmp = dimmsize >> 24;
2462	if (tmp > 0x40) tmp = (0x100 + (tmp - 0x40)); 	/* Adj for exp space */
2463	mask |= (tmp << (16*csidx));
2464	WRITECSR(mc1base+R_MC_CS_END,mask);
2465	}
2466}
2467
2468
2469
2470
2471/*  *********************************************************************
2472    *  SB1250_DRAM_MSBCS
2473    *
2474    *  Do row/column/bank initialization for MSB-CS (noninterleaved)
2475    *  mode.  This is only separated out of the main loop to make things
2476    *  read easier, it's not a generally useful subroutine by itself.
2477    *
2478    *  Input parameters:
2479    *  	   initdata_t structure
2480    *
2481    *  Return value:
2482    *  	   memory controller initialized
2483    ********************************************************************* */
2484
2485static void sb1250_dram_msbcs(initdata_t *d)
2486{
2487    int ttlbits;			/* bit counter */
2488    int rows,columns,banks;
2489    int mcidx,csidx;
2490    sbport_t mcbase;
2491    sbport_t csxbase;
2492    uint64_t mask;
2493    uint64_t colmask;
2494    int t;
2495    uint64_t dimmsize;
2496
2497    d->ttlbytes = 0;			/* start with zero memory */
2498
2499    /*
2500     * Loop through each memory controller and each chip select
2501     * within each memory controller.
2502     */
2503
2504    for (mcidx = d->firstchan; mcidx < MC_CHANNELS; mcidx++) {
2505        int num_csint = 1 << d->mc[mcidx].csint;
2506	uint64_t channel_start = d->ttlbytes;
2507	uint64_t end_addr;
2508	uint64_t tmp;
2509
2510	mcbase = PHYS_TO_K1(A_MC_BASE(mcidx));
2511
2512	for (csidx = 0; csidx < MC_CHIPSELS; csidx++) {
2513
2514	    /*
2515	     * Address of CS-specific registers
2516	     */
2517
2518	    csxbase = mcbase + R_MC_CSX_BASE + csidx * MC_CSX_SPACING;
2519
2520	    /*
2521	     * Ignore this chipsel if we're not using it
2522	     */
2523
2524	    if (!(d->mc[mcidx].csdata[csidx].flags & CS_PRESENT)) continue;
2525
2526	    /*
2527	     * Remember we did something to this MC.  We won't bother
2528	     * activating controllers we don't use.
2529	     */
2530
2531	    d->inuse |= (1 << mcidx);
2532
2533	    /*
2534	     * Dig the geometry out of the structure
2535	     */
2536
2537	    columns = d->mc[mcidx].csdata[csidx].cols;
2538	    rows    = d->mc[mcidx].csdata[csidx].rows;
2539	    banks   = d->mc[mcidx].csdata[csidx].banks;
2540
2541	    /*
2542	     * The lowest 3 bits are never set in any mask.
2543	     * They represent the byte width of the DIMM.
2544	     */
2545
2546	    ttlbits = 3;
2547
2548	    /*
2549	     * The first two bits are always set and are part of the
2550	     * column bits.  Actually, the MC ignores these bits
2551	     * but we set them here for clarity.
2552	     *
2553	     * Depending on the block size, 0, 1, or 2 additional
2554	     * bits are used for column interleave.
2555	     */
2556
2557	    switch (d->mc[mcidx].blksize) {
2558		case 32:   t = 2; break;	/* 32-byte interleave */
2559		case 64:   t = 3; break;	/* 64-byte interleave */
2560		default:   t = 4; break;	/* 128-byte interleave */
2561		}
2562
2563	    columns -= t;
2564	    colmask = MAKEDRAMMASK(t,ttlbits);
2565	    ttlbits += t;
2566
2567	    /*
2568	     * Now do the bank mask
2569	     */
2570
2571	    mask = MAKEDRAMMASK(banks,ttlbits);
2572	    ttlbits += banks;
2573	    WRITECSR(csxbase+R_MC_CSX_BA,mask);
2574
2575	    /*
2576	     * Now do the chip select mask
2577	     */
2578
2579	    if ((d->mc[mcidx].csint > 0) &&
2580		(csidx < num_csint)) {
2581	        mask = MAKEDRAMMASK(d->mc[mcidx].csint,ttlbits);
2582		ttlbits += d->mc[mcidx].csint;
2583		WRITECSR(mcbase+R_MC_CS_INTERLEAVE,mask);
2584		}
2585
2586	    /*
2587	     * Do the rest of the column bits
2588	     */
2589
2590	    mask = MAKEDRAMMASK(columns,ttlbits);
2591	    colmask |= mask;
2592	    WRITECSR(csxbase+R_MC_CSX_COL,colmask);
2593	    ttlbits += columns;
2594
2595	    /*
2596	     * Finally, do the rows.  If we're in "big" memory
2597	     * mode, two additional row bits are used for the
2598	     * chip select bits.
2599	     */
2600
2601	    if (d->mc[mcidx].flags & MCFLG_BIGMEM) {
2602		rows += 2;		/* add two bits for chip select */
2603		/*
2604		 * The "bigmem" bit will be set in the MC_CONFIG
2605		 * register back in the main routine.
2606 	 	 */
2607		}
2608
2609	    mask = MAKEDRAMMASK(rows,ttlbits);
2610	    ttlbits += rows;
2611	    WRITECSR(csxbase+R_MC_CSX_ROW,mask);
2612
2613	    /*
2614	     * The total size of this DIMM is 1 << ttlbits, which is inflated
2615	     * by a factor of num_csint to cover all interleaved chip selects.
2616	     */
2617
2618	    dimmsize = ((uint64_t) 1) << ttlbits;
2619
2620	    /*
2621	     * Program the start and end registers.  The start address is
2622	     * channel_start if csidx is cs-interleaved; otherwise, the
2623	     * start address is the current "ttlbytes".
2624	     */
2625
2626	    if (csidx < num_csint) {
2627	        mask = READCSR(mcbase+R_MC_CS_START);
2628		tmp  = (channel_start >> 24);
2629		if (tmp >= 0x40) tmp = (0x100 + (tmp - 0x40)); 	/* Adj for exp space */
2630		mask |= (tmp << (16*csidx));
2631		WRITECSR(mcbase+R_MC_CS_START,mask);
2632
2633	        d->ttlbytes += dimmsize >> d->mc[mcidx].csint;
2634		end_addr = channel_start + dimmsize;
2635		}
2636	    else {
2637	        mask = READCSR(mcbase+R_MC_CS_START);
2638		tmp  = d->ttlbytes >> 24;
2639		if (tmp >= 0x40) tmp = (0x100 + (tmp - 0x40)); 	/* Adj for exp space */
2640		mask |= (tmp << (16*csidx));
2641		WRITECSR(mcbase+R_MC_CS_START,mask);
2642
2643		d->ttlbytes += dimmsize;
2644		end_addr = d->ttlbytes;
2645		}
2646
2647	    mask = READCSR(mcbase+R_MC_CS_END);
2648	    tmp  = end_addr >> 24;
2649	    if (tmp > 0x40) tmp = (0x100 + (tmp - 0x40)); 	/* Adj for exp space */
2650	    mask |= (tmp << (16*csidx));
2651	    WRITECSR(mcbase+R_MC_CS_END,mask);
2652	    }
2653    }
2654}
2655
2656
2657/*  *********************************************************************
2658    *  SB1250_DRAM_ANALYZE(d)
2659    *
2660    *  Analyze the DRAM parameters, determine if we can do
2661    *  port interleaving mode
2662    *
2663    *  Input parameters:
2664    *  	   d - init data
2665    *
2666    *  Return value:
2667    *  	   nothing (fields in initdata are updated)
2668    ********************************************************************* */
2669
2670static void sb1250_dram_analyze(initdata_t *d)
2671{
2672    int csidx;
2673    int mcidx;
2674    int csint;
2675
2676    /*
2677     * Determine if we can do port interleaving.  This is possible if
2678     * the DIMMs on each channel are the same.
2679     */
2680
2681    for (csidx = 0; csidx < MC_CHIPSELS; csidx++) {
2682	if (d->mc[0].csdata[csidx].rows != d->mc[1].csdata[csidx].rows) break;
2683	if (d->mc[0].csdata[csidx].cols != d->mc[1].csdata[csidx].cols) break;
2684	if (d->mc[0].csdata[csidx].banks != d->mc[1].csdata[csidx].banks) break;
2685	if (d->mc[0].csdata[csidx].flags != d->mc[1].csdata[csidx].flags) break;
2686	}
2687
2688    /*
2689     * If the per-controller flags don't match, no port interleaving.
2690     * I.e., you can't mix and match ECC, big memory, etc.
2691     */
2692
2693    if (d->mc[0].flags != d->mc[1].flags) csidx = 0;
2694
2695    /*
2696     * Done with checks, see if we can do it.
2697     */
2698
2699    if (csidx == MC_CHIPSELS) {
2700	/*
2701	 * All channels are the same, we can interleave. If we were asked
2702	 * to try it, then enable it.
2703	 */
2704	if (d->flags & M_MCINIT_TRYPINTLV) d->flags |= M_MCINIT_PINTLV;
2705	}
2706
2707    /*
2708     * Determine how many CS interleave bits (0, 1, or 2) will work.
2709     * Memory channels are checked separately.  If port (i.e., channel)
2710     * interleaving is allowed, each channel will end up with the same number
2711     * of CS interleave bits.
2712     * Note: No support for interleaving only chip selects 2 & 3.
2713     */
2714
2715    for (mcidx = d->firstchan; mcidx < MC_CHANNELS; mcidx++) {
2716        /* Forbid CS interleaving if any of:
2717	   - not requested
2718	   - in large mem mode
2719	   - CS 0 is absent */
2720	if ((d->mc[mcidx].cfgcsint > 0) &&
2721	    !(d->mc[mcidx].flags & MCFLG_BIGMEM) &&
2722	    (d->mc[mcidx].csdata[0].flags & CS_PRESENT)) {
2723
2724	    for (csidx = 1; csidx < MC_CHIPSELS; csidx++) {
2725		/* CS csidx must be present */
2726		if (!(d->mc[mcidx].csdata[csidx].flags & CS_PRESENT)) break;
2727		/* CS csidx must match geometry of CS 0 */
2728		if (d->mc[mcidx].csdata[0].rows != d->mc[mcidx].csdata[csidx].rows)
2729		    break;
2730		if (d->mc[mcidx].csdata[0].cols != d->mc[mcidx].csdata[csidx].cols)
2731		    break;
2732		if (d->mc[mcidx].csdata[0].banks !=
2733		    d->mc[mcidx].csdata[csidx].banks)
2734		    break;
2735		}
2736	    /* csidx = 1st CS index that can't be interleaved;
2737	       Convert csidx to a number of CS interleave address bits */
2738	    csint = csidx >> 1;
2739	    /* Cap csint by the csinterleave attribute on the channel */
2740	    if (csint > d->mc[mcidx].cfgcsint) {
2741		csint = d->mc[mcidx].cfgcsint;
2742		}
2743	    /* Forbid CS interleaving into the hole in the memory address
2744	       space; i,e., cap the port-and-CS-interleaved CS size at 1 GB.
2745	       Remove this code when sb1250_dram_intlv() can deal with CS
2746	       sizes that span the hole. */
2747	    {
2748	      int addr_bits = 30;  /* 1 GB */
2749	      addr_bits -= 3;      /* 8 byte data bus width */
2750	      addr_bits -= d->mc[mcidx].csdata[0].rows;
2751	      addr_bits -= d->mc[mcidx].csdata[0].cols;
2752	      addr_bits -= d->mc[mcidx].csdata[0].banks;
2753	      if (d->flags & M_MCINIT_PINTLV) addr_bits -= 1;
2754	      if (addr_bits < 0) addr_bits = 0;
2755	      if (addr_bits < csint) csint = addr_bits;
2756	    }
2757	    /* Return csint to caller */
2758	    d->mc[mcidx].csint = csint;
2759	    }
2760	}
2761}
2762
2763
2764
2765
2766#if !defined(_MCSTANDALONE_)		/* When not linked into firmware, no RAM zeroing */
2767
2768/*  *********************************************************************
2769    *  SB1250_DRAM_ZERO1MB(d,addr)
2770    *
2771    *  Zero one megabyte of memory starting at the specified address.
2772    *  'addr' is in megabytes.
2773    *
2774    *  Input parameters:
2775    *  	   d - initdata structure
2776    *  	   addr - starting address, expressed as a megabyte index
2777    *
2778    *  Return value:
2779    *  	   nothing
2780    ********************************************************************* */
2781
2782#ifndef _DMZERO_
2783static void sb1250_dram_zero1mb(initdata_t *d,uint64_t addr)
2784{
2785    /*
2786     * We **MUST** align the stores to a cache-line boundary.  If they
2787     * get split over multiple cache lines, the first miss (to get the
2788     * *rest* of the stores into the icache) will cause a long enough
2789     * delay for the UAC write (not yet to the complete line) to be
2790     * flushed... which means it will hit the MC as a partial line,
2791     * causing a RMW, defeating the notion of writing all of memory to
2792     * clear ECC.
2793     */
2794    __asm(" .set push ; .set noreorder ; .set mips64 ; "
2795	  "  mfc0 $9,$12 ; "
2796	  "  ori  $8,$9,0x80 ; "
2797	  "  mtc0 $8,$12 ; "
2798	  "  bnel $0,$0,.+4 ; "
2799	  "  ssnop ; "
2800	  "  lui  $10,0xB800 ; "
2801	  "  dsll32 $10,$10,0 ; "
2802	  "  dsll   $12,%0,20 ;"
2803	  "  or     $10,$10,$12 ; "
2804	  "  lui    $11,0x10 ; "
2805	  "  .align 5 ; "
2806	  "1: "
2807	  "  sd     $0,0($10) ; "
2808	  "  sd     $0,8($10) ; "
2809	  "  sd     $0,16($10) ; "
2810	  "  sd     $0,24($10) ; "
2811	  "  sd     $0,32($10) ; "
2812	  "  sd     $0,40($10) ; "
2813	  "  sd     $0,48($10) ; "
2814	  "  sd     $0,56($10) ; "
2815	  "  sub    $11,$11,64 ; "
2816	  "  bne    $11,$0,1b ;  "
2817	  "  dadd   $10,64 ; "
2818          "  mtc0   $9,$12 ; "
2819          "  bnel   $0,$0,.+4 ;"
2820	  "  ssnop ; "
2821	  " .set pop"
2822	  : : "r"(addr) : "$8","$9","$10","$11","$12");
2823}
2824#endif
2825
2826/*  *********************************************************************
2827    *  SB1250_DRAM_ZERO1MB(d,addr)
2828    *
2829    *  Zero one megabyte of memory starting at the specified address.
2830    *  'addr' is in megabytes.
2831    *
2832    *  Input parameters:
2833    *  	   d - initdata structure
2834    *  	   addr - starting address, expressed as a megabyte index
2835    *
2836    *  Return value:
2837    *  	   nothing
2838    ********************************************************************* */
2839
2840#ifdef _DMZERO_
2841static void sb1250_dram_zero1mb(initdata_t *d,uint64_t addr)
2842{
2843    sbport_t dmreg;
2844    uint64_t baseaddr;
2845    volatile int idx;
2846
2847    /*
2848     * Build the descriptor
2849     */
2850
2851    d->dscr[0] = (addr << 20) |
2852	M_DM_DSCRA_ZERO_MEM |
2853	M_DM_DSCRA_UN_DEST | M_DM_DSCRA_UN_SRC |
2854	V_DM_DSCRA_DIR_SRC_CONST |
2855	V_DM_DSCRA_DIR_DEST_INCR;
2856    d->dscr[1] = V_DM_DSCRB_SRC_LENGTH(0);
2857
2858    /* Flush the descriptor out.  We need to do this in Pass1
2859       because we're in cacheable noncoherent mode right now and
2860       the core will not respond to the DM's request for the descriptor. */
2861
2862    __asm __volatile ("cache 0x15,0(%0) ; " :: "r"(d));
2863
2864    /*
2865     * Give the descriptor to the data mover
2866     */
2867
2868    dmreg = PHYS_TO_K1(A_DM_REGISTER(0,R_DM_DSCR_BASE));
2869    baseaddr = (uint64_t) K0_TO_PHYS((long)d->dscr) |
2870	V_DM_DSCR_BASE_PRIORITY(0) |
2871	V_DM_DSCR_BASE_RINGSZ(4) |
2872	M_DM_DSCR_BASE_ENABL |
2873	M_DM_DSCR_BASE_RESET;
2874    WRITECSR(dmreg,baseaddr);
2875
2876    dmreg = PHYS_TO_K1(A_DM_REGISTER(0,R_DM_DSCR_COUNT));
2877    WRITECSR(dmreg,1);
2878
2879    /*
2880     * Wait for the request to complete
2881     */
2882
2883    while ((READCSR(dmreg) & 0xFFFF) > 0) {
2884	/* Do something that doesn't involve the ZBBus to give
2885	   the DM some extra time */
2886	for (idx = 0; idx < 10000; idx++) ; /* NULL LOOP */
2887	}
2888
2889}
2890#endif
2891
2892
2893/*  *********************************************************************
2894    *  SB1250_DRAM_ZERO(d)
2895    *
2896    *  Zero memory, using the data mover.
2897    *
2898    *  Input parameters:
2899    *  	   d - initdata structure
2900    *
2901    *  Return value:
2902    *  	   nothing
2903    ********************************************************************* */
2904static void sb1250_dram_zero(initdata_t *d)
2905{
2906#if 0
2907    /* This is just for debugging */
2908    sb1250_dram_zero1mb(d,1);
2909#else
2910    int idx;
2911    int maxmem;
2912    uint64_t curmb;			/* current address in megabytes */
2913
2914    maxmem = (int) (d->ttlbytes >> 20);
2915    curmb = 0;
2916
2917    for (idx = 0; idx < (int) maxmem; idx++) {
2918	sb1250_dram_zero1mb(d,curmb);
2919	curmb++;
2920	if (curmb == (REGION0_LOC+REGION0_SIZE))      curmb = REGION1_LOC;
2921	else if (curmb == (REGION1_LOC+REGION1_SIZE)) curmb = REGION2_LOC;
2922	else if (curmb == (REGION2_LOC+REGION2_SIZE)) curmb = REGION3_LOC;
2923	}
2924#endif
2925}
2926
2927#endif /* !defined(_MCSTANDALONE_) */
2928
2929
2930/*  *********************************************************************
2931    *  SB1250_DRAM_INIT()
2932    *
2933    *  Initialize DRAM connected to the specified DRAM controller
2934    *  The DRAM will be configured without interleaving, as sequential
2935    *  blocks of memory.
2936    *
2937    *  Input parameters:
2938    *  	   a0 - zero to use default memconfig table
2939    *           or KSEG1 address of mem config table
2940    *
2941    *  Return value:
2942    *  	   v0 - total amount of installed DRAM
2943    *
2944    *  Registers used:
2945    *  	   all
2946    ********************************************************************* */
2947
2948uint64_t sb1250_dram_init_real(const draminittab_t *init,initdata_t *d);
2949uint64_t sb1250_dram_init_real(const draminittab_t *init,initdata_t *d)
2950{
2951    uint64_t reg;
2952    sbport_t mcbase;
2953    uint64_t cfgbits;
2954    int mcidx;
2955    int csidx;
2956    int dramtype;
2957    csdata_t *tdata;
2958    mcdata_t *mc;
2959#if !defined(_MCSTANDALONE_)
2960    const draminittab_t *init_11xx;
2961    const draminittab_t *init_12xx;
2962#endif
2963
2964#if defined(_MCSTANDALONE_)
2965    /*
2966     * If compiled standalone (e.g., as the memconfig utility), we have
2967     * to zero the initdata structure.  On hardware, this is handled
2968     * by the initdata + stack init code.
2969     */
2970    memset (d, 0, sizeof (initdata_t));
2971#endif
2972
2973#if !defined(_MCSTANDALONE_)
2974    /*
2975     * The results of these assignments won't actually be used, but if
2976     * they are not done GCC complains that draminittab_* are declared
2977     * static and never used.
2978     */
2979    init_11xx = (draminittab_t *) draminittab_11xx;
2980    init_12xx = (draminittab_t *) draminittab_12xx;
2981#endif
2982
2983    /*
2984     * Determine system SOC type so we will know what channels
2985     * to initialize.  The hybrid parts have 1250s inside,
2986     * so even though there are only pins for 1 channel the
2987     * registers are there for both, so we need to initialize them.
2988     * The "real" 1125s only have one channel.
2989     */
2990
2991    cfgbits = READCSR(PHYS_TO_K1(A_SCD_SYSTEM_REVISION));
2992    d->soctype = SYS_SOC_TYPE(cfgbits);
2993
2994    switch (d->soctype) {
2995	case  K_SYS_SOC_TYPE_BCM1120:
2996	case  K_SYS_SOC_TYPE_BCM1125:
2997	    d->firstchan = 1;
2998#if !defined(_MCSTANDALONE_)
2999	    if (!init) init = init_11xx;
3000#endif
3001	    break;
3002
3003	case K_SYS_SOC_TYPE_BCM1250:
3004	case K_SYS_SOC_TYPE_BCM1125H:
3005	default:
3006	    d->firstchan = 0;
3007#if !defined(_MCSTANDALONE_)
3008	    if (!init) init = init_12xx;
3009#endif
3010	}
3011
3012    /*
3013     * Begin by initializing the memory channels to some known state.
3014     * Set the "BERR_DISABLE" bit for now while we initialize the channels,
3015     * this will be cleared again before the routine exits.
3016     */
3017
3018#ifdef _MCSTANDALONE_NOISY_
3019    printf("DRAM: Initializing memory controller.\n");
3020#endif
3021
3022    for (mcidx = d->firstchan; mcidx < MC_CHANNELS; mcidx++) {
3023	mcbase = PHYS_TO_K1(A_MC_BASE(mcidx));
3024
3025	WRITECSR(mcbase+R_MC_CONFIG,V_MC_CONFIG_DEFAULT | M_MC_ECC_DISABLE |
3026		 V_MC_CS_MODE_MSB_CS | M_MC_BERR_DISABLE);
3027	WRITECSR(mcbase+R_MC_CS_START,0);
3028	WRITECSR(mcbase+R_MC_CS_END,0);
3029	WRITECSR(mcbase+R_MC_CS_INTERLEAVE,0);
3030	WRITECSR(mcbase+R_MC_CS_ATTR,0);
3031	WRITECSR(mcbase+R_MC_TEST_DATA,0);
3032	WRITECSR(mcbase+R_MC_TEST_ECC,0);
3033	}
3034
3035    /*
3036     * Read the parameters
3037     */
3038
3039    sb1250_dram_readparams(d,init);
3040
3041    /*
3042     * Analyze parameters
3043     */
3044
3045    sb1250_dram_analyze(d);
3046
3047    /*
3048     * Configure chip selects
3049     */
3050
3051    if (d->flags & M_MCINIT_PINTLV) {
3052	sb1250_dram_intlv(d);
3053	cfgbits = V_MC_CHANNEL_SEL(d->pintbit);
3054	}
3055    else {
3056	sb1250_dram_msbcs(d);
3057	cfgbits = 0;
3058	}
3059
3060    /*
3061     * Okay, initialize the DRAM controller(s)
3062     */
3063
3064    for (mcidx = d->firstchan; mcidx < MC_CHANNELS; mcidx++) {
3065
3066        uint64_t mc_cfgbits = cfgbits;
3067
3068	/*
3069	 * Skip this controller if we did nothing
3070	 */
3071	if (!(d->inuse & (1 << mcidx))) continue;
3072
3073	/*
3074	 * Get the base address of the controller
3075	 */
3076	mcbase = PHYS_TO_K1(A_MC_BASE(mcidx));
3077
3078	/*
3079	 * Get our MC data
3080	 */
3081
3082	mc = &(d->mc[mcidx]);
3083
3084	/*
3085	 * Program the clock config register.  This starts the clock to the
3086	 * SDRAMs.  Need to wait 200us after doing this. (6.4.6.1)
3087	 *
3088	 * Find the slowest chip/dimm among the chip selects on this
3089	 * controller and use that for computing the timing values.
3090	 */
3091
3092	csidx = sb1250_find_timingcs(mc);
3093	if (csidx < 0) continue;		/* should not happen */
3094
3095	tdata = &(d->mc[mcidx].csdata[csidx]);	/* remember for use below */
3096
3097	if (mc->mantiming) {
3098	    sb1250_manual_timing(mcidx,mc);
3099	    }
3100	else {
3101	    sb1250_auto_timing(mcidx,mc,tdata);
3102	    }
3103
3104	DRAMINIT_DELAY();
3105
3106	/*
3107	 * Set up the memory controller config and timing registers.
3108	 */
3109
3110	switch(mc->csint) {
3111	    case 0: mc_cfgbits |= V_MC_CS_MODE_MSB_CS;      break;
3112	    case 1: mc_cfgbits |= V_MC_CS_MODE_MIXED_CS_32; break;
3113	    case 2: mc_cfgbits |= V_MC_CS_MODE_INTLV_CS;    break;
3114	    }
3115
3116	mc_cfgbits |= V_MC_WR_LIMIT_DEFAULT | V_MC_AGE_LIMIT_DEFAULT |
3117	    V_MC_BANK0_MAP_DEFAULT | V_MC_BANK1_MAP_DEFAULT |
3118	    V_MC_BANK2_MAP_DEFAULT | V_MC_BANK3_MAP_DEFAULT |
3119	    V_MC_QUEUE_SIZE_DEFAULT;
3120
3121	/* Give IOB1 priority (config bit is only on channel 1) */
3122
3123	if (mcidx == 1) mc_cfgbits |= M_MC_IOB1HIGHPRIORITY;
3124
3125	WRITECSR(mcbase+R_MC_CONFIG,mc_cfgbits | M_MC_ECC_DISABLE | M_MC_BERR_DISABLE);
3126
3127
3128	dramtype = d->mc[mcidx].dramtype;
3129
3130	/*
3131	 * Set the page policy
3132	 */
3133
3134	WRITECSR(mcbase+R_MC_CS_ATTR,
3135		 V_MC_CS0_PAGE(mc->pagepolicy) |
3136		 V_MC_CS1_PAGE(mc->pagepolicy) |
3137		 V_MC_CS2_PAGE(mc->pagepolicy) |
3138		 V_MC_CS3_PAGE(mc->pagepolicy));
3139
3140	/*
3141	 * Okay, now do the following sequence:
3142	 * PRE-EMRS-MRS-PRE-AR-AR-MRS.  Do this for each chip select,
3143	 * one at a time for each enabled chip select.
3144	 */
3145
3146	for (csidx = 0; csidx < MC_CHIPSELS; csidx++) {
3147	    if (mc->csdata[csidx].flags & CS_PRESENT) {
3148
3149		switch (dramtype) {
3150		    case JEDEC:
3151			if (mc->flags & MCFLG_BIGMEM) {
3152			    sb1250_jedec_initcmds(mcidx,mc,csidx,0,tdata);
3153			    sb1250_jedec_initcmds(mcidx,mc,csidx,1,tdata);
3154			    sb1250_jedec_initcmds(mcidx,mc,csidx,2,tdata);
3155			    sb1250_jedec_initcmds(mcidx,mc,csidx,3,tdata);
3156			    /*
3157			     * If in "big memory mode" turn on the "external decode"
3158			     * switch here.  We never turn it off.
3159			     */
3160
3161			    if (mc->flags & MCFLG_BIGMEM) {
3162				sbport_t port;
3163				port = PHYS_TO_K1(A_MC_REGISTER(mcidx,R_MC_DRAMMODE));
3164				WRITECSR(port,M_MC_EXTERNALDECODE);
3165				}
3166			    }
3167			else {
3168			    sb1250_jedec_initcmds(mcidx,mc,csidx,0,tdata);
3169			    }
3170			break;
3171		    case SGRAM:
3172			sb1250_sgram_initcmds(mcidx,mc,csidx,tdata);
3173			break;
3174		    case FCRAM:
3175			sb1250_fcram_initcmds(mcidx,mc,csidx,tdata);
3176			break;
3177		    default:
3178#ifdef _MCSTANDALONE_NOISY_
3179			printf("DRAM: Channel DRAM type declared as DRAM_TYPE_SPD, but no SPD DRAM type found.\n");
3180#endif
3181			break;
3182		    }
3183
3184		}
3185	    }
3186
3187	/*
3188	 * Kill the BERR_DISABLE bit for this controller
3189	 */
3190
3191	reg = READCSR(mcbase+R_MC_CONFIG);
3192	reg &= ~M_MC_BERR_DISABLE;
3193	WRITECSR(mcbase+R_MC_CONFIG,reg);
3194	}
3195
3196#if !defined(_MCSTANDALONE_)
3197    /*
3198     * Zero the contents of memory to set the ECC bits correctly.
3199     * Do it for all memory if either channel is enabled for ECC.
3200     */
3201
3202    for (mcidx = d->firstchan; mcidx < MC_CHANNELS; mcidx++) {
3203	if (!(d->inuse & (1 << mcidx))) continue;
3204	if (d->mc[mcidx].flags & MCFLG_ECC_ENABLE) {
3205	    sb1250_dram_zero(d);
3206	    break;
3207	    }
3208	}
3209#endif
3210
3211    /*
3212     * Turn on the ECC in the memory controller for those channels
3213     * that we've specified.
3214     */
3215
3216    for (mcidx = d->firstchan; mcidx < MC_CHANNELS; mcidx++) {
3217	if (!(d->inuse & (1 << mcidx))) continue;
3218	if (!(d->mc[mcidx].flags & MCFLG_ECC_ENABLE)) continue;		/* ecc not enabled */
3219	mcbase = PHYS_TO_K1(A_MC_BASE(mcidx));
3220	reg = READCSR(mcbase+R_MC_CONFIG);
3221	reg &= ~M_MC_ECC_DISABLE;
3222	WRITECSR(mcbase+R_MC_CONFIG,reg);
3223	}
3224
3225    /*
3226     * Return the total amount of memory initialized, in megabytes
3227     */
3228
3229#ifdef _MCSTANDALONE_NOISY_
3230    printf("DRAM: Total memory: %dMB.\n",(unsigned int)(d->ttlbytes >> 20));
3231#endif
3232
3233    return (d->ttlbytes >> 20);
3234}
3235
3236
3237/*  *********************************************************************
3238    *  XXSB1250_DRAMINIT()
3239    *
3240    *  This is a hideous hack.  To help keep things all in one module,
3241    *  and to aid in relocation (remember, it's tough to do a
3242    *  PC-relative branch to an external symbol), here is an
3243    *  assembly stub to get things ready to call the above C routine.
3244    *  We turn off the bus errors on both memory controllers, set up
3245    *  a small stack, and branch to the C routine to handle the rest.
3246    *
3247    *  Input parameters:
3248    *  	   register a0 - user initialization table
3249    *
3250    *  Return value:
3251    *  	   register v0 - size of memory, in bytes
3252    ********************************************************************* */
3253
3254#if !defined(_MCSTANDALONE_)
3255
3256void xxsb1250_draminit(const draminittab_t *init);
3257void xxsb1250_draminit(const draminittab_t *init)
3258{
3259    /* Work area must fit into one cache way.  */
3260    if (WORK_AREA_SIZE > 8192)
3261	__asm __volatile ("ERROR WORK AREA TOO LARGE");
3262    __asm __volatile (" .globl sb1250_dram_init ; "
3263	  "sb1250_dram_init: ; "
3264	  " dli $10,0x30158A00C9800000 ; "	/* Set the BERR_DISABLE bits */
3265	  " lui	$8,0xb005 ; "			/* and ECC_DISABLE bits */
3266	  " sd	$10,0x1100($8) ; "		/* do MC 0 */
3267	  " sd	$10,0x2100($8) ; "		/* do MC 1 */
3268
3269          /*
3270           * Make sure the writes have actually hit the memory
3271           * controller registers before we proceed.
3272           */
3273          " ld  $0, 0x2100($8) ; "
3274          " sync ; "
3275
3276          /*
3277           * Calculate an area to use for the work area (initdata and
3278           * stasck).  For Verilog/BOOTRAM code, this is in the ROM
3279           * area (i.e. just below the KSEG1 boundary).  For normal
3280           * code, this goes at the bottom of DRAM.
3281           */
3282#if defined( _VERILOG_)
3283	  " li $29,0xa0000000 ; "
3284	  " daddiu $5, $29, -(%0) ; "
3285#else
3286	  " li $5,0x80000000 ; "
3287	  " daddiu $29, $5, %0 ; "
3288#endif
3289
3290          /*
3291           * Zero all of the memory in the work area *right now*,
3292           * so that it's all in the L1 cache.  Otherwise, stack
3293           * usage may cause it to be requested during a time when
3294           * the MC can't respond without causing an error.
3295           *
3296           * From above:
3297           * $5   Base of work area.
3298           * $29  End of work area (SP).
3299           * $8   Scratch register.
3300           */
3301          " move   $8, $5 ; "
3302          "1: "
3303          " sd     $0,  0($8) ; "
3304          " sd     $0,  8($8) ; "
3305          " sd     $0, 16($8) ; "
3306          " sd     $0, 24($8) ; "
3307          " daddiu $8, $8, 0x20 ; "
3308          " bne    $8, $29, 1b ; "
3309
3310          /*
3311           * Leave space on the stack for argument register stack slots
3312           * required by the o32 and o64 ABIs for calls.
3313           */
3314	  " daddiu $29, $29, -32 ; "		/* reg save (o32/o64 calls) */
3315
3316#if CFG_RELOC
3317	  " la $25,sb1250_dram_init_real ; "	/* SVR4 PIC linkage */
3318#endif
3319	  " b sb1250_dram_init_real ; "		/* Branch to real init routine */
3320	  : : "I"(WORK_AREA_SIZE));
3321}
3322
3323#else	/* _MCSTANDALONE_ */
3324
3325/*
3326 * SOCVIEW and non-CFE, non-MIPS things don't need any magic since they
3327 * are not running on the 1250.  Just call the main routine.
3328 */
3329uint64_t sb1250_dram_init(const draminittab_t *init,initdata_t *d);
3330uint64_t sb1250_dram_init(const draminittab_t *init,initdata_t *d)
3331{
3332    initdata_t initdata;
3333    return sb1250_dram_init_real(init,&initdata);
3334}
3335#endif
3336
3337
3338/*  *********************************************************************
3339    *  End  (yes, 3200 lines of memory controller init code.  Sheesh!)
3340    ********************************************************************* */
3341
3342