1// SPDX-License-Identifier: GPL-2.0+
2/*
3 * sun9i dram controller initialisation
4 *
5 * (C) Copyright 2007-2015
6 * Allwinner Technology Co., Ltd. <www.allwinnertech.com>
7 * Jerry Wang <wangflord@allwinnertech.com>
8 *
9 * (C) Copyright 2016 Theobroma Systems Design und Consulting GmbH
10 *                    Philipp Tomsich <philipp.tomsich@theobroma-systems.com>
11 */
12
13#include <dm.h>
14#include <errno.h>
15#include <init.h>
16#include <log.h>
17#include <ram.h>
18#include <asm/io.h>
19#include <asm/arch/clock.h>
20#include <asm/arch/dram.h>
21#include <asm/arch/sys_proto.h>
22
23#define DRAM_CLK (CONFIG_DRAM_CLK * 1000000)
24
25/*
26 * The following amounts to an extensive rewrite of the code received from
27 * Allwinner as part of the open-source bootloader release (refer to
28 * https://github.com/allwinner-zh/bootloader.git) and augments the upstream
29 * sources (which act as the primary reference point for the inner workings
30 * of the 'underdocumented' DRAM controller in the A80) using the following
31 * documentation for other memory controllers based on the (Synopsys)
32 * Designware IP (DDR memory protocol controller and DDR PHY)
33 *   * TI Keystone II Architecture: DDR3 Memory Controller, User's Guide
34 *     Document 'SPRUHN7C', Oct 2013 (revised March 2015)
35 *   * Xilinx Zynq UltraScale+ MPSoC Register Reference
36 *     document ug1087 (v1.0)
37 * Note that the Zynq-documentation provides a very close match for the DDR
38 * memory protocol controller (and provides a very good guide to the rounding
39 * rules for various timings), whereas the TI Keystone II document should be
40 * referred to for DDR PHY specifics only.
41 *
42 * The DRAM controller in the A80 runs at half the frequency of the DDR PHY
43 * (i.e. the rules for MEMC_FREQ_RATIO=2 from the Zynq-documentation apply).
44 *
45 * Known limitations
46 * =================
47 * In the current state, the following features are not fully supported and
48 * a number of simplifying assumptions have been made:
49 *   1) Only DDR3 support is implemented, as our test platform (the A80-Q7
50 *      module) is designed to accomodate DDR3/DDR3L.
51 *   2) Only 2T-mode has been implemented and tested.
52 *   3) The controller supports two different clocking strategies (PLL6 can
53 *      either be 2*CK or CK/2)... we only support the 2*CK clock at this
54 *      time and haven't verified whether the alternative clocking strategy
55 *      works.  If you are interested in porting this over/testing this,
56 *      please refer to cases where bit 0 of 'dram_tpr8' is tested in the
57 *      original code from Allwinner.
58 *   4) Support for 2 ranks per controller is not implemented (as we don't
59 *      the hardware to test it).
60 *
61 * Future directions
62 * =================
63 * The driver should be driven from a device-tree based configuration that
64 * can dynamically provide the necessary timing parameters (i.e. target
65 * frequency and speed-bin information)---the data structures used in the
66 * calculation of the timing parameters are already designed to capture
67 * similar information as the device tree would provide.
68 *
69 * To enable a device-tree based configuration of the sun9i platform, we
70 * will need to enable CONFIG_TPL and bootstrap in 3 stages: initially
71 * into SRAM A1 (40KB) and next into SRAM A2 (160KB)---which would be the
72 * stage to initialise the platform via the device-tree---before having
73 * the full U-Boot run from DDR.
74 */
75
76/*
77 * A number of DDR3 timings are given as "the greater of a fixed number of
78 * clock cycles (CK) or nanoseconds.  We express these using a structure
79 * that holds a cycle count and a duration in picoseconds (so we can model
80 * sub-ns timings, such as 7.5ns without losing precision or resorting to
81 * rounding up early.
82 */
83struct dram_sun9i_timing {
84	u32 ck;
85	u32 ps;
86};
87
88/* */
89struct dram_sun9i_cl_cwl_timing {
90	u32 CL;
91	u32 CWL;
92	u32 tCKmin;  /* in ps */
93	u32 tCKmax;  /* in ps */
94};
95
96struct dram_sun9i_para {
97	u32 dram_type;
98
99	u8 bus_width;
100	u8 chan;
101	u8 rank;
102	u8 rows;
103	u16 page_size;
104
105	/* Timing information for each speed-bin */
106	struct dram_sun9i_cl_cwl_timing *cl_cwl_table;
107	u32 cl_cwl_numentries;
108
109	/*
110	 * For the timings, we try to keep the order and grouping used in
111	 * JEDEC Standard No. 79-3F
112	 */
113
114	/* timings */
115	u32 tREFI; /* in ns */
116	u32 tRFC;  /* in ns */
117
118	u32 tRAS;  /* in ps */
119
120	/* command and address timing */
121	u32 tDLLK; /* in nCK */
122	struct dram_sun9i_timing tRTP;
123	struct dram_sun9i_timing tWTR;
124	u32 tWR;   /* in nCK */
125	u32 tMRD;  /* in nCK */
126	struct dram_sun9i_timing tMOD;
127	u32 tRCD;  /* in ps */
128	u32 tRP;   /* in ps */
129	u32 tRC;   /* in ps */
130	u32 tCCD;  /* in nCK */
131	struct dram_sun9i_timing tRRD;
132	u32 tFAW;  /* in ps */
133
134	/* calibration timing */
135	/* struct dram_sun9i_timing tZQinit; */
136	struct dram_sun9i_timing tZQoper;
137	struct dram_sun9i_timing tZQCS;
138
139	/* reset timing */
140	/* struct dram_sun9i_timing tXPR; */
141
142	/* self-refresh timings */
143	struct dram_sun9i_timing tXS;
144	u32 tXSDLL; /* in nCK */
145	/* struct dram_sun9i_timing tCKESR; */
146	struct dram_sun9i_timing tCKSRE;
147	struct dram_sun9i_timing tCKSRX;
148
149	/* power-down timings */
150	struct dram_sun9i_timing tXP;
151	struct dram_sun9i_timing tXPDLL;
152	struct dram_sun9i_timing tCKE;
153
154	/* write leveling timings */
155	u32 tWLMRD;    /* min, in nCK */
156	/* u32 tWLDQSEN;  min, in nCK */
157	u32 tWLO;      /* max, in ns */
158	/* u32 tWLOE;     max, in ns */
159
160	/* u32 tCKDPX;    in nCK */
161	/* u32 tCKCSX;    in nCK */
162};
163
164static void mctl_sys_init(void);
165
166#define SCHED_RDWR_IDLE_GAP(n)            ((n & 0xff) << 24)
167#define SCHED_GO2CRITICAL_HYSTERESIS(n)   ((n & 0xff) << 16)
168#define SCHED_LPR_NUM_ENTRIES(n)          ((n & 0xff) <<  8)
169#define SCHED_PAGECLOSE                   (1 << 2)
170#define SCHED_PREFER_WRITE                (1 << 1)
171#define SCHED_FORCE_LOW_PRI_N             (1 << 0)
172
173#define SCHED_CONFIG		(SCHED_RDWR_IDLE_GAP(0xf) | \
174				 SCHED_GO2CRITICAL_HYSTERESIS(0x80) | \
175				 SCHED_LPR_NUM_ENTRIES(0x20) | \
176				 SCHED_FORCE_LOW_PRI_N)
177#define PERFHPR0_CONFIG                   0x0000001f
178#define PERFHPR1_CONFIG                   0x1f00001f
179#define PERFLPR0_CONFIG                   0x000000ff
180#define PERFLPR1_CONFIG                   0x0f0000ff
181#define PERFWR0_CONFIG                    0x000000ff
182#define PERFWR1_CONFIG                    0x0f0001ff
183
184static void mctl_ctl_sched_init(unsigned long  base)
185{
186	struct sunxi_mctl_ctl_reg *mctl_ctl =
187		(struct sunxi_mctl_ctl_reg *)base;
188
189	/* Needs to be done before the global clk enable... */
190	writel(SCHED_CONFIG, &mctl_ctl->sched);
191	writel(PERFHPR0_CONFIG, &mctl_ctl->perfhpr0);
192	writel(PERFHPR1_CONFIG, &mctl_ctl->perfhpr1);
193	writel(PERFLPR0_CONFIG, &mctl_ctl->perflpr0);
194	writel(PERFLPR1_CONFIG, &mctl_ctl->perflpr1);
195	writel(PERFWR0_CONFIG, &mctl_ctl->perfwr0);
196	writel(PERFWR1_CONFIG, &mctl_ctl->perfwr1);
197}
198
199static void mctl_sys_init(void)
200{
201	struct sunxi_ccm_reg * const ccm =
202		(struct sunxi_ccm_reg *)SUNXI_CCM_BASE;
203	struct sunxi_mctl_com_reg * const mctl_com =
204		(struct sunxi_mctl_com_reg *)SUNXI_DRAM_COM_BASE;
205
206	debug("Setting PLL6 to %d\n", DRAM_CLK * 2);
207	clock_set_pll6(DRAM_CLK * 2);
208
209	/* Original dram init code which may come in handy later
210	********************************************************
211	clock_set_pll6(use_2channelPLL ? (DRAM_CLK * 2) :
212					 (DRAM_CLK / 2), false);
213
214	if ((para->dram_clk <= 400)|((para->dram_tpr8 & 0x1)==0)) {
215		 * PLL6 should be 2*CK *
216		 * ccm_setup_pll6_ddr_clk(PLL6_DDR_CLK); *
217		ccm_setup_pll6_ddr_clk((1000000 * (para->dram_clk) * 2), 0);
218	} else {
219		 * PLL6 should be CK/2 *
220		ccm_setup_pll6_ddr_clk((1000000 * (para->dram_clk) / 2), 1);
221	}
222
223	if (para->dram_tpr13 & (0xf<<18)) {
224		 *
225		 * bit21:bit18=0001:pll swing 0.4
226		 * bit21:bit18=0010:pll swing 0.3
227		 * bit21:bit18=0100:pll swing 0.2
228		 * bit21:bit18=1000:pll swing 0.1
229		 *
230		dram_dbg("DRAM fre extend open !\n");
231		reg_val=mctl_read_w(CCM_PLL6_DDR_REG);
232		reg_val&=(0x1<<16);
233		reg_val=reg_val>>16;
234
235		if(para->dram_tpr13 & (0x1<<18))
236		{
237			mctl_write_w(CCM_PLL_BASE + 0x114,
238				(0x3333U|(0x3<<17)|(reg_val<<19)|(0x120U<<20)|
239				(0x2U<<29)|(0x1U<<31)));
240		}
241		else if(para->dram_tpr13 & (0x1<<19))
242		{
243			mctl_write_w(CCM_PLL_BASE + 0x114,
244				(0x6666U|(0x3U<<17)|(reg_val<<19)|(0xD8U<<20)|
245				(0x2U<<29)|(0x1U<<31)));
246		}
247		else if(para->dram_tpr13 & (0x1<<20))
248		{
249			mctl_write_w(CCM_PLL_BASE + 0x114,
250				(0x9999U|(0x3U<<17)|(reg_val<<19)|(0x90U<<20)|
251				(0x2U<<29)|(0x1U<<31)));
252		}
253		else if(para->dram_tpr13 & (0x1<<21))
254		{
255			mctl_write_w(CCM_PLL_BASE + 0x114,
256				(0xccccU|(0x3U<<17)|(reg_val<<19)|(0x48U<<20)|
257				(0x2U<<29)|(0x1U<<31)));
258		}
259
260		//frequency extend open
261		reg_val = mctl_read_w(CCM_PLL6_DDR_REG);
262		reg_val |= ((0x1<<24)|(0x1<<30));
263		mctl_write_w(CCM_PLL6_DDR_REG, reg_val);
264
265
266		while(mctl_read_w(CCM_PLL6_DDR_REG) & (0x1<<30));
267	}
268
269	aw_delay(0x20000);	//make some delay
270	********************************************************
271	*/
272
273	/* assert mctl reset */
274	clrbits_le32(&ccm->ahb_reset0_cfg, 1 << AHB_RESET_OFFSET_MCTL);
275	/* stop mctl clock */
276	clrbits_le32(&ccm->ahb_gate0, 1 << AHB_GATE_OFFSET_MCTL);
277
278	sdelay(2000);
279
280	/* deassert mctl reset */
281	setbits_le32(&ccm->ahb_reset0_cfg, 1 << AHB_RESET_OFFSET_MCTL);
282	/* enable mctl clock */
283	setbits_le32(&ccm->ahb_gate0, 1 << AHB_GATE_OFFSET_MCTL);
284
285	/* set up the transactions scheduling before enabling the global clk */
286	mctl_ctl_sched_init(SUNXI_DRAM_CTL0_BASE);
287	mctl_ctl_sched_init(SUNXI_DRAM_CTL1_BASE);
288	sdelay(1000);
289
290	debug("2\n");
291
292	/* (3 << 12): PLL_DDR */
293	writel((3 << 12) | (1 << 16), &ccm->dram_clk_cfg);
294	do {
295		debug("Waiting for DRAM_CLK_CFG\n");
296		sdelay(10000);
297	} while (readl(&ccm->dram_clk_cfg) & (1 << 16));
298	setbits_le32(&ccm->dram_clk_cfg, (1 << 31));
299
300	/* TODO: we only support the common case ... i.e. 2*CK */
301	setbits_le32(&mctl_com->ccr, (1 << 14) | (1 << 30));
302	writel(2, &mctl_com->rmcr); /* controller clock is PLL6/4 */
303
304	sdelay(2000);
305
306	/* Original dram init code which may come in handy later
307	********************************************************
308	if ((para->dram_clk <= 400) | ((para->dram_tpr8 & 0x1) == 0)) {
309		 * PLL6 should be 2*CK *
310		 * gating 2 channel pll *
311		reg_val = mctl_read_w(MC_CCR);
312		reg_val |= ((0x1 << 14) | (0x1U << 30));
313		mctl_write_w(MC_CCR, reg_val);
314		mctl_write_w(MC_RMCR, 0x2); * controller clock use pll6/4 *
315	} else {
316		 * enable 2 channel pll *
317		reg_val = mctl_read_w(MC_CCR);
318		reg_val &= ~((0x1 << 14) | (0x1U << 30));
319		mctl_write_w(MC_CCR, reg_val);
320		mctl_write_w(MC_RMCR, 0x0); * controller clock use pll6 *
321	}
322
323	reg_val = mctl_read_w(MC_CCR);
324	reg_val &= ~((0x1<<15)|(0x1U<<31));
325	mctl_write_w(MC_CCR, reg_val);
326	aw_delay(20);
327	//aw_delay(0x10);
328	********************************************************
329	*/
330
331	clrbits_le32(&mctl_com->ccr, MCTL_CCR_CH0_CLK_EN | MCTL_CCR_CH1_CLK_EN);
332	sdelay(1000);
333
334	setbits_le32(&mctl_com->ccr, MCTL_CCR_CH0_CLK_EN);
335	/* TODO if (para->chan == 2) */
336	setbits_le32(&mctl_com->ccr, MCTL_CCR_CH1_CLK_EN);
337}
338
339static void mctl_com_init(struct dram_sun9i_para *para)
340{
341	struct sunxi_mctl_com_reg * const mctl_com =
342		(struct sunxi_mctl_com_reg *)SUNXI_DRAM_COM_BASE;
343
344	/* TODO: hard-wired for DDR3 now */
345	writel(((para->chan == 2) ? MCTL_CR_CHANNEL_DUAL :
346				    MCTL_CR_CHANNEL_SINGLE)
347	       | MCTL_CR_DRAMTYPE_DDR3 | MCTL_CR_BANK(1)
348	       | MCTL_CR_ROW(para->rows)
349	       | ((para->bus_width == 32) ? MCTL_CR_BUSW32 : MCTL_CR_BUSW16)
350	       | MCTL_CR_PAGE_SIZE(para->page_size) | MCTL_CR_RANK(para->rank),
351	       &mctl_com->cr);
352
353	debug("CR: %d\n", readl(&mctl_com->cr));
354}
355
356static u32 mctl_channel_init(u32 ch_index, struct dram_sun9i_para *para)
357{
358	struct sunxi_mctl_ctl_reg *mctl_ctl;
359	struct sunxi_mctl_phy_reg *mctl_phy;
360
361	u32 CL = 0;
362	u32 CWL = 0;
363	u16 mr[4] = { 0, };
364
365#define PS2CYCLES_FLOOR(n)    ((n * CONFIG_DRAM_CLK) / 1000000)
366#define PS2CYCLES_ROUNDUP(n)  ((n * CONFIG_DRAM_CLK + 999999) / 1000000)
367#define NS2CYCLES_FLOOR(n)    ((n * CONFIG_DRAM_CLK) / 1000)
368#define NS2CYCLES_ROUNDUP(n)  ((n * CONFIG_DRAM_CLK + 999) / 1000)
369#define MAX(a, b)             ((a) > (b) ? (a) : (b))
370
371	/*
372	 * Convert the values to cycle counts (nCK) from what is provided
373	 * by the definition of each speed bin.
374	 */
375	/* const u32 tREFI = NS2CYCLES_FLOOR(para->tREFI); */
376	const u32 tREFI = NS2CYCLES_FLOOR(para->tREFI);
377	const u32 tRFC  = NS2CYCLES_ROUNDUP(para->tRFC);
378	const u32 tRCD  = PS2CYCLES_ROUNDUP(para->tRCD);
379	const u32 tRP   = PS2CYCLES_ROUNDUP(para->tRP);
380	const u32 tRC   = PS2CYCLES_ROUNDUP(para->tRC);
381	const u32 tRAS  = PS2CYCLES_ROUNDUP(para->tRAS);
382
383	/* command and address timing */
384	const u32 tDLLK = para->tDLLK;
385	const u32 tRTP  = MAX(para->tRTP.ck, PS2CYCLES_ROUNDUP(para->tRTP.ps));
386	const u32 tWTR  = MAX(para->tWTR.ck, PS2CYCLES_ROUNDUP(para->tWTR.ps));
387	const u32 tWR   = NS2CYCLES_FLOOR(para->tWR);
388	const u32 tMRD  = para->tMRD;
389	const u32 tMOD  = MAX(para->tMOD.ck, PS2CYCLES_ROUNDUP(para->tMOD.ps));
390	const u32 tCCD  = para->tCCD;
391	const u32 tRRD  = MAX(para->tRRD.ck, PS2CYCLES_ROUNDUP(para->tRRD.ps));
392	const u32 tFAW  = PS2CYCLES_ROUNDUP(para->tFAW);
393
394	/* calibration timings */
395	/* const u32 tZQinit = MAX(para->tZQinit.ck,
396				PS2CYCLES_ROUNDUP(para->tZQinit.ps)); */
397	const u32 tZQoper = MAX(para->tZQoper.ck,
398				PS2CYCLES_ROUNDUP(para->tZQoper.ps));
399	const u32 tZQCS   = MAX(para->tZQCS.ck,
400				PS2CYCLES_ROUNDUP(para->tZQCS.ps));
401
402	/* reset timing */
403	/* const u32 tXPR  = MAX(para->tXPR.ck,
404				PS2CYCLES_ROUNDUP(para->tXPR.ps)); */
405
406	/* power-down timings */
407	const u32 tXP    = MAX(para->tXP.ck, PS2CYCLES_ROUNDUP(para->tXP.ps));
408	const u32 tXPDLL = MAX(para->tXPDLL.ck,
409			       PS2CYCLES_ROUNDUP(para->tXPDLL.ps));
410	const u32 tCKE   = MAX(para->tCKE.ck, PS2CYCLES_ROUNDUP(para->tCKE.ps));
411
412	/*
413	 * self-refresh timings (keep below power-down timings, as tCKESR
414	 * needs to be calculated based on the nCK value of tCKE)
415	 */
416	const u32 tXS    = MAX(para->tXS.ck, PS2CYCLES_ROUNDUP(para->tXS.ps));
417	const u32 tXSDLL = para->tXSDLL;
418	const u32 tCKSRE = MAX(para->tCKSRE.ck,
419			       PS2CYCLES_ROUNDUP(para->tCKSRE.ps));
420	const u32 tCKESR = tCKE + 1;
421	const u32 tCKSRX = MAX(para->tCKSRX.ck,
422			       PS2CYCLES_ROUNDUP(para->tCKSRX.ps));
423
424	/* write leveling timings */
425	const u32 tWLMRD = para->tWLMRD;
426	/* const u32 tWLDQSEN = para->tWLDQSEN; */
427	const u32 tWLO = PS2CYCLES_FLOOR(para->tWLO);
428	/* const u32 tWLOE = PS2CYCLES_FLOOR(para->tWLOE); */
429
430	const u32 tRASmax = tREFI * 9;
431	int i;
432
433	for (i = 0; i < para->cl_cwl_numentries; ++i) {
434		const u32 tCK = 1000000 / CONFIG_DRAM_CLK;
435
436		if ((para->cl_cwl_table[i].tCKmin <= tCK) &&
437		    (tCK < para->cl_cwl_table[i].tCKmax)) {
438			CL = para->cl_cwl_table[i].CL;
439			CWL = para->cl_cwl_table[i].CWL;
440
441			debug("found CL/CWL: CL = %d, CWL = %d\n", CL, CWL);
442			break;
443		}
444	}
445
446	if ((CL == 0) && (CWL == 0)) {
447		printf("failed to find valid CL/CWL for operating point %d MHz\n",
448		       CONFIG_DRAM_CLK);
449		return 0;
450	}
451
452	if (ch_index == 0) {
453		mctl_ctl = (struct sunxi_mctl_ctl_reg *)SUNXI_DRAM_CTL0_BASE;
454		mctl_phy = (struct sunxi_mctl_phy_reg *)SUNXI_DRAM_PHY0_BASE;
455	} else {
456		mctl_ctl = (struct sunxi_mctl_ctl_reg *)SUNXI_DRAM_CTL1_BASE;
457		mctl_phy = (struct sunxi_mctl_phy_reg *)SUNXI_DRAM_PHY1_BASE;
458	}
459
460	if (para->dram_type == DRAM_TYPE_DDR3) {
461		mr[0] = DDR3_MR0_PPD_FAST_EXIT | DDR3_MR0_WR(tWR) |
462			DDR3_MR0_CL(CL);
463		mr[1] = DDR3_MR1_RTT120OHM;
464		mr[2] = DDR3_MR2_TWL(CWL);
465		mr[3] = 0;
466
467		/*
468		 * DRAM3 initialisation requires holding CKE LOW for
469		 * at least 500us prior to starting the initialisation
470		 * sequence and at least 10ns after driving CKE HIGH
471		 * before the initialisation sequence may be started).
472		 *
473		 * Refer to Micron document "TN-41-07: DDR3 Power-Up,
474		 * Initialization, and Reset DDR3 Initialization
475		 * Routine" for details).
476		 */
477		writel(MCTL_INIT0_POST_CKE_x1024(1) |
478		       MCTL_INIT0_PRE_CKE_x1024(
479			    (500 * CONFIG_DRAM_CLK + 1023) / 1024), /* 500us */
480		       &mctl_ctl->init[0]);
481		writel(MCTL_INIT1_DRAM_RSTN_x1024(1),
482		       &mctl_ctl->init[1]);
483		/* INIT2 is not used for DDR3 */
484		writel(MCTL_INIT3_MR(mr[0]) | MCTL_INIT3_EMR(mr[1]),
485		       &mctl_ctl->init[3]);
486		writel(MCTL_INIT4_EMR2(mr[2]) | MCTL_INIT4_EMR3(mr[3]),
487		       &mctl_ctl->init[4]);
488		writel(MCTL_INIT5_DEV_ZQINIT_x32(512 / 32), /* 512 cycles */
489		       &mctl_ctl->init[5]);
490	} else {
491		/* !!! UNTESTED !!! */
492		/*
493		 * LPDDR2 and/or LPDDR3 require a 200us minimum delay
494		 * after driving CKE HIGH in the initialisation sequence.
495		 */
496		writel(MCTL_INIT0_POST_CKE_x1024(
497				(200 * CONFIG_DRAM_CLK + 1023) / 1024),
498		       &mctl_ctl->init[0]);
499		writel(MCTL_INIT1_DRAM_RSTN_x1024(1),
500		       &mctl_ctl->init[1]);
501		writel(MCTL_INIT2_IDLE_AFTER_RESET_x32(
502				(CONFIG_DRAM_CLK + 31) / 32) /* 1us */
503		       | MCTL_INIT2_MIN_STABLE_CLOCK_x1(5),  /* 5 cycles */
504		       &mctl_ctl->init[2]);
505		writel(MCTL_INIT3_MR(mr[1]) | MCTL_INIT3_EMR(mr[2]),
506		       &mctl_ctl->init[3]);
507		writel(MCTL_INIT4_EMR2(mr[3]),
508		       &mctl_ctl->init[4]);
509		writel(MCTL_INIT5_DEV_ZQINIT_x32(
510				(CONFIG_DRAM_CLK + 31) / 32) /* 1us */
511		       | MCTL_INIT5_MAX_AUTO_INIT_x1024(
512				(10 * CONFIG_DRAM_CLK + 1023) / 1024),
513		       &mctl_ctl->init[5]);
514	}
515
516	/* (DDR3) We always use a burst-length of 8. */
517#define MCTL_BL               8
518	/* wr2pre: WL + BL/2 + tWR */
519#define WR2PRE           (MCTL_BL/2 + CWL + tWTR)
520	/* wr2rd = CWL + BL/2 + tWTR */
521#define WR2RD            (MCTL_BL/2 + CWL + tWTR)
522	/*
523	 * rd2wr = RL + BL/2 + 2 - WL (for DDR3)
524	 * rd2wr = RL + BL/2 + RU(tDQSCKmax/tCK) + 1 - WL (for LPDDR2/LPDDR3)
525	 */
526#define RD2WR            (CL + MCTL_BL/2 + 2 - CWL)
527#define MCTL_PHY_TRTW        0
528#define MCTL_PHY_TRTODT      0
529
530#define MCTL_DIV2(n)         ((n + 1)/2)
531#define MCTL_DIV32(n)        (n/32)
532#define MCTL_DIV1024(n)      (n/1024)
533
534	writel((MCTL_DIV2(WR2PRE) << 24) | (MCTL_DIV2(tFAW) << 16) |
535	       (MCTL_DIV1024(tRASmax) << 8) | (MCTL_DIV2(tRAS) << 0),
536	       &mctl_ctl->dramtmg[0]);
537	writel((MCTL_DIV2(tXP) << 16) | (MCTL_DIV2(tRTP) << 8) |
538	       (MCTL_DIV2(tRC) << 0),
539	       &mctl_ctl->dramtmg[1]);
540	writel((MCTL_DIV2(CWL) << 24) | (MCTL_DIV2(CL) << 16) |
541	       (MCTL_DIV2(RD2WR) << 8) | (MCTL_DIV2(WR2RD) << 0),
542	       &mctl_ctl->dramtmg[2]);
543	/*
544	 * Note: tMRW is located at bit 16 (and up) in DRAMTMG3...
545	 * this is only relevant for LPDDR2/LPDDR3
546	 */
547	writel((MCTL_DIV2(tMRD) << 12) | (MCTL_DIV2(tMOD) << 0),
548	       &mctl_ctl->dramtmg[3]);
549	writel((MCTL_DIV2(tRCD) << 24) | (MCTL_DIV2(tCCD) << 16) |
550	       (MCTL_DIV2(tRRD) << 8) | (MCTL_DIV2(tRP) << 0),
551	       &mctl_ctl->dramtmg[4]);
552	writel((MCTL_DIV2(tCKSRX) << 24) | (MCTL_DIV2(tCKSRE) << 16) |
553	       (MCTL_DIV2(tCKESR) << 8) | (MCTL_DIV2(tCKE) << 0),
554	       &mctl_ctl->dramtmg[5]);
555
556	/* These timings are relevant for LPDDR2/LPDDR3 only */
557	/* writel((MCTL_TCKDPDE << 24) | (MCTL_TCKDPX << 16) |
558	       (MCTL_TCKCSX << 0), &mctl_ctl->dramtmg[6]); */
559
560	/* printf("DRAMTMG7 reset value: 0x%x\n",
561		readl(&mctl_ctl->dramtmg[7])); */
562	/* DRAMTMG7 reset value: 0x202 */
563	/* DRAMTMG7 should contain t_ckpde and t_ckpdx: check reset values!!! */
564	/* printf("DRAMTMG8 reset value: 0x%x\n",
565		readl(&mctl_ctl->dramtmg[8])); */
566	/* DRAMTMG8 reset value: 0x44 */
567
568	writel((MCTL_DIV32(tXSDLL) << 0), &mctl_ctl->dramtmg[8]);
569
570	writel((MCTL_DIV32(tREFI) << 16) | (MCTL_DIV2(tRFC) << 0),
571	       &mctl_ctl->rfshtmg);
572
573	if (para->dram_type == DRAM_TYPE_DDR3) {
574		writel((2 << 24) | ((MCTL_DIV2(CL) - 2) << 16) |
575		       (1 << 8) | ((MCTL_DIV2(CWL) - 2) << 0),
576			&mctl_ctl->dfitmg[0]);
577	} else {
578		/* TODO */
579	}
580
581	/* TODO: handle the case of the write latency domain going to 0 ... */
582
583	/*
584	 * Disable dfi_init_complete_en (the triggering of the SDRAM
585	 * initialisation when the PHY initialisation completes).
586	 */
587	clrbits_le32(&mctl_ctl->dfimisc, MCTL_DFIMISC_DFI_INIT_COMPLETE_EN);
588	/* Disable the automatic generation of DLL calibration requests */
589	setbits_le32(&mctl_ctl->dfiupd[0], MCTL_DFIUPD0_DIS_AUTO_CTRLUPD);
590
591	/* A80-Q7: 2T, 1 rank, DDR3, full-32bit-DQ */
592	/* TODO: make 2T and BUSWIDTH configurable  */
593	writel(MCTL_MSTR_DEVICETYPE(para->dram_type) |
594	       MCTL_MSTR_BURSTLENGTH(para->dram_type) |
595	       MCTL_MSTR_ACTIVERANKS(para->rank) |
596	       MCTL_MSTR_2TMODE | MCTL_MSTR_BUSWIDTH32,
597	       &mctl_ctl->mstr);
598
599	if (para->dram_type == DRAM_TYPE_DDR3) {
600		writel(MCTL_ZQCTRL0_TZQCL(MCTL_DIV2(tZQoper)) |
601		       (MCTL_DIV2(tZQCS)), &mctl_ctl->zqctrl[0]);
602		/*
603		 * TODO: is the following really necessary as the bottom
604		 * half should already be 0x100 and the upper half should
605		 * be ignored for a DDR3 device???
606		 */
607		writel(MCTL_ZQCTRL1_TZQSI_x1024(0x100),
608		       &mctl_ctl->zqctrl[1]);
609	} else {
610		writel(MCTL_ZQCTRL0_TZQCL(0x200) | MCTL_ZQCTRL0_TZQCS(0x40),
611		       &mctl_ctl->zqctrl[0]);
612		writel(MCTL_ZQCTRL1_TZQRESET(0x28) |
613		       MCTL_ZQCTRL1_TZQSI_x1024(0x100),
614		       &mctl_ctl->zqctrl[1]);
615	}
616
617	/* Assert dfi_init_complete signal */
618	setbits_le32(&mctl_ctl->dfimisc, MCTL_DFIMISC_DFI_INIT_COMPLETE_EN);
619	/* Disable auto-refresh */
620	setbits_le32(&mctl_ctl->rfshctl3, MCTL_RFSHCTL3_DIS_AUTO_REFRESH);
621
622	/* PHY initialisation */
623
624	/* TODO: make 2T and 8-bank mode configurable  */
625	writel(MCTL_PHY_DCR_BYTEMASK | MCTL_PHY_DCR_2TMODE |
626	       MCTL_PHY_DCR_DDR8BNK | MCTL_PHY_DRAMMODE_DDR3,
627	       &mctl_phy->dcr);
628
629	/* For LPDDR2 or LPDDR3, set DQSGX to 0 before training. */
630	if (para->dram_type != DRAM_TYPE_DDR3)
631		clrbits_le32(&mctl_phy->dsgcr, (3 << 6));
632
633	writel(mr[0], &mctl_phy->mr0);
634	writel(mr[1], &mctl_phy->mr1);
635	writel(mr[2], &mctl_phy->mr2);
636	writel(mr[3], &mctl_phy->mr3);
637
638	/*
639	 * The DFI PHY is running at full rate. We thus use the actual
640	 * timings in clock cycles here.
641	 */
642	writel((tRC << 26) | (tRRD << 22) | (tRAS << 16) |
643	       (tRCD << 12) | (tRP << 8) | (tWTR << 4) | (tRTP << 0),
644		&mctl_phy->dtpr[0]);
645	writel((tMRD << 0) | ((tMOD - 12) << 2) | (tFAW << 5) |
646	       (tRFC << 11) | (tWLMRD << 20) | (tWLO << 26),
647	       &mctl_phy->dtpr[1]);
648	writel((tXS << 0) | (MAX(tXP, tXPDLL) << 10) |
649	       (tCKE << 15) | (tDLLK << 19) |
650	       (MCTL_PHY_TRTODT << 29) | (MCTL_PHY_TRTW << 30) |
651	       (((tCCD - 4) & 0x1) << 31),
652	       &mctl_phy->dtpr[2]);
653
654	/* tDQSCK and tDQSCKmax are used LPDDR2/LPDDR3 */
655	/* writel((tDQSCK << 0) | (tDQSCKMAX << 3), &mctl_phy->dtpr[3]); */
656
657	/*
658	 * We use the same values used by Allwinner's Boot0 for the PTR
659	 * (PHY timing register) configuration that is tied to the PHY
660	 * implementation.
661	 */
662	writel(0x42C21590, &mctl_phy->ptr[0]);
663	writel(0xD05612C0, &mctl_phy->ptr[1]);
664	if (para->dram_type == DRAM_TYPE_DDR3) {
665		const unsigned int tdinit0 = 500 * CONFIG_DRAM_CLK; /* 500us */
666		const unsigned int tdinit1 = (360 * CONFIG_DRAM_CLK + 999) /
667			1000; /* 360ns */
668		const unsigned int tdinit2 = 200 * CONFIG_DRAM_CLK; /* 200us */
669		const unsigned int tdinit3 = CONFIG_DRAM_CLK; /* 1us */
670
671		writel((tdinit1 << 20) | tdinit0, &mctl_phy->ptr[3]);
672		writel((tdinit3 << 18) | tdinit2, &mctl_phy->ptr[4]);
673	} else {
674		/* LPDDR2 or LPDDR3 */
675		const unsigned int tdinit0 = (100 * CONFIG_DRAM_CLK + 999) /
676			1000; /* 100ns */
677		const unsigned int tdinit1 = 200 * CONFIG_DRAM_CLK; /* 200us */
678		const unsigned int tdinit2 = 22 * CONFIG_DRAM_CLK; /* 11us */
679		const unsigned int tdinit3 = 2 * CONFIG_DRAM_CLK; /* 2us */
680
681		writel((tdinit1 << 20) | tdinit0, &mctl_phy->ptr[3]);
682		writel((tdinit3 << 18) | tdinit2, &mctl_phy->ptr[4]);
683	}
684
685	/* TEST ME */
686	writel(0x00203131, &mctl_phy->acmdlr);
687
688	/* TODO: can we enable this for 2 ranks, even when we don't know yet */
689	writel(MCTL_DTCR_DEFAULT | MCTL_DTCR_RANKEN(para->rank),
690	       &mctl_phy->dtcr);
691
692	/* TODO: half width */
693	debug("DX2GCR0 reset: 0x%x\n", readl(&mctl_phy->dx[2].gcr[0]));
694	writel(0x7C000285, &mctl_phy->dx[2].gcr[0]);
695	writel(0x7C000285, &mctl_phy->dx[3].gcr[0]);
696
697	clrsetbits_le32(&mctl_phy->zq[0].pr, 0xff,
698			(CONFIG_DRAM_ZQ >>  0) & 0xff);  /* CK/CA */
699	clrsetbits_le32(&mctl_phy->zq[1].pr, 0xff,
700			(CONFIG_DRAM_ZQ >>  8) & 0xff);  /* DX0/DX1 */
701	clrsetbits_le32(&mctl_phy->zq[2].pr, 0xff,
702			(CONFIG_DRAM_ZQ >> 16) & 0xff);  /* DX2/DX3 */
703
704	/* TODO: make configurable & implement non-ODT path */
705	if (1) {
706		int lane;
707		for (lane = 0; lane < 4; ++lane) {
708			clrbits_le32(&mctl_phy->dx[lane].gcr[2], 0xffff);
709			clrbits_le32(&mctl_phy->dx[lane].gcr[3],
710				     (0x3<<12) | (0x3<<4));
711		}
712	} else {
713		/* TODO: check */
714		int lane;
715		for (lane = 0; lane < 4; ++lane) {
716			clrsetbits_le32(&mctl_phy->dx[lane].gcr[2], 0xffff,
717					0xaaaa);
718			if (para->dram_type == DRAM_TYPE_DDR3)
719				setbits_le32(&mctl_phy->dx[lane].gcr[3],
720					     (0x3<<12) | (0x3<<4));
721			else
722				setbits_le32(&mctl_phy->dx[lane].gcr[3],
723					     0x00000012);
724		}
725	}
726
727	writel(0x04058D02, &mctl_phy->zq[0].cr); /* CK/CA */
728	writel(0x04058D02, &mctl_phy->zq[1].cr); /* DX0/DX1 */
729	writel(0x04058D02, &mctl_phy->zq[2].cr); /* DX2/DX3 */
730
731	/* Disable auto-refresh prior to data training */
732	setbits_le32(&mctl_ctl->rfshctl3, MCTL_RFSHCTL3_DIS_AUTO_REFRESH);
733
734	setbits_le32(&mctl_phy->dsgcr, 0xf << 24); /* unclear what this is... */
735	/* TODO: IODDRM (IO DDR-MODE) for DDR3L */
736	clrsetbits_le32(&mctl_phy->pgcr[1],
737			MCTL_PGCR1_ZCKSEL_MASK,
738			MCTL_PGCR1_IODDRM_DDR3 | MCTL_PGCR1_INHVT_EN);
739
740	setbits_le32(&mctl_phy->pllcr, 0x3 << 19); /* PLL frequency select */
741	/* TODO: single-channel PLL mode??? missing */
742	setbits_le32(&mctl_phy->pllcr,
743		     MCTL_PLLGCR_PLL_BYPASS | MCTL_PLLGCR_PLL_POWERDOWN);
744	/* setbits_le32(&mctl_phy->pir, MCTL_PIR_PLL_BYPASS); included below */
745
746	/* Disable VT compensation */
747	clrbits_le32(&mctl_phy->pgcr[0], 0x3f);
748
749	/* TODO: "other" PLL mode ... 0x20000 seems to be the PLL Bypass */
750	if (para->dram_type == DRAM_TYPE_DDR3)
751		clrsetbits_le32(&mctl_phy->pir, MCTL_PIR_MASK, 0x20df3);
752	else
753		clrsetbits_le32(&mctl_phy->pir, MCTL_PIR_MASK, 0x2c573);
754
755	sdelay(10000); /* XXX necessary? */
756
757	/* Wait for the INIT bit to clear itself... */
758	while ((readl(&mctl_phy->pir) & MCTL_PIR_INIT) != MCTL_PIR_INIT) {
759		/* not done yet -- keep spinning */
760		debug("MCTL_PIR_INIT not set\n");
761		sdelay(1000);
762		/* TODO: implement timeout */
763	}
764
765	/* TODO: not used --- there's a "2rank debug" section here */
766
767	/* Original dram init code which may come in handy later
768	********************************************************
769	 * LPDDR2 and LPDDR3 *
770	if ((para->dram_type) == 6 || (para->dram_type) == 7) {
771		reg_val = mctl_read_w(P0_DSGCR + ch_offset);
772		reg_val &= (~(0x3<<6));		* set DQSGX to 1 *
773		reg_val |= (0x1<<6);		* dqs gate extend *
774		mctl_write_w(P0_DSGCR + ch_offset, reg_val);
775		dram_dbg("DQS Gate Extend Enable!\n", ch_index);
776	}
777
778	 * Disable ZCAL after initial--for nand dma debug--20140330 by YSZ *
779	if (para->dram_tpr13 & (0x1<<31)) {
780		reg_val = mctl_read_w(P0_ZQ0CR + ch_offset);
781		reg_val |= (0x7<<11);
782		mctl_write_w(P0_ZQ0CR + ch_offset, reg_val);
783	}
784	********************************************************
785	*/
786
787	/*
788	 * TODO: more 2-rank support
789	 * (setting the "dqs gate delay to average between 2 rank")
790	 */
791
792	/* check if any errors are set */
793	if (readl(&mctl_phy->pgsr[0]) & MCTL_PGSR0_ERRORS) {
794		debug("Channel %d unavailable!\n", ch_index);
795		return 0;
796	} else{
797		/* initial OK */
798		debug("Channel %d OK!\n", ch_index);
799		/* return 1; */
800	}
801
802	while ((readl(&mctl_ctl->stat) & 0x1) != 0x1) {
803		debug("Waiting for INIT to be done (controller to come up into 'normal operating' mode\n");
804		sdelay(100000);
805		/* init not done */
806		/* TODO: implement time-out */
807	}
808	debug("done\n");
809
810	/* "DDR is controller by contoller" */
811	clrbits_le32(&mctl_phy->pgcr[3], (1 << 25));
812
813	/* TODO: is the following necessary? */
814	debug("DFIMISC before writing 0: 0x%x\n", readl(&mctl_ctl->dfimisc));
815	writel(0, &mctl_ctl->dfimisc);
816
817	/* Enable auto-refresh */
818	clrbits_le32(&mctl_ctl->rfshctl3, MCTL_RFSHCTL3_DIS_AUTO_REFRESH);
819
820	debug("channel_init complete\n");
821	return 1;
822}
823
824signed int DRAMC_get_dram_size(void)
825{
826	struct sunxi_mctl_com_reg * const mctl_com =
827		(struct sunxi_mctl_com_reg *)SUNXI_DRAM_COM_BASE;
828
829	unsigned int reg_val;
830	unsigned int dram_size;
831	unsigned int temp;
832
833	reg_val = readl(&mctl_com->cr);
834
835	temp = (reg_val >> 8) & 0xf;	/* page size code */
836	dram_size = (temp - 6);		/* (1 << dram_size) * 512Bytes */
837
838	temp = (reg_val >> 4) & 0xf;	/* row width code */
839	dram_size += (temp + 1);	/* (1 << dram_size) * 512Bytes */
840
841	temp = (reg_val >> 2) & 0x3;	/* bank number code */
842	dram_size += (temp + 2);	/* (1 << dram_size) * 512Bytes */
843
844	temp = reg_val & 0x3;		/* rank number code */
845	dram_size += temp;		/* (1 << dram_size) * 512Bytes */
846
847	temp = (reg_val >> 19) & 0x1;	/* channel number code */
848	dram_size += temp;		/* (1 << dram_size) * 512Bytes */
849
850	dram_size = dram_size - 11;	/* (1 << dram_size) MBytes */
851
852	return 1 << dram_size;
853}
854
855unsigned long sunxi_dram_init(void)
856{
857	struct sunxi_mctl_com_reg * const mctl_com =
858		(struct sunxi_mctl_com_reg *)SUNXI_DRAM_COM_BASE;
859
860	struct dram_sun9i_cl_cwl_timing cl_cwl[] = {
861		{ .CL =  5, .CWL = 5, .tCKmin = 3000, .tCKmax = 3300 },
862		{ .CL =  6, .CWL = 5, .tCKmin = 2500, .tCKmax = 3300 },
863		{ .CL =  8, .CWL = 6, .tCKmin = 1875, .tCKmax = 2500 },
864		{ .CL = 10, .CWL = 7, .tCKmin = 1500, .tCKmax = 1875 },
865		{ .CL = 11, .CWL = 8, .tCKmin = 1250, .tCKmax = 1500 }
866	};
867
868	/* Set initial parameters, these get modified by the autodetect code */
869	struct dram_sun9i_para para = {
870		.dram_type = DRAM_TYPE_DDR3,
871		.bus_width = 32,
872		.chan = 2,
873		.rank = 1,
874		/* .rank = 2, */
875		.page_size = 4096,
876		/* .rows = 16, */
877		.rows = 15,
878
879		/* CL/CWL table for the speed bin */
880		.cl_cwl_table = cl_cwl,
881		.cl_cwl_numentries = sizeof(cl_cwl) /
882			sizeof(struct dram_sun9i_cl_cwl_timing),
883
884		/* timings */
885		.tREFI = 7800,	/* 7.8us (up to 85 degC) */
886		.tRFC  = 260,	/* 260ns for 4GBit devices */
887				/* 350ns @ 8GBit */
888
889		.tRCD  = 13750,
890		.tRP   = 13750,
891		.tRC   = 48750,
892		.tRAS  = 35000,
893
894		.tDLLK = 512,
895		.tRTP  = { .ck = 4, .ps = 7500 },
896		.tWTR  = { .ck = 4, .ps = 7500 },
897		.tWR   = 15,
898		.tMRD  = 4,
899		.tMOD  = { .ck = 12, .ps = 15000 },
900		.tCCD  = 4,
901		.tRRD  = { .ck = 4, .ps = 7500 },
902		.tFAW  = 40,
903
904		/* calibration timing */
905		/* .tZQinit = { .ck = 512, .ps = 640000 }, */
906		.tZQoper = { .ck = 256, .ps = 320000 },
907		.tZQCS   = { .ck = 64,  .ps = 80000 },
908
909		/* reset timing */
910		/* .tXPR  = { .ck = 5, .ps = 10000 }, */
911
912		/* self-refresh timings */
913		.tXS  = { .ck = 5, .ps = 10000 },
914		.tXSDLL = 512,
915		.tCKSRE = { .ck = 5, .ps = 10000 },
916		.tCKSRX = { .ck = 5, .ps = 10000 },
917
918		/* power-down timings */
919		.tXP = { .ck = 3, .ps = 6000 },
920		.tXPDLL = { .ck = 10, .ps = 24000 },
921		.tCKE = { .ck = 3, .ps = 5000 },
922
923		/* write leveling timings */
924		.tWLMRD = 40,
925		/* .tWLDQSEN = 25, */
926		.tWLO = 7500,
927		/* .tWLOE = 2000, */
928	};
929
930	/*
931	 * Disable A80 internal 240 ohm resistor.
932	 *
933	 * This code sequence is adapated from Allwinner's Boot0 (see
934	 * https://github.com/allwinner-zh/bootloader.git), as there
935	 * is no documentation for these two registers in the R_PRCM
936	 * block.
937	 */
938	setbits_le32(SUNXI_PRCM_BASE + 0x1e0, (0x3 << 8));
939	writel(0, SUNXI_PRCM_BASE + 0x1e8);
940
941	mctl_sys_init();
942
943	if (!mctl_channel_init(0, &para))
944		return 0;
945
946	/* dual-channel */
947	if (!mctl_channel_init(1, &para)) {
948		/* disable channel 1 */
949		clrsetbits_le32(&mctl_com->cr, MCTL_CR_CHANNEL_MASK,
950				MCTL_CR_CHANNEL_SINGLE);
951		/* disable channel 1 global clock */
952		clrbits_le32(&mctl_com->cr, MCTL_CCR_CH1_CLK_EN);
953	}
954
955	mctl_com_init(&para);
956
957	/* return the proper RAM size */
958	return DRAMC_get_dram_size() << 20;
959}
960