1// SPDX-License-Identifier: Intel
2/*
3 * Copyright (C) 2013, Intel Corporation
4 * Copyright (C) 2015, Bin Meng <bmeng.cn@gmail.com>
5 *
6 * Ported from Intel released Quark UEFI BIOS
7 * QuarkSocPkg/QuarkNorthCluster/MemoryInit/Pei
8 */
9
10#include <common.h>
11#include <hang.h>
12#include <asm/arch/device.h>
13#include <asm/arch/mrc.h>
14#include <asm/arch/msg_port.h>
15#include <asm/arch/quark.h>
16#include "mrc_util.h"
17#include "hte.h"
18#include "smc.h"
19
20static const uint8_t vref_codes[64] = {
21	/* lowest to highest */
22	0x3f, 0x3e, 0x3d, 0x3c, 0x3b, 0x3a, 0x39, 0x38,
23	0x37, 0x36, 0x35, 0x34, 0x33, 0x32, 0x31, 0x30,
24	0x2f, 0x2e, 0x2d, 0x2c, 0x2b, 0x2a, 0x29, 0x28,
25	0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21, 0x20,
26	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
27	0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
28	0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
29	0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
30};
31
32void mrc_write_mask(u32 unit, u32 addr, u32 data, u32 mask)
33{
34	msg_port_write(unit, addr,
35		       (msg_port_read(unit, addr) & ~(mask)) |
36		       ((data) & (mask)));
37}
38
39void mrc_alt_write_mask(u32 unit, u32 addr, u32 data, u32 mask)
40{
41	msg_port_alt_write(unit, addr,
42			   (msg_port_alt_read(unit, addr) & ~(mask)) |
43			   ((data) & (mask)));
44}
45
46void mrc_post_code(uint8_t major, uint8_t minor)
47{
48	/* send message to UART */
49	DPF(D_INFO, "POST: 0x%01x%02x\n", major, minor);
50
51	/* error check */
52	if (major == 0xee)
53		hang();
54}
55
56/* Delay number of nanoseconds */
57void delay_n(uint32_t ns)
58{
59	/* 1000 MHz clock has 1ns period --> no conversion required */
60	uint64_t final_tsc = rdtsc();
61
62	final_tsc += ((get_tbclk_mhz() * ns) / 1000);
63
64	while (rdtsc() < final_tsc)
65		;
66}
67
68/* Delay number of microseconds */
69void delay_u(uint32_t ms)
70{
71	/* 64-bit math is not an option, just use loops */
72	while (ms--)
73		delay_n(1000);
74}
75
76/* Select Memory Manager as the source for PRI interface */
77void select_mem_mgr(void)
78{
79	u32 dco;
80
81	ENTERFN();
82
83	dco = msg_port_read(MEM_CTLR, DCO);
84	dco &= ~DCO_PMICTL;
85	msg_port_write(MEM_CTLR, DCO, dco);
86
87	LEAVEFN();
88}
89
90/* Select HTE as the source for PRI interface */
91void select_hte(void)
92{
93	u32 dco;
94
95	ENTERFN();
96
97	dco = msg_port_read(MEM_CTLR, DCO);
98	dco |= DCO_PMICTL;
99	msg_port_write(MEM_CTLR, DCO, dco);
100
101	LEAVEFN();
102}
103
104/*
105 * Send DRAM command
106 * data should be formated using DCMD_Xxxx macro or emrsXCommand structure
107 */
108void dram_init_command(uint32_t data)
109{
110	qrk_pci_write_config_dword(QUARK_HOST_BRIDGE, MSG_DATA_REG, data);
111	qrk_pci_write_config_dword(QUARK_HOST_BRIDGE, MSG_CTRL_EXT_REG, 0);
112	msg_port_setup(MSG_OP_DRAM_INIT, MEM_CTLR, 0);
113
114	DPF(D_REGWR, "WR32 %03X %08X %08X\n", MEM_CTLR, 0, data);
115}
116
117/* Send DRAM wake command using special MCU side-band WAKE opcode */
118void dram_wake_command(void)
119{
120	ENTERFN();
121
122	msg_port_setup(MSG_OP_DRAM_WAKE, MEM_CTLR, 0);
123
124	LEAVEFN();
125}
126
127void training_message(uint8_t channel, uint8_t rank, uint8_t byte_lane)
128{
129	/* send message to UART */
130	DPF(D_INFO, "CH%01X RK%01X BL%01X\n", channel, rank, byte_lane);
131}
132
133/*
134 * This function will program the RCVEN delays
135 *
136 * (currently doesn't comprehend rank)
137 */
138void set_rcvn(uint8_t channel, uint8_t rank,
139	      uint8_t byte_lane, uint32_t pi_count)
140{
141	uint32_t reg;
142	uint32_t msk;
143	uint32_t temp;
144
145	ENTERFN();
146
147	DPF(D_TRN, "Rcvn ch%d rnk%d ln%d : pi=%03X\n",
148	    channel, rank, byte_lane, pi_count);
149
150	/*
151	 * RDPTR (1/2 MCLK, 64 PIs)
152	 * BL0 -> B01PTRCTL0[11:08] (0x0-0xF)
153	 * BL1 -> B01PTRCTL0[23:20] (0x0-0xF)
154	 */
155	reg = B01PTRCTL0 + (byte_lane >> 1) * DDRIODQ_BL_OFFSET +
156		channel * DDRIODQ_CH_OFFSET;
157	msk = (byte_lane & 1) ? 0xf00000 : 0xf00;
158	temp = (byte_lane & 1) ? (pi_count / HALF_CLK) << 20 :
159		(pi_count / HALF_CLK) << 8;
160	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
161
162	/* Adjust PI_COUNT */
163	pi_count -= ((pi_count / HALF_CLK) & 0xf) * HALF_CLK;
164
165	/*
166	 * PI (1/64 MCLK, 1 PIs)
167	 * BL0 -> B0DLLPICODER0[29:24] (0x00-0x3F)
168	 * BL1 -> B1DLLPICODER0[29:24] (0x00-0x3F)
169	 */
170	reg = (byte_lane & 1) ? B1DLLPICODER0 : B0DLLPICODER0;
171	reg += ((byte_lane >> 1) * DDRIODQ_BL_OFFSET +
172		channel * DDRIODQ_CH_OFFSET);
173	msk = 0x3f000000;
174	temp = pi_count << 24;
175	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
176
177	/*
178	 * DEADBAND
179	 * BL0/1 -> B01DBCTL1[08/11] (+1 select)
180	 * BL0/1 -> B01DBCTL1[02/05] (enable)
181	 */
182	reg = B01DBCTL1 + (byte_lane >> 1) * DDRIODQ_BL_OFFSET +
183		channel * DDRIODQ_CH_OFFSET;
184	msk = 0x00;
185	temp = 0x00;
186
187	/* enable */
188	msk |= (byte_lane & 1) ? (1 << 5) : (1 << 2);
189	if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
190		temp |= msk;
191
192	/* select */
193	msk |= (byte_lane & 1) ? (1 << 11) : (1 << 8);
194	if (pi_count < EARLY_DB)
195		temp |= msk;
196
197	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
198
199	/* error check */
200	if (pi_count > 0x3f) {
201		training_message(channel, rank, byte_lane);
202		mrc_post_code(0xee, 0xe0);
203	}
204
205	LEAVEFN();
206}
207
208/*
209 * This function will return the current RCVEN delay on the given
210 * channel, rank, byte_lane as an absolute PI count.
211 *
212 * (currently doesn't comprehend rank)
213 */
214uint32_t get_rcvn(uint8_t channel, uint8_t rank, uint8_t byte_lane)
215{
216	uint32_t reg;
217	uint32_t temp;
218	uint32_t pi_count;
219
220	ENTERFN();
221
222	/*
223	 * RDPTR (1/2 MCLK, 64 PIs)
224	 * BL0 -> B01PTRCTL0[11:08] (0x0-0xF)
225	 * BL1 -> B01PTRCTL0[23:20] (0x0-0xF)
226	 */
227	reg = B01PTRCTL0 + (byte_lane >> 1) * DDRIODQ_BL_OFFSET +
228		channel * DDRIODQ_CH_OFFSET;
229	temp = msg_port_alt_read(DDRPHY, reg);
230	temp >>= (byte_lane & 1) ? 20 : 8;
231	temp &= 0xf;
232
233	/* Adjust PI_COUNT */
234	pi_count = temp * HALF_CLK;
235
236	/*
237	 * PI (1/64 MCLK, 1 PIs)
238	 * BL0 -> B0DLLPICODER0[29:24] (0x00-0x3F)
239	 * BL1 -> B1DLLPICODER0[29:24] (0x00-0x3F)
240	 */
241	reg = (byte_lane & 1) ? B1DLLPICODER0 : B0DLLPICODER0;
242	reg += ((byte_lane >> 1) * DDRIODQ_BL_OFFSET +
243		channel * DDRIODQ_CH_OFFSET);
244	temp = msg_port_alt_read(DDRPHY, reg);
245	temp >>= 24;
246	temp &= 0x3f;
247
248	/* Adjust PI_COUNT */
249	pi_count += temp;
250
251	LEAVEFN();
252
253	return pi_count;
254}
255
256/*
257 * This function will program the RDQS delays based on an absolute
258 * amount of PIs.
259 *
260 * (currently doesn't comprehend rank)
261 */
262void set_rdqs(uint8_t channel, uint8_t rank,
263	      uint8_t byte_lane, uint32_t pi_count)
264{
265	uint32_t reg;
266	uint32_t msk;
267	uint32_t temp;
268
269	ENTERFN();
270	DPF(D_TRN, "Rdqs ch%d rnk%d ln%d : pi=%03X\n",
271	    channel, rank, byte_lane, pi_count);
272
273	/*
274	 * PI (1/128 MCLK)
275	 * BL0 -> B0RXDQSPICODE[06:00] (0x00-0x47)
276	 * BL1 -> B1RXDQSPICODE[06:00] (0x00-0x47)
277	 */
278	reg = (byte_lane & 1) ? B1RXDQSPICODE : B0RXDQSPICODE;
279	reg += ((byte_lane >> 1) * DDRIODQ_BL_OFFSET +
280		channel * DDRIODQ_CH_OFFSET);
281	msk = 0x7f;
282	temp = pi_count << 0;
283	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
284
285	/* error check (shouldn't go above 0x3F) */
286	if (pi_count > 0x47) {
287		training_message(channel, rank, byte_lane);
288		mrc_post_code(0xee, 0xe1);
289	}
290
291	LEAVEFN();
292}
293
294/*
295 * This function will return the current RDQS delay on the given
296 * channel, rank, byte_lane as an absolute PI count.
297 *
298 * (currently doesn't comprehend rank)
299 */
300uint32_t get_rdqs(uint8_t channel, uint8_t rank, uint8_t byte_lane)
301{
302	uint32_t reg;
303	uint32_t temp;
304	uint32_t pi_count;
305
306	ENTERFN();
307
308	/*
309	 * PI (1/128 MCLK)
310	 * BL0 -> B0RXDQSPICODE[06:00] (0x00-0x47)
311	 * BL1 -> B1RXDQSPICODE[06:00] (0x00-0x47)
312	 */
313	reg = (byte_lane & 1) ? B1RXDQSPICODE : B0RXDQSPICODE;
314	reg += ((byte_lane >> 1) * DDRIODQ_BL_OFFSET +
315		channel * DDRIODQ_CH_OFFSET);
316	temp = msg_port_alt_read(DDRPHY, reg);
317
318	/* Adjust PI_COUNT */
319	pi_count = temp & 0x7f;
320
321	LEAVEFN();
322
323	return pi_count;
324}
325
326/*
327 * This function will program the WDQS delays based on an absolute
328 * amount of PIs.
329 *
330 * (currently doesn't comprehend rank)
331 */
332void set_wdqs(uint8_t channel, uint8_t rank,
333	      uint8_t byte_lane, uint32_t pi_count)
334{
335	uint32_t reg;
336	uint32_t msk;
337	uint32_t temp;
338
339	ENTERFN();
340
341	DPF(D_TRN, "Wdqs ch%d rnk%d ln%d : pi=%03X\n",
342	    channel, rank, byte_lane, pi_count);
343
344	/*
345	 * RDPTR (1/2 MCLK, 64 PIs)
346	 * BL0 -> B01PTRCTL0[07:04] (0x0-0xF)
347	 * BL1 -> B01PTRCTL0[19:16] (0x0-0xF)
348	 */
349	reg = B01PTRCTL0 + (byte_lane >> 1) * DDRIODQ_BL_OFFSET +
350		channel * DDRIODQ_CH_OFFSET;
351	msk = (byte_lane & 1) ? 0xf0000 : 0xf0;
352	temp = pi_count / HALF_CLK;
353	temp <<= (byte_lane & 1) ? 16 : 4;
354	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
355
356	/* Adjust PI_COUNT */
357	pi_count -= ((pi_count / HALF_CLK) & 0xf) * HALF_CLK;
358
359	/*
360	 * PI (1/64 MCLK, 1 PIs)
361	 * BL0 -> B0DLLPICODER0[21:16] (0x00-0x3F)
362	 * BL1 -> B1DLLPICODER0[21:16] (0x00-0x3F)
363	 */
364	reg = (byte_lane & 1) ? B1DLLPICODER0 : B0DLLPICODER0;
365	reg += ((byte_lane >> 1) * DDRIODQ_BL_OFFSET +
366		channel * DDRIODQ_CH_OFFSET);
367	msk = 0x3f0000;
368	temp = pi_count << 16;
369	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
370
371	/*
372	 * DEADBAND
373	 * BL0/1 -> B01DBCTL1[07/10] (+1 select)
374	 * BL0/1 -> B01DBCTL1[01/04] (enable)
375	 */
376	reg = B01DBCTL1 + (byte_lane >> 1) * DDRIODQ_BL_OFFSET +
377		channel * DDRIODQ_CH_OFFSET;
378	msk = 0x00;
379	temp = 0x00;
380
381	/* enable */
382	msk |= (byte_lane & 1) ? (1 << 4) : (1 << 1);
383	if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
384		temp |= msk;
385
386	/* select */
387	msk |= (byte_lane & 1) ? (1 << 10) : (1 << 7);
388	if (pi_count < EARLY_DB)
389		temp |= msk;
390
391	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
392
393	/* error check */
394	if (pi_count > 0x3f) {
395		training_message(channel, rank, byte_lane);
396		mrc_post_code(0xee, 0xe2);
397	}
398
399	LEAVEFN();
400}
401
402/*
403 * This function will return the amount of WDQS delay on the given
404 * channel, rank, byte_lane as an absolute PI count.
405 *
406 * (currently doesn't comprehend rank)
407 */
408uint32_t get_wdqs(uint8_t channel, uint8_t rank, uint8_t byte_lane)
409{
410	uint32_t reg;
411	uint32_t temp;
412	uint32_t pi_count;
413
414	ENTERFN();
415
416	/*
417	 * RDPTR (1/2 MCLK, 64 PIs)
418	 * BL0 -> B01PTRCTL0[07:04] (0x0-0xF)
419	 * BL1 -> B01PTRCTL0[19:16] (0x0-0xF)
420	 */
421	reg = B01PTRCTL0 + (byte_lane >> 1) * DDRIODQ_BL_OFFSET +
422		channel * DDRIODQ_CH_OFFSET;
423	temp = msg_port_alt_read(DDRPHY, reg);
424	temp >>= (byte_lane & 1) ? 16 : 4;
425	temp &= 0xf;
426
427	/* Adjust PI_COUNT */
428	pi_count = (temp * HALF_CLK);
429
430	/*
431	 * PI (1/64 MCLK, 1 PIs)
432	 * BL0 -> B0DLLPICODER0[21:16] (0x00-0x3F)
433	 * BL1 -> B1DLLPICODER0[21:16] (0x00-0x3F)
434	 */
435	reg = (byte_lane & 1) ? B1DLLPICODER0 : B0DLLPICODER0;
436	reg += ((byte_lane >> 1) * DDRIODQ_BL_OFFSET +
437		channel * DDRIODQ_CH_OFFSET);
438	temp = msg_port_alt_read(DDRPHY, reg);
439	temp >>= 16;
440	temp &= 0x3f;
441
442	/* Adjust PI_COUNT */
443	pi_count += temp;
444
445	LEAVEFN();
446
447	return pi_count;
448}
449
450/*
451 * This function will program the WDQ delays based on an absolute
452 * number of PIs.
453 *
454 * (currently doesn't comprehend rank)
455 */
456void set_wdq(uint8_t channel, uint8_t rank,
457	     uint8_t byte_lane, uint32_t pi_count)
458{
459	uint32_t reg;
460	uint32_t msk;
461	uint32_t temp;
462
463	ENTERFN();
464
465	DPF(D_TRN, "Wdq ch%d rnk%d ln%d : pi=%03X\n",
466	    channel, rank, byte_lane, pi_count);
467
468	/*
469	 * RDPTR (1/2 MCLK, 64 PIs)
470	 * BL0 -> B01PTRCTL0[03:00] (0x0-0xF)
471	 * BL1 -> B01PTRCTL0[15:12] (0x0-0xF)
472	 */
473	reg = B01PTRCTL0 + (byte_lane >> 1) * DDRIODQ_BL_OFFSET +
474		channel * DDRIODQ_CH_OFFSET;
475	msk = (byte_lane & 1) ? 0xf000 : 0xf;
476	temp = pi_count / HALF_CLK;
477	temp <<= (byte_lane & 1) ? 12 : 0;
478	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
479
480	/* Adjust PI_COUNT */
481	pi_count -= ((pi_count / HALF_CLK) & 0xf) * HALF_CLK;
482
483	/*
484	 * PI (1/64 MCLK, 1 PIs)
485	 * BL0 -> B0DLLPICODER0[13:08] (0x00-0x3F)
486	 * BL1 -> B1DLLPICODER0[13:08] (0x00-0x3F)
487	 */
488	reg = (byte_lane & 1) ? B1DLLPICODER0 : B0DLLPICODER0;
489	reg += ((byte_lane >> 1) * DDRIODQ_BL_OFFSET +
490		channel * DDRIODQ_CH_OFFSET);
491	msk = 0x3f00;
492	temp = pi_count << 8;
493	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
494
495	/*
496	 * DEADBAND
497	 * BL0/1 -> B01DBCTL1[06/09] (+1 select)
498	 * BL0/1 -> B01DBCTL1[00/03] (enable)
499	 */
500	reg = B01DBCTL1 + (byte_lane >> 1) * DDRIODQ_BL_OFFSET +
501		channel * DDRIODQ_CH_OFFSET;
502	msk = 0x00;
503	temp = 0x00;
504
505	/* enable */
506	msk |= (byte_lane & 1) ? (1 << 3) : (1 << 0);
507	if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
508		temp |= msk;
509
510	/* select */
511	msk |= (byte_lane & 1) ? (1 << 9) : (1 << 6);
512	if (pi_count < EARLY_DB)
513		temp |= msk;
514
515	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
516
517	/* error check */
518	if (pi_count > 0x3f) {
519		training_message(channel, rank, byte_lane);
520		mrc_post_code(0xee, 0xe3);
521	}
522
523	LEAVEFN();
524}
525
526/*
527 * This function will return the amount of WDQ delay on the given
528 * channel, rank, byte_lane as an absolute PI count.
529 *
530 * (currently doesn't comprehend rank)
531 */
532uint32_t get_wdq(uint8_t channel, uint8_t rank, uint8_t byte_lane)
533{
534	uint32_t reg;
535	uint32_t temp;
536	uint32_t pi_count;
537
538	ENTERFN();
539
540	/*
541	 * RDPTR (1/2 MCLK, 64 PIs)
542	 * BL0 -> B01PTRCTL0[03:00] (0x0-0xF)
543	 * BL1 -> B01PTRCTL0[15:12] (0x0-0xF)
544	 */
545	reg = B01PTRCTL0 + (byte_lane >> 1) * DDRIODQ_BL_OFFSET +
546		channel * DDRIODQ_CH_OFFSET;
547	temp = msg_port_alt_read(DDRPHY, reg);
548	temp >>= (byte_lane & 1) ? 12 : 0;
549	temp &= 0xf;
550
551	/* Adjust PI_COUNT */
552	pi_count = temp * HALF_CLK;
553
554	/*
555	 * PI (1/64 MCLK, 1 PIs)
556	 * BL0 -> B0DLLPICODER0[13:08] (0x00-0x3F)
557	 * BL1 -> B1DLLPICODER0[13:08] (0x00-0x3F)
558	 */
559	reg = (byte_lane & 1) ? B1DLLPICODER0 : B0DLLPICODER0;
560	reg += ((byte_lane >> 1) * DDRIODQ_BL_OFFSET +
561		channel * DDRIODQ_CH_OFFSET);
562	temp = msg_port_alt_read(DDRPHY, reg);
563	temp >>= 8;
564	temp &= 0x3f;
565
566	/* Adjust PI_COUNT */
567	pi_count += temp;
568
569	LEAVEFN();
570
571	return pi_count;
572}
573
574/*
575 * This function will program the WCMD delays based on an absolute
576 * number of PIs.
577 */
578void set_wcmd(uint8_t channel, uint32_t pi_count)
579{
580	uint32_t reg;
581	uint32_t msk;
582	uint32_t temp;
583
584	ENTERFN();
585
586	/*
587	 * RDPTR (1/2 MCLK, 64 PIs)
588	 * CMDPTRREG[11:08] (0x0-0xF)
589	 */
590	reg = CMDPTRREG + channel * DDRIOCCC_CH_OFFSET;
591	msk = 0xf00;
592	temp = pi_count / HALF_CLK;
593	temp <<= 8;
594	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
595
596	/* Adjust PI_COUNT */
597	pi_count -= ((pi_count / HALF_CLK) & 0xf) * HALF_CLK;
598
599	/*
600	 * PI (1/64 MCLK, 1 PIs)
601	 * CMDDLLPICODER0[29:24] -> CMDSLICE R3 (unused)
602	 * CMDDLLPICODER0[21:16] -> CMDSLICE L3 (unused)
603	 * CMDDLLPICODER0[13:08] -> CMDSLICE R2 (unused)
604	 * CMDDLLPICODER0[05:00] -> CMDSLICE L2 (unused)
605	 * CMDDLLPICODER1[29:24] -> CMDSLICE R1 (unused)
606	 * CMDDLLPICODER1[21:16] -> CMDSLICE L1 (0x00-0x3F)
607	 * CMDDLLPICODER1[13:08] -> CMDSLICE R0 (unused)
608	 * CMDDLLPICODER1[05:00] -> CMDSLICE L0 (unused)
609	 */
610	reg = CMDDLLPICODER1 + channel * DDRIOCCC_CH_OFFSET;
611	msk = 0x3f3f3f3f;
612	temp = (pi_count << 24) | (pi_count << 16) |
613		(pi_count << 8) | (pi_count << 0);
614
615	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
616	reg = CMDDLLPICODER0 + channel * DDRIOCCC_CH_OFFSET;	/* PO */
617	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
618
619	/*
620	 * DEADBAND
621	 * CMDCFGREG0[17] (+1 select)
622	 * CMDCFGREG0[16] (enable)
623	 */
624	reg = CMDCFGREG0 + channel * DDRIOCCC_CH_OFFSET;
625	msk = 0x00;
626	temp = 0x00;
627
628	/* enable */
629	msk |= (1 << 16);
630	if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
631		temp |= msk;
632
633	/* select */
634	msk |= (1 << 17);
635	if (pi_count < EARLY_DB)
636		temp |= msk;
637
638	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
639
640	/* error check */
641	if (pi_count > 0x3f)
642		mrc_post_code(0xee, 0xe4);
643
644	LEAVEFN();
645}
646
647/*
648 * This function will return the amount of WCMD delay on the given
649 * channel as an absolute PI count.
650 */
651uint32_t get_wcmd(uint8_t channel)
652{
653	uint32_t reg;
654	uint32_t temp;
655	uint32_t pi_count;
656
657	ENTERFN();
658
659	/*
660	 * RDPTR (1/2 MCLK, 64 PIs)
661	 * CMDPTRREG[11:08] (0x0-0xF)
662	 */
663	reg = CMDPTRREG + channel * DDRIOCCC_CH_OFFSET;
664	temp = msg_port_alt_read(DDRPHY, reg);
665	temp >>= 8;
666	temp &= 0xf;
667
668	/* Adjust PI_COUNT */
669	pi_count = temp * HALF_CLK;
670
671	/*
672	 * PI (1/64 MCLK, 1 PIs)
673	 * CMDDLLPICODER0[29:24] -> CMDSLICE R3 (unused)
674	 * CMDDLLPICODER0[21:16] -> CMDSLICE L3 (unused)
675	 * CMDDLLPICODER0[13:08] -> CMDSLICE R2 (unused)
676	 * CMDDLLPICODER0[05:00] -> CMDSLICE L2 (unused)
677	 * CMDDLLPICODER1[29:24] -> CMDSLICE R1 (unused)
678	 * CMDDLLPICODER1[21:16] -> CMDSLICE L1 (0x00-0x3F)
679	 * CMDDLLPICODER1[13:08] -> CMDSLICE R0 (unused)
680	 * CMDDLLPICODER1[05:00] -> CMDSLICE L0 (unused)
681	 */
682	reg = CMDDLLPICODER1 + channel * DDRIOCCC_CH_OFFSET;
683	temp = msg_port_alt_read(DDRPHY, reg);
684	temp >>= 16;
685	temp &= 0x3f;
686
687	/* Adjust PI_COUNT */
688	pi_count += temp;
689
690	LEAVEFN();
691
692	return pi_count;
693}
694
695/*
696 * This function will program the WCLK delays based on an absolute
697 * number of PIs.
698 */
699void set_wclk(uint8_t channel, uint8_t rank, uint32_t pi_count)
700{
701	uint32_t reg;
702	uint32_t msk;
703	uint32_t temp;
704
705	ENTERFN();
706
707	/*
708	 * RDPTR (1/2 MCLK, 64 PIs)
709	 * CCPTRREG[15:12] -> CLK1 (0x0-0xF)
710	 * CCPTRREG[11:08] -> CLK0 (0x0-0xF)
711	 */
712	reg = CCPTRREG + channel * DDRIOCCC_CH_OFFSET;
713	msk = 0xff00;
714	temp = ((pi_count / HALF_CLK) << 12) | ((pi_count / HALF_CLK) << 8);
715	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
716
717	/* Adjust PI_COUNT */
718	pi_count -= ((pi_count / HALF_CLK) & 0xf) * HALF_CLK;
719
720	/*
721	 * PI (1/64 MCLK, 1 PIs)
722	 * ECCB1DLLPICODER0[13:08] -> CLK0 (0x00-0x3F)
723	 * ECCB1DLLPICODER0[21:16] -> CLK1 (0x00-0x3F)
724	 */
725	reg = rank ? ECCB1DLLPICODER0 : ECCB1DLLPICODER0;
726	reg += (channel * DDRIOCCC_CH_OFFSET);
727	msk = 0x3f3f00;
728	temp = (pi_count << 16) | (pi_count << 8);
729	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
730
731	reg = rank ? ECCB1DLLPICODER1 : ECCB1DLLPICODER1;
732	reg += (channel * DDRIOCCC_CH_OFFSET);
733	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
734
735	reg = rank ? ECCB1DLLPICODER2 : ECCB1DLLPICODER2;
736	reg += (channel * DDRIOCCC_CH_OFFSET);
737	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
738
739	reg = rank ? ECCB1DLLPICODER3 : ECCB1DLLPICODER3;
740	reg += (channel * DDRIOCCC_CH_OFFSET);
741	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
742
743	/*
744	 * DEADBAND
745	 * CCCFGREG1[11:08] (+1 select)
746	 * CCCFGREG1[03:00] (enable)
747	 */
748	reg = CCCFGREG1 + channel * DDRIOCCC_CH_OFFSET;
749	msk = 0x00;
750	temp = 0x00;
751
752	/* enable */
753	msk |= 0xf;
754	if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
755		temp |= msk;
756
757	/* select */
758	msk |= 0xf00;
759	if (pi_count < EARLY_DB)
760		temp |= msk;
761
762	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
763
764	/* error check */
765	if (pi_count > 0x3f)
766		mrc_post_code(0xee, 0xe5);
767
768	LEAVEFN();
769}
770
771/*
772 * This function will return the amout of WCLK delay on the given
773 * channel, rank as an absolute PI count.
774 */
775uint32_t get_wclk(uint8_t channel, uint8_t rank)
776{
777	uint32_t reg;
778	uint32_t temp;
779	uint32_t pi_count;
780
781	ENTERFN();
782
783	/*
784	 * RDPTR (1/2 MCLK, 64 PIs)
785	 * CCPTRREG[15:12] -> CLK1 (0x0-0xF)
786	 * CCPTRREG[11:08] -> CLK0 (0x0-0xF)
787	 */
788	reg = CCPTRREG + channel * DDRIOCCC_CH_OFFSET;
789	temp = msg_port_alt_read(DDRPHY, reg);
790	temp >>= rank ? 12 : 8;
791	temp &= 0xf;
792
793	/* Adjust PI_COUNT */
794	pi_count = temp * HALF_CLK;
795
796	/*
797	 * PI (1/64 MCLK, 1 PIs)
798	 * ECCB1DLLPICODER0[13:08] -> CLK0 (0x00-0x3F)
799	 * ECCB1DLLPICODER0[21:16] -> CLK1 (0x00-0x3F)
800	 */
801	reg = rank ? ECCB1DLLPICODER0 : ECCB1DLLPICODER0;
802	reg += (channel * DDRIOCCC_CH_OFFSET);
803	temp = msg_port_alt_read(DDRPHY, reg);
804	temp >>= rank ? 16 : 8;
805	temp &= 0x3f;
806
807	pi_count += temp;
808
809	LEAVEFN();
810
811	return pi_count;
812}
813
814/*
815 * This function will program the WCTL delays based on an absolute
816 * number of PIs.
817 *
818 * (currently doesn't comprehend rank)
819 */
820void set_wctl(uint8_t channel, uint8_t rank, uint32_t pi_count)
821{
822	uint32_t reg;
823	uint32_t msk;
824	uint32_t temp;
825
826	ENTERFN();
827
828	/*
829	 * RDPTR (1/2 MCLK, 64 PIs)
830	 * CCPTRREG[31:28] (0x0-0xF)
831	 * CCPTRREG[27:24] (0x0-0xF)
832	 */
833	reg = CCPTRREG + channel * DDRIOCCC_CH_OFFSET;
834	msk = 0xff000000;
835	temp = ((pi_count / HALF_CLK) << 28) | ((pi_count / HALF_CLK) << 24);
836	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
837
838	/* Adjust PI_COUNT */
839	pi_count -= ((pi_count / HALF_CLK) & 0xf) * HALF_CLK;
840
841	/*
842	 * PI (1/64 MCLK, 1 PIs)
843	 * ECCB1DLLPICODER?[29:24] (0x00-0x3F)
844	 * ECCB1DLLPICODER?[29:24] (0x00-0x3F)
845	 */
846	reg = ECCB1DLLPICODER0 + channel * DDRIOCCC_CH_OFFSET;
847	msk = 0x3f000000;
848	temp = (pi_count << 24);
849	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
850
851	reg = ECCB1DLLPICODER1 + channel * DDRIOCCC_CH_OFFSET;
852	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
853
854	reg = ECCB1DLLPICODER2 + channel * DDRIOCCC_CH_OFFSET;
855	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
856
857	reg = ECCB1DLLPICODER3 + channel * DDRIOCCC_CH_OFFSET;
858	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
859
860	/*
861	 * DEADBAND
862	 * CCCFGREG1[13:12] (+1 select)
863	 * CCCFGREG1[05:04] (enable)
864	 */
865	reg = CCCFGREG1 + channel * DDRIOCCC_CH_OFFSET;
866	msk = 0x00;
867	temp = 0x00;
868
869	/* enable */
870	msk |= 0x30;
871	if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
872		temp |= msk;
873
874	/* select */
875	msk |= 0x3000;
876	if (pi_count < EARLY_DB)
877		temp |= msk;
878
879	mrc_alt_write_mask(DDRPHY, reg, temp, msk);
880
881	/* error check */
882	if (pi_count > 0x3f)
883		mrc_post_code(0xee, 0xe6);
884
885	LEAVEFN();
886}
887
888/*
889 * This function will return the amount of WCTL delay on the given
890 * channel, rank as an absolute PI count.
891 *
892 * (currently doesn't comprehend rank)
893 */
894uint32_t get_wctl(uint8_t channel, uint8_t rank)
895{
896	uint32_t reg;
897	uint32_t temp;
898	uint32_t pi_count;
899
900	ENTERFN();
901
902	/*
903	 * RDPTR (1/2 MCLK, 64 PIs)
904	 * CCPTRREG[31:28] (0x0-0xF)
905	 * CCPTRREG[27:24] (0x0-0xF)
906	 */
907	reg = CCPTRREG + channel * DDRIOCCC_CH_OFFSET;
908	temp = msg_port_alt_read(DDRPHY, reg);
909	temp >>= 24;
910	temp &= 0xf;
911
912	/* Adjust PI_COUNT */
913	pi_count = temp * HALF_CLK;
914
915	/*
916	 * PI (1/64 MCLK, 1 PIs)
917	 * ECCB1DLLPICODER?[29:24] (0x00-0x3F)
918	 * ECCB1DLLPICODER?[29:24] (0x00-0x3F)
919	 */
920	reg = ECCB1DLLPICODER0 + channel * DDRIOCCC_CH_OFFSET;
921	temp = msg_port_alt_read(DDRPHY, reg);
922	temp >>= 24;
923	temp &= 0x3f;
924
925	/* Adjust PI_COUNT */
926	pi_count += temp;
927
928	LEAVEFN();
929
930	return pi_count;
931}
932
933/*
934 * This function will program the internal Vref setting in a given
935 * byte lane in a given channel.
936 */
937void set_vref(uint8_t channel, uint8_t byte_lane, uint32_t setting)
938{
939	uint32_t reg = (byte_lane & 0x1) ? B1VREFCTL : B0VREFCTL;
940
941	ENTERFN();
942
943	DPF(D_TRN, "Vref ch%d ln%d : val=%03X\n",
944	    channel, byte_lane, setting);
945
946	mrc_alt_write_mask(DDRPHY, reg + channel * DDRIODQ_CH_OFFSET +
947		(byte_lane >> 1) * DDRIODQ_BL_OFFSET,
948		vref_codes[setting] << 2, 0xfc);
949
950	/*
951	 * need to wait ~300ns for Vref to settle
952	 * (check that this is necessary)
953	 */
954	delay_n(300);
955
956	/* ??? may need to clear pointers ??? */
957
958	LEAVEFN();
959}
960
961/*
962 * This function will return the internal Vref setting for the given
963 * channel, byte_lane.
964 */
965uint32_t get_vref(uint8_t channel, uint8_t byte_lane)
966{
967	uint8_t j;
968	uint32_t ret_val = sizeof(vref_codes) / 2;
969	uint32_t reg = (byte_lane & 0x1) ? B1VREFCTL : B0VREFCTL;
970	uint32_t temp;
971
972	ENTERFN();
973
974	temp = msg_port_alt_read(DDRPHY, reg + channel * DDRIODQ_CH_OFFSET +
975		(byte_lane >> 1) * DDRIODQ_BL_OFFSET);
976	temp >>= 2;
977	temp &= 0x3f;
978
979	for (j = 0; j < sizeof(vref_codes); j++) {
980		if (vref_codes[j] == temp) {
981			ret_val = j;
982			break;
983		}
984	}
985
986	LEAVEFN();
987
988	return ret_val;
989}
990
991/*
992 * This function will return a 32-bit address in the desired
993 * channel and rank.
994 */
995uint32_t get_addr(uint8_t channel, uint8_t rank)
996{
997	uint32_t offset = 32 * 1024 * 1024;	/* 32MB */
998
999	/* Begin product specific code */
1000	if (channel > 0) {
1001		DPF(D_ERROR, "ILLEGAL CHANNEL\n");
1002		DEAD_LOOP();
1003	}
1004
1005	if (rank > 1) {
1006		DPF(D_ERROR, "ILLEGAL RANK\n");
1007		DEAD_LOOP();
1008	}
1009
1010	/* use 256MB lowest density as per DRP == 0x0003 */
1011	offset += rank * (256 * 1024 * 1024);
1012
1013	return offset;
1014}
1015
1016/*
1017 * This function will sample the DQTRAINSTS registers in the given
1018 * channel/rank SAMPLE_SIZE times looking for a valid '0' or '1'.
1019 *
1020 * It will return an encoded 32-bit date in which each bit corresponds to
1021 * the sampled value on the byte lane.
1022 */
1023uint32_t sample_dqs(struct mrc_params *mrc_params, uint8_t channel,
1024		    uint8_t rank, bool rcvn)
1025{
1026	uint8_t j;	/* just a counter */
1027	uint8_t bl;	/* which BL in the module (always 2 per module) */
1028	uint8_t bl_grp;	/* which BL module */
1029	/* byte lane divisor */
1030	uint8_t bl_divisor = (mrc_params->channel_width == X16) ? 2 : 1;
1031	uint32_t msk[2];	/* BLx in module */
1032	/* DQTRAINSTS register contents for each sample */
1033	uint32_t sampled_val[SAMPLE_SIZE];
1034	uint32_t num_0s;	/* tracks the number of '0' samples */
1035	uint32_t num_1s;	/* tracks the number of '1' samples */
1036	uint32_t ret_val = 0x00;	/* assume all '0' samples */
1037	uint32_t address = get_addr(channel, rank);
1038
1039	/* initialise msk[] */
1040	msk[0] = rcvn ? (1 << 1) : (1 << 9);	/* BL0 */
1041	msk[1] = rcvn ? (1 << 0) : (1 << 8);	/* BL1 */
1042
1043	/* cycle through each byte lane group */
1044	for (bl_grp = 0; bl_grp < (NUM_BYTE_LANES / bl_divisor) / 2; bl_grp++) {
1045		/* take SAMPLE_SIZE samples */
1046		for (j = 0; j < SAMPLE_SIZE; j++) {
1047			hte_mem_op(address, mrc_params->first_run,
1048				   rcvn ? 0 : 1);
1049			mrc_params->first_run = 0;
1050
1051			/*
1052			 * record the contents of the proper
1053			 * DQTRAINSTS register
1054			 */
1055			sampled_val[j] = msg_port_alt_read(DDRPHY,
1056				DQTRAINSTS +
1057				bl_grp * DDRIODQ_BL_OFFSET +
1058				channel * DDRIODQ_CH_OFFSET);
1059		}
1060
1061		/*
1062		 * look for a majority value (SAMPLE_SIZE / 2) + 1
1063		 * on the byte lane and set that value in the corresponding
1064		 * ret_val bit
1065		 */
1066		for (bl = 0; bl < 2; bl++) {
1067			num_0s = 0x00;	/* reset '0' tracker for byte lane */
1068			num_1s = 0x00;	/* reset '1' tracker for byte lane */
1069			for (j = 0; j < SAMPLE_SIZE; j++) {
1070				if (sampled_val[j] & msk[bl])
1071					num_1s++;
1072				else
1073					num_0s++;
1074			}
1075		if (num_1s > num_0s)
1076			ret_val |= (1 << (bl + bl_grp * 2));
1077		}
1078	}
1079
1080	/*
1081	 * "ret_val.0" contains the status of BL0
1082	 * "ret_val.1" contains the status of BL1
1083	 * "ret_val.2" contains the status of BL2
1084	 * etc.
1085	 */
1086	return ret_val;
1087}
1088
1089/* This function will find the rising edge transition on RCVN or WDQS */
1090void find_rising_edge(struct mrc_params *mrc_params, uint32_t delay[],
1091		      uint8_t channel, uint8_t rank, bool rcvn)
1092{
1093	bool all_edges_found;	/* determines stop condition */
1094	bool direction[NUM_BYTE_LANES];	/* direction indicator */
1095	uint8_t sample;	/* sample counter */
1096	uint8_t bl;	/* byte lane counter */
1097	/* byte lane divisor */
1098	uint8_t bl_divisor = (mrc_params->channel_width == X16) ? 2 : 1;
1099	uint32_t sample_result[SAMPLE_CNT];	/* results of sample_dqs() */
1100	uint32_t temp;
1101	uint32_t transition_pattern;
1102
1103	ENTERFN();
1104
1105	/* select hte and request initial configuration */
1106	select_hte();
1107	mrc_params->first_run = 1;
1108
1109	/* Take 3 sample points (T1,T2,T3) to obtain a transition pattern */
1110	for (sample = 0; sample < SAMPLE_CNT; sample++) {
1111		/* program the desired delays for sample */
1112		for (bl = 0; bl < (NUM_BYTE_LANES / bl_divisor); bl++) {
1113			/* increase sample delay by 26 PI (0.2 CLK) */
1114			if (rcvn) {
1115				set_rcvn(channel, rank, bl,
1116					 delay[bl] + sample * SAMPLE_DLY);
1117			} else {
1118				set_wdqs(channel, rank, bl,
1119					 delay[bl] + sample * SAMPLE_DLY);
1120			}
1121		}
1122
1123		/* take samples (Tsample_i) */
1124		sample_result[sample] = sample_dqs(mrc_params,
1125			channel, rank, rcvn);
1126
1127		DPF(D_TRN,
1128		    "Find rising edge %s ch%d rnk%d: #%d dly=%d dqs=%02X\n",
1129		    rcvn ? "RCVN" : "WDQS", channel, rank, sample,
1130		    sample * SAMPLE_DLY, sample_result[sample]);
1131	}
1132
1133	/*
1134	 * This pattern will help determine where we landed and ultimately
1135	 * how to place RCVEN/WDQS.
1136	 */
1137	for (bl = 0; bl < NUM_BYTE_LANES / bl_divisor; bl++) {
1138		/* build transition_pattern (MSB is 1st sample) */
1139		transition_pattern = 0;
1140		for (sample = 0; sample < SAMPLE_CNT; sample++) {
1141			transition_pattern |=
1142				((sample_result[sample] & (1 << bl)) >> bl) <<
1143				(SAMPLE_CNT - 1 - sample);
1144		}
1145
1146		DPF(D_TRN, "=== transition pattern %d\n", transition_pattern);
1147
1148		/*
1149		 * set up to look for rising edge based on
1150		 * transition_pattern
1151		 */
1152		switch (transition_pattern) {
1153		case 0:	/* sampled 0->0->0 */
1154			/* move forward from T3 looking for 0->1 */
1155			delay[bl] += 2 * SAMPLE_DLY;
1156			direction[bl] = FORWARD;
1157			break;
1158		case 1:	/* sampled 0->0->1 */
1159		case 5:	/* sampled 1->0->1 (bad duty cycle) *HSD#237503* */
1160			/* move forward from T2 looking for 0->1 */
1161			delay[bl] += 1 * SAMPLE_DLY;
1162			direction[bl] = FORWARD;
1163			break;
1164		case 2:	/* sampled 0->1->0 (bad duty cycle) *HSD#237503* */
1165		case 3:	/* sampled 0->1->1 */
1166			/* move forward from T1 looking for 0->1 */
1167			delay[bl] += 0 * SAMPLE_DLY;
1168			direction[bl] = FORWARD;
1169			break;
1170		case 4:	/* sampled 1->0->0 (assumes BL8, HSD#234975) */
1171			/* move forward from T3 looking for 0->1 */
1172			delay[bl] += 2 * SAMPLE_DLY;
1173			direction[bl] = FORWARD;
1174			break;
1175		case 6:	/* sampled 1->1->0 */
1176		case 7:	/* sampled 1->1->1 */
1177			/* move backward from T1 looking for 1->0 */
1178			delay[bl] += 0 * SAMPLE_DLY;
1179			direction[bl] = BACKWARD;
1180			break;
1181		default:
1182			mrc_post_code(0xee, 0xee);
1183			break;
1184		}
1185
1186		/* program delays */
1187		if (rcvn)
1188			set_rcvn(channel, rank, bl, delay[bl]);
1189		else
1190			set_wdqs(channel, rank, bl, delay[bl]);
1191	}
1192
1193	/*
1194	 * Based on the observed transition pattern on the byte lane,
1195	 * begin looking for a rising edge with single PI granularity.
1196	 */
1197	do {
1198		all_edges_found = true;	/* assume all byte lanes passed */
1199		/* take a sample */
1200		temp = sample_dqs(mrc_params, channel, rank, rcvn);
1201		/* check all each byte lane for proper edge */
1202		for (bl = 0; bl < NUM_BYTE_LANES / bl_divisor; bl++) {
1203			if (temp & (1 << bl)) {
1204				/* sampled "1" */
1205				if (direction[bl] == BACKWARD) {
1206					/*
1207					 * keep looking for edge
1208					 * on this byte lane
1209					 */
1210					all_edges_found = false;
1211					delay[bl] -= 1;
1212					if (rcvn) {
1213						set_rcvn(channel, rank,
1214							 bl, delay[bl]);
1215					} else {
1216						set_wdqs(channel, rank,
1217							 bl, delay[bl]);
1218					}
1219				}
1220			} else {
1221				/* sampled "0" */
1222				if (direction[bl] == FORWARD) {
1223					/*
1224					 * keep looking for edge
1225					 * on this byte lane
1226					 */
1227					all_edges_found = false;
1228					delay[bl] += 1;
1229					if (rcvn) {
1230						set_rcvn(channel, rank,
1231							 bl, delay[bl]);
1232					} else {
1233						set_wdqs(channel, rank,
1234							 bl, delay[bl]);
1235					}
1236				}
1237			}
1238		}
1239	} while (!all_edges_found);
1240
1241	/* restore DDR idle state */
1242	dram_init_command(DCMD_PREA(rank));
1243
1244	DPF(D_TRN, "Delay %03X %03X %03X %03X\n",
1245	    delay[0], delay[1], delay[2], delay[3]);
1246
1247	LEAVEFN();
1248}
1249
1250/*
1251 * This function will return a 32 bit mask that will be used to
1252 * check for byte lane failures.
1253 */
1254uint32_t byte_lane_mask(struct mrc_params *mrc_params)
1255{
1256	uint32_t j;
1257	uint32_t ret_val = 0x00;
1258
1259	/*
1260	 * set ret_val based on NUM_BYTE_LANES such that you will check
1261	 * only BL0 in result
1262	 *
1263	 * (each bit in result represents a byte lane)
1264	 */
1265	for (j = 0; j < MAX_BYTE_LANES; j += NUM_BYTE_LANES)
1266		ret_val |= (1 << ((j / NUM_BYTE_LANES) * NUM_BYTE_LANES));
1267
1268	/*
1269	 * HSD#235037
1270	 * need to adjust the mask for 16-bit mode
1271	 */
1272	if (mrc_params->channel_width == X16)
1273		ret_val |= (ret_val << 2);
1274
1275	return ret_val;
1276}
1277
1278/*
1279 * Check memory executing simple write/read/verify at the specified address.
1280 *
1281 * Bits in the result indicate failure on specific byte lane.
1282 */
1283uint32_t check_rw_coarse(struct mrc_params *mrc_params, uint32_t address)
1284{
1285	uint32_t result = 0;
1286	uint8_t first_run = 0;
1287
1288	if (mrc_params->hte_setup) {
1289		mrc_params->hte_setup = 0;
1290		first_run = 1;
1291		select_hte();
1292	}
1293
1294	result = hte_basic_write_read(mrc_params, address, first_run,
1295				      WRITE_TRAIN);
1296
1297	DPF(D_TRN, "check_rw_coarse result is %x\n", result);
1298
1299	return result;
1300}
1301
1302/*
1303 * Check memory executing write/read/verify of many data patterns
1304 * at the specified address. Bits in the result indicate failure
1305 * on specific byte lane.
1306 */
1307uint32_t check_bls_ex(struct mrc_params *mrc_params, uint32_t address)
1308{
1309	uint32_t result;
1310	uint8_t first_run = 0;
1311
1312	if (mrc_params->hte_setup) {
1313		mrc_params->hte_setup = 0;
1314		first_run = 1;
1315		select_hte();
1316	}
1317
1318	result = hte_write_stress_bit_lanes(mrc_params, address, first_run);
1319
1320	DPF(D_TRN, "check_bls_ex result is %x\n", result);
1321
1322	return result;
1323}
1324
1325/*
1326 * 32-bit LFSR with characteristic polynomial: X^32 + X^22 +X^2 + X^1
1327 *
1328 * The function takes pointer to previous 32 bit value and
1329 * modifies it to next value.
1330 */
1331void lfsr32(uint32_t *lfsr_ptr)
1332{
1333	uint32_t bit;
1334	uint32_t lfsr;
1335	int i;
1336
1337	lfsr = *lfsr_ptr;
1338
1339	for (i = 0; i < 32; i++) {
1340		bit = 1 ^ (lfsr & 1);
1341		bit = bit ^ ((lfsr & 2) >> 1);
1342		bit = bit ^ ((lfsr & 4) >> 2);
1343		bit = bit ^ ((lfsr & 0x400000) >> 22);
1344
1345		lfsr = ((lfsr >> 1) | (bit << 31));
1346	}
1347
1348	*lfsr_ptr = lfsr;
1349}
1350
1351/* Clear the pointers in a given byte lane in a given channel */
1352void clear_pointers(void)
1353{
1354	uint8_t channel;
1355	uint8_t bl;
1356
1357	ENTERFN();
1358
1359	for (channel = 0; channel < NUM_CHANNELS; channel++) {
1360		for (bl = 0; bl < NUM_BYTE_LANES; bl++) {
1361			mrc_alt_write_mask(DDRPHY,
1362					   B01PTRCTL1 +
1363					   channel * DDRIODQ_CH_OFFSET +
1364					   (bl >> 1) * DDRIODQ_BL_OFFSET,
1365					   ~(1 << 8), (1 << 8));
1366
1367			mrc_alt_write_mask(DDRPHY,
1368					   B01PTRCTL1 +
1369					   channel * DDRIODQ_CH_OFFSET +
1370					   (bl >> 1) * DDRIODQ_BL_OFFSET,
1371					   (1 << 8), (1 << 8));
1372		}
1373	}
1374
1375	LEAVEFN();
1376}
1377
1378static void print_timings_internal(uint8_t algo, uint8_t channel, uint8_t rank,
1379				   uint8_t bl_divisor)
1380{
1381	uint8_t bl;
1382
1383	switch (algo) {
1384	case RCVN:
1385		DPF(D_INFO, "\nRCVN[%02d:%02d]", channel, rank);
1386		break;
1387	case WDQS:
1388		DPF(D_INFO, "\nWDQS[%02d:%02d]", channel, rank);
1389		break;
1390	case WDQX:
1391		DPF(D_INFO, "\nWDQx[%02d:%02d]", channel, rank);
1392		break;
1393	case RDQS:
1394		DPF(D_INFO, "\nRDQS[%02d:%02d]", channel, rank);
1395		break;
1396	case VREF:
1397		DPF(D_INFO, "\nVREF[%02d:%02d]", channel, rank);
1398		break;
1399	case WCMD:
1400		DPF(D_INFO, "\nWCMD[%02d:%02d]", channel, rank);
1401		break;
1402	case WCTL:
1403		DPF(D_INFO, "\nWCTL[%02d:%02d]", channel, rank);
1404		break;
1405	case WCLK:
1406		DPF(D_INFO, "\nWCLK[%02d:%02d]", channel, rank);
1407		break;
1408	default:
1409		break;
1410	}
1411
1412	for (bl = 0; bl < NUM_BYTE_LANES / bl_divisor; bl++) {
1413		switch (algo) {
1414		case RCVN:
1415			DPF(D_INFO, " %03d", get_rcvn(channel, rank, bl));
1416			break;
1417		case WDQS:
1418			DPF(D_INFO, " %03d", get_wdqs(channel, rank, bl));
1419			break;
1420		case WDQX:
1421			DPF(D_INFO, " %03d", get_wdq(channel, rank, bl));
1422			break;
1423		case RDQS:
1424			DPF(D_INFO, " %03d", get_rdqs(channel, rank, bl));
1425			break;
1426		case VREF:
1427			DPF(D_INFO, " %03d", get_vref(channel, bl));
1428			break;
1429		case WCMD:
1430			DPF(D_INFO, " %03d", get_wcmd(channel));
1431			break;
1432		case WCTL:
1433			DPF(D_INFO, " %03d", get_wctl(channel, rank));
1434			break;
1435		case WCLK:
1436			DPF(D_INFO, " %03d", get_wclk(channel, rank));
1437			break;
1438		default:
1439			break;
1440		}
1441	}
1442}
1443
1444void print_timings(struct mrc_params *mrc_params)
1445{
1446	uint8_t algo;
1447	uint8_t channel;
1448	uint8_t rank;
1449	uint8_t bl_divisor = (mrc_params->channel_width == X16) ? 2 : 1;
1450
1451	DPF(D_INFO, "\n---------------------------");
1452	DPF(D_INFO, "\nALGO[CH:RK] BL0 BL1 BL2 BL3");
1453	DPF(D_INFO, "\n===========================");
1454
1455	for (algo = 0; algo < MAX_ALGOS; algo++) {
1456		for (channel = 0; channel < NUM_CHANNELS; channel++) {
1457			if (mrc_params->channel_enables & (1 << channel)) {
1458				for (rank = 0; rank < NUM_RANKS; rank++) {
1459					if (mrc_params->rank_enables &
1460						(1 << rank)) {
1461						print_timings_internal(algo,
1462							channel, rank,
1463							bl_divisor);
1464					}
1465				}
1466			}
1467		}
1468	}
1469
1470	DPF(D_INFO, "\n---------------------------");
1471	DPF(D_INFO, "\n");
1472}
1473