1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (C) Marvell International Ltd. and its affiliates
4 */
5
6#include <common.h>
7#include <i2c.h>
8#include <spl.h>
9#include <asm/io.h>
10#include <asm/arch/cpu.h>
11#include <asm/arch/soc.h>
12#include <linux/delay.h>
13
14#include "ddr3_hw_training.h"
15
16/*
17 * Debug
18 */
19#define DEBUG_PBS_FULL_C(s, d, l) \
20	DEBUG_PBS_FULL_S(s); DEBUG_PBS_FULL_D(d, l); DEBUG_PBS_FULL_S("\n")
21#define DEBUG_PBS_C(s, d, l) \
22	DEBUG_PBS_S(s); DEBUG_PBS_D(d, l); DEBUG_PBS_S("\n")
23
24#ifdef MV_DEBUG_PBS
25#define DEBUG_PBS_S(s)			puts(s)
26#define DEBUG_PBS_D(d, l)		printf("%x", d)
27#else
28#define DEBUG_PBS_S(s)
29#define DEBUG_PBS_D(d, l)
30#endif
31
32#ifdef MV_DEBUG_FULL_PBS
33#define DEBUG_PBS_FULL_S(s)		puts(s)
34#define DEBUG_PBS_FULL_D(d, l)		printf("%x", d)
35#else
36#define DEBUG_PBS_FULL_S(s)
37#define DEBUG_PBS_FULL_D(d, l)
38#endif
39
40#if defined(MV88F78X60) || defined(MV88F672X)
41
42/* Temp array for skew data storage */
43static u32 skew_array[(MAX_PUP_NUM) * DQ_NUM] = { 0 };
44
45/* PBS locked dq (per pup) */
46extern u32 pbs_locked_dq[MAX_PUP_NUM][DQ_NUM];
47extern u32 pbs_locked_dm[MAX_PUP_NUM];
48extern u32 pbs_locked_value[MAX_PUP_NUM][DQ_NUM];
49
50#if defined(MV88F672X)
51extern u32 pbs_pattern[2][LEN_16BIT_PBS_PATTERN];
52extern u32 pbs_pattern_32b[2][LEN_PBS_PATTERN];
53#else
54extern u32 pbs_pattern_32b[2][LEN_PBS_PATTERN];
55extern u32 pbs_pattern_64b[2][LEN_PBS_PATTERN];
56#endif
57
58extern u32 pbs_dq_mapping[PUP_NUM_64BIT + 1][DQ_NUM];
59
60static int ddr3_tx_shift_dqs_adll_step_before_fail(MV_DRAM_INFO *dram_info,
61		u32 cur_pup, u32 pbs_pattern_idx, u32 ecc);
62static int ddr3_rx_shift_dqs_to_first_fail(MV_DRAM_INFO *dram_info, u32 cur_pup,
63		u32 pbs_pattern_idx, u32 ecc);
64static int ddr3_pbs_per_bit(MV_DRAM_INFO *dram_info, int *start_over, int is_tx,
65		u32 *pcur_pup, u32 pbs_pattern_idx, u32 ecc);
66static int ddr3_set_pbs_results(MV_DRAM_INFO *dram_info, int is_tx);
67static void ddr3_pbs_write_pup_dqs_reg(u32 cs, u32 pup, u32 dqs_delay);
68
69/*
70 * Name:     ddr3_pbs_tx
71 * Desc:     Execute the PBS TX phase.
72 * Args:     dram_info   ddr3 training information struct
73 * Notes:
74 * Returns:  MV_OK if success, other error code if fail.
75 */
76int ddr3_pbs_tx(MV_DRAM_INFO *dram_info)
77{
78	/* Array of Deskew results */
79
80	/*
81	 * Array to hold the total sum of skew from all iterations
82	 * (for average purpose)
83	 */
84	u32 skew_sum_array[MAX_PUP_NUM][DQ_NUM] = { {0} };
85
86	/*
87	 * Array to hold the total average skew from both patterns
88	 * (for average purpose)
89	 */
90	u32 pattern_skew_array[MAX_PUP_NUM][DQ_NUM] = { {0} };
91
92	u32 pbs_rep_time = 0;	/* counts number of loop in case of fail */
93	/* bit array for unlock pups - used to repeat on the RX operation */
94	u32 cur_pup;
95	u32 max_pup;
96	u32 pbs_retry;
97	u32 pup, dq, pups, cur_max_pup, valid_pup, reg;
98	u32 pattern_idx;
99	u32 ecc;
100	/* indicates whether we need to start the loop again */
101	int start_over;
102
103	DEBUG_PBS_S("DDR3 - PBS TX - Starting PBS TX procedure\n");
104
105	pups = dram_info->num_of_total_pups;
106	max_pup = dram_info->num_of_total_pups;
107
108	/* Enable SW override */
109	reg = reg_read(REG_DRAM_TRAINING_2_ADDR) |
110		(1 << REG_DRAM_TRAINING_2_SW_OVRD_OFFS);
111	/* [0] = 1 - Enable SW override  */
112	/* 0x15B8 - Training SW 2 Register */
113	reg_write(REG_DRAM_TRAINING_2_ADDR, reg);
114	DEBUG_PBS_S("DDR3 - PBS RX - SW Override Enabled\n");
115
116	reg = 1 << REG_DRAM_TRAINING_AUTO_OFFS;
117	reg_write(REG_DRAM_TRAINING_ADDR, reg);	/* 0x15B0 - Training Register */
118
119	/* Running twice for 2 different patterns. each patterns - 3 times */
120	for (pattern_idx = 0; pattern_idx < COUNT_PBS_PATTERN; pattern_idx++) {
121		DEBUG_PBS_C("DDR3 - PBS TX - Working with pattern - ",
122			    pattern_idx, 1);
123
124		/* Reset sum array */
125		for (pup = 0; pup < pups; pup++) {
126			for (dq = 0; dq < DQ_NUM; dq++)
127				skew_sum_array[pup][dq] = 0;
128		}
129
130		/*
131		 * Perform PBS several of times (3 for each pattern).
132		 * At the end, we'll use the average
133		 */
134		/* If there is ECC, do each PBS again with mux change */
135		for (pbs_retry = 0; pbs_retry < COUNT_PBS_REPEAT; pbs_retry++) {
136			for (ecc = 0; ecc < (dram_info->ecc_ena + 1); ecc++) {
137
138				/*
139				 * This parameter stores the current PUP
140				 * num - ecc mode dependent - 4-8 / 1 pups
141				 */
142				cur_max_pup = (1 - ecc) *
143					dram_info->num_of_std_pups + ecc;
144
145				if (ecc) {
146					/* Only 1 pup in this case */
147					valid_pup = 0x1;
148				} else if (cur_max_pup > 4) {
149					/* 64 bit - 8 pups */
150					valid_pup = 0xFF;
151				} else if (cur_max_pup == 4) {
152					/* 32 bit - 4 pups */
153					valid_pup = 0xF;
154				} else {
155					/* 16 bit - 2 pups */
156					valid_pup = 0x3;
157				}
158
159				/* ECC Support - Switch ECC Mux on ecc=1 */
160				reg = reg_read(REG_DRAM_TRAINING_2_ADDR) &
161					~(1 << REG_DRAM_TRAINING_2_ECC_MUX_OFFS);
162				reg |= (dram_info->ecc_ena * ecc <<
163					REG_DRAM_TRAINING_2_ECC_MUX_OFFS);
164				reg_write(REG_DRAM_TRAINING_2_ADDR, reg);
165
166				if (ecc)
167					DEBUG_PBS_S("DDR3 - PBS Tx - ECC Mux Enabled\n");
168				else
169					DEBUG_PBS_S("DDR3 - PBS Tx - ECC Mux Disabled\n");
170
171				/* Init iteration values */
172				/* Clear the locked DQs */
173				for (pup = 0; pup < cur_max_pup; pup++) {
174					for (dq = 0; dq < DQ_NUM; dq++) {
175						pbs_locked_dq[
176							pup + ecc *
177							(max_pup - 1)][dq] =
178							0;
179					}
180				}
181
182				pbs_rep_time = 0;
183				cur_pup = valid_pup;
184				start_over = 0;
185
186				/*
187				 * Run loop On current Pattern and current
188				 * pattern iteration (just to cover the false
189				 * fail problem)
190				 */
191				do {
192					DEBUG_PBS_S("DDR3 - PBS Tx - Pbs Rep Loop is ");
193					DEBUG_PBS_D(pbs_rep_time, 1);
194					DEBUG_PBS_S(", for Retry No.");
195					DEBUG_PBS_D(pbs_retry, 1);
196					DEBUG_PBS_S("\n");
197
198					/* Set all PBS values to MIN (0) */
199					DEBUG_PBS_S("DDR3 - PBS Tx - Set all PBS values to MIN\n");
200
201					for (dq = 0; dq < DQ_NUM; dq++) {
202						ddr3_write_pup_reg(
203							PUP_PBS_TX +
204							pbs_dq_mapping[pup *
205								(1 - ecc) +
206								ecc * ECC_PUP]
207							[dq], CS0, (1 - ecc) *
208							PUP_BC + ecc * ECC_PUP, 0,
209							0);
210					}
211
212					/*
213					 * Shift DQ ADLL right, One step before
214					 * fail
215					 */
216					DEBUG_PBS_S("DDR3 - PBS Tx - ADLL shift right one phase before fail\n");
217
218					if (MV_OK != ddr3_tx_shift_dqs_adll_step_before_fail
219					    (dram_info, cur_pup, pattern_idx,
220					     ecc))
221						return MV_DDR3_TRAINING_ERR_PBS_ADLL_SHR_1PHASE;
222
223					/* PBS For each bit */
224					DEBUG_PBS_S("DDR3 - PBS Tx - perform PBS for each bit\n");
225
226					/*
227					 * In this stage - start_over = 0
228					 */
229					if (MV_OK != ddr3_pbs_per_bit(
230						    dram_info, &start_over, 1,
231						    &cur_pup, pattern_idx, ecc))
232						return MV_DDR3_TRAINING_ERR_PBS_TX_PER_BIT;
233
234				} while ((start_over == 1) &&
235					 (++pbs_rep_time < COUNT_PBS_STARTOVER));
236
237				if (pbs_rep_time == COUNT_PBS_STARTOVER &&
238				    start_over == 1) {
239					DEBUG_PBS_S("DDR3 - PBS Tx - FAIL - Adll reach max value\n");
240					return MV_DDR3_TRAINING_ERR_PBS_TX_MAX_VAL;
241				}
242
243				DEBUG_PBS_FULL_C("DDR3 - PBS TX - values for iteration - ",
244						 pbs_retry, 1);
245				for (pup = 0; pup < cur_max_pup; pup++) {
246					/*
247					 * To minimize delay elements, inc
248					 * from pbs value the min pbs val
249					 */
250					DEBUG_PBS_S("DDR3 - PBS - PUP");
251					DEBUG_PBS_D((pup + (ecc * ECC_PUP)), 1);
252					DEBUG_PBS_S(": ");
253
254					for (dq = 0; dq < DQ_NUM; dq++) {
255						/* Set skew value for all dq */
256						/*
257						 * Bit# Deskew <- Bit# Deskew -
258						 * last / first  failing bit
259						 * Deskew For all bits (per PUP)
260						 * (minimize delay elements)
261						 */
262						DEBUG_PBS_S("DQ");
263						DEBUG_PBS_D(dq, 1);
264						DEBUG_PBS_S("-");
265						DEBUG_PBS_D(skew_array
266							    [((pup) * DQ_NUM) +
267							     dq], 2);
268						DEBUG_PBS_S(", ");
269					}
270					DEBUG_PBS_S("\n");
271				}
272
273				/*
274				 * Collect the results we got on this trial
275				 * of PBS
276				 */
277				for (pup = 0; pup < cur_max_pup; pup++) {
278					for (dq = 0; dq < DQ_NUM; dq++) {
279						skew_sum_array[pup + (ecc * (max_pup - 1))]
280							[dq] += skew_array
281							[((pup) * DQ_NUM) + dq];
282					}
283				}
284
285				/* ECC Support - Disable ECC MUX */
286				reg = reg_read(REG_DRAM_TRAINING_2_ADDR) &
287					~(1 << REG_DRAM_TRAINING_2_ECC_MUX_OFFS);
288				reg_write(REG_DRAM_TRAINING_2_ADDR, reg);
289			}
290		}
291
292		DEBUG_PBS_C("DDR3 - PBS TX - values for current pattern - ",
293			    pattern_idx, 1);
294		for (pup = 0; pup < max_pup; pup++) {
295			/*
296			 * To minimize delay elements, inc from pbs value the
297			 * min pbs val
298			 */
299			DEBUG_PBS_S("DDR3 - PBS - PUP");
300			DEBUG_PBS_D(pup, 1);
301			DEBUG_PBS_S(": ");
302
303			for (dq = 0; dq < DQ_NUM; dq++) {
304				/* set skew value for all dq */
305				/* Bit# Deskew <- Bit# Deskew - last / first  failing bit Deskew For all bits (per PUP) (minimize delay elements) */
306				DEBUG_PBS_S("DQ");
307				DEBUG_PBS_D(dq, 1);
308				DEBUG_PBS_S("-");
309				DEBUG_PBS_D(skew_sum_array[pup][dq] /
310					    COUNT_PBS_REPEAT, 2);
311				DEBUG_PBS_S(", ");
312			}
313			DEBUG_PBS_S("\n");
314		}
315
316		/*
317		 * Calculate the average skew for current pattern for each
318		 * pup and each bit
319		 */
320		DEBUG_PBS_C("DDR3 - PBS TX - Average for pattern - ",
321			    pattern_idx, 1);
322
323		for (pup = 0; pup < max_pup; pup++) {
324			/*
325			 * FOR ECC only :: found min and max value for current
326			 * pattern skew array
327			 */
328			/* Loop for all dqs */
329			for (dq = 0; dq < DQ_NUM; dq++) {
330				pattern_skew_array[pup][dq] +=
331					(skew_sum_array[pup][dq] /
332					 COUNT_PBS_REPEAT);
333			}
334		}
335	}
336
337	/* Calculate the average skew */
338	for (pup = 0; pup < max_pup; pup++) {
339		for (dq = 0; dq < DQ_NUM; dq++)
340			skew_array[((pup) * DQ_NUM) + dq] =
341				pattern_skew_array[pup][dq] / COUNT_PBS_PATTERN;
342	}
343
344	DEBUG_PBS_S("DDR3 - PBS TX - Average for all patterns:\n");
345	for (pup = 0; pup < max_pup; pup++) {
346		/*
347		 * To minimize delay elements, inc from pbs value the min
348		 * pbs val
349		 */
350		DEBUG_PBS_S("DDR3 - PBS - PUP");
351		DEBUG_PBS_D(pup, 1);
352		DEBUG_PBS_S(": ");
353
354		for (dq = 0; dq < DQ_NUM; dq++) {
355			/* Set skew value for all dq */
356			/*
357			 * Bit# Deskew <- Bit# Deskew - last / first
358			 * failing bit Deskew For all bits (per PUP)
359			 * (minimize delay elements)
360			 */
361			DEBUG_PBS_S("DQ");
362			DEBUG_PBS_D(dq, 1);
363			DEBUG_PBS_S("-");
364			DEBUG_PBS_D(skew_array[(pup * DQ_NUM) + dq], 2);
365			DEBUG_PBS_S(", ");
366		}
367		DEBUG_PBS_S("\n");
368	}
369
370	/* Return ADLL to default value */
371	for (pup = 0; pup < max_pup; pup++) {
372		if (pup == (max_pup - 1) && dram_info->ecc_ena)
373			pup = ECC_PUP;
374		ddr3_pbs_write_pup_dqs_reg(CS0, pup, INIT_WL_DELAY);
375	}
376
377	/* Set averaged PBS results */
378	ddr3_set_pbs_results(dram_info, 1);
379
380	/* Disable SW override - Must be in a different stage */
381	/* [0]=0 - Enable SW override  */
382	reg = reg_read(REG_DRAM_TRAINING_2_ADDR);
383	reg &= ~(1 << REG_DRAM_TRAINING_2_SW_OVRD_OFFS);
384	/* 0x15B8 - Training SW 2 Register */
385	reg_write(REG_DRAM_TRAINING_2_ADDR, reg);
386
387	reg = reg_read(REG_DRAM_TRAINING_1_ADDR) |
388		(1 << REG_DRAM_TRAINING_1_TRNBPOINT_OFFS);
389	reg_write(REG_DRAM_TRAINING_1_ADDR, reg);
390
391	DEBUG_PBS_S("DDR3 - PBS Tx - PBS TX ended successfuly\n");
392
393	return MV_OK;
394}
395
396/*
397 * Name:     ddr3_tx_shift_dqs_adll_step_before_fail
398 * Desc:     Execute the Tx shift DQ phase.
399 * Args:     dram_info            ddr3 training information struct
400 *           cur_pup              bit array of the function active pups.
401 *           pbs_pattern_idx      Index of PBS pattern
402 * Notes:
403 * Returns:  MV_OK if success, other error code if fail.
404 */
405static int ddr3_tx_shift_dqs_adll_step_before_fail(MV_DRAM_INFO *dram_info,
406						   u32 cur_pup,
407						   u32 pbs_pattern_idx, u32 ecc)
408{
409	u32 unlock_pup;		/* bit array of unlock pups  */
410	u32 new_lockup_pup;	/* bit array of compare failed pups */
411	u32 adll_val = 4;	/* INIT_WL_DELAY */
412	u32 cur_max_pup, pup;
413	u32 dqs_dly_set[MAX_PUP_NUM] = { 0 };
414	u32 *pattern_ptr;
415
416	/* Choose pattern */
417	switch (dram_info->ddr_width) {
418#if defined(MV88F672X)
419	case 16:
420		pattern_ptr = (u32 *)&pbs_pattern[pbs_pattern_idx];
421		break;
422#endif
423	case 32:
424		pattern_ptr = (u32 *)&pbs_pattern_32b[pbs_pattern_idx];
425		break;
426#if defined(MV88F78X60)
427	case 64:
428		pattern_ptr = (u32 *)&pbs_pattern_64b[pbs_pattern_idx];
429		break;
430#endif
431	default:
432		return MV_FAIL;
433	}
434
435	/* Set current pup number */
436	if (cur_pup == 0x1)	/* Ecc mode */
437		cur_max_pup = 1;
438	else
439		cur_max_pup = dram_info->num_of_std_pups;
440
441	unlock_pup = cur_pup;	/* '1' for each unlocked pup */
442
443	/* Loop on all ADLL Vaules */
444	do {
445		/* Loop until found first fail */
446		adll_val++;
447
448		/*
449		 * Increment (Move to right - ADLL) DQ TX delay
450		 * (broadcast to all Data PUPs)
451		 */
452		for (pup = 0; pup < cur_max_pup; pup++)
453			ddr3_pbs_write_pup_dqs_reg(CS0,
454						   pup * (1 - ecc) +
455						   ECC_PUP * ecc, adll_val);
456
457		/*
458		 * Write and Read, compare results (read was already verified)
459		 */
460		/* 0 - all locked */
461		new_lockup_pup = 0;
462
463		if (MV_OK != ddr3_sdram_compare(dram_info, unlock_pup,
464						&new_lockup_pup,
465						pattern_ptr, LEN_PBS_PATTERN,
466						SDRAM_PBS_TX_OFFS, 1, 0,
467						NULL,
468						0))
469			return MV_FAIL;
470
471		unlock_pup &= ~new_lockup_pup;
472
473		DEBUG_PBS_FULL_S("Shift DQS by 2 steps for PUPs: ");
474		DEBUG_PBS_FULL_D(unlock_pup, 2);
475		DEBUG_PBS_FULL_C(", Set ADLL value = ", adll_val, 2);
476
477		/* If any PUP failed there is '1' to mark the PUP */
478		if (new_lockup_pup != 0) {
479			/*
480			 * Decrement (Move Back to Left two steps - ADLL)
481			 * DQ TX delay for current failed pups and save
482			 */
483			for (pup = 0; pup < cur_max_pup; pup++) {
484				if (((new_lockup_pup >> pup) & 0x1) &&
485				    dqs_dly_set[pup] == 0)
486					dqs_dly_set[pup] = adll_val - 1;
487			}
488		}
489	} while ((unlock_pup != 0) && (adll_val != ADLL_MAX));
490
491	if (unlock_pup != 0) {
492		DEBUG_PBS_FULL_S("DDR3 - PBS Tx - Shift DQ - Adll value reached maximum\n");
493
494		for (pup = 0; pup < cur_max_pup; pup++) {
495			if (((unlock_pup >> pup) & 0x1) &&
496			    dqs_dly_set[pup] == 0)
497				dqs_dly_set[pup] = adll_val - 1;
498		}
499	}
500
501	DEBUG_PBS_FULL_C("PBS TX one step before fail last pups locked Adll ",
502			 adll_val - 2, 2);
503
504	/* Set the PUP DQS DLY Values */
505	for (pup = 0; pup < cur_max_pup; pup++)
506		ddr3_pbs_write_pup_dqs_reg(CS0, pup * (1 - ecc) + ECC_PUP * ecc,
507					   dqs_dly_set[pup]);
508
509	/* Found one phase before fail */
510	return MV_OK;
511}
512
513/*
514 * Name:     ddr3_pbs_rx
515 * Desc:     Execute the PBS RX phase.
516 * Args:     dram_info   ddr3 training information struct
517 * Notes:
518 * Returns:  MV_OK if success, other error code if fail.
519 */
520int ddr3_pbs_rx(MV_DRAM_INFO *dram_info)
521{
522	/*
523	 * Array to hold the total sum of skew from all iterations
524	 * (for average purpose)
525	 */
526	u32 skew_sum_array[MAX_PUP_NUM][DQ_NUM] = { {0} };
527
528	/*
529	 * Array to hold the total average skew from both patterns
530	 * (for average purpose)
531	 */
532	u32 pattern_skew_array[MAX_PUP_NUM][DQ_NUM] = { {0} };
533
534	u32 pbs_rep_time = 0;	/* counts number of loop in case of fail */
535	/* bit array for unlock pups - used to repeat on the RX operation */
536	u32 cur_pup;
537	u32 max_pup;
538	u32 pbs_retry;
539	u32 pup, dq, pups, cur_max_pup, valid_pup, reg;
540	u32 pattern_idx;
541	u32 ecc;
542	/* indicates whether we need to start the loop again */
543	int start_over;
544	int status;
545
546	DEBUG_PBS_S("DDR3 - PBS RX - Starting PBS RX procedure\n");
547
548	pups = dram_info->num_of_total_pups;
549	max_pup = dram_info->num_of_total_pups;
550
551	/* Enable SW override */
552	reg = reg_read(REG_DRAM_TRAINING_2_ADDR) |
553		(1 << REG_DRAM_TRAINING_2_SW_OVRD_OFFS);
554	/* [0] = 1 - Enable SW override  */
555	/* 0x15B8 - Training SW 2 Register */
556	reg_write(REG_DRAM_TRAINING_2_ADDR, reg);
557	DEBUG_PBS_FULL_S("DDR3 - PBS RX - SW Override Enabled\n");
558
559	reg = 1 << REG_DRAM_TRAINING_AUTO_OFFS;
560	reg_write(REG_DRAM_TRAINING_ADDR, reg);	/* 0x15B0 - Training Register */
561
562	/* Running twice for 2 different patterns. each patterns - 3 times */
563	for (pattern_idx = 0; pattern_idx < COUNT_PBS_PATTERN; pattern_idx++) {
564		DEBUG_PBS_FULL_C("DDR3 - PBS RX - Working with pattern - ",
565				 pattern_idx, 1);
566
567		/* Reset sum array */
568		for (pup = 0; pup < pups; pup++) {
569			for (dq = 0; dq < DQ_NUM; dq++)
570				skew_sum_array[pup][dq] = 0;
571		}
572
573		/*
574		 * Perform PBS several of times (3 for each pattern).
575		 * At the end, we'll use the average
576		 */
577		/* If there is ECC, do each PBS again with mux change */
578		for (pbs_retry = 0; pbs_retry < COUNT_PBS_REPEAT; pbs_retry++) {
579			for (ecc = 0; ecc < (dram_info->ecc_ena + 1); ecc++) {
580				/*
581				 * This parameter stores the current PUP
582				 * num - ecc mode dependent - 4-8 / 1 pups
583				 */
584				cur_max_pup = (1 - ecc) *
585					dram_info->num_of_std_pups + ecc;
586
587				if (ecc) {
588					/* Only 1 pup in this case */
589					valid_pup = 0x1;
590				} else if (cur_max_pup > 4) {
591					/* 64 bit - 8 pups */
592					valid_pup = 0xFF;
593				} else if (cur_max_pup == 4) {
594					/* 32 bit - 4 pups */
595					valid_pup = 0xF;
596				} else {
597					/* 16 bit - 2 pups */
598					valid_pup = 0x3;
599				}
600
601				/* ECC Support - Switch ECC Mux on ecc=1 */
602				reg = reg_read(REG_DRAM_TRAINING_2_ADDR) &
603					~(1 << REG_DRAM_TRAINING_2_ECC_MUX_OFFS);
604				reg |= (dram_info->ecc_ena * ecc <<
605					REG_DRAM_TRAINING_2_ECC_MUX_OFFS);
606				reg_write(REG_DRAM_TRAINING_2_ADDR, reg);
607
608				if (ecc)
609					DEBUG_PBS_FULL_S("DDR3 - PBS Rx - ECC Mux Enabled\n");
610				else
611					DEBUG_PBS_FULL_S("DDR3 - PBS Rx - ECC Mux Disabled\n");
612
613				/* Init iteration values */
614				/* Clear the locked DQs */
615				for (pup = 0; pup < cur_max_pup; pup++) {
616					for (dq = 0; dq < DQ_NUM; dq++) {
617						pbs_locked_dq[
618							pup + ecc * (max_pup - 1)][dq] =
619							0;
620					}
621				}
622
623				pbs_rep_time = 0;
624				cur_pup = valid_pup;
625				start_over = 0;
626
627				/*
628				 * Run loop On current Pattern and current
629				 * pattern iteration (just to cover the false
630				 * fail problem
631				 */
632				do {
633					DEBUG_PBS_FULL_S("DDR3 - PBS Rx - Pbs Rep Loop is ");
634					DEBUG_PBS_FULL_D(pbs_rep_time, 1);
635					DEBUG_PBS_FULL_S(", for Retry No.");
636					DEBUG_PBS_FULL_D(pbs_retry, 1);
637					DEBUG_PBS_FULL_S("\n");
638
639					/* Set all PBS values to MAX (31) */
640					for (pup = 0; pup < cur_max_pup; pup++) {
641						for (dq = 0; dq < DQ_NUM; dq++)
642							ddr3_write_pup_reg(
643								PUP_PBS_RX +
644								pbs_dq_mapping[
645								pup * (1 - ecc)
646								+ ecc * ECC_PUP]
647								[dq], CS0,
648								pup + ecc * ECC_PUP,
649								0, MAX_PBS);
650					}
651
652					/* Set all DQS PBS values to MIN (0) */
653					for (pup = 0; pup < cur_max_pup; pup++) {
654						ddr3_write_pup_reg(PUP_PBS_RX +
655								   DQ_NUM, CS0,
656								   pup +
657								   ecc *
658								   ECC_PUP, 0,
659								   0);
660					}
661
662					/* Shift DQS, To first Fail */
663					DEBUG_PBS_FULL_S("DDR3 - PBS Rx - Shift RX DQS to first fail\n");
664
665					status = ddr3_rx_shift_dqs_to_first_fail
666						(dram_info, cur_pup,
667						 pattern_idx, ecc);
668					if (MV_OK != status) {
669						DEBUG_PBS_S("DDR3 - PBS Rx - ddr3_rx_shift_dqs_to_first_fail failed.\n");
670						DEBUG_PBS_D(status, 8);
671						DEBUG_PBS_S("\nDDR3 - PBS Rx - SKIP.\n");
672
673						/* Reset read FIFO */
674						reg = reg_read(REG_DRAM_TRAINING_ADDR);
675						/* Start Auto Read Leveling procedure */
676						reg |= (1 << REG_DRAM_TRAINING_RL_OFFS);
677						/* 0x15B0 - Training Register */
678						reg_write(REG_DRAM_TRAINING_ADDR, reg);
679
680						reg = reg_read(REG_DRAM_TRAINING_2_ADDR);
681						reg |= ((1 << REG_DRAM_TRAINING_2_FIFO_RST_OFFS)
682							+ (1 << REG_DRAM_TRAINING_2_SW_OVRD_OFFS));
683						/* [0] = 1 - Enable SW override, [4] = 1 - FIFO reset  */
684						/* 0x15B8 - Training SW 2 Register */
685						reg_write(REG_DRAM_TRAINING_2_ADDR, reg);
686
687						do {
688							reg = (reg_read(REG_DRAM_TRAINING_2_ADDR))
689								& (1 <<	REG_DRAM_TRAINING_2_FIFO_RST_OFFS);
690						} while (reg);	/* Wait for '0' */
691
692						reg = reg_read(REG_DRAM_TRAINING_ADDR);
693						/* Clear Auto Read Leveling procedure */
694						reg &= ~(1 << REG_DRAM_TRAINING_RL_OFFS);
695						/* 0x15B0 - Training Register */
696						reg_write(REG_DRAM_TRAINING_ADDR, reg);
697
698						/* Set ADLL to 15 */
699						for (pup = 0; pup < max_pup;
700						     pup++) {
701							ddr3_write_pup_reg
702							    (PUP_DQS_RD, CS0,
703							     pup +
704							     (ecc * ECC_PUP), 0,
705							     15);
706						}
707
708						/* Set all PBS values to MIN (0) */
709						for (pup = 0; pup < cur_max_pup;
710						     pup++) {
711							for (dq = 0;
712							     dq < DQ_NUM; dq++)
713								ddr3_write_pup_reg
714								    (PUP_PBS_RX +
715								     pbs_dq_mapping
716								     [pup * (1 - ecc) +
717								      ecc * ECC_PUP]
718								     [dq], CS0,
719								     pup + ecc * ECC_PUP,
720								     0, MIN_PBS);
721						}
722
723						return MV_OK;
724					}
725
726					/* PBS For each bit */
727					DEBUG_PBS_FULL_S("DDR3 - PBS Rx - perform PBS for each bit\n");
728					/* in this stage - start_over = 0; */
729					if (MV_OK != ddr3_pbs_per_bit(
730						    dram_info, &start_over,
731						    0, &cur_pup,
732						    pattern_idx, ecc)) {
733						DEBUG_PBS_S("DDR3 - PBS Rx - ddr3_pbs_per_bit failed.");
734						return MV_DDR3_TRAINING_ERR_PBS_RX_PER_BIT;
735					}
736
737				} while ((start_over == 1) &&
738					 (++pbs_rep_time < COUNT_PBS_STARTOVER));
739
740				if (pbs_rep_time == COUNT_PBS_STARTOVER &&
741				    start_over == 1) {
742					DEBUG_PBS_FULL_S("DDR3 - PBS Rx - FAIL - Algorithm failed doing RX PBS\n");
743					return MV_DDR3_TRAINING_ERR_PBS_RX_MAX_VAL;
744				}
745
746				/* Return DQS ADLL to default value - 15 */
747				/* Set all DQS PBS values to MIN (0) */
748				for (pup = 0; pup < cur_max_pup; pup++)
749					ddr3_write_pup_reg(PUP_DQS_RD, CS0,
750							   pup + ecc * ECC_PUP,
751							   0, INIT_RL_DELAY);
752
753				DEBUG_PBS_FULL_C("DDR3 - PBS RX - values for iteration - ",
754						 pbs_retry, 1);
755				for (pup = 0; pup < cur_max_pup; pup++) {
756					/*
757					 * To minimize delay elements, inc from
758					 * pbs value the min pbs val
759					 */
760					DEBUG_PBS_FULL_S("DDR3 - PBS - PUP");
761					DEBUG_PBS_FULL_D((pup +
762							  (ecc * ECC_PUP)), 1);
763					DEBUG_PBS_FULL_S(": ");
764
765					for (dq = 0; dq < DQ_NUM; dq++) {
766						/* Set skew value for all dq */
767						/*
768						 * Bit# Deskew <- Bit# Deskew -
769						 * last / first  failing bit
770						 * Deskew For all bits (per PUP)
771						 * (minimize delay elements)
772						 */
773						DEBUG_PBS_FULL_S("DQ");
774						DEBUG_PBS_FULL_D(dq, 1);
775						DEBUG_PBS_FULL_S("-");
776						DEBUG_PBS_FULL_D(skew_array
777								 [((pup) *
778								   DQ_NUM) +
779								  dq], 2);
780						DEBUG_PBS_FULL_S(", ");
781					}
782					DEBUG_PBS_FULL_S("\n");
783				}
784
785				/*
786				 * Collect the results we got on this trial
787				 * of PBS
788				 */
789				for (pup = 0; pup < cur_max_pup; pup++) {
790					for (dq = 0; dq < DQ_NUM; dq++) {
791						skew_sum_array
792							[pup + (ecc * (max_pup - 1))]
793							[dq] +=
794							skew_array[((pup) * DQ_NUM) + dq];
795					}
796				}
797
798				/* ECC Support - Disable ECC MUX */
799				reg = reg_read(REG_DRAM_TRAINING_2_ADDR) &
800					~(1 << REG_DRAM_TRAINING_2_ECC_MUX_OFFS);
801				reg_write(REG_DRAM_TRAINING_2_ADDR, reg);
802			}
803		}
804
805		/*
806		 * Calculate the average skew for current pattern for each
807		 * pup and each bit
808		 */
809		DEBUG_PBS_FULL_C("DDR3 - PBS RX - Average for pattern - ",
810				 pattern_idx, 1);
811		for (pup = 0; pup < max_pup; pup++) {
812			/*
813			 * FOR ECC only :: found min and max value for
814			 * current pattern skew array
815			 */
816			/* Loop for all dqs */
817			for (dq = 0; dq < DQ_NUM; dq++) {
818				pattern_skew_array[pup][dq] +=
819					(skew_sum_array[pup][dq] /
820					 COUNT_PBS_REPEAT);
821			}
822		}
823
824		DEBUG_PBS_C("DDR3 - PBS RX - values for current pattern - ",
825			    pattern_idx, 1);
826		for (pup = 0; pup < max_pup; pup++) {
827			/*
828			 * To minimize delay elements, inc from pbs value the
829			 * min pbs val
830			 */
831			DEBUG_PBS_S("DDR3 - PBS RX - PUP");
832			DEBUG_PBS_D(pup, 1);
833			DEBUG_PBS_S(": ");
834
835			for (dq = 0; dq < DQ_NUM; dq++) {
836				/* Set skew value for all dq */
837				/*
838				 * Bit# Deskew <- Bit# Deskew - last / first
839				 * failing bit Deskew For all bits (per PUP)
840				 * (minimize delay elements)
841				 */
842				DEBUG_PBS_S("DQ");
843				DEBUG_PBS_D(dq, 1);
844				DEBUG_PBS_S("-");
845				DEBUG_PBS_D(skew_sum_array[pup][dq] /
846					    COUNT_PBS_REPEAT, 2);
847				DEBUG_PBS_S(", ");
848			}
849			DEBUG_PBS_S("\n");
850		}
851	}
852
853	/* Calculate the average skew */
854	for (pup = 0; pup < max_pup; pup++) {
855		for (dq = 0; dq < DQ_NUM; dq++)
856			skew_array[((pup) * DQ_NUM) + dq] =
857				pattern_skew_array[pup][dq] / COUNT_PBS_PATTERN;
858	}
859
860	DEBUG_PBS_S("DDR3 - PBS RX - Average for all patterns:\n");
861	for (pup = 0; pup < max_pup; pup++) {
862		/*
863		 * To minimize delay elements, inc from pbs value the
864		 * min pbs val
865		 */
866		DEBUG_PBS_S("DDR3 - PBS - PUP");
867		DEBUG_PBS_D(pup, 1);
868		DEBUG_PBS_S(": ");
869
870		for (dq = 0; dq < DQ_NUM; dq++) {
871			/* Set skew value for all dq */
872			/*
873			 * Bit# Deskew <- Bit# Deskew - last / first
874			 * failing bit Deskew For all bits (per PUP)
875			 * (minimize delay elements)
876			 */
877			DEBUG_PBS_S("DQ");
878			DEBUG_PBS_D(dq, 1);
879			DEBUG_PBS_S("-");
880			DEBUG_PBS_D(skew_array[(pup * DQ_NUM) + dq], 2);
881			DEBUG_PBS_S(", ");
882		}
883		DEBUG_PBS_S("\n");
884	}
885
886	/* Return ADLL to default value */
887	ddr3_write_pup_reg(PUP_DQS_RD, CS0, PUP_BC, 0, INIT_RL_DELAY);
888
889	/* Set averaged PBS results */
890	ddr3_set_pbs_results(dram_info, 0);
891
892	/* Disable SW override - Must be in a different stage */
893	/* [0]=0 - Enable SW override  */
894	reg = reg_read(REG_DRAM_TRAINING_2_ADDR);
895	reg &= ~(1 << REG_DRAM_TRAINING_2_SW_OVRD_OFFS);
896	/* 0x15B8 - Training SW 2 Register */
897	reg_write(REG_DRAM_TRAINING_2_ADDR, reg);
898
899	reg = reg_read(REG_DRAM_TRAINING_1_ADDR) |
900		(1 << REG_DRAM_TRAINING_1_TRNBPOINT_OFFS);
901	reg_write(REG_DRAM_TRAINING_1_ADDR, reg);
902
903	DEBUG_PBS_FULL_S("DDR3 - PBS RX - ended successfuly\n");
904
905	return MV_OK;
906}
907
908/*
909 * Name:     ddr3_rx_shift_dqs_to_first_fail
910 * Desc:     Execute the Rx shift DQ phase.
911 * Args:     dram_info           ddr3 training information struct
912 *           cur_pup             bit array of the function active pups.
913 *           pbs_pattern_idx     Index of PBS pattern
914 * Notes:
915 * Returns:  MV_OK if success, other error code if fail.
916 */
917static int ddr3_rx_shift_dqs_to_first_fail(MV_DRAM_INFO *dram_info, u32 cur_pup,
918					   u32 pbs_pattern_idx, u32 ecc)
919{
920	u32 unlock_pup;		/* bit array of unlock pups  */
921	u32 new_lockup_pup;	/* bit array of compare failed pups */
922	u32 adll_val = MAX_DELAY;
923	u32 dqs_deskew_val = 0;	/* current value of DQS PBS deskew */
924	u32 cur_max_pup, pup, pass_pup;
925	u32 *pattern_ptr;
926
927	/* Choose pattern */
928	switch (dram_info->ddr_width) {
929#if defined(MV88F672X)
930	case 16:
931		pattern_ptr = (u32 *)&pbs_pattern[pbs_pattern_idx];
932		break;
933#endif
934	case 32:
935		pattern_ptr = (u32 *)&pbs_pattern_32b[pbs_pattern_idx];
936		break;
937#if defined(MV88F78X60)
938	case 64:
939		pattern_ptr = (u32 *)&pbs_pattern_64b[pbs_pattern_idx];
940		break;
941#endif
942	default:
943		return MV_FAIL;
944	}
945
946	/* Set current pup number */
947	if (cur_pup == 0x1)	/* Ecc mode */
948		cur_max_pup = 1;
949	else
950		cur_max_pup = dram_info->num_of_std_pups;
951
952	unlock_pup = cur_pup;	/* '1' for each unlocked pup */
953
954	DEBUG_PBS_FULL_S("DDR3 - PBS RX - Shift DQS - Starting...\n");
955
956	/* Set DQS ADLL to MAX */
957	DEBUG_PBS_FULL_S("DDR3 - PBS RX - Shift DQS - Set DQS ADLL to Max for all PUPs\n");
958	for (pup = 0; pup < cur_max_pup; pup++)
959		ddr3_write_pup_reg(PUP_DQS_RD, CS0, pup + ecc * ECC_PUP, 0,
960				   MAX_DELAY);
961
962	/* Loop on all ADLL Vaules */
963	do {
964		/* Loop until found fail for all pups */
965		new_lockup_pup = 0;
966		if (MV_OK != ddr3_sdram_compare(dram_info, unlock_pup,
967						&new_lockup_pup,
968						pattern_ptr, LEN_PBS_PATTERN,
969						SDRAM_PBS_I_OFFS +
970						pbs_pattern_idx * SDRAM_PBS_NEXT_OFFS,
971						0, 0, NULL, 0)) {
972			DEBUG_PBS_S("DDR3 - PBS Rx - Shift DQS - MV_DDR3_TRAINING_ERR_PBS_SHIFT_QDS_SRAM_CMP(ddr3_sdram_compare)\n");
973			return MV_DDR3_TRAINING_ERR_PBS_SHIFT_QDS_SRAM_CMP;
974		}
975
976		if ((new_lockup_pup != 0) && (dqs_deskew_val <= 1)) {
977			/* Fail on start with first deskew value */
978			/* Decrement DQS ADLL */
979			--adll_val;
980			if (adll_val == ADLL_MIN) {
981				DEBUG_PBS_S("DDR3 - PBS Rx - Shift DQS - fail on start with first deskew value\n");
982				return MV_DDR3_TRAINING_ERR_PBS_SHIFT_QDS_SRAM_CMP;
983			}
984			ddr3_write_pup_reg(PUP_DQS_RD, CS0, pup + ecc * ECC_PUP,
985					   0, adll_val);
986			continue;
987		}
988
989		/* Update all new locked pups */
990		unlock_pup &= ~new_lockup_pup;
991
992		if ((unlock_pup == 0) || (dqs_deskew_val == MAX_PBS)) {
993			if (dqs_deskew_val == MAX_PBS) {
994				/*
995				 * Reach max value of dqs deskew or get fail
996				 * for all pups
997				 */
998				DEBUG_PBS_FULL_S("DDR3 - PBS RX - Shift DQS - DQS deskew reached maximum value\n");
999			}
1000			break;
1001		}
1002
1003		DEBUG_PBS_FULL_S("DDR3 - PBS RX - Shift DQS - Inc DQS deskew for PUPs: ");
1004		DEBUG_PBS_FULL_D(unlock_pup, 2);
1005		DEBUG_PBS_FULL_C(", deskew = ", dqs_deskew_val, 2);
1006
1007		/* Increment DQS deskew elements - Only for unlocked pups */
1008		dqs_deskew_val++;
1009		for (pup = 0; pup < cur_max_pup; pup++) {
1010			if (IS_PUP_ACTIVE(unlock_pup, pup) == 1) {
1011				ddr3_write_pup_reg(PUP_PBS_RX + DQS_DQ_NUM, CS0,
1012						   pup + ecc * ECC_PUP, 0,
1013						   dqs_deskew_val);
1014			}
1015		}
1016	} while (1);
1017
1018	DEBUG_PBS_FULL_S("DDR3 - PBS RX - Shift DQS - ADLL shift one step before fail\n");
1019	/* Continue to ADLL shift one step before fail */
1020	unlock_pup = cur_pup;
1021	do {
1022		/* Loop until pass compare for all pups */
1023		new_lockup_pup = 0;
1024		/* Read and compare results  */
1025		if (MV_OK != ddr3_sdram_compare(dram_info, unlock_pup, &new_lockup_pup,
1026						pattern_ptr, LEN_PBS_PATTERN,
1027						SDRAM_PBS_I_OFFS +
1028						pbs_pattern_idx * SDRAM_PBS_NEXT_OFFS,
1029						1, 0, NULL, 0)) {
1030			DEBUG_PBS_S("DDR3 - PBS Rx - Shift DQS - MV_DDR3_TRAINING_ERR_PBS_SHIFT_QDS_SRAM_CMP(ddr3_sdram_compare)\n");
1031			return MV_DDR3_TRAINING_ERR_PBS_SHIFT_QDS_SRAM_CMP;
1032		}
1033
1034		/*
1035		 * Get mask for pup which passed so their adll will be
1036		 * changed to 2 steps before fails
1037		 */
1038		pass_pup = unlock_pup & ~new_lockup_pup;
1039
1040		DEBUG_PBS_FULL_S("Shift DQS by 2 steps for PUPs: ");
1041		DEBUG_PBS_FULL_D(pass_pup, 2);
1042		DEBUG_PBS_FULL_C(", Set ADLL value = ", (adll_val - 2), 2);
1043
1044		/* Only for pass pups   */
1045		for (pup = 0; pup < cur_max_pup; pup++) {
1046			if (IS_PUP_ACTIVE(pass_pup, pup) == 1) {
1047				ddr3_write_pup_reg(PUP_DQS_RD, CS0,
1048						   pup + ecc * ECC_PUP, 0,
1049						   (adll_val - 2));
1050			}
1051		}
1052
1053		/* Locked pups that compare success  */
1054		unlock_pup &= new_lockup_pup;
1055
1056		if (unlock_pup == 0) {
1057			/* All pups locked */
1058			break;
1059		}
1060
1061		/* Found error */
1062		if (adll_val == 0) {
1063			DEBUG_PBS_FULL_S("DDR3 - PBS Rx - Shift DQS - Adll reach min value\n");
1064			return MV_DDR3_TRAINING_ERR_PBS_SHIFT_QDS_MAX_VAL;
1065		}
1066
1067		/*
1068		 * Decrement (Move Back to Left one phase - ADLL) dqs RX delay
1069		 */
1070		adll_val--;
1071		for (pup = 0; pup < cur_max_pup; pup++) {
1072			if (IS_PUP_ACTIVE(unlock_pup, pup) == 1) {
1073				ddr3_write_pup_reg(PUP_DQS_RD, CS0,
1074						   pup + ecc * ECC_PUP, 0,
1075						   adll_val);
1076			}
1077		}
1078	} while (1);
1079
1080	return MV_OK;
1081}
1082
1083/*
1084 * lock_pups() extracted from ddr3_pbs_per_bit(). This just got too
1085 * much indented making it hard to read / edit.
1086 */
1087static void lock_pups(u32 pup, u32 *pup_locked, u8 *unlock_pup_dq_array,
1088		      u32 pbs_curr_val, u32 start_pbs, u32 ecc, int is_tx)
1089{
1090	u32 dq;
1091	int idx;
1092
1093	/* Lock PBS value for all remaining PUPs bits */
1094	DEBUG_PBS_FULL_S("DDR3 - PBS Per bit - Lock PBS value for all remaining PUPs bits, pup ");
1095	DEBUG_PBS_FULL_D(pup, 1);
1096	DEBUG_PBS_FULL_C(" pbs value ", pbs_curr_val, 2);
1097
1098	idx = pup * (1 - ecc) + ecc * ECC_PUP;
1099	*pup_locked &= ~(1 << pup);
1100
1101	for (dq = 0; dq < DQ_NUM; dq++) {
1102		if (IS_PUP_ACTIVE(unlock_pup_dq_array[dq], pup) == 1) {
1103			int offs;
1104
1105			/* Lock current dq */
1106			unlock_pup_dq_array[dq] &= ~(1 << pup);
1107			skew_array[(pup * DQ_NUM) + dq] = pbs_curr_val;
1108
1109			if (is_tx == 1)
1110				offs = PUP_PBS_TX;
1111			else
1112				offs = PUP_PBS_RX;
1113
1114			ddr3_write_pup_reg(offs +
1115					   pbs_dq_mapping[idx][dq], CS0,
1116					   idx, 0, start_pbs);
1117		}
1118	}
1119}
1120
1121/*
1122 * Name:     ddr3_pbs_per_bit
1123 * Desc:     Execute the Per Bit Skew phase.
1124 * Args:     start_over      Return whether need to start over the algorithm
1125 *           is_tx           Indicate whether Rx or Tx
1126 *           pcur_pup        bit array of the function active pups. return the
1127 *                           pups that need to repeat on the PBS
1128 *           pbs_pattern_idx Index of PBS pattern
1129 *
1130 * Notes:    Current implementation supports double activation of this function.
1131 *           i.e. in order to activate this function (using start_over) more than
1132 *           twice, the implementation should change.
1133 *           imlementation limitation are marked using
1134 *           ' CHIP-ONLY! - Implementation Limitation '
1135 * Returns:  MV_OK if success, other error code if fail.
1136 */
1137static int ddr3_pbs_per_bit(MV_DRAM_INFO *dram_info, int *start_over, int is_tx,
1138			    u32 *pcur_pup, u32 pbs_pattern_idx, u32 ecc)
1139{
1140	/*
1141	 * Bit array to indicate if we already get fail on bit per pup & dq bit
1142	 */
1143	u8 unlock_pup_dq_array[DQ_NUM] = {
1144		*pcur_pup, *pcur_pup, *pcur_pup, *pcur_pup, *pcur_pup,
1145		*pcur_pup, *pcur_pup, *pcur_pup
1146	};
1147
1148	u8 cmp_unlock_pup_dq_array[COUNT_PBS_COMP_RETRY_NUM][DQ_NUM];
1149	u32 pup, dq;
1150	/* value of pbs is according to RX or TX */
1151	u32 start_pbs, last_pbs;
1152	u32 pbs_curr_val;
1153	/* bit array that indicates all dq of the pup locked */
1154	u32 pup_locked;
1155	u32 first_fail[MAX_PUP_NUM] = { 0 };	/* count first fail per pup */
1156	/* indicates whether we get first fail per pup */
1157	int first_failed[MAX_PUP_NUM] = { 0 };
1158	/* bit array that indicates pup already get fail */
1159	u32 sum_pup_fail;
1160	/* use to calculate diff between curr pbs to first fail pbs */
1161	u32 calc_pbs_diff;
1162	u32 pbs_cmp_retry;
1163	u32 max_pup;
1164
1165	/* Set init values for retry array - 8 retry */
1166	for (pbs_cmp_retry = 0; pbs_cmp_retry < COUNT_PBS_COMP_RETRY_NUM;
1167	     pbs_cmp_retry++) {
1168		for (dq = 0; dq < DQ_NUM; dq++)
1169			cmp_unlock_pup_dq_array[pbs_cmp_retry][dq] = *pcur_pup;
1170	}
1171
1172	memset(&skew_array, 0, MAX_PUP_NUM * DQ_NUM * sizeof(u32));
1173
1174	DEBUG_PBS_FULL_S("DDR3 - PBS Per bit - Started\n");
1175
1176	/* The pbs value depends if rx or tx */
1177	if (is_tx == 1) {
1178		start_pbs = MIN_PBS;
1179		last_pbs = MAX_PBS;
1180	} else {
1181		start_pbs = MAX_PBS;
1182		last_pbs = MIN_PBS;
1183	}
1184
1185	pbs_curr_val = start_pbs;
1186	pup_locked = *pcur_pup;
1187
1188	/* Set current pup number */
1189	if (pup_locked == 0x1)	/* Ecc mode */
1190		max_pup = 1;
1191	else
1192		max_pup = dram_info->num_of_std_pups;
1193
1194	do {
1195		/* Increment/ decrement PBS for un-lock bits only */
1196		if (is_tx == 1)
1197			pbs_curr_val++;
1198		else
1199			pbs_curr_val--;
1200
1201		/* Set Current PBS delay  */
1202		for (dq = 0; dq < DQ_NUM; dq++) {
1203			/* Check DQ bits to see if locked in all pups */
1204			if (unlock_pup_dq_array[dq] == 0) {
1205				DEBUG_PBS_FULL_S("DDR3 - PBS Per bit - All pups are locked for DQ ");
1206				DEBUG_PBS_FULL_D(dq, 1);
1207				DEBUG_PBS_FULL_S("\n");
1208				continue;
1209			}
1210
1211			for (pup = 0; pup < max_pup; pup++) {
1212				int idx;
1213
1214				idx = pup * (1 - ecc) + ecc * ECC_PUP;
1215
1216				if (IS_PUP_ACTIVE(unlock_pup_dq_array[dq], pup)
1217				    == 0)
1218					continue;
1219
1220				if (is_tx == 1)
1221					ddr3_write_pup_reg(
1222						PUP_PBS_TX + pbs_dq_mapping[idx][dq],
1223						CS0, idx, 0, pbs_curr_val);
1224				else
1225					ddr3_write_pup_reg(
1226						PUP_PBS_RX + pbs_dq_mapping[idx][dq],
1227						CS0, idx, 0, pbs_curr_val);
1228			}
1229		}
1230
1231		/*
1232		 * Write Read and compare results - run the test
1233		 * DDR_PBS_COMP_RETRY_NUM times
1234		 */
1235		/* Run number of read and write to verify */
1236		for (pbs_cmp_retry = 0;
1237		     pbs_cmp_retry < COUNT_PBS_COMP_RETRY_NUM;
1238		     pbs_cmp_retry++) {
1239
1240			if (MV_OK !=
1241			    ddr3_sdram_pbs_compare(dram_info, pup_locked, is_tx,
1242						   pbs_pattern_idx,
1243						   pbs_curr_val, start_pbs,
1244						   skew_array,
1245						   cmp_unlock_pup_dq_array
1246						   [pbs_cmp_retry], ecc))
1247				return MV_FAIL;
1248
1249			for (pup = 0; pup < max_pup; pup++) {
1250				for (dq = 0; dq < DQ_NUM; dq++) {
1251					if ((IS_PUP_ACTIVE(unlock_pup_dq_array[dq],
1252							   pup) == 1)
1253					    && (IS_PUP_ACTIVE(cmp_unlock_pup_dq_array
1254					      [pbs_cmp_retry][dq],
1255					      pup) == 0)) {
1256						DEBUG_PBS_FULL_S("DDR3 - PBS Per bit - PbsCurrVal: ");
1257						DEBUG_PBS_FULL_D(pbs_curr_val, 2);
1258						DEBUG_PBS_FULL_S(" PUP: ");
1259						DEBUG_PBS_FULL_D(pup, 1);
1260						DEBUG_PBS_FULL_S(" DQ: ");
1261						DEBUG_PBS_FULL_D(dq, 1);
1262						DEBUG_PBS_FULL_S(" - failed\n");
1263					}
1264				}
1265			}
1266
1267			for (dq = 0; dq < DQ_NUM; dq++) {
1268				unlock_pup_dq_array[dq] &=
1269				    cmp_unlock_pup_dq_array[pbs_cmp_retry][dq];
1270			}
1271		}
1272
1273		pup_locked = 0;
1274		sum_pup_fail = *pcur_pup;
1275
1276		/* Check which DQ is failed */
1277		for (dq = 0; dq < DQ_NUM; dq++) {
1278			/* Summarize the locked pup */
1279			pup_locked |= unlock_pup_dq_array[dq];
1280
1281			/* Check if get fail */
1282			sum_pup_fail &= unlock_pup_dq_array[dq];
1283		}
1284
1285		/* If all PUPS are locked in all DQ - Break */
1286		if (pup_locked == 0) {
1287			/* All pups are locked */
1288			*start_over = 0;
1289			DEBUG_PBS_FULL_S("DDR3 - PBS Per bit -  All bit in all pups are successfully locked\n");
1290			break;
1291		}
1292
1293		/* PBS deskew elements reach max ? */
1294		if (pbs_curr_val == last_pbs) {
1295			DEBUG_PBS_FULL_S("DDR3 - PBS Per bit - PBS deskew elements reach max\n");
1296			/* CHIP-ONLY! - Implementation Limitation */
1297			*start_over = (sum_pup_fail != 0) && (!(*start_over));
1298			*pcur_pup = pup_locked;
1299
1300			DEBUG_PBS_FULL_S("DDR3 - PBS Per bit - StartOver: ");
1301			DEBUG_PBS_FULL_D(*start_over, 1);
1302			DEBUG_PBS_FULL_S("  pup_locked: ");
1303			DEBUG_PBS_FULL_D(pup_locked, 2);
1304			DEBUG_PBS_FULL_S("  sum_pup_fail: ");
1305			DEBUG_PBS_FULL_D(sum_pup_fail, 2);
1306			DEBUG_PBS_FULL_S("\n");
1307
1308			/* Lock PBS value for all remaining  bits */
1309			for (pup = 0; pup < max_pup; pup++) {
1310				/* Check if current pup already received error */
1311				if (IS_PUP_ACTIVE(pup_locked, pup) == 1) {
1312					/* Valid pup for current function */
1313					if (IS_PUP_ACTIVE(sum_pup_fail, pup) ==
1314					    1 && (*start_over == 1)) {
1315						DEBUG_PBS_FULL_C("DDR3 - PBS Per bit - skipping lock of pup (first loop of pbs)",
1316								 pup, 1);
1317						continue;
1318					} else
1319					    if (IS_PUP_ACTIVE(sum_pup_fail, pup)
1320						== 1) {
1321						DEBUG_PBS_FULL_C("DDR3 - PBS Per bit - Locking pup %d (even though it wasn't supposed to be locked)",
1322								 pup, 1);
1323					}
1324
1325					/* Already got fail on the PUP */
1326					/* Lock PBS value for all remaining bits */
1327					DEBUG_PBS_FULL_S("DDR3 - PBS Per bit - Locking remaning DQs for pup - ");
1328					DEBUG_PBS_FULL_D(pup, 1);
1329					DEBUG_PBS_FULL_S(": ");
1330
1331					for (dq = 0; dq < DQ_NUM; dq++) {
1332						if (IS_PUP_ACTIVE
1333						    (unlock_pup_dq_array[dq],
1334						     pup) == 1) {
1335							DEBUG_PBS_FULL_D(dq, 1);
1336							DEBUG_PBS_FULL_S(",");
1337							/* set current PBS */
1338							skew_array[((pup) *
1339								    DQ_NUM) +
1340								   dq] =
1341							    pbs_curr_val;
1342						}
1343					}
1344
1345					if (*start_over == 1) {
1346						/*
1347						 * Reset this pup bit - when
1348						 * restart the PBS, ignore this
1349						 * pup
1350						 */
1351						*pcur_pup &= ~(1 << pup);
1352					}
1353					DEBUG_PBS_FULL_S("\n");
1354				} else {
1355					DEBUG_PBS_FULL_S("DDR3 - PBS Per bit - Pup ");
1356					DEBUG_PBS_FULL_D(pup, 1);
1357					DEBUG_PBS_FULL_C(" is not set in puplocked - ",
1358							 pup_locked, 1);
1359				}
1360			}
1361
1362			/* Need to start the PBS again */
1363			if (*start_over == 1) {
1364				DEBUG_PBS_FULL_S("DDR3 - PBS Per bit - false fail - returning to start\n");
1365				return MV_OK;
1366			}
1367			break;
1368		}
1369
1370		/* Diff Check */
1371		for (pup = 0; pup < max_pup; pup++) {
1372			if (IS_PUP_ACTIVE(pup_locked, pup) == 1) {
1373				/* pup is not locked */
1374				if (first_failed[pup] == 0) {
1375					/* No first fail until now */
1376					if (IS_PUP_ACTIVE(sum_pup_fail, pup) ==
1377					    0) {
1378						/* Get first fail */
1379						DEBUG_PBS_FULL_C("DDR3 - PBS Per bit - First fail in pup ",
1380								 pup, 1);
1381						first_failed[pup] = 1;
1382						first_fail[pup] = pbs_curr_val;
1383					}
1384				} else {
1385					/* Already got first fail */
1386					if (is_tx == 1) {
1387						/* TX - inc pbs */
1388						calc_pbs_diff =	pbs_curr_val -
1389							first_fail[pup];
1390					} else {
1391						/* RX - dec pbs */
1392						calc_pbs_diff = first_fail[pup] -
1393							pbs_curr_val;
1394					}
1395
1396					if (calc_pbs_diff >= PBS_DIFF_LIMIT) {
1397						lock_pups(pup, &pup_locked,
1398							  unlock_pup_dq_array,
1399							  pbs_curr_val,
1400							  start_pbs, ecc, is_tx);
1401					}
1402				}
1403			}
1404		}
1405	} while (1);
1406
1407	return MV_OK;
1408}
1409
1410/*
1411 * Name:         ddr3_set_pbs_results
1412 * Desc:         Set to HW the PBS phase results.
1413 * Args:         is_tx       Indicates whether to set Tx or RX results
1414 * Notes:
1415 * Returns:      MV_OK if success, other error code if fail.
1416 */
1417static int ddr3_set_pbs_results(MV_DRAM_INFO *dram_info, int is_tx)
1418{
1419	u32 pup, phys_pup, dq;
1420	u32 max_pup;		/* number of valid pups */
1421	u32 pbs_min;		/* minimal pbs val per pup */
1422	u32 pbs_max;		/* maximum pbs val per pup */
1423	u32 val[9];
1424
1425	max_pup = dram_info->num_of_total_pups;
1426	DEBUG_PBS_FULL_S("DDR3 - PBS - ddr3_set_pbs_results:\n");
1427
1428	/* Loop for all dqs & pups */
1429	for (pup = 0; pup < max_pup; pup++) {
1430		if (pup == (max_pup - 1) && dram_info->ecc_ena)
1431			phys_pup = ECC_PUP;
1432		else
1433			phys_pup = pup;
1434
1435		/*
1436		 * To minimize delay elements, inc from pbs value the min
1437		 * pbs val
1438		 */
1439		pbs_min = MAX_PBS;
1440		pbs_max = 0;
1441		for (dq = 0; dq < DQ_NUM; dq++) {
1442			if (pbs_min > skew_array[(pup * DQ_NUM) + dq])
1443				pbs_min = skew_array[(pup * DQ_NUM) + dq];
1444
1445			if (pbs_max < skew_array[(pup * DQ_NUM) + dq])
1446				pbs_max = skew_array[(pup * DQ_NUM) + dq];
1447		}
1448
1449		pbs_max -= pbs_min;
1450
1451		DEBUG_PBS_FULL_S("DDR3 - PBS - PUP");
1452		DEBUG_PBS_FULL_D(phys_pup, 1);
1453		DEBUG_PBS_FULL_S(": Min Val = ");
1454		DEBUG_PBS_FULL_D(pbs_min, 2);
1455		DEBUG_PBS_FULL_C(", Max Val = ", pbs_max, 2);
1456
1457		val[pup] = 0;
1458
1459		for (dq = 0; dq < DQ_NUM; dq++) {
1460			int idx;
1461			int offs;
1462
1463			/* Set skew value for all dq */
1464			/*
1465			 * Bit# Deskew <- Bit# Deskew - last / first
1466			 * failing bit Deskew For all bits (per PUP)
1467			 * (minimize delay elements)
1468			 */
1469
1470			DEBUG_PBS_FULL_S("DQ");
1471			DEBUG_PBS_FULL_D(dq, 1);
1472			DEBUG_PBS_FULL_S("-");
1473			DEBUG_PBS_FULL_D((skew_array[(pup * DQ_NUM) + dq] -
1474					  pbs_min), 2);
1475			DEBUG_PBS_FULL_S(", ");
1476
1477			idx = (pup * DQ_NUM) + dq;
1478
1479			if (is_tx == 1)
1480				offs = PUP_PBS_TX;
1481			else
1482				offs = PUP_PBS_RX;
1483
1484			ddr3_write_pup_reg(offs + pbs_dq_mapping[phys_pup][dq],
1485					   CS0, phys_pup, 0,
1486					   skew_array[idx] - pbs_min);
1487
1488			if (is_tx == 1)
1489				val[pup] += skew_array[idx] - pbs_min;
1490		}
1491
1492		DEBUG_PBS_FULL_S("\n");
1493
1494		/* Set the DQS the half of the Max PBS of the DQs  */
1495		if (is_tx == 1) {
1496			ddr3_write_pup_reg(PUP_PBS_TX + 8, CS0, phys_pup, 0,
1497					   pbs_max / 2);
1498			ddr3_write_pup_reg(PUP_PBS_TX + 0xa, CS0, phys_pup, 0,
1499					   val[pup] / 8);
1500		} else
1501			ddr3_write_pup_reg(PUP_PBS_RX + 8, CS0, phys_pup, 0,
1502					   pbs_max / 2);
1503	}
1504
1505	return MV_OK;
1506}
1507
1508static void ddr3_pbs_write_pup_dqs_reg(u32 cs, u32 pup, u32 dqs_delay)
1509{
1510	u32 reg, delay;
1511
1512	reg = (ddr3_read_pup_reg(PUP_WL_MODE, cs, pup) & 0x3FF);
1513	delay = reg & PUP_DELAY_MASK;
1514	reg |= ((dqs_delay + delay) << REG_PHY_DQS_REF_DLY_OFFS);
1515	reg |= REG_PHY_REGISTRY_FILE_ACCESS_OP_WR;
1516	reg |= (pup << REG_PHY_PUP_OFFS);
1517	reg |= ((0x4 * cs + PUP_WL_MODE) << REG_PHY_CS_OFFS);
1518
1519	reg_write(REG_PHY_REGISTRY_FILE_ACCESS_ADDR, reg);	/* 0x16A0 */
1520	do {
1521		reg = reg_read(REG_PHY_REGISTRY_FILE_ACCESS_ADDR) &
1522			REG_PHY_REGISTRY_FILE_ACCESS_OP_DONE;
1523	} while (reg);	/* Wait for '0' to mark the end of the transaction */
1524
1525	udelay(10);
1526}
1527
1528/*
1529 * Set training patterns
1530 */
1531int ddr3_load_pbs_patterns(MV_DRAM_INFO *dram_info)
1532{
1533	u32 cs, cs_count, cs_tmp;
1534	u32 sdram_addr;
1535	u32 *pattern_ptr0, *pattern_ptr1;
1536
1537	/* Choose pattern */
1538	switch (dram_info->ddr_width) {
1539#if defined(MV88F672X)
1540	case 16:
1541		pattern_ptr0 = (u32 *)&pbs_pattern[0];
1542		pattern_ptr1 = (u32 *)&pbs_pattern[1];
1543		break;
1544#endif
1545	case 32:
1546		pattern_ptr0 = (u32 *)&pbs_pattern_32b[0];
1547		pattern_ptr1 = (u32 *)&pbs_pattern_32b[1];
1548		break;
1549#if defined(MV88F78X60)
1550	case 64:
1551		pattern_ptr0 = (u32 *)&pbs_pattern_64b[0];
1552		pattern_ptr1 = (u32 *)&pbs_pattern_64b[1];
1553		break;
1554#endif
1555	default:
1556		return MV_FAIL;
1557	}
1558
1559	/* Loop for each CS */
1560	for (cs = 0; cs < MAX_CS; cs++) {
1561		if (dram_info->cs_ena & (1 << cs)) {
1562			cs_count = 0;
1563			for (cs_tmp = 0; cs_tmp < cs; cs_tmp++) {
1564				if (dram_info->cs_ena & (1 << cs_tmp))
1565					cs_count++;
1566			}
1567
1568			/* Init PBS I pattern */
1569			sdram_addr = (cs_count * (SDRAM_CS_SIZE + 1) +
1570				      SDRAM_PBS_I_OFFS);
1571			if (MV_OK !=
1572			    ddr3_sdram_compare(dram_info, (u32) NULL, NULL,
1573					       pattern_ptr0, LEN_STD_PATTERN,
1574					       sdram_addr, 1, 0, NULL,
1575					       0))
1576				return MV_FAIL;
1577
1578			/* Init PBS II pattern */
1579			sdram_addr = (cs_count * (SDRAM_CS_SIZE + 1) +
1580				      SDRAM_PBS_II_OFFS);
1581			if (MV_OK !=
1582			    ddr3_sdram_compare(dram_info, (u32) NULL, NULL,
1583					       pattern_ptr1, LEN_STD_PATTERN,
1584					       sdram_addr, 1, 0, NULL,
1585					       0))
1586				return MV_FAIL;
1587		}
1588	}
1589
1590	return MV_OK;
1591}
1592#endif
1593