1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (C) Marvell International Ltd. and its affiliates
4 */
5
6#if defined(CONFIG_DDR4)
7
8/* DESCRIPTION: DDR4 Receiver and DQVref Calibration */
9
10#include "ddr3_init.h"
11#include "mv_ddr4_training_calibration.h"
12#include "mv_ddr4_training.h"
13#include "mv_ddr4_mpr_pda_if.h"
14#include "mv_ddr_training_db.h"
15#include "mv_ddr_regs.h"
16
17#define RX_DIR			0
18#define TX_DIR			1
19#define MAX_DIR_TYPES		2
20
21#define RECEIVER_DC_STEP_SIZE	3
22#define RECEIVER_DC_MIN_RANGE	0
23#define RECEIVER_DC_MAX_RANGE	63
24#define RECEIVER_DC_MAX_COUNT	(((RECEIVER_DC_MAX_RANGE - RECEIVER_DC_MIN_RANGE) / RECEIVER_DC_STEP_SIZE) + 1)
25
26#define PBS_VAL_FACTOR		1000
27#define MV_DDR_VW_TX_NOISE_FILTER	8	/* adlls */
28
29u8 dq_vref_vec[MAX_BUS_NUM];	/* stability support */
30u8 rx_eye_hi_lvl[MAX_BUS_NUM];	/* rx adjust support */
31u8 rx_eye_lo_lvl[MAX_BUS_NUM];	/* rx adjust support */
32
33static u8 pbs_max = 31;
34static u8 vdq_tv; /* vref value for dq vref calibration */
35static u8 duty_cycle; /* duty cycle value for receiver calibration */
36static u8 rx_vw_pos[MAX_INTERFACE_NUM][MAX_BUS_NUM];
37static u8 patterns_byte_status[MAX_INTERFACE_NUM][MAX_BUS_NUM];
38static const char *str_dir[MAX_DIR_TYPES] = {"read", "write"};
39
40static u8 center_low_element_get(u8 dir, u8 pbs_element, u16 lambda, u8 pbs_max_val)
41{
42	u8 result;
43
44	if (dir == RX_DIR)
45		result = pbs_element * lambda / PBS_VAL_FACTOR;
46	else
47		result = (pbs_max_val - pbs_element) * lambda / PBS_VAL_FACTOR;
48
49	return result;
50}
51
52static u8 center_high_element_get(u8 dir, u8 pbs_element, u16 lambda, u8 pbs_max_val)
53{
54	u8 result;
55
56	if (dir == RX_DIR)
57		result = (pbs_max_val - pbs_element) * lambda / PBS_VAL_FACTOR;
58	else
59		result = pbs_element * lambda / PBS_VAL_FACTOR;
60
61	return result;
62}
63
64static int mv_ddr4_centralization(u8 dev_num, u16 (*lambda)[MAX_BUS_NUM][BUS_WIDTH_IN_BITS], u8 (*copt)[MAX_BUS_NUM],
65				  u8 (*pbs_result)[MAX_BUS_NUM][BUS_WIDTH_IN_BITS], u8 (*vw_size)[MAX_BUS_NUM],
66				  u8 mode, u16 param0, u8 param1);
67static int mv_ddr4_dqs_reposition(u8 dir, u16 *lambda, u8 *pbs_result, char delta, u8 *copt, u8 *dqs_pbs);
68static int mv_ddr4_copt_get(u8 dir, u16 *lambda, u8 *vw_l, u8 *vw_h, u8 *pbs_result, u8 *copt);
69static int mv_ddr4_center_of_mass_calc(u8 dev_num, u8 if_id, u8 subphy_num, u8 mode, u8 *vw_l, u8 *vw_h, u8 *vw_v,
70				       u8 vw_num, u8 *v_opt, u8 *t_opt);
71static int mv_ddr4_tap_tuning(u8 dev_num, u16 (*pbs_tap_factor)[MAX_BUS_NUM][BUS_WIDTH_IN_BITS], u8 mode);
72
73/* dq vref calibration flow */
74int mv_ddr4_dq_vref_calibration(u8 dev_num, u16 (*pbs_tap_factor)[MAX_BUS_NUM][BUS_WIDTH_IN_BITS])
75{
76	u32 if_id, subphy_num;
77	u32 vref_idx, dq_idx, pad_num = 0;
78	u8 dq_vref_start_win[MAX_INTERFACE_NUM][MAX_BUS_NUM][MV_DDR4_VREF_MAX_COUNT];
79	u8 dq_vref_end_win[MAX_INTERFACE_NUM][MAX_BUS_NUM][MV_DDR4_VREF_MAX_COUNT];
80	u8 valid_win_size[MAX_INTERFACE_NUM][MAX_BUS_NUM];
81	u8 c_opt_per_bus[MAX_INTERFACE_NUM][MAX_BUS_NUM];
82	u8 valid_vref_cnt[MAX_INTERFACE_NUM][MAX_BUS_NUM];
83	u8 valid_vref_ptr[MAX_INTERFACE_NUM][MAX_BUS_NUM][MV_DDR4_VREF_MAX_COUNT];
84	u8 center_adll[MAX_INTERFACE_NUM][MAX_BUS_NUM];
85	u8 center_vref[MAX_INTERFACE_NUM][MAX_BUS_NUM];
86	u8 pbs_res_per_bus[MAX_INTERFACE_NUM][MAX_BUS_NUM][BUS_WIDTH_IN_BITS];
87	u16 vref_avg, vref_subphy_num;
88	int vref_tap_idx;
89	int vref_range_min;
90	struct mv_ddr_topology_map *tm = mv_ddr_topology_map_get();
91	enum mv_ddr4_vref_subphy_cal_state all_subphys_state = MV_DDR4_VREF_SUBPHY_CAL_ABOVE;
92	int tap_tune_passed = 0;
93	enum mv_ddr4_vref_tap_state vref_tap_set_state = MV_DDR4_VREF_TAP_START;
94	enum hws_result *flow_result = ddr3_tip_get_result_ptr(training_stage);
95	u8 subphy_max = ddr3_tip_dev_attr_get(dev_num, MV_ATTR_OCTET_PER_INTERFACE);
96	enum mv_ddr4_vref_subphy_cal_state vref_state_per_subphy[MAX_INTERFACE_NUM][MAX_BUS_NUM];
97	int status;
98	static u8 vref_byte_status[MAX_INTERFACE_NUM][MAX_BUS_NUM][MV_DDR4_VREF_MAX_RANGE];
99
100	DEBUG_CALIBRATION(DEBUG_LEVEL_INFO, ("Starting ddr4 dq vref calibration training stage\n"));
101
102	vdq_tv = 0;
103	duty_cycle = 0;
104
105	/* reset valid vref counter per if and subphy */
106	for (if_id = 0; if_id < MAX_INTERFACE_NUM; if_id++) {
107		for (subphy_num = 0; subphy_num < MAX_BUS_NUM; subphy_num++) {
108			valid_vref_cnt[if_id][subphy_num] = 0;
109			vref_state_per_subphy[if_id][subphy_num] = MV_DDR4_VREF_SUBPHY_CAL_ABOVE;
110		}
111	}
112
113	if (mv_ddr4_tap_tuning(dev_num, pbs_tap_factor, TX_DIR) == MV_OK)
114		tap_tune_passed = 1;
115
116	/* place dram to vref training mode */
117	mv_ddr4_vref_training_mode_ctrl(dev_num, 0, ACCESS_TYPE_MULTICAST, 1);
118
119	/* main loop for 2d scan (low_to_high voltage scan) */
120	vref_tap_idx = MV_DDR4_VREF_MAX_RANGE;
121	vref_range_min = MV_DDR4_VREF_MIN_RANGE;
122
123	if (vref_range_min < MV_DDR4_VREF_STEP_SIZE)
124		vref_range_min = MV_DDR4_VREF_STEP_SIZE;
125
126	/* clean vref status array */
127	memset(vref_byte_status, BYTE_NOT_DEFINED, sizeof(vref_byte_status));
128
129	for (vref_tap_idx = MV_DDR4_VREF_MAX_RANGE; (vref_tap_idx >= vref_range_min) &&
130	     (all_subphys_state != MV_DDR4_VREF_SUBPHY_CAL_UNDER);
131	     vref_tap_idx -= MV_DDR4_VREF_STEP_SIZE) {
132		/* set new vref training value in dram */
133		mv_ddr4_vref_tap_set(dev_num, 0, ACCESS_TYPE_MULTICAST, vref_tap_idx, vref_tap_set_state);
134
135		if (tap_tune_passed == 0) {
136			if (mv_ddr4_tap_tuning(dev_num, pbs_tap_factor, TX_DIR) == MV_OK)
137				tap_tune_passed = 1;
138			else
139				continue;
140		}
141
142		if (mv_ddr4_centralization(dev_num, pbs_tap_factor, c_opt_per_bus, pbs_res_per_bus,
143					   valid_win_size, TX_DIR, vref_tap_idx, 0) != MV_OK) {
144			DEBUG_CALIBRATION(DEBUG_LEVEL_ERROR,
145					  ("error: %s: ddr4 centralization failed (dq vref tap index %d)!!!\n",
146					   __func__, vref_tap_idx));
147			continue;
148		}
149
150		/* go over all results and find out the vref start and end window */
151		for (if_id = 0; if_id < MAX_INTERFACE_NUM; if_id++) {
152			VALIDATE_IF_ACTIVE(tm->if_act_mask, if_id);
153			for (subphy_num = 0; subphy_num < subphy_max; subphy_num++) {
154				VALIDATE_BUS_ACTIVE(tm->bus_act_mask, subphy_num);
155				if (valid_win_size[if_id][subphy_num] > MV_DDR_VW_TX_NOISE_FILTER) {
156					if (vref_state_per_subphy[if_id][subphy_num] == MV_DDR4_VREF_SUBPHY_CAL_UNDER)
157						DEBUG_CALIBRATION(DEBUG_LEVEL_ERROR,
158								  ("warning: %s: subphy %d vref tap %d voltage noise\n",
159								   __func__, subphy_num, vref_tap_idx));
160					/* window is valid; keep current vref_tap_idx value and increment counter */
161					vref_idx = valid_vref_cnt[if_id][subphy_num];
162					valid_vref_ptr[if_id][subphy_num][vref_idx] = vref_tap_idx;
163					valid_vref_cnt[if_id][subphy_num]++;
164
165					/* set 0 for possible negative values */
166					vref_byte_status[if_id][subphy_num][vref_idx] |=
167						patterns_byte_status[if_id][subphy_num];
168					dq_vref_start_win[if_id][subphy_num][vref_idx] =
169						c_opt_per_bus[if_id][subphy_num] + 1 -
170						valid_win_size[if_id][subphy_num] / 2;
171					dq_vref_start_win[if_id][subphy_num][vref_idx] =
172						(valid_win_size[if_id][subphy_num] % 2 == 0) ?
173						dq_vref_start_win[if_id][subphy_num][vref_idx] :
174						dq_vref_start_win[if_id][subphy_num][vref_idx] - 1;
175					dq_vref_end_win[if_id][subphy_num][vref_idx] =
176						c_opt_per_bus[if_id][subphy_num] +
177						valid_win_size[if_id][subphy_num] / 2;
178					vref_state_per_subphy[if_id][subphy_num] = MV_DDR4_VREF_SUBPHY_CAL_INSIDE;
179				} else if (vref_state_per_subphy[if_id][subphy_num] == MV_DDR4_VREF_SUBPHY_CAL_INSIDE) {
180					vref_state_per_subphy[if_id][subphy_num] = MV_DDR4_VREF_SUBPHY_CAL_UNDER;
181				}
182			} /* subphy */
183		} /* if */
184
185		/* check all subphys are in under state */
186		all_subphys_state = MV_DDR4_VREF_SUBPHY_CAL_UNDER;
187		for (if_id = 0; if_id < MAX_INTERFACE_NUM; if_id++) {
188			VALIDATE_IF_ACTIVE(tm->if_act_mask, if_id);
189			for (subphy_num = 0; subphy_num < subphy_max; subphy_num++) {
190				VALIDATE_BUS_ACTIVE(tm->bus_act_mask, subphy_num);
191				if (vref_state_per_subphy[if_id][subphy_num] != MV_DDR4_VREF_SUBPHY_CAL_UNDER)
192					all_subphys_state = MV_DDR4_VREF_SUBPHY_CAL_INSIDE;
193			}
194		}
195	}
196
197	if (tap_tune_passed == 0) {
198		DEBUG_CALIBRATION(DEBUG_LEVEL_INFO,
199				  ("%s: tap tune not passed on any dq_vref value\n", __func__));
200		for (if_id = 0; if_id < MAX_INTERFACE_NUM; if_id++) {
201			VALIDATE_IF_ACTIVE(tm->if_act_mask, if_id);
202			/* report fail for all active interfaces; multi-interface support - tbd */
203			flow_result[if_id] = TEST_FAILED;
204		}
205
206		return MV_FAIL;
207	}
208
209	/* close vref range */
210	mv_ddr4_vref_tap_set(dev_num, 0, ACCESS_TYPE_MULTICAST, vref_tap_idx, MV_DDR4_VREF_TAP_END);
211
212	/*
213	 * find out the results with the mixed and low states and move the low state 64 adlls in case
214	 * the center of the ui is smaller than 31
215	 */
216	for (vref_idx = 0; vref_idx < MV_DDR4_VREF_MAX_RANGE; vref_idx++) {
217		for (if_id = 0; if_id < MAX_INTERFACE_NUM; if_id++) {
218			VALIDATE_IF_ACTIVE(tm->if_act_mask, if_id);
219			for (subphy_num = 0; subphy_num < subphy_max; subphy_num++) {
220				VALIDATE_BUS_ACTIVE(tm->bus_act_mask, subphy_num);
221				if (((vref_byte_status[if_id][subphy_num][vref_idx]) &
222				    (BYTE_HOMOGENEOUS_LOW | BYTE_SPLIT_OUT_MIX)) ==
223				    (BYTE_HOMOGENEOUS_LOW | BYTE_SPLIT_OUT_MIX)) {
224					if ((dq_vref_start_win[if_id][subphy_num][vref_idx] +
225					    dq_vref_end_win[if_id][subphy_num][vref_idx]) / 2 <= 31) {
226						dq_vref_start_win[if_id][subphy_num][vref_idx] += 64;
227						dq_vref_end_win[if_id][subphy_num][vref_idx] += 64;
228						DEBUG_CALIBRATION
229							(DEBUG_LEVEL_TRACE,
230							 ("%s vref_idx %d if %d subphy %d added 64 adlls to window\n",
231							  __func__, valid_vref_ptr[if_id][subphy_num][vref_idx],
232							  if_id, subphy_num));
233					}
234				}
235			}
236		}
237	}
238
239	for (if_id = 0; if_id < MAX_INTERFACE_NUM; if_id++) {
240		VALIDATE_IF_ACTIVE(tm->if_act_mask, if_id);
241		for (subphy_num = 0; subphy_num < subphy_max; subphy_num++) {
242			VALIDATE_BUS_ACTIVE(tm->bus_act_mask, subphy_num);
243			DEBUG_CALIBRATION(DEBUG_LEVEL_INFO,
244					  ("calculating center of mass for subphy %d, valid window size %d\n",
245					   subphy_num, valid_win_size[if_id][subphy_num]));
246			if (valid_vref_cnt[if_id][subphy_num] > 0) {
247				/* calculate center of mass sampling point (t, v) for each subphy */
248				status = mv_ddr4_center_of_mass_calc(dev_num, if_id, subphy_num, TX_DIR,
249								     dq_vref_start_win[if_id][subphy_num],
250								     dq_vref_end_win[if_id][subphy_num],
251								     valid_vref_ptr[if_id][subphy_num],
252								     valid_vref_cnt[if_id][subphy_num],
253								     &center_vref[if_id][subphy_num],
254								     &center_adll[if_id][subphy_num]);
255				if (status != MV_OK)
256					return status;
257
258				DEBUG_CALIBRATION(DEBUG_LEVEL_INFO,
259						  ("center of mass results: vref %d, adll %d\n",
260						   center_vref[if_id][subphy_num], center_adll[if_id][subphy_num]));
261			} else {
262				DEBUG_CALIBRATION(DEBUG_LEVEL_ERROR,
263						  ("%s subphy %d no vref results to calculate the center of mass\n",
264						  __func__, subphy_num));
265				status = MV_ERROR;
266				return status;
267			}
268		}
269	}
270
271	for (if_id = 0; if_id < MAX_INTERFACE_NUM; if_id++) {
272		VALIDATE_IF_ACTIVE(tm->if_act_mask, if_id);
273		vref_avg = 0;
274		vref_subphy_num = 0;
275		for (subphy_num = 0; subphy_num < subphy_max; subphy_num++) {
276			VALIDATE_BUS_ACTIVE(tm->bus_act_mask, subphy_num);
277			vref_avg += center_vref[if_id][subphy_num];
278			dq_vref_vec[subphy_num] = center_vref[if_id][subphy_num];
279			vref_subphy_num++;
280		}
281
282		mv_ddr4_vref_tap_set(dev_num, if_id, ACCESS_TYPE_UNICAST,
283				     vref_avg / vref_subphy_num, MV_DDR4_VREF_TAP_START);
284		mv_ddr4_vref_tap_set(dev_num, if_id, ACCESS_TYPE_UNICAST,
285				     vref_avg / vref_subphy_num, MV_DDR4_VREF_TAP_END);
286		DEBUG_CALIBRATION(DEBUG_LEVEL_INFO, ("final vref average %d\n", vref_avg / vref_subphy_num));
287		/* run centralization again with optimal vref to update global structures */
288		mv_ddr4_centralization(dev_num, pbs_tap_factor, c_opt_per_bus, pbs_res_per_bus, valid_win_size,
289				       TX_DIR, vref_avg / vref_subphy_num, duty_cycle);
290	}
291
292	/* return dram from vref DRAM from vref training mode */
293	mv_ddr4_vref_training_mode_ctrl(dev_num, 0, ACCESS_TYPE_MULTICAST, 0);
294
295	/* dqs tx reposition calculation */
296	for (if_id = 0; if_id < MAX_INTERFACE_NUM; if_id++) {
297		VALIDATE_IF_ACTIVE(tm->if_act_mask, if_id);
298		for (subphy_num = 0; subphy_num < subphy_max; subphy_num++) {
299			VALIDATE_BUS_ACTIVE(tm->bus_act_mask, subphy_num);
300			for (dq_idx = 0; dq_idx < 8; dq_idx++) {
301				pad_num = dq_map_table[dq_idx +
302						       subphy_num * BUS_WIDTH_IN_BITS +
303						       if_id * BUS_WIDTH_IN_BITS * subphy_max];
304				status = ddr3_tip_bus_write(dev_num, ACCESS_TYPE_UNICAST, if_id, ACCESS_TYPE_UNICAST,
305							    subphy_num, DDR_PHY_DATA,
306							    0x10 + pad_num + effective_cs * 0x10,
307							    pbs_res_per_bus[if_id][subphy_num][dq_idx]);
308				if (status != MV_OK)
309					return status;
310			}
311
312			status = ddr3_tip_bus_write(dev_num, ACCESS_TYPE_UNICAST, if_id, ACCESS_TYPE_UNICAST,
313						    subphy_num, DDR_PHY_DATA,
314						    CTX_PHY_REG(effective_cs),
315						    center_adll[if_id][subphy_num] % 64);
316			if (status != MV_OK)
317				return status;
318		}
319	}
320
321	for (if_id = 0; if_id < MAX_INTERFACE_NUM; if_id++) {
322		VALIDATE_IF_ACTIVE(tm->if_act_mask, if_id);
323		/* report pass for all active interfaces; multi-interface support - tbd */
324		flow_result[if_id] = TEST_SUCCESS;
325	}
326
327	return MV_OK;
328}
329
330/* centralization flow */
331static int mv_ddr4_centralization(u8 dev_num, u16 (*lambda)[MAX_BUS_NUM][BUS_WIDTH_IN_BITS], u8 (*copt)[MAX_BUS_NUM],
332				  u8 (*pbs_result)[MAX_BUS_NUM][BUS_WIDTH_IN_BITS], u8 (*vw_size)[MAX_BUS_NUM],
333				  u8 mode, u16 param0, u8 param1)
334{
335/* FIXME:  remove the dependency in 64bit */
336#define MV_DDR_NUM_OF_CENTRAL_PATTERNS	(PATTERN_KILLER_DQ7 - PATTERN_KILLER_DQ0 + 1)
337	static u8 subphy_end_win[MAX_DIR_TYPES][MAX_INTERFACE_NUM][MAX_BUS_NUM];
338	static u8 subphy_start_win[MAX_DIR_TYPES][MAX_INTERFACE_NUM][MAX_BUS_NUM];
339	static u8 final_start_win[MAX_INTERFACE_NUM][MAX_BUS_NUM][BUS_WIDTH_IN_BITS];
340	static u8 final_end_win[MAX_INTERFACE_NUM][MAX_BUS_NUM][BUS_WIDTH_IN_BITS];
341	enum hws_training_ip_stat training_result[MAX_INTERFACE_NUM];
342	u32 if_id, subphy_num, pattern_id, pattern_loop_idx, bit_num;
343	u8  curr_start_win[BUS_WIDTH_IN_BITS];
344	u8  curr_end_win[BUS_WIDTH_IN_BITS];
345	static u8 start_win_db[MV_DDR_NUM_OF_CENTRAL_PATTERNS][MAX_INTERFACE_NUM][MAX_BUS_NUM][BUS_WIDTH_IN_BITS];
346	static u8 end_win_db[MV_DDR_NUM_OF_CENTRAL_PATTERNS][MAX_INTERFACE_NUM][MAX_BUS_NUM][BUS_WIDTH_IN_BITS];
347	u8  curr_win[BUS_WIDTH_IN_BITS];
348	u8  opt_win, waste_win, start_win_skew, end_win_skew;
349	u8  final_subphy_win[MAX_INTERFACE_NUM][BUS_WIDTH_IN_BITS];
350	enum hws_training_result result_type = RESULT_PER_BIT;
351	enum hws_dir direction;
352	enum hws_search_dir search_dir;
353	u32 *result[HWS_SEARCH_DIR_LIMIT];
354	u32 max_win_size;
355	u8 curr_end_win_min, curr_start_win_max;
356	u32 cs_ena_reg_val[MAX_INTERFACE_NUM];
357	u8 current_byte_status;
358	int status;
359	struct mv_ddr_topology_map *tm = mv_ddr_topology_map_get();
360	u8 subphy_max = ddr3_tip_dev_attr_get(dev_num, MV_ATTR_OCTET_PER_INTERFACE);
361
362	for (if_id = 0; if_id < MAX_INTERFACE_NUM; if_id++) {
363		VALIDATE_IF_ACTIVE(tm->if_act_mask, if_id);
364		/* save current cs enable reg val */
365		status = ddr3_tip_if_read(dev_num, ACCESS_TYPE_UNICAST, if_id, DUAL_DUNIT_CFG_REG,
366					  cs_ena_reg_val, MASK_ALL_BITS);
367		if (status != MV_OK)
368			return status;
369
370		/* enable single cs */
371		status = ddr3_tip_if_write(dev_num, ACCESS_TYPE_UNICAST, if_id, DUAL_DUNIT_CFG_REG,
372					   (0x1 << 3), (0x1 << 3));
373		if (status != MV_OK)
374			return status;
375	}
376
377	if (mode == TX_DIR) {
378		max_win_size = MAX_WINDOW_SIZE_TX;
379		direction = OPER_WRITE;
380	} else {
381		max_win_size = MAX_WINDOW_SIZE_RX;
382		direction = OPER_READ;
383	}
384
385	/* database initialization */
386	for (if_id = 0; if_id < MAX_INTERFACE_NUM; if_id++) {
387		VALIDATE_IF_ACTIVE(tm->if_act_mask, if_id);
388		for (subphy_num = 0; subphy_num < subphy_max; subphy_num++) {
389			VALIDATE_BUS_ACTIVE(tm->bus_act_mask, subphy_num);
390			patterns_byte_status[if_id][subphy_num] = BYTE_NOT_DEFINED;
391			subphy_end_win[mode][if_id][subphy_num] = (max_win_size - 1);
392			subphy_start_win[mode][if_id][subphy_num] = 0;
393			vw_size[if_id][subphy_num] = (max_win_size - 1);
394			for (bit_num = 0; bit_num < BUS_WIDTH_IN_BITS; bit_num++) {
395				final_start_win[if_id][subphy_num][bit_num] = 0;
396				final_end_win[if_id][subphy_num][bit_num] = (max_win_size - 1);
397				if (mode == TX_DIR)
398					final_end_win[if_id][subphy_num][bit_num] = (2 * max_win_size - 1);
399			}
400			if (mode == TX_DIR) {
401				subphy_end_win[mode][if_id][subphy_num] = (2 * max_win_size - 1);
402				vw_size[if_id][subphy_num] = (2 * max_win_size - 1);
403			}
404		}
405	}
406
407	/* main flow */
408	/* FIXME: hard-coded "22" below for PATTERN_KILLER_DQ7_64 enum hws_pattern */
409	for (pattern_id = PATTERN_KILLER_DQ0, pattern_loop_idx = 0;
410	     pattern_id <= (MV_DDR_IS_64BIT_DRAM_MODE(tm->bus_act_mask) ? 22 : PATTERN_KILLER_DQ7);
411	     pattern_id++, pattern_loop_idx++) {
412		ddr3_tip_ip_training_wrapper(dev_num, ACCESS_TYPE_MULTICAST, PARAM_NOT_CARE, ACCESS_TYPE_MULTICAST,
413					     PARAM_NOT_CARE, result_type, HWS_CONTROL_ELEMENT_ADLL,
414					     PARAM_NOT_CARE, direction, tm->if_act_mask,
415					     0x0, max_win_size - 1, max_win_size - 1, pattern_id,
416					     EDGE_FPF, CS_SINGLE, PARAM_NOT_CARE, training_result);
417
418		for (if_id = 0; if_id < MAX_INTERFACE_NUM; if_id++) {
419			VALIDATE_IF_ACTIVE(tm->if_act_mask, if_id);
420			for (subphy_num = 0; subphy_num < subphy_max; subphy_num++) {
421				VALIDATE_BUS_ACTIVE(tm->bus_act_mask, subphy_num);
422				/*
423				 * in case the previous patterns found the current subphy as BYTE_NOT_DEFINED,
424				 * continue to next subphy
425				 */
426				if ((patterns_byte_status[if_id][subphy_num] == BYTE_NOT_DEFINED) &&
427				    (pattern_id != PATTERN_KILLER_DQ0))
428					continue;
429				/*
430				 * in case the result of the current subphy is BYTE_NOT_DEFINED mark the
431				 * pattern byte status as BYTE_NOT_DEFINED
432				 */
433				current_byte_status = mv_ddr_tip_sub_phy_byte_status_get(if_id, subphy_num);
434				if (current_byte_status == BYTE_NOT_DEFINED) {
435					DEBUG_DDR4_CENTRALIZATION
436						(DEBUG_LEVEL_INFO,
437						 ("%s:%s: failed to lock subphy, pat %d if %d subphy %d\n",
438						 __func__, str_dir[mode], pattern_id, if_id, subphy_num));
439					patterns_byte_status[if_id][subphy_num] = BYTE_NOT_DEFINED;
440					/* update the valid window size which is return value from this function */
441					vw_size[if_id][subphy_num] = 0;
442					/* continue to next subphy */
443					continue;
444				}
445
446				/* set the status of this byte */
447				patterns_byte_status[if_id][subphy_num] |= current_byte_status;
448				for (search_dir = HWS_LOW2HIGH; search_dir <= HWS_HIGH2LOW; search_dir++) {
449					status = ddr3_tip_read_training_result(dev_num, if_id, ACCESS_TYPE_UNICAST,
450									       subphy_num, ALL_BITS_PER_PUP,
451									       search_dir, direction, result_type,
452									       TRAINING_LOAD_OPERATION_UNLOAD,
453									       CS_SINGLE, &result[search_dir],
454									       1, 0, 0);
455					if (status != MV_OK)
456						return status;
457
458					DEBUG_DDR4_CENTRALIZATION
459					(DEBUG_LEVEL_INFO,
460					 ("param0 %d param1 %d pat %d if %d subphy %d "
461					 "regs: 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
462					 param0, param1, pattern_id, if_id, subphy_num,
463					 result[search_dir][0], result[search_dir][1],
464					 result[search_dir][2], result[search_dir][3],
465					 result[search_dir][4], result[search_dir][5],
466					 result[search_dir][6], result[search_dir][7]));
467				}
468
469				for (bit_num = 0; bit_num < BUS_WIDTH_IN_BITS; bit_num++) {
470					/* read result success */
471					DEBUG_DDR4_CENTRALIZATION(
472								  DEBUG_LEVEL_INFO,
473								  ("%s %s subphy locked, pat %d if %d subphy %d\n",
474								  __func__, str_dir[mode], pattern_id,
475								  if_id, subphy_num));
476					start_win_db[pattern_loop_idx][if_id][subphy_num][bit_num] =
477						GET_TAP_RESULT(result[HWS_LOW2HIGH][bit_num], EDGE_1);
478					end_win_db[pattern_loop_idx][if_id][subphy_num][bit_num] =
479						GET_TAP_RESULT(result[HWS_HIGH2LOW][bit_num], EDGE_1);
480				}
481			} /* subphy */
482		} /* interface */
483	} /* pattern */
484
485	/*
486	 * check if the current patterns subphys in all interfaces has mixed and low byte states
487	 * in that case add 64 adlls to the low byte
488	 */
489	for (pattern_id = PATTERN_KILLER_DQ0, pattern_loop_idx = 0;
490		pattern_id <= (MV_DDR_IS_64BIT_DRAM_MODE(tm->bus_act_mask) ? 22 : PATTERN_KILLER_DQ7);
491		pattern_id++, pattern_loop_idx++) {
492		for (if_id = 0; if_id < MAX_INTERFACE_NUM; if_id++) {
493			VALIDATE_IF_ACTIVE(tm->if_act_mask, if_id);
494			for (subphy_num = 0; subphy_num < subphy_max; subphy_num++) {
495				VALIDATE_BUS_ACTIVE(tm->bus_act_mask, subphy_num);
496				if (patterns_byte_status[if_id][subphy_num] == BYTE_NOT_DEFINED)
497					continue;
498				opt_win = 2 * max_win_size;	/* initialize opt_win */
499				/* in case this byte in the pattern is homogeneous low add 64 adlls to the byte */
500				if (((patterns_byte_status[if_id][subphy_num]) &
501				    (BYTE_HOMOGENEOUS_LOW | BYTE_SPLIT_OUT_MIX)) ==
502				     (BYTE_HOMOGENEOUS_LOW | BYTE_SPLIT_OUT_MIX)) {
503					for (bit_num = 0; bit_num < BUS_WIDTH_IN_BITS; bit_num++) {
504						if (start_win_db[pattern_loop_idx][if_id][subphy_num][bit_num] <= 31 &&
505						    end_win_db[pattern_loop_idx][if_id][subphy_num][bit_num] <= 31) {
506							start_win_db[pattern_loop_idx][if_id][subphy_num][bit_num] +=
507								64;
508							end_win_db[pattern_loop_idx][if_id][subphy_num][bit_num] += 64;
509							DEBUG_DDR4_CENTRALIZATION
510								(DEBUG_LEVEL_INFO,
511								 ("%s %s pattern %d if %d subphy %d bit %d added 64 "
512								 "adll\n",
513								 __func__, str_dir[mode], pattern_id, if_id,
514								 subphy_num, bit_num));
515						}
516					}
517				}
518
519				/* calculations for the current pattern per subphy */
520				for (bit_num = 0; bit_num < BUS_WIDTH_IN_BITS; bit_num++) {
521					curr_win[bit_num] = end_win_db[pattern_loop_idx][if_id][subphy_num][bit_num] -
522						start_win_db[pattern_loop_idx][if_id][subphy_num][bit_num] + 1;
523					curr_start_win[bit_num] =
524						start_win_db[pattern_loop_idx][if_id][subphy_num][bit_num];
525					curr_end_win[bit_num] =
526						end_win_db[pattern_loop_idx][if_id][subphy_num][bit_num];
527				}
528
529				opt_win = GET_MIN(opt_win, ddr3_tip_get_buf_min(curr_win));
530				vw_size[if_id][subphy_num] =
531					GET_MIN(vw_size[if_id][subphy_num], ddr3_tip_get_buf_min(curr_win));
532
533				/* final subphy window length */
534				final_subphy_win[if_id][subphy_num] = ddr3_tip_get_buf_min(curr_end_win) -
535					ddr3_tip_get_buf_max(curr_start_win) + 1;
536				waste_win = opt_win - final_subphy_win[if_id][subphy_num];
537				start_win_skew = ddr3_tip_get_buf_max(curr_start_win) -
538					ddr3_tip_get_buf_min(curr_start_win);
539				end_win_skew = ddr3_tip_get_buf_max(curr_end_win) -
540					ddr3_tip_get_buf_min(curr_end_win);
541
542				/* min/max updated with pattern change */
543				curr_end_win_min = ddr3_tip_get_buf_min(curr_end_win);
544				curr_start_win_max = ddr3_tip_get_buf_max(curr_start_win);
545				subphy_end_win[mode][if_id][subphy_num] =
546					GET_MIN(subphy_end_win[mode][if_id][subphy_num], curr_end_win_min);
547				subphy_start_win[mode][if_id][subphy_num] =
548					GET_MAX(subphy_start_win[mode][if_id][subphy_num], curr_start_win_max);
549				DEBUG_DDR4_CENTRALIZATION
550					(DEBUG_LEVEL_TRACE,
551					 ("%s, %s pat %d if %d subphy %d opt_win %d ",
552					 __func__, str_dir[mode], pattern_id, if_id, subphy_num, opt_win));
553				DEBUG_DDR4_CENTRALIZATION
554					(DEBUG_LEVEL_TRACE,
555					 ("final_subphy_win %d waste_win %d "
556					 "start_win_skew %d end_win_skew %d ",
557					 final_subphy_win[if_id][subphy_num],
558					 waste_win, start_win_skew, end_win_skew));
559				DEBUG_DDR4_CENTRALIZATION(DEBUG_LEVEL_TRACE,
560					("curr_start_win_max %d curr_end_win_min %d "
561					"subphy_start_win %d subphy_end_win %d\n",
562					curr_start_win_max, curr_end_win_min,
563					subphy_start_win[mode][if_id][subphy_num],
564					subphy_end_win[mode][if_id][subphy_num]));
565
566				/* valid window */
567				DEBUG_DDR4_CENTRALIZATION(DEBUG_LEVEL_TRACE,
568					("valid window, pat %d if %d subphy %d\n",
569					pattern_id, if_id, subphy_num));
570				for (bit_num = 0; bit_num < BUS_WIDTH_IN_BITS; bit_num++) {
571					final_start_win[if_id][subphy_num][bit_num] =
572						GET_MAX(final_start_win[if_id][subphy_num][bit_num],
573							curr_start_win[bit_num]);
574					final_end_win[if_id][subphy_num][bit_num] =
575						GET_MIN(final_end_win[if_id][subphy_num][bit_num],
576							curr_end_win[bit_num]);
577				} /* bit */
578			} /* subphy */
579		} /* if_id */
580	} /* pattern */
581
582	/* calculate valid window for each subphy */
583	for (if_id = 0; if_id < MAX_INTERFACE_NUM; if_id++) {
584		VALIDATE_IF_ACTIVE(tm->if_act_mask, if_id);
585		for (subphy_num = 0; subphy_num < subphy_max; subphy_num++) {
586			VALIDATE_BUS_ACTIVE(tm->bus_act_mask, subphy_num);
587			if (patterns_byte_status[if_id][subphy_num] != BYTE_NOT_DEFINED) {
588				/*
589				 * in case of bytes status which were found as mixed and low
590				 * change the their status to be mixed only, due to the fact
591				 * that we have already dealt with this bytes by adding 64 adlls
592				 * to the low bytes
593				 */
594				if (patterns_byte_status[if_id][subphy_num] &
595				    (BYTE_HOMOGENEOUS_LOW | BYTE_SPLIT_OUT_MIX))
596					patterns_byte_status[if_id][subphy_num] = BYTE_SPLIT_OUT_MIX;
597				if (rx_vw_pos[if_id][subphy_num] == 0)	/* rx_vw_pos is initialized during tap tune */
598					pbs_max = 31 - 0xa;
599				else
600					pbs_max = 31;
601
602				/* continue if locked */
603				/*if (centralization_state[if_id][subphy_num] == 0) {*/
604				status = mv_ddr4_copt_get(mode, lambda[if_id][subphy_num],
605							  final_start_win[if_id][subphy_num],
606							  final_end_win[if_id][subphy_num],
607							  pbs_result[if_id][subphy_num],
608							  &copt[if_id][subphy_num]);
609
610				/*
611				 * after copt the adll is moved to smaller value due to pbs compensation
612				 * so the byte status might change, here we change the byte status to be
613				 * homogeneous low in case the center of the ui after copt is moved below
614				 * 31 adlls
615				 */
616				if(copt[if_id][subphy_num] <= 31)
617					patterns_byte_status[if_id][subphy_num] = BYTE_HOMOGENEOUS_LOW;
618
619				DEBUG_DDR4_CENTRALIZATION
620					(DEBUG_LEVEL_INFO,
621					 ("%s %s if %d subphy %d copt %d\n",
622					 __func__, str_dir[mode], if_id, subphy_num, copt[if_id][subphy_num]));
623
624				if (status != MV_OK) {
625					/*
626					 * TODO: print out error message(s) only when all points fail
627					 * as temporary solution, replaced ERROR to TRACE debug level
628					 */
629					DEBUG_DDR4_CENTRALIZATION
630						(DEBUG_LEVEL_TRACE,
631						 ("%s %s copt calculation failed, "
632						 "no valid window for subphy %d\n",
633						 __func__, str_dir[mode], subphy_num));
634					/* set the byte to 0 (fail) and clean the status (continue with algorithm) */
635					vw_size[if_id][subphy_num] = 0;
636					status = MV_OK;
637
638					if (debug_mode == 0) {
639						/*
640						 * TODO: print out error message(s) only when all points fail
641						 * as temporary solution, commented out debug level set to TRACE
642						*/
643						/*
644						 * ddr3_hws_set_log_level(DEBUG_BLOCK_CALIBRATION, DEBUG_LEVEL_TRACE);
645						 */
646						/* open relevant log and run function again for debug */
647						mv_ddr4_copt_get(mode, lambda[if_id][subphy_num],
648									final_start_win[if_id][subphy_num],
649									final_end_win[if_id][subphy_num],
650									pbs_result[if_id][subphy_num],
651									&copt[if_id][subphy_num]);
652						/*
653						 * ddr3_hws_set_log_level(DEBUG_BLOCK_CALIBRATION, DEBUG_LEVEL_ERROR);
654						 */
655					} /* debug mode */
656				} /* status */
657			} /* byte not defined */
658		} /* subphy */
659	} /* if_id */
660
661	/* restore cs enable value*/
662	for (if_id = 0; if_id < MAX_INTERFACE_NUM - 1; if_id++) {
663		VALIDATE_IF_ACTIVE(tm->if_act_mask, if_id);
664		status = ddr3_tip_if_write(dev_num, ACCESS_TYPE_UNICAST, if_id, DUAL_DUNIT_CFG_REG,
665					   cs_ena_reg_val[if_id], MASK_ALL_BITS);
666		if (status != MV_OK)
667			return status;
668	}
669
670	return status;
671}
672
673/*
674 * mv_ddr4_copt_get function
675 * inputs:
676 *	dir - direction; 0 is for rx, 1 for tx
677 *	lambda - a pointer to adll to pbs ration multiplied by PBS_VAL_FACTOR
678 *	vw_l - a pointer to valid window low limit in adll taps
679 *	vw_h - a pointer to valid window high limit in adll taps
680 * outputs:
681 *	pbs_result - a pointer to pbs new delay value; the function's output
682 *	copt - optimal center of subphy in adll taps
683 * The function assumes initial pbs tap value is zero. Otherwise, it requires logic
684 * getting pbs value per dq and setting pbs_taps_per_dq array.
685 * It provides with a solution for a single subphy (8 bits).
686 * The calling function is responsible for any additional pbs taps for dqs
687 */
688static int mv_ddr4_copt_get(u8 dir, u16 *lambda, u8 *vw_l, u8 *vw_h, u8 *pbs_result, u8 *copt)
689{
690	u8 center_per_dq[8];
691	u8 center_zone_low[8] = {0};
692	u8 center_zone_high[8] = {0};
693	u8 ext_center_zone_low[8] = {0};
694	u8 ext_center_zone_high[8] = {0};
695	u8 pbs_taps_per_dq[8] = {0};
696	u8 vw_per_dq[8];
697	u8 vw_zone_low[8] = {0};
698	u8 vw_zone_high[8] = {0};
699	u8 margin_vw[8] = {0};
700	u8 copt_val;
701	u8 dq_idx;
702	u8 center_zone_max_low = 0;
703	u8 center_zone_min_high = 128;
704	u8 vw_zone_max_low = 0;
705	u8 vw_zone_min_high = 128;
706	u8 min_vw = 63; /* minimum valid window between all bits */
707	u8 center_low_el;
708	u8 center_high_el;
709
710	/* lambda calculated as D * PBS_VALUE_FACTOR / d */
711	//printf("Copt::Debug::\t");
712	for (dq_idx = 0; dq_idx < 8; dq_idx++) {
713		center_per_dq[dq_idx] = (vw_h[dq_idx] + vw_l[dq_idx]) / 2;
714		vw_per_dq[dq_idx] = 1 + (vw_h[dq_idx] - vw_l[dq_idx]);
715		if (min_vw > vw_per_dq[dq_idx])
716			min_vw = vw_per_dq[dq_idx];
717	}
718
719	/* calculate center zone */
720	for (dq_idx = 0; dq_idx < 8; dq_idx++) {
721		center_low_el = center_low_element_get(dir, pbs_taps_per_dq[dq_idx], lambda[dq_idx], pbs_max);
722		if (center_per_dq[dq_idx] > center_low_el)
723			center_zone_low[dq_idx] = center_per_dq[dq_idx] - center_low_el;
724		center_high_el = center_high_element_get(dir, pbs_taps_per_dq[dq_idx], lambda[dq_idx], pbs_max);
725		center_zone_high[dq_idx] = center_per_dq[dq_idx] + center_high_el;
726		if (center_zone_max_low < center_zone_low[dq_idx])
727			center_zone_max_low = center_zone_low[dq_idx];
728		if (center_zone_min_high > center_zone_high[dq_idx])
729			center_zone_min_high = center_zone_high[dq_idx];
730		DEBUG_CALIBRATION(DEBUG_LEVEL_TRACE,
731				  ("center: low %d, high %d, max_low %d, min_high %d\n",
732				   center_zone_low[dq_idx], center_zone_high[dq_idx],
733				   center_zone_max_low, center_zone_min_high));
734	}
735
736	if (center_zone_min_high >= center_zone_max_low) { /* center zone visib */
737		/* set copt_val to high zone for rx */
738		copt_val = (dir == RX_DIR) ? center_zone_max_low : center_zone_min_high;
739		*copt = copt_val;
740
741		/* calculate additional pbs taps */
742		for (dq_idx = 0; dq_idx < 8; dq_idx++) {
743			if (dir == RX_DIR)
744				pbs_result[dq_idx] = (copt_val - center_per_dq[dq_idx]) *
745						     PBS_VAL_FACTOR / lambda[dq_idx];
746			else
747				pbs_result[dq_idx] = (center_per_dq[dq_idx] - copt_val) *
748						     PBS_VAL_FACTOR / lambda[dq_idx];
749		}
750		return MV_OK;
751	} else { /* not center zone visib */
752		for (dq_idx = 0; dq_idx < 8; dq_idx++) {
753			if ((center_zone_low[dq_idx] + 1) > (vw_per_dq[dq_idx] / 2  + vw_per_dq[dq_idx] % 2)) {
754				vw_zone_low[dq_idx] = (center_zone_low[dq_idx] + 1) -
755						      (vw_per_dq[dq_idx] / 2 + vw_per_dq[dq_idx] % 2);
756			} else {
757				vw_zone_low[dq_idx] = 0;
758				DEBUG_CALIBRATION(DEBUG_LEVEL_INFO,
759						  ("dq_idx %d, center zone low %d, vw_l %d, vw_l %d\n",
760						   dq_idx, center_zone_low[dq_idx], vw_l[dq_idx], vw_h[dq_idx]));
761				DEBUG_CALIBRATION(DEBUG_LEVEL_INFO,
762						  ("vw_l[%d], vw_lh[%d], lambda[%d]\n",
763						   vw_l[dq_idx], vw_h[dq_idx], lambda[dq_idx]));
764			}
765
766			vw_zone_high[dq_idx] = center_zone_high[dq_idx] + vw_per_dq[dq_idx] / 2;
767
768			if (vw_zone_max_low < vw_zone_low[dq_idx])
769				vw_zone_max_low = vw_zone_low[dq_idx];
770
771			if (vw_zone_min_high > vw_zone_high[dq_idx])
772				vw_zone_min_high = vw_zone_high[dq_idx];
773
774			DEBUG_CALIBRATION(DEBUG_LEVEL_TRACE,
775					  ("valid_window: low %d, high %d, max_low %d, min_high %d\n",
776					   vw_zone_low[dq_idx], vw_zone_high[dq_idx],
777					   vw_zone_max_low, vw_zone_min_high));
778		}
779
780		/* try to extend center zone */
781		if (vw_zone_min_high >= vw_zone_max_low) { /* vw zone visib */
782			center_zone_max_low = 0;
783			center_zone_min_high = 128;
784
785			for (dq_idx = 0; dq_idx < 8; dq_idx++) {
786				margin_vw[dq_idx] =  vw_per_dq[dq_idx] - min_vw;
787
788				if (center_zone_low[dq_idx] > margin_vw[dq_idx])
789					ext_center_zone_low[dq_idx] = center_zone_low[dq_idx] - margin_vw[dq_idx];
790				else
791					ext_center_zone_low[dq_idx] = 0;
792
793				ext_center_zone_high[dq_idx] = center_zone_high[dq_idx] + margin_vw[dq_idx];
794
795				if (center_zone_max_low < ext_center_zone_low[dq_idx])
796					center_zone_max_low = ext_center_zone_low[dq_idx];
797
798				if (center_zone_min_high > ext_center_zone_high[dq_idx])
799					center_zone_min_high = ext_center_zone_high[dq_idx];
800
801				DEBUG_CALIBRATION(DEBUG_LEVEL_TRACE,
802						  ("ext_center: low %d, high %d, max_low %d, min_high %d\n",
803						   ext_center_zone_low[dq_idx], ext_center_zone_high[dq_idx],
804						   center_zone_max_low, center_zone_min_high));
805			}
806
807			if (center_zone_min_high >= center_zone_max_low) { /* center zone visib */
808				/* get optimal center position */
809				copt_val = (dir == RX_DIR) ? center_zone_max_low : center_zone_min_high;
810				*copt = copt_val;
811
812				/* calculate additional pbs taps */
813				for (dq_idx = 0; dq_idx < 8; dq_idx++) {
814					if (dir == 0) {
815						if (copt_val > center_per_dq[dq_idx])
816							pbs_result[dq_idx] = (copt_val - center_per_dq[dq_idx]) *
817									     PBS_VAL_FACTOR / lambda[dq_idx];
818						else
819							pbs_result[dq_idx] = 0;
820					} else {
821						if (center_per_dq[dq_idx] > copt_val)
822							pbs_result[dq_idx] = (center_per_dq[dq_idx] - copt_val) *
823									     PBS_VAL_FACTOR / lambda[dq_idx];
824						else
825							pbs_result[dq_idx] = 0;
826					}
827
828					if (pbs_result[dq_idx] > pbs_max)
829						pbs_result[dq_idx] = pbs_max;
830				}
831
832				return MV_OK;
833			} else { /* not center zone visib */
834				/*
835				 * TODO: print out error message(s) only when all points fail
836				 * as temporary solution, replaced ERROR to TRACE debug level
837				*/
838				DEBUG_DDR4_CENTRALIZATION(DEBUG_LEVEL_TRACE,
839							  ("lambda: %d, %d, %d, %d, %d, %d, %d, %d\n",
840							   lambda[0], lambda[1], lambda[2], lambda[3],
841							   lambda[4], lambda[5], lambda[6], lambda[7]));
842
843				DEBUG_DDR4_CENTRALIZATION(DEBUG_LEVEL_TRACE,
844							  ("vw_h: %d, %d, %d, %d, %d, %d, %d, %d\n",
845							   vw_h[0], vw_h[1], vw_h[2], vw_h[3],
846							   vw_h[4], vw_h[5], vw_h[6], vw_h[7]));
847
848				DEBUG_DDR4_CENTRALIZATION(DEBUG_LEVEL_TRACE,
849							  ("vw_l: %d, %d, %d, %d, %d, %d, %d, %d\n",
850							   vw_l[0], vw_l[1], vw_l[2], vw_l[3],
851							   vw_l[4], vw_l[5], vw_l[6], vw_l[7]));
852
853				for (dq_idx = 0; dq_idx < 8; dq_idx++) {
854					DEBUG_DDR4_CENTRALIZATION(DEBUG_LEVEL_TRACE,
855								  ("center: low %d, high %d, "
856								   "max_low %d, min_high %d\n",
857								   center_zone_low[dq_idx], center_zone_high[dq_idx],
858								   center_zone_max_low, center_zone_min_high));
859
860					DEBUG_DDR4_CENTRALIZATION(DEBUG_LEVEL_TRACE,
861								  ("valid_window: low %d, high %d, "
862								   "max_low %d, min_high %d\n",
863								   vw_zone_low[dq_idx], vw_zone_high[dq_idx],
864								   vw_zone_max_low, vw_zone_min_high));
865
866					DEBUG_DDR4_CENTRALIZATION(DEBUG_LEVEL_TRACE,
867								  ("ext_center: low %d, high %d, "
868								   "max_low %d, min_high %d\n",
869								   ext_center_zone_low[dq_idx],
870								   ext_center_zone_high[dq_idx],
871								   center_zone_max_low, center_zone_min_high));
872				}
873
874				return MV_FAIL;
875			}
876		} else { /* not vw zone visib; failed to find a single sample point */
877			return MV_FAIL;
878		}
879	}
880
881	return MV_OK;
882}
883
884/*
885 * mv_ddr4_dqs_reposition function gets copt to align to and returns pbs value per bit
886 * parameters:
887 *	dir - direction; 0 is for rx, 1 for tx
888 *	lambda - a pointer to adll to pbs ration multiplied by PBS_VAL_FACTOR
889 *	pbs_result - a pointer to pbs new delay value; the function's output
890 *	delta - signed; possilbe values: +0xa, 0x0, -0xa; for rx can be only negative
891 *	copt - optimal center of subphy in adll taps
892 *	dqs_pbs - optimal pbs
893 * The function assumes initial pbs tap value is zero. Otherwise, it requires logic
894 * getting pbs value per dq and setting pbs_taps_per_dq array.
895 * It provides with a solution for a single subphy (8 bits).
896 * The calling function is responsible for any additional pbs taps for dqs
897 */
898static int mv_ddr4_dqs_reposition(u8 dir, u16 *lambda, u8 *pbs_result, char delta, u8 *copt, u8 *dqs_pbs)
899{
900	u8 dq_idx;
901	u32 pbs_max_val = 0;
902	u32 lambda_avg = 0;
903
904	/* lambda calculated as D * X / d */
905	for (dq_idx = 0; dq_idx < 8; dq_idx++) {
906		if (pbs_max_val < pbs_result[dq_idx])
907			pbs_max_val = pbs_result[dq_idx];
908		lambda_avg += lambda[dq_idx];
909	}
910
911	if (delta >= 0)
912		*dqs_pbs = (pbs_max_val + delta) / 2;
913	else /* dqs already 0xa */
914		*dqs_pbs = pbs_max_val / 2;
915
916	lambda_avg /= 8;
917
918	/* change in dqs pbs value requires change in final copt position from mass center solution */
919	if (dir == TX_DIR) {
920		/* for tx, additional pbs on dqs in opposite direction of adll */
921		*copt = *copt + ((*dqs_pbs) * lambda_avg) / PBS_VAL_FACTOR;
922	} else {
923		/* for rx, additional pbs on dqs in same direction of adll */
924		if (delta < 0)
925			*copt = *copt - ((*dqs_pbs + delta) * lambda_avg) / PBS_VAL_FACTOR;
926		else
927			*copt = *copt - (*dqs_pbs * lambda_avg) / PBS_VAL_FACTOR;
928	}
929
930	return MV_OK;
931}
932
933/*
934 * mv_ddr4_center_of_mass_calc function
935 * parameters:
936 *	vw_l - a pointer to valid window low limit in adll taps
937 *	vw_h - a pointer to valid window high limit in adll taps
938 *	vw_v - a pointer to vref value matching vw_l/h arrays
939 *	vw_num - number of valid windows (lenght vw_v vector)
940 *	v_opt - optimal voltage value in vref taps
941 *	t_opt - optimal adll value in adll taps
942 * This function solves 2D centroid equation (e.g., adll and vref axes)
943 * The function doesn't differentiate between byte and bit eyes
944 */
945static int mv_ddr4_center_of_mass_calc(u8 dev_num, u8 if_id, u8 subphy_num, u8 mode, u8 *vw_l,
946				       u8 *vw_h, u8 *vw_v, u8 vw_num, u8 *v_opt, u8 *t_opt)
947{
948	u8 idx;
949	u8 edge_t[128], edge_v[128];
950	u8 min_edge_t = 127, min_edge_v = 127;
951	int polygon_area = 0;
952	int t_opt_temp = 0, v_opt_temp = 0;
953	int vw_avg = 0, v_avg = 0;
954	int s0 = 0, s1 = 0, s2 = 0, slope = 1, r_sq = 0;
955	u32 d_min = 10000, reg_val = 0;
956	int status;
957
958	/*
959	 * reorder all polygon points counterclockwise
960	 * get min value of each axis to shift to smaller calc value
961	 */
962	 for (idx = 0; idx < vw_num; idx++) {
963		edge_t[idx] = vw_l[idx];
964		edge_v[idx] = vw_v[idx];
965		if (min_edge_v > vw_v[idx])
966			min_edge_v = vw_v[idx];
967		if (min_edge_t > vw_l[idx])
968			min_edge_t = vw_l[idx];
969		edge_t[vw_num * 2 - 1 - idx] = vw_h[idx];
970		edge_v[vw_num * 2 - 1 - idx] = vw_v[idx];
971		vw_avg += vw_h[idx] - vw_l[idx];
972		v_avg += vw_v[idx];
973		DEBUG_CALIBRATION(DEBUG_LEVEL_INFO,
974				  ("%s: if %d, byte %d, direction %d, vw_v %d, vw_l %d, vw_h %d\n",
975				   __func__, if_id, subphy_num, mode, vw_v[idx], vw_l[idx], vw_h[idx]));
976	}
977
978	vw_avg *= 1000 / vw_num;
979	v_avg /= vw_num;
980	for (idx = 0; idx < vw_num; idx++) {
981		s0 += (1000 * (vw_h[idx] - vw_l[idx]) - vw_avg) * (vw_v[idx] - v_avg);
982		s1 += (vw_v[idx] - v_avg) * (vw_v[idx] - v_avg);
983		s2 += (1000 * (vw_h[idx] - vw_l[idx]) - vw_avg) * (1000 * (vw_h[idx] - vw_l[idx]) - vw_avg);
984	}
985	r_sq = s0 * (s0 / s1);
986	r_sq /= (s2 / 1000);
987	slope = s0 / s1;
988
989	/* idx n is equal to idx 0 */
990	edge_t[vw_num * 2] = vw_l[0];
991	edge_v[vw_num * 2] = vw_v[0];
992
993	/* calculate polygon area, a (may be negative) */
994	for (idx = 0; idx < vw_num * 2; idx++)
995		polygon_area = polygon_area +
996			       ((edge_t[idx] - min_edge_t)*(edge_v[idx + 1] - min_edge_v) -
997			       (edge_t[idx + 1] - min_edge_t)*(edge_v[idx] - min_edge_v));
998
999	/* calculate optimal point */
1000	for (idx = 0; idx < vw_num * 2; idx++) {
1001		t_opt_temp = t_opt_temp +
1002			     (edge_t[idx] + edge_t[idx + 1] - 2 * min_edge_t) *
1003			     ((edge_t[idx] - min_edge_t)*(edge_v[idx + 1] - min_edge_v) -
1004			      (edge_t[idx + 1] - min_edge_t)*(edge_v[idx] - min_edge_v));
1005		v_opt_temp = v_opt_temp +
1006			     (edge_v[idx] + edge_v[idx + 1] - 2 * min_edge_v) *
1007			     ((edge_t[idx] - min_edge_t)*(edge_v[idx + 1] - min_edge_v) -
1008			      (edge_t[idx + 1] - min_edge_t)*(edge_v[idx] - min_edge_v));
1009	}
1010
1011	*t_opt = t_opt_temp / (3 * polygon_area);
1012	*v_opt = v_opt_temp / (3 * polygon_area);
1013
1014	/* re-shift */
1015	*t_opt += min_edge_t;
1016	*v_opt += min_edge_v;
1017
1018	/* calculate d_min */
1019	for (idx = 0; idx < 2 * vw_num; idx++) {
1020		s0 = (*t_opt - edge_t[idx]) * (*t_opt - edge_t[idx]) +
1021		     (*v_opt - edge_v[idx]) * (*v_opt - edge_v[idx]);
1022		d_min = (d_min > s0) ? s0 : d_min;
1023	}
1024	DEBUG_CALIBRATION(DEBUG_LEVEL_TRACE,
1025			  ("%s: r_sq %d, slope %d, area = %d, , d_min = %d\n",
1026			   __func__, r_sq, slope, polygon_area, d_min));
1027
1028	/* insert vw eye to register database for validation */
1029	if (d_min < 0)
1030		d_min = -d_min;
1031	if (polygon_area < 0)
1032		polygon_area = -polygon_area;
1033
1034	status = ddr3_tip_bus_write(dev_num, ACCESS_TYPE_UNICAST, if_id, ACCESS_TYPE_UNICAST, subphy_num,
1035				    DDR_PHY_DATA, RESULT_PHY_REG + effective_cs + 4 * (1 - mode),
1036				    polygon_area);
1037	if (status != MV_OK)
1038		return status;
1039
1040	status = ddr3_tip_bus_read(dev_num, if_id, ACCESS_TYPE_UNICAST,
1041				   dmin_phy_reg_table[effective_cs * 5 + subphy_num][0], DDR_PHY_CONTROL,
1042				   dmin_phy_reg_table[effective_cs * 5 + subphy_num][1], &reg_val);
1043	if (status != MV_OK)
1044		return status;
1045
1046	reg_val &= 0xff << (8 * mode); /* rx clean bits 0..8, tx bits 9..16 */
1047	reg_val |= d_min / 2 << (8 * (1 - mode)); /* rX write bits 0..8, tx bits 9..16 */
1048
1049	status = ddr3_tip_bus_write(dev_num, ACCESS_TYPE_UNICAST, if_id, ACCESS_TYPE_UNICAST,
1050				    dmin_phy_reg_table[effective_cs * 5 + subphy_num][0], DDR_PHY_CONTROL,
1051				    dmin_phy_reg_table[effective_cs * 5 + subphy_num][1], reg_val);
1052	if (status != MV_OK)
1053		return status;
1054
1055	if (polygon_area < 400) {
1056		DEBUG_CALIBRATION(DEBUG_LEVEL_ERROR,
1057				  ("%s: if %d, subphy %d: poligon area too small %d (dmin %d)\n",
1058				   __func__, if_id, subphy_num, polygon_area, d_min));
1059		if (debug_mode == 0)
1060			return MV_FAIL;
1061	}
1062
1063	return MV_OK;
1064}
1065
1066/* tap tuning flow */
1067enum {
1068	DQS_TO_DQ_LONG,
1069	DQS_TO_DQ_SHORT
1070};
1071enum {
1072	ALIGN_LEFT,
1073	ALIGN_CENTER,
1074	ALIGN_RIGHT
1075};
1076#define ONE_MHZ			1000000
1077#define MAX_SKEW_DLY		200 /* in ps */
1078#define NOMINAL_PBS_DLY		9 /* in ps */
1079#define MIN_WL_TO_CTX_ADLL_DIFF	2 /* in taps */
1080#define DQS_SHIFT_INIT_VAL	30
1081#define MAX_PBS_NUM		31
1082#define ADLL_TAPS_PER_PHASE	32
1083#define ADLL_TAPS_PER_PERIOD	(ADLL_TAPS_PER_PHASE * 2)
1084#define ADLL_TX_RES_REG_MASK	0xff
1085#define VW_DESKEW_BIAS		0xa
1086static int mv_ddr4_tap_tuning(u8 dev, u16 (*pbs_tap_factor)[MAX_BUS_NUM][BUS_WIDTH_IN_BITS], u8 mode)
1087{
1088	enum hws_training_ip_stat training_result[MAX_INTERFACE_NUM];
1089	u32 iface, subphy, bit, pattern;
1090	u32 limit_div;
1091	u8 curr_start_win, curr_end_win;
1092	u8 upd_curr_start_win, upd_curr_end_win;
1093	u8 start_win_diff, end_win_diff;
1094	u32 max_win_size, a, b;
1095	u32 cs_ena_reg_val[MAX_INTERFACE_NUM];
1096	u32 reg_addr;
1097	enum hws_search_dir search_dir;
1098	enum hws_dir dir;
1099	u32 *result[MAX_BUS_NUM][HWS_SEARCH_DIR_LIMIT];
1100	u32 result1[MAX_BUS_NUM][HWS_SEARCH_DIR_LIMIT][BUS_WIDTH_IN_BITS];
1101	u8 subphy_max = ddr3_tip_dev_attr_get(dev, MV_ATTR_OCTET_PER_INTERFACE);
1102	struct mv_ddr_topology_map *tm = mv_ddr_topology_map_get();
1103	enum hws_training_result result_type = RESULT_PER_BIT;
1104	int status = MV_OK;
1105	int i;
1106	u32 reg_val;
1107	u32 freq = mv_ddr_freq_get(tm->interface_params->memory_freq);
1108	/* calc adll tap in ps based on frequency */
1109	int adll_tap = (ONE_MHZ / freq) / ADLL_TAPS_PER_PERIOD;
1110	int dq_to_dqs_delta[MAX_BUS_NUM][BUS_WIDTH_IN_BITS]; /* skew b/w dq and dqs */
1111	u32 wl_adll[MAX_BUS_NUM]; /* wl solution adll value */
1112	int is_dq_dqs_short[MAX_BUS_NUM] = {0}; /* tx byte's state */
1113	u32 new_pbs_per_byte[MAX_BUS_NUM]; /* dq pads' pbs value correction */
1114	/* threshold to decide subphy needs dqs pbs delay */
1115	int dq_to_dqs_min_delta_threshold = MIN_WL_TO_CTX_ADLL_DIFF + MAX_SKEW_DLY / adll_tap;
1116	/* search init condition */
1117	int dq_to_dqs_min_delta = dq_to_dqs_min_delta_threshold * 2;
1118	u32 pbs_tap_factor0 = PBS_VAL_FACTOR * NOMINAL_PBS_DLY / adll_tap; /* init lambda */
1119	/* adapt pbs to frequency */
1120	u32 new_pbs = (1810000 - (345 * freq)) / 100000;
1121	int stage_num, loop;
1122	int wl_tap, new_wl_tap;
1123	int pbs_tap_factor_avg;
1124	int dqs_shift[MAX_BUS_NUM]; /* dqs' pbs delay */
1125	static u16 tmp_pbs_tap_factor[MAX_INTERFACE_NUM][MAX_BUS_NUM][BUS_WIDTH_IN_BITS];
1126	DEBUG_TAP_TUNING_ENGINE(DEBUG_LEVEL_INFO, ("Starting ddr4 tap tuning training stage\n"));
1127
1128	for (i = 0; i < MAX_BUS_NUM; i++)
1129		dqs_shift[i] = DQS_SHIFT_INIT_VAL;
1130
1131	if (mode == TX_DIR) {
1132		max_win_size = MAX_WINDOW_SIZE_TX;
1133		dir = OPER_WRITE;
1134	} else {
1135		max_win_size = MAX_WINDOW_SIZE_RX;
1136		dir = OPER_READ;
1137	}
1138
1139	/* init all pbs registers */
1140	for (iface = 0; iface < MAX_INTERFACE_NUM; iface++) {
1141		VALIDATE_IF_ACTIVE(tm->if_act_mask, iface);
1142		if (mode == RX_DIR)
1143			reg_addr = PBS_RX_BCAST_PHY_REG(effective_cs);
1144		else
1145			reg_addr = PBS_TX_BCAST_PHY_REG(effective_cs);
1146		ddr3_tip_bus_write(dev, ACCESS_TYPE_UNICAST, iface, ACCESS_TYPE_MULTICAST,
1147				   PARAM_NOT_CARE, DDR_PHY_DATA, reg_addr, 0);
1148
1149		if (mode == RX_DIR)
1150			reg_addr = PBS_RX_PHY_REG(effective_cs, DQSP_PAD);
1151		else
1152			reg_addr = PBS_TX_PHY_REG(effective_cs, DQSP_PAD);
1153		ddr3_tip_bus_write(dev, ACCESS_TYPE_UNICAST, iface, ACCESS_TYPE_MULTICAST,
1154				   PARAM_NOT_CARE, DDR_PHY_DATA, reg_addr, 0);
1155		if (mode == RX_DIR)
1156			reg_addr = PBS_RX_PHY_REG(effective_cs, DQSN_PAD);
1157		else
1158			reg_addr = PBS_TX_PHY_REG(effective_cs, DQSN_PAD);
1159		ddr3_tip_bus_write(dev, ACCESS_TYPE_UNICAST, iface, ACCESS_TYPE_MULTICAST,
1160				   PARAM_NOT_CARE, DDR_PHY_DATA, reg_addr, 0);
1161	}
1162
1163	for (iface = 0; iface < MAX_INTERFACE_NUM; iface++) {
1164		VALIDATE_IF_ACTIVE(tm->if_act_mask, iface);
1165		/* save current cs enable reg val */
1166		ddr3_tip_if_read(dev, ACCESS_TYPE_UNICAST, iface, DUAL_DUNIT_CFG_REG,
1167				 cs_ena_reg_val, MASK_ALL_BITS);
1168
1169		/* enable single cs */
1170		ddr3_tip_if_write(dev, ACCESS_TYPE_UNICAST, iface, DUAL_DUNIT_CFG_REG,
1171				  (SINGLE_CS_ENA << SINGLE_CS_PIN_OFFS),
1172				  (SINGLE_CS_PIN_MASK << SINGLE_CS_PIN_OFFS));
1173	}
1174
1175	/* FIXME: fix this hard-coded parameters due to compilation issue with patterns definitions */
1176	pattern = MV_DDR_IS_64BIT_DRAM_MODE(tm->bus_act_mask) ? 73 : 23;
1177	stage_num = (mode == RX_DIR) ? 1 : 2;
1178	/* find window; run training */
1179	for (loop = 0; loop < stage_num; loop++) {
1180		ddr3_tip_ip_training_wrapper(dev, ACCESS_TYPE_MULTICAST, PARAM_NOT_CARE, ACCESS_TYPE_MULTICAST,
1181					     PARAM_NOT_CARE, result_type, HWS_CONTROL_ELEMENT_ADLL, PARAM_NOT_CARE,
1182					     dir, tm->if_act_mask, 0x0, max_win_size - 1, max_win_size - 1,
1183					     pattern, EDGE_FPF, CS_SINGLE, PARAM_NOT_CARE, training_result);
1184
1185		for (iface = 0; iface < MAX_INTERFACE_NUM; iface++) {
1186			VALIDATE_IF_ACTIVE(tm->if_act_mask, iface);
1187			for (subphy = 0; subphy < subphy_max; subphy++) {
1188				VALIDATE_BUS_ACTIVE(tm->bus_act_mask, subphy);
1189				rx_vw_pos[iface][subphy] = ALIGN_CENTER;
1190				new_pbs_per_byte[subphy] = new_pbs; /* rx init */
1191				if ((mode == TX_DIR) && (loop == 0)) {
1192					/* read nominal wl */
1193					ddr3_tip_bus_read(dev, iface, ACCESS_TYPE_UNICAST, subphy,
1194							  DDR_PHY_DATA, WL_PHY_REG(effective_cs),
1195							  &reg_val);
1196					wl_adll[subphy] = reg_val;
1197				}
1198
1199				for (search_dir = HWS_LOW2HIGH; search_dir <= HWS_HIGH2LOW; search_dir++) {
1200					ddr3_tip_read_training_result(dev, iface, ACCESS_TYPE_UNICAST, subphy,
1201								      ALL_BITS_PER_PUP, search_dir, dir,
1202								      result_type, TRAINING_LOAD_OPERATION_UNLOAD,
1203								      CS_SINGLE, &(result[subphy][search_dir]),
1204								      1, 0, 0);
1205
1206					DEBUG_TAP_TUNING_ENGINE(DEBUG_LEVEL_INFO,
1207								("cs %d if %d subphy %d mode %d result: "
1208								 "0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
1209									 effective_cs, iface, subphy, mode,
1210								 result[subphy][search_dir][0],
1211								 result[subphy][search_dir][1],
1212								 result[subphy][search_dir][2],
1213								 result[subphy][search_dir][3],
1214								 result[subphy][search_dir][4],
1215								 result[subphy][search_dir][5],
1216								 result[subphy][search_dir][6],
1217								 result[subphy][search_dir][7]));
1218				}
1219
1220				for (bit = 0; bit < BUS_WIDTH_IN_BITS; bit++) {
1221					a = result[subphy][HWS_LOW2HIGH][bit];
1222					b = result[subphy][HWS_HIGH2LOW][bit];
1223					result1[subphy][HWS_LOW2HIGH][bit] = a;
1224					result1[subphy][HWS_HIGH2LOW][bit] = b;
1225					/* measure distance between ctx and wl adlls */
1226					if (mode == TX_DIR) {
1227						a &= ADLL_TX_RES_REG_MASK;
1228						if (a >= ADLL_TAPS_PER_PERIOD)
1229							a -= ADLL_TAPS_PER_PERIOD;
1230						dq_to_dqs_delta[subphy][bit] =
1231							a - (wl_adll[subphy] & WR_LVL_REF_DLY_MASK);
1232						if (dq_to_dqs_delta[subphy][bit] < dq_to_dqs_min_delta)
1233							dq_to_dqs_min_delta = dq_to_dqs_delta[subphy][bit];
1234						DEBUG_TAP_TUNING_ENGINE(DEBUG_LEVEL_INFO,
1235									("%s: dq_to_dqs_delta[%d][%d] %d\n",
1236									 __func__, subphy, bit,
1237									 dq_to_dqs_delta[subphy][bit]));
1238					}
1239				}
1240
1241				/* adjust wl on the first pass only */
1242				if ((mode == TX_DIR) && (loop == 0)) {
1243					/* dqs pbs shift if distance b/w adll is too large */
1244					if (dq_to_dqs_min_delta < dq_to_dqs_min_delta_threshold) {
1245						/* first calculate the WL in taps */
1246						wl_tap = ((wl_adll[subphy] >> WR_LVL_REF_DLY_OFFS) &
1247							  WR_LVL_REF_DLY_MASK) +
1248							  ((wl_adll[subphy] >> WR_LVL_PH_SEL_OFFS) &
1249							  WR_LVL_PH_SEL_MASK) * ADLL_TAPS_PER_PHASE;
1250
1251						/* calc dqs pbs shift */
1252						dqs_shift[subphy] =
1253							dq_to_dqs_min_delta_threshold - dq_to_dqs_min_delta;
1254						/* check that the WL result have enough taps to reduce */
1255						if (wl_tap > 0) {
1256							if (wl_tap < dqs_shift[subphy])
1257								dqs_shift[subphy] = wl_tap-1;
1258							else
1259								dqs_shift[subphy] = dqs_shift[subphy];
1260						} else {
1261							dqs_shift[subphy] = 0;
1262						}
1263						DEBUG_TAP_TUNING_ENGINE
1264							(DEBUG_LEVEL_INFO,
1265							 ("%s: tap tune tx: subphy %d, dqs shifted by %d adll taps, ",
1266									 __func__, subphy, dqs_shift[subphy]));
1267						dqs_shift[subphy] =
1268							(dqs_shift[subphy] * PBS_VAL_FACTOR) / pbs_tap_factor0;
1269						DEBUG_TAP_TUNING_ENGINE(DEBUG_LEVEL_INFO,
1270									("%d pbs taps\n", dqs_shift[subphy]));
1271						/* check high limit */
1272						if (dqs_shift[subphy] > MAX_PBS_NUM)
1273							dqs_shift[subphy] = MAX_PBS_NUM;
1274						reg_addr = PBS_TX_PHY_REG(effective_cs, DQSP_PAD);
1275						ddr3_tip_bus_write(dev, ACCESS_TYPE_UNICAST, iface,
1276								   ACCESS_TYPE_UNICAST, subphy, DDR_PHY_DATA,
1277								   reg_addr, dqs_shift[subphy]);
1278						reg_addr = PBS_TX_PHY_REG(effective_cs, DQSN_PAD);
1279						ddr3_tip_bus_write(dev, ACCESS_TYPE_UNICAST, iface,
1280								   ACCESS_TYPE_UNICAST, subphy, DDR_PHY_DATA,
1281								   reg_addr, dqs_shift[subphy]);
1282
1283						is_dq_dqs_short[subphy] = DQS_TO_DQ_SHORT;
1284
1285						new_wl_tap = wl_tap -
1286							     (dqs_shift[subphy] * pbs_tap_factor0) / PBS_VAL_FACTOR;
1287						reg_val = (new_wl_tap & WR_LVL_REF_DLY_MASK) |
1288							  ((new_wl_tap &
1289							    ((WR_LVL_PH_SEL_MASK << WR_LVL_PH_SEL_OFFS) >> 1))
1290							   << 1) |
1291							  (wl_adll[subphy] &
1292							   ((CTRL_CENTER_DLY_MASK << CTRL_CENTER_DLY_OFFS) |
1293							    (CTRL_CENTER_DLY_INV_MASK << CTRL_CENTER_DLY_INV_OFFS)));
1294						ddr3_tip_bus_write(dev, ACCESS_TYPE_UNICAST, iface,
1295								   ACCESS_TYPE_UNICAST, subphy, DDR_PHY_DATA,
1296								   WL_PHY_REG(effective_cs), reg_val);
1297						DEBUG_TAP_TUNING_ENGINE
1298							(DEBUG_LEVEL_INFO,
1299							 ("%s: subphy %d, dq_to_dqs_min_delta %d, dqs_shift %d, old wl %d, temp wl %d 0x%08x\n",
1300									 __func__, subphy, dq_to_dqs_min_delta,
1301									 dqs_shift[subphy], wl_tap, new_wl_tap,
1302									 reg_val));
1303					}
1304				}
1305				dq_to_dqs_min_delta = dq_to_dqs_min_delta_threshold * 2;
1306			}
1307		}
1308	}
1309
1310	/* deskew dq */
1311	for (iface = 0; iface < MAX_INTERFACE_NUM; iface++) {
1312		VALIDATE_IF_ACTIVE(tm->if_act_mask, iface);
1313		if (mode == RX_DIR)
1314			reg_addr = PBS_RX_BCAST_PHY_REG(effective_cs);
1315		else
1316			reg_addr = PBS_TX_BCAST_PHY_REG(effective_cs);
1317		ddr3_tip_bus_write(dev, ACCESS_TYPE_UNICAST, iface, ACCESS_TYPE_MULTICAST, PARAM_NOT_CARE,
1318				   DDR_PHY_DATA, reg_addr, new_pbs_per_byte[0]);
1319	 }
1320
1321	/* run training search and get results */
1322	ddr3_tip_ip_training_wrapper(dev, ACCESS_TYPE_MULTICAST, PARAM_NOT_CARE, ACCESS_TYPE_MULTICAST,
1323				     PARAM_NOT_CARE, result_type, HWS_CONTROL_ELEMENT_ADLL, PARAM_NOT_CARE,
1324				     dir, tm->if_act_mask, 0x0, max_win_size - 1, max_win_size - 1,
1325				     pattern, EDGE_FPF, CS_SINGLE, PARAM_NOT_CARE, training_result);
1326
1327	for (iface = 0; iface < MAX_INTERFACE_NUM; iface++) {
1328		VALIDATE_IF_ACTIVE(tm->if_act_mask, iface);
1329		for (subphy = 0; subphy < subphy_max; subphy++) {
1330			VALIDATE_BUS_ACTIVE(tm->bus_act_mask, subphy);
1331			/* read training ip results from db */
1332			for (search_dir = HWS_LOW2HIGH; search_dir <= HWS_HIGH2LOW; search_dir++) {
1333				ddr3_tip_read_training_result(dev, iface, ACCESS_TYPE_UNICAST,
1334							      subphy, ALL_BITS_PER_PUP, search_dir,
1335							      dir, result_type,
1336							      TRAINING_LOAD_OPERATION_UNLOAD, CS_SINGLE,
1337							      &(result[subphy][search_dir]),
1338							      1, 0, 0);
1339
1340				DEBUG_TAP_TUNING_ENGINE(DEBUG_LEVEL_INFO,
1341							("cs %d if %d subphy %d mode %d result: "
1342							 "0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
1343							 effective_cs, iface, subphy, mode,
1344							 result[subphy][search_dir][0],
1345							 result[subphy][search_dir][1],
1346							 result[subphy][search_dir][2],
1347							 result[subphy][search_dir][3],
1348							 result[subphy][search_dir][4],
1349							 result[subphy][search_dir][5],
1350							 result[subphy][search_dir][6],
1351							 result[subphy][search_dir][7]));
1352			}
1353
1354			/* calc dq skew impact on vw position */
1355			for (bit = 0; bit < BUS_WIDTH_IN_BITS; bit++) {
1356				start_win_diff = 0;
1357				end_win_diff = 0;
1358				limit_div = 0;
1359				if ((GET_LOCK_RESULT(result1[subphy][HWS_LOW2HIGH][bit]) == 1) &&
1360				    (GET_LOCK_RESULT(result1[subphy][HWS_HIGH2LOW][bit]) == 1) &&
1361				    (GET_LOCK_RESULT(result[subphy][HWS_LOW2HIGH][bit]) == 1) &&
1362				    (GET_LOCK_RESULT(result[subphy][HWS_HIGH2LOW][bit]) == 1)) {
1363					curr_start_win = GET_TAP_RESULT(result1[subphy][HWS_LOW2HIGH][bit],
1364									EDGE_1);
1365					curr_end_win = GET_TAP_RESULT(result1[subphy][HWS_HIGH2LOW][bit],
1366								      EDGE_1);
1367					upd_curr_start_win = GET_TAP_RESULT(result[subphy][HWS_LOW2HIGH][bit],
1368									    EDGE_1);
1369					upd_curr_end_win = GET_TAP_RESULT(result[subphy][HWS_HIGH2LOW][bit],
1370									  EDGE_1);
1371
1372					/* update tx start skew; set rx vw position */
1373					if ((upd_curr_start_win != 0) && (curr_start_win != 0)) {
1374						if (upd_curr_start_win > curr_start_win) {
1375							start_win_diff = upd_curr_start_win - curr_start_win;
1376							if (mode == TX_DIR)
1377								start_win_diff =
1378									curr_start_win + 64 - upd_curr_start_win;
1379						} else {
1380							start_win_diff = curr_start_win - upd_curr_start_win;
1381						}
1382						limit_div++;
1383					} else {
1384						rx_vw_pos[iface][subphy] = ALIGN_LEFT;
1385					}
1386
1387					/* update tx end skew; set rx vw position */
1388					if (((upd_curr_end_win != max_win_size) && (curr_end_win != max_win_size)) ||
1389					    (mode == TX_DIR)) {
1390						if (upd_curr_end_win  > curr_end_win) {
1391							end_win_diff = upd_curr_end_win - curr_end_win;
1392							if (mode == TX_DIR)
1393								end_win_diff =
1394									curr_end_win + 64 - upd_curr_end_win;
1395						} else {
1396							end_win_diff = curr_end_win - upd_curr_end_win;
1397						}
1398						limit_div++;
1399					} else {
1400						rx_vw_pos[iface][subphy] = ALIGN_RIGHT;
1401					}
1402
1403					/*
1404					 * don't care about start in tx mode
1405					 * TODO: temporary solution for instability in the start adll search
1406					 */
1407					if (mode == TX_DIR) {
1408						start_win_diff = end_win_diff;
1409						limit_div = 2;
1410					}
1411
1412					/*
1413					 * workaround for false tx measurements in tap tune stage
1414					 * tx pbs factor will use rx pbs factor results instead
1415					 */
1416					if ((limit_div != 0) && (mode == RX_DIR)) {
1417						pbs_tap_factor[iface][subphy][bit] =
1418							PBS_VAL_FACTOR * (start_win_diff + end_win_diff) /
1419							(new_pbs_per_byte[subphy] * limit_div);
1420						tmp_pbs_tap_factor[iface][subphy][bit] =
1421							pbs_tap_factor[iface][subphy][bit];
1422					} else {
1423						pbs_tap_factor[iface][subphy][bit] =
1424							tmp_pbs_tap_factor[iface][subphy][bit];
1425					}
1426
1427					DEBUG_TAP_TUNING_ENGINE(DEBUG_LEVEL_INFO,
1428								("cs %d if %d subphy %d bit %d sw1 %d sw2 %d "
1429								 "ew1 %d ew2 %d sum delta %d, align %d\n",
1430								 effective_cs, iface, subphy, bit,
1431								 curr_start_win, upd_curr_start_win,
1432								 curr_end_win, upd_curr_end_win,
1433								 pbs_tap_factor[iface][subphy][bit],
1434								 rx_vw_pos[iface][subphy]));
1435				} else {
1436					status = MV_FAIL;
1437					DEBUG_TAP_TUNING_ENGINE(DEBUG_LEVEL_INFO,
1438								("tap tuning fail %s cs %d if %d subphy %d bit %d\n",
1439								 (mode == RX_DIR) ? "RX" : "TX", effective_cs, iface,
1440								 subphy, bit));
1441					DEBUG_TAP_TUNING_ENGINE(DEBUG_LEVEL_INFO,
1442								("cs %d if %d subphy %d mode %d result: "
1443								 "0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
1444								 effective_cs, iface, subphy, mode,
1445								 result[subphy][HWS_LOW2HIGH][0],
1446								 result[subphy][HWS_LOW2HIGH][1],
1447								 result[subphy][HWS_LOW2HIGH][2],
1448								 result[subphy][HWS_LOW2HIGH][3],
1449								 result[subphy][HWS_LOW2HIGH][4],
1450								 result[subphy][HWS_LOW2HIGH][5],
1451								 result[subphy][HWS_LOW2HIGH][6],
1452								 result[subphy][HWS_LOW2HIGH][7]));
1453					DEBUG_TAP_TUNING_ENGINE(DEBUG_LEVEL_INFO,
1454								("cs %d if %d subphy %d mode %d result: "
1455								 "0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
1456								 effective_cs, iface, subphy, mode,
1457								 result[subphy][HWS_HIGH2LOW][0],
1458								 result[subphy][HWS_HIGH2LOW][1],
1459								 result[subphy][HWS_HIGH2LOW][2],
1460								 result[subphy][HWS_HIGH2LOW][3],
1461								 result[subphy][HWS_HIGH2LOW][4],
1462								 result[subphy][HWS_HIGH2LOW][5],
1463								 result[subphy][HWS_HIGH2LOW][6],
1464								 result[subphy][HWS_HIGH2LOW][7]));
1465					DEBUG_TAP_TUNING_ENGINE(DEBUG_LEVEL_INFO,
1466								("cs %d if %d subphy %d mode %d result: "
1467								 "0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
1468								 effective_cs, iface, subphy, mode,
1469								 result1[subphy][HWS_LOW2HIGH][0],
1470								 result1[subphy][HWS_LOW2HIGH][1],
1471								 result1[subphy][HWS_LOW2HIGH][2],
1472								 result1[subphy][HWS_LOW2HIGH][3],
1473								 result1[subphy][HWS_LOW2HIGH][4],
1474								 result1[subphy][HWS_LOW2HIGH][5],
1475								 result1[subphy][HWS_LOW2HIGH][6],
1476								 result1[subphy][HWS_LOW2HIGH][7]));
1477					DEBUG_TAP_TUNING_ENGINE(DEBUG_LEVEL_INFO,
1478								("cs %d if %d subphy %d mode %d result: "
1479								 "0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
1480								 effective_cs, iface, subphy, mode,
1481								 result1[subphy][HWS_HIGH2LOW][0],
1482								 result1[subphy][HWS_HIGH2LOW][1],
1483								 result1[subphy][HWS_HIGH2LOW][2],
1484								 result1[subphy][HWS_HIGH2LOW][3],
1485								 result1[subphy][HWS_HIGH2LOW][4],
1486								 result1[subphy][HWS_HIGH2LOW][5],
1487								 result1[subphy][HWS_HIGH2LOW][6],
1488								 result1[subphy][HWS_HIGH2LOW][7]));
1489				}
1490			}
1491		}
1492	}
1493
1494	/* restore cs enable value */
1495	for (iface = 0; iface < MAX_INTERFACE_NUM; iface++) {
1496		VALIDATE_IF_ACTIVE(tm->if_act_mask, iface);
1497		ddr3_tip_if_write(dev, ACCESS_TYPE_UNICAST, iface, DUAL_DUNIT_CFG_REG,
1498				  cs_ena_reg_val[iface], MASK_ALL_BITS);
1499	}
1500
1501	/* restore pbs (set to 0) */
1502	for (iface = 0; iface < MAX_INTERFACE_NUM; iface++) {
1503		VALIDATE_IF_ACTIVE(tm->if_act_mask, iface);
1504		for (subphy = 0; subphy < subphy_max; subphy++) {
1505			VALIDATE_BUS_ACTIVE(tm->bus_act_mask, subphy);
1506			if (mode == RX_DIR)
1507				reg_addr = PBS_RX_BCAST_PHY_REG(effective_cs);
1508			else
1509				reg_addr = PBS_TX_BCAST_PHY_REG(effective_cs);
1510			ddr3_tip_bus_write(dev, ACCESS_TYPE_UNICAST, iface, ACCESS_TYPE_UNICAST,
1511					   subphy, DDR_PHY_DATA, reg_addr, 0);
1512		}
1513	}
1514
1515	/* set deskew bias for rx valid window */
1516	if (mode == RX_DIR) {
1517		/*
1518		 * pattern special for rx
1519		 * check for rx_vw_pos stat
1520		 * - add n pbs taps to every dq to align to left (pbs_max set to (31 - n))
1521		 * - add pbs taps to dqs to align to right
1522		 */
1523		for (iface = 0; iface < MAX_INTERFACE_NUM; iface++) {
1524			VALIDATE_IF_ACTIVE(tm->if_act_mask, iface);
1525			for (subphy = 0; subphy < subphy_max; subphy++) {
1526				VALIDATE_BUS_ACTIVE(tm->bus_act_mask, subphy);
1527				if (rx_vw_pos[iface][subphy] == ALIGN_LEFT) {
1528					ddr3_tip_bus_write(dev, ACCESS_TYPE_UNICAST, 0,
1529							   ACCESS_TYPE_UNICAST, subphy, DDR_PHY_DATA,
1530							   PBS_RX_BCAST_PHY_REG(effective_cs),
1531							   VW_DESKEW_BIAS);
1532					DEBUG_CALIBRATION(DEBUG_LEVEL_INFO,
1533							  ("%s: if %d, subphy %d aligned to left\n",
1534							   __func__, iface, subphy));
1535				} else if (rx_vw_pos[iface][subphy] == ALIGN_RIGHT) {
1536					reg_addr = PBS_RX_PHY_REG(effective_cs, DQSP_PAD);
1537					ddr3_tip_bus_write(dev, ACCESS_TYPE_UNICAST, 0,
1538							   ACCESS_TYPE_UNICAST, subphy, DDR_PHY_DATA,
1539							   reg_addr, VW_DESKEW_BIAS);
1540					reg_addr = PBS_RX_PHY_REG(effective_cs, DQSN_PAD);
1541					ddr3_tip_bus_write(dev, ACCESS_TYPE_UNICAST, 0,
1542							   ACCESS_TYPE_UNICAST, subphy, DDR_PHY_DATA,
1543							   reg_addr, VW_DESKEW_BIAS);
1544					DEBUG_CALIBRATION(DEBUG_LEVEL_INFO,
1545							  ("%s: if %d , subphy %d aligned to right\n",
1546							   __func__, iface, subphy));
1547				}
1548			} /* subphy */
1549		} /* if */
1550	} else { /* tx mode */
1551		/* update wl solution */
1552		if (status == MV_OK) {
1553			for (iface = 0; iface < MAX_INTERFACE_NUM; iface++) {
1554				VALIDATE_IF_ACTIVE(tm->if_act_mask, iface);
1555				for (subphy = 0; subphy < subphy_max; subphy++) {
1556					VALIDATE_BUS_ACTIVE(tm->bus_act_mask, subphy);
1557					if (is_dq_dqs_short[subphy]) {
1558						wl_tap = ((wl_adll[subphy] >> WR_LVL_REF_DLY_OFFS) &
1559							  WR_LVL_REF_DLY_MASK) +
1560							 ((wl_adll[subphy] >> WR_LVL_PH_SEL_OFFS) &
1561							  WR_LVL_PH_SEL_MASK) * ADLL_TAPS_PER_PHASE;
1562						pbs_tap_factor_avg = (pbs_tap_factor[iface][subphy][0] +
1563								      pbs_tap_factor[iface][subphy][1] +
1564								      pbs_tap_factor[iface][subphy][2] +
1565								      pbs_tap_factor[iface][subphy][3] +
1566								      pbs_tap_factor[iface][subphy][4] +
1567								      pbs_tap_factor[iface][subphy][5] +
1568								      pbs_tap_factor[iface][subphy][6] +
1569								      pbs_tap_factor[iface][subphy][7]) / 8;
1570						DEBUG_TAP_TUNING_ENGINE(DEBUG_LEVEL_INFO,
1571									("%s: pbs_tap_factor_avg %d\n",
1572									 __func__, pbs_tap_factor_avg));
1573						new_wl_tap = wl_tap -
1574							     (dqs_shift[subphy] * pbs_tap_factor_avg) /
1575							     PBS_VAL_FACTOR;
1576						/*
1577						 * check wraparound due to change in the pbs_tap_factor_avg
1578						 * vs the first guess
1579						 */
1580						if (new_wl_tap <= 0)
1581							new_wl_tap = 0;
1582
1583						reg_val = (new_wl_tap & WR_LVL_REF_DLY_MASK) |
1584							  ((new_wl_tap &
1585							    ((WR_LVL_PH_SEL_MASK << WR_LVL_PH_SEL_OFFS) >> 1))
1586							   << 1) |
1587							  (wl_adll[subphy] &
1588							   ((CTRL_CENTER_DLY_MASK << CTRL_CENTER_DLY_OFFS) |
1589							    (CTRL_CENTER_DLY_INV_MASK << CTRL_CENTER_DLY_INV_OFFS)));
1590						ddr3_tip_bus_write(dev, ACCESS_TYPE_UNICAST, iface,
1591								   ACCESS_TYPE_UNICAST, subphy, DDR_PHY_DATA,
1592								   WL_PHY_REG(effective_cs), reg_val);
1593						DEBUG_TAP_TUNING_ENGINE(DEBUG_LEVEL_INFO,
1594									("%s: tap tune tx algorithm final wl:\n",
1595									 __func__));
1596						DEBUG_TAP_TUNING_ENGINE(DEBUG_LEVEL_INFO,
1597									("%s: subphy %d, dqs pbs %d, old wl %d, final wl %d 0x%08x -> 0x%08x\n",
1598									 __func__, subphy, pbs_tap_factor_avg,
1599									 wl_tap, new_wl_tap, wl_adll[subphy],
1600									 reg_val));
1601					}
1602				}
1603			}
1604		} else {
1605			/* return to nominal wl */
1606			for (subphy = 0; subphy < subphy_max; subphy++) {
1607				ddr3_tip_bus_write(dev, ACCESS_TYPE_UNICAST, iface, ACCESS_TYPE_UNICAST,
1608						   subphy, DDR_PHY_DATA, WL_PHY_REG(effective_cs),
1609						   wl_adll[subphy]);
1610				DEBUG_TAP_TUNING_ENGINE(DEBUG_LEVEL_INFO,
1611							("%s: tap tune failed; return to nominal wl\n",
1612							__func__));
1613				reg_addr = PBS_TX_PHY_REG(effective_cs, DQSP_PAD);
1614				ddr3_tip_bus_write(dev, ACCESS_TYPE_UNICAST, iface, ACCESS_TYPE_UNICAST,
1615						   subphy, DDR_PHY_DATA, reg_addr, 0);
1616				reg_addr = PBS_TX_PHY_REG(effective_cs, DQSN_PAD);
1617				ddr3_tip_bus_write(dev, ACCESS_TYPE_UNICAST, iface, ACCESS_TYPE_UNICAST,
1618						   subphy, DDR_PHY_DATA, reg_addr, 0);
1619			}
1620		}
1621	}
1622
1623	return status;
1624}
1625
1626/* receiver duty cycle flow */
1627#define DDR_PHY_JIRA_ENABLE
1628int mv_ddr4_receiver_calibration(u8 dev_num)
1629{
1630	u32  if_id, subphy_num;
1631	u32 vref_idx, dq_idx, pad_num = 0;
1632	u8 dq_vref_start_win[MAX_INTERFACE_NUM][MAX_BUS_NUM][RECEIVER_DC_MAX_COUNT];
1633	u8 dq_vref_end_win[MAX_INTERFACE_NUM][MAX_BUS_NUM][RECEIVER_DC_MAX_COUNT];
1634	u8 c_vref[MAX_INTERFACE_NUM][MAX_BUS_NUM];
1635	u8 valid_win_size[MAX_INTERFACE_NUM][MAX_BUS_NUM];
1636	u8 c_opt_per_bus[MAX_INTERFACE_NUM][MAX_BUS_NUM];
1637	u8 valid_vref_cnt[MAX_INTERFACE_NUM][MAX_BUS_NUM];
1638	u8 valid_vref_ptr[MAX_INTERFACE_NUM][MAX_BUS_NUM][RECEIVER_DC_MAX_COUNT];
1639	u8 center_adll[MAX_INTERFACE_NUM][MAX_BUS_NUM];
1640	u8 center_vref[MAX_INTERFACE_NUM][MAX_BUS_NUM];
1641	u8 pbs_res_per_bus[MAX_INTERFACE_NUM][MAX_BUS_NUM][BUS_WIDTH_IN_BITS];
1642	u16 lambda_per_dq[MAX_INTERFACE_NUM][MAX_BUS_NUM][BUS_WIDTH_IN_BITS];
1643	u8 dqs_pbs = 0, const_pbs;
1644	int tap_tune_passed = 0;
1645	struct mv_ddr_topology_map *tm = mv_ddr_topology_map_get();
1646	enum hws_result *flow_result = ddr3_tip_get_result_ptr(training_stage);
1647	u8 subphy_max = ddr3_tip_dev_attr_get(dev_num, MV_ATTR_OCTET_PER_INTERFACE);
1648#ifdef DDR_PHY_JIRA_ENABLE
1649	u32  dqs_pbs_jira56[MAX_INTERFACE_NUM][MAX_BUS_NUM];
1650	u8 delta = 0;
1651#endif
1652	unsigned int max_cs = mv_ddr_cs_num_get();
1653	u32 ctr_x[4], pbs_temp[4];
1654	u16 cs_index = 0, pbs_rx_avg, lambda_avg;
1655	int status;
1656
1657	DEBUG_CALIBRATION(DEBUG_LEVEL_INFO, ("Starting ddr4 dc calibration training stage\n"));
1658
1659	vdq_tv = 0;
1660	duty_cycle = 0;
1661
1662	/* reset valid vref counter per if and subphy */
1663	for (if_id = 0; if_id < MAX_INTERFACE_NUM; if_id++)
1664		for (subphy_num = 0; subphy_num < MAX_BUS_NUM; subphy_num++)
1665			valid_vref_cnt[if_id][subphy_num] = 0;
1666
1667	/* calculate pbs-adll tap tuning */
1668	/* reset special pattern configuration to re-run this stage */
1669	status = ddr3_tip_bus_write(dev_num, ACCESS_TYPE_UNICAST, 0, ACCESS_TYPE_MULTICAST, PARAM_NOT_CARE,
1670			   DDR_PHY_DATA, 0x5f + effective_cs * 0x10, 0x0);
1671	if (status != MV_OK)
1672		return status;
1673
1674	status = ddr3_tip_bus_write(dev_num, ACCESS_TYPE_UNICAST, 0, ACCESS_TYPE_MULTICAST, PARAM_NOT_CARE,
1675			   DDR_PHY_DATA, 0x54 + effective_cs * 0x10, 0x0);
1676	if (status != MV_OK)
1677		return status;
1678
1679	status = ddr3_tip_bus_write(dev_num, ACCESS_TYPE_UNICAST, 0, ACCESS_TYPE_MULTICAST, PARAM_NOT_CARE,
1680			   DDR_PHY_DATA, 0x55 + effective_cs * 0x10, 0x0);
1681	if (status != MV_OK)
1682		return status;
1683
1684#ifdef DDR_PHY_JIRA_ENABLE
1685	if (effective_cs != 0) {
1686		for (if_id = 0; if_id < MAX_INTERFACE_NUM; if_id++) {
1687			VALIDATE_IF_ACTIVE(tm->if_act_mask, if_id);
1688			for (subphy_num = 0; subphy_num < subphy_max; subphy_num++) {
1689				VALIDATE_BUS_ACTIVE(tm->bus_act_mask, subphy_num);
1690				status = ddr3_tip_bus_read(dev_num, if_id, ACCESS_TYPE_UNICAST, subphy_num,
1691							   DDR_PHY_DATA, 0x54 + 0 * 0x10,
1692							   &dqs_pbs_jira56[if_id][subphy_num]);
1693				if (status != MV_OK)
1694					return status;
1695
1696				status = ddr3_tip_bus_write(dev_num, ACCESS_TYPE_UNICAST, 0, ACCESS_TYPE_UNICAST,
1697							    subphy_num, DDR_PHY_DATA, 0x54 + 0 * 0x10, 0x0);
1698				if (status != MV_OK)
1699					return status;
1700
1701				status = ddr3_tip_bus_write(dev_num, ACCESS_TYPE_UNICAST, 0, ACCESS_TYPE_UNICAST,
1702							    subphy_num, DDR_PHY_DATA, 0x55 + 0 * 0x10, 0x0);
1703				if (status != MV_OK)
1704					return status;
1705			}
1706		}
1707	}
1708#endif
1709
1710	if (mv_ddr4_tap_tuning(dev_num, lambda_per_dq, RX_DIR) == MV_OK)
1711		tap_tune_passed = 1;
1712
1713	/* main loop for 2d scan (low_to_high voltage scan) */
1714	for (duty_cycle = RECEIVER_DC_MIN_RANGE;
1715	     duty_cycle <= RECEIVER_DC_MAX_RANGE;
1716	     duty_cycle += RECEIVER_DC_STEP_SIZE) {
1717		/* set new receiver dc training value in dram */
1718		status = ddr3_tip_bus_write(dev_num, ACCESS_TYPE_MULTICAST, PARAM_NOT_CARE,
1719					    ACCESS_TYPE_MULTICAST, PARAM_NOT_CARE, DDR_PHY_DATA,
1720					    VREF_BCAST_PHY_REG(effective_cs), duty_cycle);
1721		if (status != MV_OK)
1722			return status;
1723
1724		status = ddr3_tip_bus_write(dev_num, ACCESS_TYPE_MULTICAST, PARAM_NOT_CARE,
1725					    ACCESS_TYPE_MULTICAST, PARAM_NOT_CARE, DDR_PHY_DATA,
1726					    VREF_PHY_REG(effective_cs, DQSP_PAD), duty_cycle);
1727		if (status != MV_OK)
1728			return status;
1729
1730		status = ddr3_tip_bus_write(dev_num, ACCESS_TYPE_MULTICAST, PARAM_NOT_CARE,
1731					    ACCESS_TYPE_MULTICAST, PARAM_NOT_CARE, DDR_PHY_DATA,
1732					    VREF_PHY_REG(effective_cs, DQSN_PAD), duty_cycle);
1733		if (status != MV_OK)
1734			return status;
1735
1736		if (tap_tune_passed == 0) {
1737			if (mv_ddr4_tap_tuning(dev_num, lambda_per_dq, RX_DIR) == MV_OK) {
1738				tap_tune_passed = 1;
1739			} else {
1740				DEBUG_CALIBRATION(DEBUG_LEVEL_ERROR,
1741						  ("rc, tap tune failed inside calibration\n"));
1742				continue;
1743			}
1744		}
1745
1746		if (mv_ddr4_centralization(dev_num, lambda_per_dq, c_opt_per_bus, pbs_res_per_bus,
1747					   valid_win_size, RX_DIR, vdq_tv, duty_cycle) != MV_OK) {
1748			DEBUG_CALIBRATION(DEBUG_LEVEL_ERROR,
1749					  ("error: ddr4 centralization failed (duty_cycle %d)!!!\n", duty_cycle));
1750			if (debug_mode == 0)
1751				break;
1752		}
1753
1754		for (if_id = 0; if_id < MAX_INTERFACE_NUM; if_id++) {
1755			VALIDATE_IF_ACTIVE(tm->if_act_mask, if_id);
1756			for (subphy_num = 0; subphy_num < subphy_max; subphy_num++) {
1757				VALIDATE_BUS_ACTIVE(tm->bus_act_mask, subphy_num);
1758				if (valid_win_size[if_id][subphy_num] > 8) {
1759					/* window is valid; keep current duty_cycle value and increment counter */
1760					vref_idx = valid_vref_cnt[if_id][subphy_num];
1761					valid_vref_ptr[if_id][subphy_num][vref_idx] = duty_cycle;
1762					valid_vref_cnt[if_id][subphy_num]++;
1763					c_vref[if_id][subphy_num] = c_opt_per_bus[if_id][subphy_num];
1764					/* set 0 for possible negative values */
1765					dq_vref_start_win[if_id][subphy_num][vref_idx] =
1766						c_vref[if_id][subphy_num] + 1 - valid_win_size[if_id][subphy_num] / 2;
1767					dq_vref_start_win[if_id][subphy_num][vref_idx] =
1768						(valid_win_size[if_id][subphy_num] % 2 == 0) ?
1769						dq_vref_start_win[if_id][subphy_num][vref_idx] :
1770						dq_vref_start_win[if_id][subphy_num][vref_idx] - 1;
1771					dq_vref_end_win[if_id][subphy_num][vref_idx] =
1772						c_vref[if_id][subphy_num] + valid_win_size[if_id][subphy_num] / 2;
1773				}
1774			} /* subphy */
1775		} /* if */
1776	} /* duty_cycle */
1777
1778	if (tap_tune_passed == 0) {
1779		DEBUG_CALIBRATION(DEBUG_LEVEL_INFO,
1780				  ("%s: tap tune not passed on any duty_cycle value\n", __func__));
1781		for (if_id = 0; if_id < MAX_INTERFACE_NUM; if_id++) {
1782			VALIDATE_IF_ACTIVE(tm->if_act_mask, if_id);
1783			/* report fail for all active interfaces; multi-interface support - tbd */
1784			flow_result[if_id] = TEST_FAILED;
1785		}
1786
1787		return MV_FAIL;
1788	}
1789
1790	for (if_id = 0; if_id < MAX_INTERFACE_NUM; if_id++) {
1791		VALIDATE_IF_ACTIVE(tm->if_act_mask, if_id);
1792		for (subphy_num = 0; subphy_num < subphy_max; subphy_num++) {
1793			VALIDATE_BUS_ACTIVE(tm->bus_act_mask, subphy_num);
1794			DEBUG_CALIBRATION(DEBUG_LEVEL_INFO,
1795					  ("calculating center of mass for subphy %d, valid window size %d\n",
1796					   subphy_num, valid_win_size[if_id][subphy_num]));
1797			if (valid_vref_cnt[if_id][subphy_num] > 0) {
1798				rx_eye_hi_lvl[subphy_num] =
1799					valid_vref_ptr[if_id][subphy_num][valid_vref_cnt[if_id][subphy_num] - 1];
1800				rx_eye_lo_lvl[subphy_num] = valid_vref_ptr[if_id][subphy_num][0];
1801				/* calculate center of mass sampling point (t, v) for each subphy */
1802				status = mv_ddr4_center_of_mass_calc(dev_num, if_id, subphy_num, RX_DIR,
1803								     dq_vref_start_win[if_id][subphy_num],
1804								     dq_vref_end_win[if_id][subphy_num],
1805								     valid_vref_ptr[if_id][subphy_num],
1806								     valid_vref_cnt[if_id][subphy_num],
1807								     &center_vref[if_id][subphy_num],
1808								     &center_adll[if_id][subphy_num]);
1809				if (status != MV_OK)
1810					return status;
1811
1812				DEBUG_CALIBRATION(DEBUG_LEVEL_INFO,
1813						  ("center of mass results: vref %d, adll %d\n",
1814						   center_vref[if_id][subphy_num], center_adll[if_id][subphy_num]));
1815			} else {
1816				DEBUG_CALIBRATION(DEBUG_LEVEL_ERROR,
1817						  ("%s: no valid window found for cs %d, subphy %d\n",
1818						   __func__, effective_cs, subphy_num));
1819				return MV_FAIL;
1820			}
1821		}
1822	}
1823
1824	for (if_id = 0; if_id < MAX_INTERFACE_NUM; if_id++) {
1825		VALIDATE_IF_ACTIVE(tm->if_act_mask, if_id);
1826		for (subphy_num = 0; subphy_num < subphy_max; subphy_num++) {
1827			VALIDATE_BUS_ACTIVE(tm->bus_act_mask, subphy_num);
1828			status = ddr3_tip_bus_write(dev_num, ACCESS_TYPE_MULTICAST, PARAM_NOT_CARE,
1829						    ACCESS_TYPE_UNICAST, subphy_num, DDR_PHY_DATA,
1830						    VREF_BCAST_PHY_REG(effective_cs),
1831						    center_vref[if_id][subphy_num]);
1832			if (status != MV_OK)
1833				return status;
1834
1835			status = ddr3_tip_bus_write(dev_num, ACCESS_TYPE_MULTICAST, PARAM_NOT_CARE,
1836						    ACCESS_TYPE_UNICAST, subphy_num, DDR_PHY_DATA,
1837						    VREF_PHY_REG(effective_cs, DQSP_PAD),
1838						    center_vref[if_id][subphy_num]);
1839			if (status != MV_OK)
1840				return status;
1841
1842			status = ddr3_tip_bus_write(dev_num, ACCESS_TYPE_MULTICAST, PARAM_NOT_CARE,
1843						    ACCESS_TYPE_UNICAST, subphy_num, DDR_PHY_DATA,
1844						    VREF_PHY_REG(effective_cs, DQSN_PAD),
1845						    center_vref[if_id][subphy_num]);
1846			if (status != MV_OK)
1847				return status;
1848
1849			DEBUG_CALIBRATION(DEBUG_LEVEL_INFO, ("final dc %d\n", center_vref[if_id][subphy_num]));
1850		}
1851
1852		/* run centralization again with optimal vref to update global structures */
1853		mv_ddr4_centralization(dev_num, lambda_per_dq, c_opt_per_bus, pbs_res_per_bus, valid_win_size,
1854				       RX_DIR, 0, center_vref[if_id][0]);
1855
1856		for (subphy_num = 0; subphy_num < subphy_max; subphy_num++) {
1857			VALIDATE_BUS_ACTIVE(tm->bus_act_mask, subphy_num);
1858
1859			const_pbs = 0xa;
1860			mv_ddr4_dqs_reposition(RX_DIR, lambda_per_dq[if_id][subphy_num],
1861					       pbs_res_per_bus[if_id][subphy_num], 0x0,
1862					       &center_adll[if_id][subphy_num], &dqs_pbs);
1863
1864			/* dq pbs update */
1865			for (dq_idx = 0; dq_idx < 8 ; dq_idx++) {
1866				pad_num = dq_map_table[dq_idx +
1867						       subphy_num * BUS_WIDTH_IN_BITS +
1868						       if_id * BUS_WIDTH_IN_BITS * subphy_max];
1869				status = ddr3_tip_bus_write(dev_num, ACCESS_TYPE_UNICAST, if_id, ACCESS_TYPE_UNICAST,
1870							    subphy_num, DDR_PHY_DATA,
1871							    0x50 + pad_num + effective_cs * 0x10,
1872							    const_pbs + pbs_res_per_bus[if_id][subphy_num][dq_idx]);
1873				if (status != MV_OK)
1874					return status;
1875			}
1876
1877			/* dqs pbs update */
1878			status = ddr3_tip_bus_write(dev_num, ACCESS_TYPE_UNICAST, 0, ACCESS_TYPE_UNICAST, subphy_num,
1879						    DDR_PHY_DATA, 0x54 + effective_cs * 0x10, dqs_pbs);
1880			if (status != MV_OK)
1881				return status;
1882
1883			status = ddr3_tip_bus_write(dev_num, ACCESS_TYPE_UNICAST, 0, ACCESS_TYPE_UNICAST, subphy_num,
1884						    DDR_PHY_DATA, 0x55 + effective_cs * 0x10, dqs_pbs);
1885			if (status != MV_OK)
1886				return status;
1887
1888			status = ddr3_tip_bus_write(dev_num, ACCESS_TYPE_UNICAST, if_id, ACCESS_TYPE_UNICAST,
1889						    subphy_num, DDR_PHY_DATA,
1890						    CRX_PHY_REG(effective_cs),
1891						    center_adll[if_id][subphy_num]);
1892			if (status != MV_OK)
1893				return status;
1894
1895#ifdef DDR_PHY_JIRA_ENABLE
1896			if (effective_cs != 0) {
1897				status = ddr3_tip_bus_write(dev_num, ACCESS_TYPE_UNICAST, 0, ACCESS_TYPE_UNICAST,
1898							    subphy_num, DDR_PHY_DATA, 0x54 + 0 * 0x10,
1899							    dqs_pbs_jira56[if_id][subphy_num]);
1900				if (status != MV_OK)
1901					return status;
1902
1903				status = ddr3_tip_bus_write(dev_num, ACCESS_TYPE_UNICAST, 0, ACCESS_TYPE_UNICAST,
1904							    subphy_num, DDR_PHY_DATA, 0x55 + 0 * 0x10,
1905							    dqs_pbs_jira56[if_id][subphy_num]);
1906				if (status != MV_OK)
1907					return status;
1908			}
1909#endif
1910		}
1911	}
1912
1913	for (if_id = 0; if_id < MAX_INTERFACE_NUM; if_id++) {
1914		VALIDATE_IF_ACTIVE(tm->if_act_mask, if_id);
1915		/* report pass for all active interfaces; multi-interface support - tbd */
1916		flow_result[if_id] = TEST_SUCCESS;
1917	}
1918
1919#ifdef DDR_PHY_JIRA_ENABLE
1920	if (effective_cs == (max_cs - 1)) {
1921		/* adjust dqs to be as cs0 */
1922		for (if_id = 0; if_id < MAX_INTERFACE_NUM; if_id++) {
1923			VALIDATE_IF_ACTIVE(tm->if_act_mask, if_id);
1924			for (subphy_num = 0; subphy_num < subphy_max; subphy_num++) {
1925				VALIDATE_BUS_ACTIVE(tm->bus_act_mask, subphy_num);
1926				pbs_rx_avg = 0;
1927				/* find average of all pbs of dqs and read ctr_x */
1928				for (cs_index = 0; cs_index < max_cs; cs_index++) {
1929					status = ddr3_tip_bus_read(dev_num, if_id, ACCESS_TYPE_UNICAST,
1930								   subphy_num, DDR_PHY_DATA,
1931								   0x54 + cs_index * 0x10,
1932								   &pbs_temp[cs_index]);
1933					if (status != MV_OK)
1934						return status;
1935
1936					status = ddr3_tip_bus_read(dev_num, if_id, ACCESS_TYPE_UNICAST,
1937								   subphy_num, DDR_PHY_DATA,
1938								   0x3 + cs_index * 0x4,
1939								   &ctr_x[cs_index]);
1940					if (status != MV_OK)
1941						return status;
1942
1943					pbs_rx_avg = pbs_rx_avg + pbs_temp[cs_index];
1944				}
1945
1946				pbs_rx_avg = pbs_rx_avg / max_cs;
1947
1948				/* update pbs and ctr_x */
1949				lambda_avg = (lambda_per_dq[if_id][subphy_num][0] +
1950					      lambda_per_dq[if_id][subphy_num][1] +
1951					      lambda_per_dq[if_id][subphy_num][2] +
1952					      lambda_per_dq[if_id][subphy_num][3] +
1953					      lambda_per_dq[if_id][subphy_num][4] +
1954					      lambda_per_dq[if_id][subphy_num][5] +
1955					      lambda_per_dq[if_id][subphy_num][6] +
1956					      lambda_per_dq[if_id][subphy_num][7]) / 8;
1957
1958				for (cs_index = 0; cs_index < max_cs; cs_index++) {
1959					status = ddr3_tip_bus_write(dev_num, ACCESS_TYPE_UNICAST,
1960								    0, ACCESS_TYPE_UNICAST,
1961								    subphy_num, DDR_PHY_DATA,
1962								    0x54 + cs_index * 0x10, pbs_rx_avg);
1963					if (status != MV_OK)
1964						return status;
1965
1966					status = ddr3_tip_bus_write(dev_num, ACCESS_TYPE_UNICAST,
1967								    0, ACCESS_TYPE_UNICAST,
1968								    subphy_num, DDR_PHY_DATA,
1969								    0x55 + cs_index * 0x10, pbs_rx_avg);
1970					if (status != MV_OK)
1971						return status;
1972
1973					/* update */
1974					if (pbs_rx_avg >= pbs_temp[cs_index]) {
1975						delta = ((pbs_rx_avg - pbs_temp[cs_index]) * lambda_avg) /
1976							PBS_VAL_FACTOR;
1977						if (ctr_x[cs_index] >= delta) {
1978							ctr_x[cs_index] = ctr_x[cs_index] - delta;
1979						} else {
1980							ctr_x[cs_index] = 0;
1981							DEBUG_CALIBRATION(DEBUG_LEVEL_INFO,
1982									  ("jira ddrphy56 extend fix(-) required %d\n",
1983									   delta));
1984						}
1985					} else {
1986						delta = ((pbs_temp[cs_index] - pbs_rx_avg) * lambda_avg) /
1987							PBS_VAL_FACTOR;
1988						if ((ctr_x[cs_index] + delta) > 32) {
1989							ctr_x[cs_index] = 32;
1990							DEBUG_CALIBRATION(DEBUG_LEVEL_INFO,
1991									  ("jira ddrphy56 extend fix(+) required %d\n",
1992									   delta));
1993						} else {
1994							ctr_x[cs_index] = (ctr_x[cs_index] + delta);
1995						}
1996					}
1997					status = ddr3_tip_bus_write(dev_num, ACCESS_TYPE_UNICAST, if_id,
1998								    ACCESS_TYPE_UNICAST, subphy_num, DDR_PHY_DATA,
1999								    CRX_PHY_REG(effective_cs),
2000								    ctr_x[cs_index]);
2001					if (status != MV_OK)
2002						return status;
2003				}
2004			}
2005		}
2006	}
2007#endif
2008
2009    return MV_OK;
2010}
2011
2012#define MAX_LOOPS			2 /* maximum number of loops to get to solution */
2013#define LEAST_SIGNIFICANT_BYTE_MASK	0xff
2014#define VW_SUBPHY_LIMIT_MIN		0
2015#define VW_SUBPHY_LIMIT_MAX		127
2016#define MAX_PBS_NUM			31 /* TODO: added by another patch */
2017enum{
2018	LOCKED,
2019	UNLOCKED
2020};
2021enum {
2022	PASS,
2023	FAIL
2024};
2025
2026int mv_ddr4_dm_tuning(u32 cs, u16 (*pbs_tap_factor)[MAX_BUS_NUM][BUS_WIDTH_IN_BITS])
2027{
2028	struct mv_ddr_topology_map *tm = mv_ddr_topology_map_get();
2029	enum hws_training_ip_stat training_result;
2030	enum hws_training_result result_type = RESULT_PER_BIT;
2031	enum hws_search_dir search_dir;
2032	enum hws_dir dir = OPER_WRITE;
2033	int vw_sphy_hi_diff = 0;
2034	int vw_sphy_lo_diff = 0;
2035	int x, y;
2036	int status;
2037	unsigned int a, b, c;
2038	u32 ctx_vector[MAX_BUS_NUM];
2039	u32 subphy, bit, pattern;
2040	u32 *result[MAX_BUS_NUM][HWS_SEARCH_DIR_LIMIT];
2041	u32 max_win_size = MAX_WINDOW_SIZE_TX;
2042	u32 dm_lambda[MAX_BUS_NUM] = {0};
2043	u32 loop;
2044	u32 adll_tap;
2045	u32 dm_pbs, max_pbs;
2046	u32 dq_pbs[BUS_WIDTH_IN_BITS];
2047	u32 new_dq_pbs[BUS_WIDTH_IN_BITS];
2048	u32 dq, pad;
2049	u32 dq_pbs_diff;
2050	u32 byte_center, dm_center;
2051	u32 idx, reg_val;
2052	u32 dm_pad = mv_ddr_dm_pad_get();
2053	u8 subphy_max = ddr3_tip_dev_attr_get(0, MV_ATTR_OCTET_PER_INTERFACE);
2054	u8 dm_vw_vector[MAX_BUS_NUM * ADLL_TAPS_PER_PERIOD];
2055	u8 vw_sphy_lo_lmt[MAX_BUS_NUM];
2056	u8 vw_sphy_hi_lmt[MAX_BUS_NUM];
2057	u8 dm_status[MAX_BUS_NUM];
2058
2059	/* init */
2060	for (subphy = 0; subphy < subphy_max; subphy++) {
2061		VALIDATE_BUS_ACTIVE(tm->bus_act_mask, subphy);
2062		dm_status[subphy] = UNLOCKED;
2063		for (bit = 0 ; bit < BUS_WIDTH_IN_BITS; bit++)
2064			dm_lambda[subphy] += pbs_tap_factor[0][subphy][bit];
2065		dm_lambda[subphy] /= BUS_WIDTH_IN_BITS;
2066	}
2067
2068	/* get algorithm's adll result */
2069	for (subphy = 0; subphy < subphy_max; subphy++) {
2070		VALIDATE_BUS_ACTIVE(tm->bus_act_mask, subphy);
2071		ddr3_tip_bus_read(0, 0, ACCESS_TYPE_UNICAST, subphy, DDR_PHY_DATA,
2072				  CTX_PHY_REG(cs), &reg_val);
2073		ctx_vector[subphy] = reg_val;
2074	}
2075
2076	for (loop = 0; loop < MAX_LOOPS; loop++) {
2077		for (subphy = 0; subphy < subphy_max; subphy++) {
2078			vw_sphy_lo_lmt[subphy] = VW_SUBPHY_LIMIT_MIN;
2079			vw_sphy_hi_lmt[subphy] = VW_SUBPHY_LIMIT_MAX;
2080			for (adll_tap = 0; adll_tap < ADLL_TAPS_PER_PERIOD; adll_tap++) {
2081				idx = subphy * ADLL_TAPS_PER_PERIOD + adll_tap;
2082				dm_vw_vector[idx] = PASS;
2083			}
2084		}
2085
2086		/* get valid window of dm signal */
2087		mv_ddr_dm_vw_get(PATTERN_ZERO, cs, dm_vw_vector);
2088		mv_ddr_dm_vw_get(PATTERN_ONE, cs, dm_vw_vector);
2089
2090		/* get vw for dm disable */
2091		pattern = MV_DDR_IS_64BIT_DRAM_MODE(tm->bus_act_mask) ? 73 : 23;
2092		ddr3_tip_ip_training_wrapper(0, ACCESS_TYPE_MULTICAST, PARAM_NOT_CARE, ACCESS_TYPE_MULTICAST,
2093					     PARAM_NOT_CARE, result_type, HWS_CONTROL_ELEMENT_ADLL, PARAM_NOT_CARE,
2094					     dir, tm->if_act_mask, 0x0, max_win_size - 1, max_win_size - 1, pattern,
2095					     EDGE_FPF, CS_SINGLE, PARAM_NOT_CARE, &training_result);
2096
2097		/* find skew of dm signal vs. dq data bits using its valid window */
2098		for (subphy = 0; subphy < subphy_max; subphy++) {
2099			VALIDATE_BUS_ACTIVE(tm->bus_act_mask, subphy);
2100			ddr3_tip_bus_write(0, ACCESS_TYPE_UNICAST, 0, ACCESS_TYPE_UNICAST, subphy, DDR_PHY_DATA,
2101					   CTX_PHY_REG(cs), ctx_vector[subphy]);
2102
2103			for (search_dir = HWS_LOW2HIGH; search_dir <= HWS_HIGH2LOW; search_dir++) {
2104				ddr3_tip_read_training_result(0, 0, ACCESS_TYPE_UNICAST, subphy,
2105							      ALL_BITS_PER_PUP, search_dir, dir, result_type,
2106							      TRAINING_LOAD_OPERATION_UNLOAD, CS_SINGLE,
2107							      &(result[subphy][search_dir]),
2108							      1, 0, 0);
2109				DEBUG_DM_TUNING(DEBUG_LEVEL_INFO,
2110						("dm cs %d if %d subphy %d result: 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
2111						 cs, 0, subphy,
2112						 result[subphy][search_dir][0],
2113						 result[subphy][search_dir][1],
2114						 result[subphy][search_dir][2],
2115						 result[subphy][search_dir][3],
2116						 result[subphy][search_dir][4],
2117						 result[subphy][search_dir][5],
2118						 result[subphy][search_dir][6],
2119						 result[subphy][search_dir][7]));
2120			}
2121
2122			if (dm_status[subphy] == LOCKED)
2123				continue;
2124
2125			for (bit = 0; bit < BUS_WIDTH_IN_BITS; bit++) {
2126				result[subphy][HWS_LOW2HIGH][bit] &= LEAST_SIGNIFICANT_BYTE_MASK;
2127				result[subphy][HWS_HIGH2LOW][bit] &= LEAST_SIGNIFICANT_BYTE_MASK;
2128
2129				if (result[subphy][HWS_LOW2HIGH][bit] > vw_sphy_lo_lmt[subphy])
2130					vw_sphy_lo_lmt[subphy] = result[subphy][HWS_LOW2HIGH][bit];
2131
2132				if (result[subphy][HWS_HIGH2LOW][bit] < vw_sphy_hi_lmt[subphy])
2133					vw_sphy_hi_lmt[subphy] = result[subphy][HWS_HIGH2LOW][bit];
2134			}
2135
2136			DEBUG_DM_TUNING(DEBUG_LEVEL_INFO,
2137					("loop %d, dm subphy %d, vw %d, %d\n", loop, subphy,
2138					 vw_sphy_lo_lmt[subphy], vw_sphy_hi_lmt[subphy]));
2139
2140			idx = subphy * ADLL_TAPS_PER_PERIOD;
2141			status = mv_ddr_dm_to_dq_diff_get(vw_sphy_hi_lmt[subphy], vw_sphy_lo_lmt[subphy],
2142							  &dm_vw_vector[idx], &vw_sphy_hi_diff, &vw_sphy_lo_diff);
2143			if (status != MV_OK)
2144				return MV_FAIL;
2145			DEBUG_DM_TUNING(DEBUG_LEVEL_INFO,
2146					("vw_sphy_lo_diff %d, vw_sphy_hi_diff %d\n",
2147					 vw_sphy_lo_diff, vw_sphy_hi_diff));
2148
2149			/* dm is the strongest signal */
2150			if ((vw_sphy_hi_diff >= 0) &&
2151			    (vw_sphy_lo_diff >= 0)) {
2152				dm_status[subphy] = LOCKED;
2153			} else if ((vw_sphy_hi_diff >= 0) &&
2154				   (vw_sphy_lo_diff < 0) &&
2155				   (loop == 0)) { /* update dm only */
2156				ddr3_tip_bus_read(0, 0, ACCESS_TYPE_UNICAST, subphy, DDR_PHY_DATA,
2157						  PBS_TX_PHY_REG(cs, dm_pad), &dm_pbs);
2158				x = -vw_sphy_lo_diff; /* get positive x */
2159				a = (unsigned int)x * PBS_VAL_FACTOR;
2160				b = dm_lambda[subphy];
2161				if (round_div(a, b, &c) != MV_OK)
2162					return MV_FAIL;
2163				dm_pbs += (u32)c;
2164				dm_pbs = (dm_pbs > MAX_PBS_NUM) ? MAX_PBS_NUM : dm_pbs;
2165				ddr3_tip_bus_write(0, ACCESS_TYPE_UNICAST, 0, ACCESS_TYPE_UNICAST,
2166						   subphy, DDR_PHY_DATA,
2167						   PBS_TX_PHY_REG(cs, dm_pad), dm_pbs);
2168			} else if ((vw_sphy_hi_diff < 0) &&
2169				   (vw_sphy_lo_diff >= 0) &&
2170				   (loop == 0)) { /* update dq and c_opt */
2171				max_pbs = 0;
2172				for (dq = 0; dq < BUS_WIDTH_IN_BITS; dq++) {
2173					idx = dq + subphy * BUS_WIDTH_IN_BITS;
2174					pad = dq_map_table[idx];
2175					ddr3_tip_bus_read(0, 0, ACCESS_TYPE_UNICAST, subphy, DDR_PHY_DATA,
2176							  PBS_TX_PHY_REG(cs, pad), &reg_val);
2177					dq_pbs[dq] = reg_val;
2178					x = -vw_sphy_hi_diff; /* get positive x */
2179					a = (unsigned int)x * PBS_VAL_FACTOR;
2180					b = pbs_tap_factor[0][subphy][dq];
2181					if (round_div(a, b, &c) != MV_OK)
2182						return MV_FAIL;
2183					new_dq_pbs[dq] = dq_pbs[dq] + (u32)c;
2184					if (max_pbs < new_dq_pbs[dq])
2185						max_pbs = new_dq_pbs[dq];
2186				}
2187
2188				dq_pbs_diff = (max_pbs > MAX_PBS_NUM) ? (max_pbs - MAX_PBS_NUM) : 0;
2189				for (dq = 0; dq < BUS_WIDTH_IN_BITS; dq++) {
2190					idx = dq + subphy * BUS_WIDTH_IN_BITS;
2191					reg_val = new_dq_pbs[dq] - dq_pbs_diff;
2192					if (reg_val < 0) {
2193						DEBUG_DM_TUNING(DEBUG_LEVEL_ERROR,
2194								("unexpected negative value found\n"));
2195						return MV_FAIL;
2196					}
2197					pad = dq_map_table[idx];
2198					ddr3_tip_bus_write(0, ACCESS_TYPE_UNICAST, 0,
2199							   ACCESS_TYPE_UNICAST, subphy,
2200							   DDR_PHY_DATA,
2201							   PBS_TX_PHY_REG(cs, pad),
2202							   reg_val);
2203				}
2204
2205				a = dm_lambda[subphy];
2206				b = dq_pbs_diff * PBS_VAL_FACTOR;
2207				if (b > 0) {
2208					if (round_div(a, b, &c) != MV_OK)
2209						return MV_FAIL;
2210					dq_pbs_diff = (u32)c;
2211				}
2212
2213				x = (int)ctx_vector[subphy];
2214				if (x < 0) {
2215					DEBUG_DM_TUNING(DEBUG_LEVEL_ERROR,
2216							("unexpected negative value found\n"));
2217					return MV_FAIL;
2218				}
2219				y = (int)dq_pbs_diff;
2220				if (y < 0) {
2221					DEBUG_DM_TUNING(DEBUG_LEVEL_ERROR,
2222							("unexpected negative value found\n"));
2223					return MV_FAIL;
2224				}
2225				x += (y + vw_sphy_hi_diff) / 2;
2226				x %= ADLL_TAPS_PER_PERIOD;
2227				ctx_vector[subphy] = (u32)x;
2228			} else if (((vw_sphy_hi_diff < 0) && (vw_sphy_lo_diff < 0)) ||
2229				   (loop == 1)) { /* dm is the weakest signal */
2230				/* update dq and c_opt */
2231				dm_status[subphy] = LOCKED;
2232				byte_center = (vw_sphy_lo_lmt[subphy] + vw_sphy_hi_lmt[subphy]) / 2;
2233				x = (int)byte_center;
2234				if (x < 0) {
2235					DEBUG_DM_TUNING(DEBUG_LEVEL_ERROR,
2236							("unexpected negative value found\n"));
2237					return MV_FAIL;
2238				}
2239				x += (vw_sphy_hi_diff - vw_sphy_lo_diff) / 2;
2240				if (x < 0) {
2241					DEBUG_DM_TUNING(DEBUG_LEVEL_ERROR,
2242							("unexpected negative value found\n"));
2243					return MV_FAIL;
2244				}
2245				dm_center = (u32)x;
2246
2247				if (byte_center > dm_center) {
2248					max_pbs = 0;
2249					for (dq = 0; dq < BUS_WIDTH_IN_BITS; dq++) {
2250						pad = dq_map_table[dq + subphy * BUS_WIDTH_IN_BITS];
2251						ddr3_tip_bus_read(0, 0, ACCESS_TYPE_UNICAST,
2252								  subphy, DDR_PHY_DATA,
2253								  PBS_TX_PHY_REG(cs, pad),
2254								  &reg_val);
2255						dq_pbs[dq] = reg_val;
2256						a = (byte_center - dm_center) * PBS_VAL_FACTOR;
2257						b = pbs_tap_factor[0][subphy][dq];
2258						if (round_div(a, b, &c) != MV_OK)
2259							return MV_FAIL;
2260						new_dq_pbs[dq] = dq_pbs[dq] + (u32)c;
2261						if (max_pbs < new_dq_pbs[dq])
2262							max_pbs = new_dq_pbs[dq];
2263					}
2264
2265					dq_pbs_diff = (max_pbs > MAX_PBS_NUM) ? (max_pbs - MAX_PBS_NUM) : 0;
2266					for (int dq = 0; dq < BUS_WIDTH_IN_BITS; dq++) {
2267						idx = dq + subphy * BUS_WIDTH_IN_BITS;
2268						pad = dq_map_table[idx];
2269						reg_val = new_dq_pbs[dq] - dq_pbs_diff;
2270						if (reg_val < 0) {
2271							DEBUG_DM_TUNING(DEBUG_LEVEL_ERROR,
2272									("unexpected negative value found\n"));
2273							return MV_FAIL;
2274						}
2275						ddr3_tip_bus_write(0, ACCESS_TYPE_UNICAST, 0,
2276								   ACCESS_TYPE_UNICAST, subphy, DDR_PHY_DATA,
2277								   PBS_TX_PHY_REG(cs, pad),
2278								   reg_val);
2279					}
2280					ctx_vector[subphy] = dm_center % ADLL_TAPS_PER_PERIOD;
2281				} else {
2282					ddr3_tip_bus_read(0, 0, ACCESS_TYPE_UNICAST, subphy, DDR_PHY_DATA,
2283							  PBS_TX_PHY_REG(cs, dm_pad), &dm_pbs);
2284					a = (dm_center - byte_center) * PBS_VAL_FACTOR;
2285					b = dm_lambda[subphy];
2286					if (round_div(a, b, &c) != MV_OK)
2287						return MV_FAIL;
2288					dm_pbs += (u32)c;
2289					ddr3_tip_bus_write(0, ACCESS_TYPE_UNICAST, 0,
2290							   ACCESS_TYPE_UNICAST, subphy, DDR_PHY_DATA,
2291							   PBS_TX_PHY_REG(cs, dm_pad), dm_pbs);
2292				}
2293			} else {
2294				/* below is the check whether dm signal per subphy converged or not */
2295			}
2296		}
2297	}
2298
2299	for (subphy = 0; subphy < subphy_max; subphy++) {
2300		VALIDATE_BUS_ACTIVE(tm->bus_act_mask, subphy);
2301		ddr3_tip_bus_write(0, ACCESS_TYPE_UNICAST, 0, ACCESS_TYPE_UNICAST, subphy, DDR_PHY_DATA,
2302				   CTX_PHY_REG(cs), ctx_vector[subphy]);
2303	}
2304
2305	for (subphy = 0; subphy < subphy_max; subphy++) {
2306		VALIDATE_BUS_ACTIVE(tm->bus_act_mask, subphy);
2307		if (dm_status[subphy] != LOCKED) {
2308			DEBUG_DM_TUNING(DEBUG_LEVEL_ERROR,
2309					("no convergence for dm signal[%u] found\n", subphy));
2310			return MV_FAIL;
2311		}
2312	}
2313
2314	return MV_OK;
2315}
2316void refresh(void)
2317{
2318	u32 data_read[MAX_INTERFACE_NUM];
2319	ddr3_tip_if_read(0, ACCESS_TYPE_UNICAST, 0, ODPG_DATA_CTRL_REG, data_read, MASK_ALL_BITS);
2320
2321	/* Refresh Command for CS0*/
2322	ddr3_tip_if_write(0, ACCESS_TYPE_UNICAST, 0, ODPG_DATA_CTRL_REG, (0 << 26), (3 << 26));
2323	ddr3_tip_if_write(0, ACCESS_TYPE_UNICAST, 0, SDRAM_OP_REG, 0xe02, 0xf1f);
2324	if (ddr3_tip_if_polling(0, ACCESS_TYPE_UNICAST, 0, 0, 0x1f, SDRAM_OP_REG, MAX_POLLING_ITERATIONS) != MV_OK)
2325			DEBUG_TRAINING_IP(DEBUG_LEVEL_ERROR, ("DDR3 poll failed"));
2326
2327	/* Refresh Command for CS1*/
2328	ddr3_tip_if_write(0, ACCESS_TYPE_UNICAST, 0, ODPG_DATA_CTRL_REG, (1 << 26), (3 << 26));
2329	ddr3_tip_if_write(0, ACCESS_TYPE_UNICAST, 0, SDRAM_OP_REG, 0xd02, 0xf1f);
2330	if (ddr3_tip_if_polling(0, ACCESS_TYPE_UNICAST, 0, 0, 0x1f, SDRAM_OP_REG, MAX_POLLING_ITERATIONS) != MV_OK)
2331			DEBUG_TRAINING_IP(DEBUG_LEVEL_ERROR, ("DDR3 poll failed"));
2332
2333	/* Restore Register*/
2334	ddr3_tip_if_write(0, ACCESS_TYPE_UNICAST, 0, ODPG_DATA_CTRL_REG, data_read[0] , MASK_ALL_BITS);
2335}
2336#endif /* CONFIG_DDR4 */
2337