1/*
2 * Copyright 2006-2018, Haiku, Inc. All Rights Reserved.
3 * Distributed under the terms of the MIT License.
4 *
5 * Authors:
6 *		Axel D��rfler, axeld@pinc-software.de
7 *		Alexander von Gluck IV, kallisti5@unixzen.com
8 *		Adrien Destugues, pulkomandy@pulkomandy.tk
9 */
10
11
12#include "pll.h"
13
14#include <math.h>
15#include <stdio.h>
16#include <string.h>
17
18#include <Debug.h>
19
20#include <create_display_modes.h>
21#include <ddc.h>
22#include <edid.h>
23#include <validate_display_mode.h>
24
25#include "accelerant_protos.h"
26#include "accelerant.h"
27#include "utility.h"
28
29
30#undef TRACE
31#define TRACE_MODE
32#ifdef TRACE_MODE
33#	define TRACE(x...) _sPrintf("intel_extreme: " x)
34#else
35#	define TRACE(x...)
36#endif
37
38#define ERROR(x...) _sPrintf("intel_extreme: " x)
39#define CALLED(x...) TRACE("CALLED %s\n", __PRETTY_FUNCTION__)
40
41
42// PLL limits, taken from i915 DRM driver. However, note that we use the values of
43// N+2, M1+2 and M2+2 here, the - 2 being applied when we write the values to the registers.
44
45static pll_limits kLimits85x = {
46	// p, p1, p2,  n,   m, m1, m2
47	{  4,  2,  2,  4,  96, 20,  8},
48	{128, 33,  4, 18, 140, 28, 18},
49	165000, 908000, 1512000
50};
51
52// For Iron Lake, a new set of timings is introduced along with the FDI system,
53// and carried on to later cards with just one further change (to the P2 cutoff
54// frequency) in Sandy Bridge.
55
56static pll_limits kLimits9xxSdvo = {
57	// p, p1, p2,  n,   m, m1, m2
58	{  5,  1,  5,  3,  70, 10,  5},	// min
59	{ 80,  8, 10,  8, 120, 20,  9},	// max
60	200000, 1400000, 2800000
61};
62
63static pll_limits kLimits9xxLvds = {
64	// p, p1, p2,  n,   m, m1, m2
65	{  7,  1,  7,  3,  70, 10,  5},	// min
66	{ 98,  8, 14,  8, 120, 20,  9},	// max
67	112000, 1400000, 2800000
68};
69
70// Limits for G45 cards taken from i915 DRM driver, mixed with old setup
71// plus tests to accomodate lower resolutions with still correct refresh.
72// Note that n here is actually n+2, same applies to m1 and m2.
73
74static pll_limits kLimitsG4xSdvo = {
75	// p, p1, p2,  n,   m, m1, m2
76	{ 10,  1, 10,  3, 104, 19,  7},	// min
77	{ 80,  8, 10,  8, 138, 25, 13},	// max
78	270000, 1750000, 3500000
79};
80
81#if 0
82static pll_limits kLimitsG4xHdmi = {
83	// p, p1, p2,  n,   m, m1, m2
84	{  5,  1,  5,  3, 104, 18,  7},	// min
85	{ 80,  8, 10,  8, 138, 25, 13},	// max
86	165000, 1750000, 3500000
87};
88#endif
89
90static pll_limits kLimitsG4xLvdsSingle = {
91	// p, p1, p2,  n,   m, m1, m2
92	{ 28,  2, 14,  3, 104, 19,  7},	// min
93	{112,  8, 14,  8, 138, 25, 13},	// max
94	0, 1750000, 3500000
95};
96
97static pll_limits kLimitsG4xLvdsDual = {
98	// p, p1, p2,  n,   m, m1, m2
99	{ 14,  2,  7,  3, 104, 19,  7},	// min
100	{ 42,  6,  7,  8, 138, 25, 13},	// max
101	0, 1750000, 3500000
102};
103
104static pll_limits kLimitsIlkDac = {
105	// p, p1, p2, n,   m, m1, m2
106	{  5,  1,  5, 3,  79, 14,  7}, // min
107	{ 80,  8, 10, 7, 127, 24, 11}, // max
108	225000, 1760000, 3510000
109};
110
111static pll_limits kLimitsIlkLvdsSingle = {
112	// p, p1, p2, n,   m, m1, m2
113	{ 28,  2, 14, 3,  79, 14,  7}, // min
114	{112,  8, 14, 5, 118, 24, 11}, // max
115	225000, 1760000, 3510000
116};
117
118static pll_limits kLimitsIlkLvdsDual = {
119	// p, p1, p2, n,   m, m1, m2
120	{ 14,  2,  7, 3,  79, 14,  7}, // min
121	{ 56,  8,  7, 5, 127, 24, 11}, // max
122	225000, 1760000, 3510000
123};
124
125// 100Mhz RefClock
126static pll_limits kLimitsIlkLvdsSingle100 = {
127	// p, p1, p2, n,   m, m1, m2
128	{ 28,  2, 14, 3,  79, 14,  7}, // min
129	{112,  8, 14, 4, 126, 24, 11}, // max
130	225000, 1760000, 3510000
131};
132
133static pll_limits kLimitsIlkLvdsDual100 = {
134	// p, p1, p2, n,   m, m1, m2
135	{ 14,  2,  7, 3,  79, 14,  7}, // min
136	{ 42,  6,  7, 5, 126, 24, 11}, // max
137	225000, 1760000, 3510000
138};
139
140// TODO From haswell onwards, a completely different PLL design is used
141// (intel_gfx-prm-osrc-hsw-display_0.pdf, page 268 for VGA). It uses a "virtual
142// root frequency" and one just has to set a single divider (integer and
143// fractional parts), so it makes no sense to reuse the same code and limit
144// structures there.
145//
146// For other display connections, the clock is handled differently, as there is
147// no need for a precise timing to send things in sync with the display.
148#if 0
149static pll_limits kLimitsChv = {
150	// p, p1, p2, n,   m, m1, m2
151	{  0,  2,  1, 1,  79, 2,   24 << 22}, // min
152	{  0,  4, 14, 1, 127, 2,  175 << 22}, // max
153	0, 4800000, 6480000
154};
155
156static pll_limits kLimitsVlv = {
157	// p, p1, p2, n,   m, m1, m2
158	{  0,  2,  2, 1,  79, 2,   11},	// min
159	{  0,  3, 20, 7, 127, 3,  156},	// max
160	0, 4000000, 6000000
161};
162
163static pll_limits kLimitsBxt = {
164	// p, p1, p2, n,  m, m1, m2
165	{  0,  2,  1, 1,  0,  2,   2 << 22}, // min
166	{  0,  4, 20, 1,  0,  2, 255 << 22}, // max
167	0, 4800000, 6700000
168};
169#endif
170
171static pll_limits kLimitsPinSdvo = {
172	// p, p1, p2, n,   m, m1,  m2
173	{  5,  1,  5, 3,   2,  0,   0},	// min
174	{ 80,  8, 10, 6, 256,  0, 254},	// max
175	200000, 1700000, 3500000
176};
177
178static pll_limits kLimitsPinLvds = {
179	// p, p1, p2, n,   m, m1,  m2
180	{  7,  1, 14, 3,   2,  0,   0},	// min
181	{112,  8, 14, 6, 256,  0, 254},	// max
182	112000, 1700000, 3500000
183};
184
185
186static bool
187lvds_dual_link(display_timing* current)
188{
189	float requestedPixelClock = current->pixel_clock / 1000.0f;
190	if (requestedPixelClock > 112.999)
191		return true;
192
193	// TODO: Force dual link on MacBookPro6,2  MacBookPro8,2  MacBookPro9,1
194
195	return ((read32(INTEL_DIGITAL_LVDS_PORT) & LVDS_CLKB_POWER_MASK)
196		== LVDS_CLKB_POWER_UP);
197}
198
199
200bool
201valid_pll_divisors(pll_divisors* divisors, pll_limits* limits)
202{
203	pll_info &info = gInfo->shared_info->pll_info;
204	uint32 vco = info.reference_frequency * divisors->m / divisors->n;
205	uint32 frequency = vco / divisors->p;
206
207	if (divisors->p < limits->min.p || divisors->p > limits->max.p
208		|| divisors->m < limits->min.m || divisors->m > limits->max.m
209		|| vco < limits->min_vco || vco > limits->max_vco
210		|| frequency < info.min_frequency || frequency > info.max_frequency)
211		return false;
212
213	return true;
214}
215
216
217static void
218compute_pll_p2(display_timing* current, pll_divisors* divisors,
219	pll_limits* limits, bool isLVDS)
220{
221	if (isLVDS) {
222		if (lvds_dual_link(current)) {
223			// fast DAC timing via 2 channels (dual link LVDS)
224			divisors->p2 = limits->min.p2;
225		} else {
226			// slow DAC timing
227			divisors->p2 = limits->max.p2;
228		}
229	} else {
230		if (current->pixel_clock < limits->dot_limit) {
231			// slow DAC timing
232			divisors->p2 = limits->max.p2;
233		} else {
234			// fast DAC timing
235			divisors->p2 = limits->min.p2;
236		}
237	}
238}
239
240
241// TODO we can simplify this computation, with the way the dividers are set, we
242// know that all values in the valid range for M are reachable. M1 allows to
243// generate any multiple of 5 in the range and M2 allows to reach the 4 next
244// values. Therefore, we don't need to loop over the range of values for M1 and
245// M2 separately, we could instead just loop over possible values for M.
246// For this to work, the logic of this function must be reversed: for a given M,
247// it should give the resulting M1 and M2 values for programming the registers.
248static uint32
249compute_pll_m(pll_divisors* divisors)
250{
251	if (gInfo->shared_info->device_type.InGroup(INTEL_GROUP_CHV)
252		|| gInfo->shared_info->device_type.InGroup(INTEL_GROUP_VLV)) {
253		return divisors->m1 * divisors->m2;
254	}
255
256	// Pineview, m1 is reserved
257	if (gInfo->shared_info->device_type.InGroup(INTEL_GROUP_PIN))
258		return divisors->m2;
259
260	return 5 * divisors->m1 + divisors->m2;
261}
262
263
264static uint32
265compute_pll_p(pll_divisors* divisors)
266{
267	return divisors->p1 * divisors->p2;
268}
269
270
271static void
272compute_dpll_g4x(display_timing* current, pll_divisors* divisors, bool isLVDS)
273{
274	float requestedPixelClock = current->pixel_clock / 1000.0f;
275	float referenceClock
276		= gInfo->shared_info->pll_info.reference_frequency / 1000.0f;
277
278	TRACE("%s: required MHz: %g, reference clock: %g\n", __func__,
279		requestedPixelClock, referenceClock);
280
281	pll_limits limits;
282	if (gInfo->shared_info->device_type.InGroup(INTEL_GROUP_G4x)) {
283		// TODO: Pass port type via video_configuration
284		if (isLVDS) {
285			if (lvds_dual_link(current))
286				memcpy(&limits, &kLimitsG4xLvdsDual, sizeof(pll_limits));
287			else
288				memcpy(&limits, &kLimitsG4xLvdsSingle, sizeof(pll_limits));
289		//} else if (type == INTEL_PORT_TYPE_HDMI) {
290		//	memcpy(&limits, &kLimitsG4xHdmi, sizeof(pll_limits));
291		} else
292			memcpy(&limits, &kLimitsG4xSdvo, sizeof(pll_limits));
293	} else {
294		// There must be a PCH, so this is ivy bridge or later
295		if (isLVDS) {
296			if (lvds_dual_link(current)) {
297				if (referenceClock == 100.0)
298					memcpy(&limits, &kLimitsIlkLvdsDual100, sizeof(pll_limits));
299				else
300					memcpy(&limits, &kLimitsIlkLvdsDual, sizeof(pll_limits));
301			} else {
302				if (referenceClock == 100.0) {
303					memcpy(&limits, &kLimitsIlkLvdsSingle100,
304						sizeof(pll_limits));
305				} else {
306					memcpy(&limits, &kLimitsIlkLvdsSingle, sizeof(pll_limits));
307				}
308			}
309		} else {
310			memcpy(&limits, &kLimitsIlkDac, sizeof(pll_limits));
311		}
312	}
313
314	compute_pll_p2(current, divisors, &limits, isLVDS);
315
316	TRACE("PLL limits, min: p %" B_PRId32 " (p1 %" B_PRId32 ", "
317		"p2 %" B_PRId32 "), n %" B_PRId32 ", m %" B_PRId32 " "
318		"(m1 %" B_PRId32 ", m2 %" B_PRId32 ")\n", limits.min.p,
319		limits.min.p1, limits.min.p2, limits.min.n, limits.min.m,
320		limits.min.m1, limits.min.m2);
321	TRACE("PLL limits, max: p %" B_PRId32 " (p1 %" B_PRId32 ", "
322		"p2 %" B_PRId32 "), n %" B_PRId32 ", m %" B_PRId32 " "
323		"(m1 %" B_PRId32 ", m2 %" B_PRId32 ")\n", limits.max.p,
324		limits.max.p1, limits.max.p2, limits.max.n, limits.max.m,
325		limits.max.m1, limits.max.m2);
326
327	float best = requestedPixelClock;
328	pll_divisors bestDivisors;
329
330	for (divisors->n = limits.min.n; divisors->n <= limits.max.n;
331			divisors->n++) {
332		for (divisors->m1 = limits.max.m1; divisors->m1 >= limits.min.m1;
333				divisors->m1--) {
334			for (divisors->m2 = limits.max.m2; divisors->m2 >= limits.min.m2;
335					divisors->m2--) {
336				for (divisors->p1 = limits.max.p1;
337						divisors->p1 >= limits.min.p1; divisors->p1--) {
338					divisors->m = compute_pll_m(divisors);
339					divisors->p = compute_pll_p(divisors);
340
341					if (!valid_pll_divisors(divisors, &limits))
342						continue;
343
344					float error = fabs(requestedPixelClock
345						- (referenceClock * divisors->m)
346						/ (divisors->n * divisors->p));
347					if (error < best) {
348						best = error;
349						bestDivisors = *divisors;
350
351						if (error == 0)
352							break;
353					}
354				}
355			}
356		}
357	}
358	*divisors = bestDivisors;
359	TRACE("%s: best MHz: %g (error: %g)\n", __func__,
360		(referenceClock * divisors->m) / (divisors->n * divisors->p),
361		best);
362}
363
364
365static void
366compute_dpll_9xx(display_timing* current, pll_divisors* divisors, bool isLVDS)
367{
368	float requestedPixelClock = current->pixel_clock / 1000.0f;
369	float referenceClock
370		= gInfo->shared_info->pll_info.reference_frequency / 1000.0f;
371
372	TRACE("%s: required MHz: %g\n", __func__, requestedPixelClock);
373
374	pll_limits limits;
375	if (gInfo->shared_info->device_type.InGroup(INTEL_GROUP_PIN)) {
376		if (isLVDS)
377			memcpy(&limits, &kLimitsPinLvds, sizeof(pll_limits));
378		else
379			memcpy(&limits, &kLimitsPinSdvo, sizeof(pll_limits));
380	} else if (gInfo->shared_info->device_type.InGroup(INTEL_GROUP_85x)) {
381		memcpy(&limits, &kLimits85x, sizeof(pll_limits));
382	} else {
383		if (isLVDS)
384			memcpy(&limits, &kLimits9xxLvds, sizeof(pll_limits));
385		else
386			memcpy(&limits, &kLimits9xxSdvo, sizeof(pll_limits));
387	}
388
389	compute_pll_p2(current, divisors, &limits, isLVDS);
390
391	TRACE("PLL limits, min: p %" B_PRId32 " (p1 %" B_PRId32 ", "
392		"p2 %" B_PRId32 "), n %" B_PRId32 ", m %" B_PRId32 " "
393		"(m1 %" B_PRId32 ", m2 %" B_PRId32 ")\n", limits.min.p,
394		limits.min.p1, limits.min.p2, limits.min.n, limits.min.m,
395		limits.min.m1, limits.min.m2);
396	TRACE("PLL limits, max: p %" B_PRId32 " (p1 %" B_PRId32 ", "
397		"p2 %" B_PRId32 "), n %" B_PRId32 ", m %" B_PRId32 " "
398		"(m1 %" B_PRId32 ", m2 %" B_PRId32 ")\n", limits.max.p,
399		limits.max.p1, limits.max.p2, limits.max.n, limits.max.m,
400		limits.max.m1, limits.max.m2);
401
402	bool is_pine = gInfo->shared_info->device_type.InGroup(INTEL_GROUP_PIN);
403
404	float best = requestedPixelClock;
405	pll_divisors bestDivisors;
406	memset(&bestDivisors, 0, sizeof(bestDivisors));
407
408	for (divisors->m1 = limits.min.m1; divisors->m1 <= limits.max.m1;
409			divisors->m1++) {
410		for (divisors->m2 = limits.min.m2; divisors->m2 <= limits.max.m2
411				&& ((divisors->m2 < divisors->m1) || is_pine); divisors->m2++) {
412			for (divisors->n = limits.min.n; divisors->n <= limits.max.n;
413					divisors->n++) {
414				for (divisors->p1 = limits.min.p1;
415						divisors->p1 <= limits.max.p1; divisors->p1++) {
416					divisors->m = compute_pll_m(divisors);
417					divisors->p = compute_pll_p(divisors);
418
419					if (!valid_pll_divisors(divisors, &limits))
420						continue;
421
422					float error = fabs(requestedPixelClock
423						- (referenceClock * divisors->m)
424						/ (divisors->n * divisors->p));
425					if (error < best) {
426						best = error;
427						bestDivisors = *divisors;
428
429						if (error == 0)
430							break;
431					}
432				}
433			}
434		}
435	}
436
437	*divisors = bestDivisors;
438
439	if (best == requestedPixelClock)
440		debugger("No valid PLL configuration found");
441	else {
442		TRACE("%s: best MHz: %g (error: %g)\n", __func__,
443			(referenceClock * divisors->m) / (divisors->n * divisors->p),
444			best);
445	}
446}
447
448
449void
450compute_pll_divisors(display_timing* current, pll_divisors* divisors, bool isLVDS)
451{
452	if (gInfo->shared_info->device_type.InGroup(INTEL_GROUP_G4x)
453		|| (gInfo->shared_info->pch_info != INTEL_PCH_NONE)) {
454		compute_dpll_g4x(current, divisors, isLVDS);
455	} else if (gInfo->shared_info->device_type.InGroup(INTEL_GROUP_CHV)) {
456		ERROR("%s: TODO: CherryView\n", __func__);
457	} else if (gInfo->shared_info->device_type.InGroup(INTEL_GROUP_VLV)) {
458		ERROR("%s: TODO: VallyView\n", __func__);
459	} else
460		compute_dpll_9xx(current, divisors, isLVDS);
461
462	TRACE("%s: found: p = %" B_PRId32 " (p1 = %" B_PRId32 ", "
463		"p2 = %" B_PRId32 "), n = %" B_PRId32 ", m = %" B_PRId32 " "
464		"(m1 = %" B_PRId32 ", m2 = %" B_PRId32 ")\n", __func__,
465		divisors->p, divisors->p1, divisors->p2, divisors->n,
466		divisors->m, divisors->m1, divisors->m2);
467}
468
469
470void
471refclk_activate_ilk(bool hasPanel)
472{
473	CALLED();
474
475	// aka, our engineers hate you
476
477	bool wantsSSC;
478	bool hasCK505;
479	if (gInfo->shared_info->pch_info == INTEL_PCH_IBX) {
480		TRACE("%s: Generation 5 graphics\n", __func__);
481		//XXX: This should be == vbt display_clock_mode
482		hasCK505 = false;
483		wantsSSC = hasCK505;
484	} else {
485		if (gInfo->shared_info->device_type.Generation() == 6) {
486			TRACE("%s: Generation 6 graphics\n", __func__);
487		} else {
488			TRACE("%s: Generation 7 graphics\n", __func__);
489		}
490		hasCK505 = false;
491		wantsSSC = true;
492	}
493
494	uint32 clkRef = read32(PCH_DREF_CONTROL);
495	uint32 newRef = clkRef;
496	TRACE("%s: PCH_DREF_CONTROL before: 0x%" B_PRIx32 "\n", __func__, clkRef);
497
498	newRef &= ~DREF_NONSPREAD_SOURCE_MASK;
499
500	if (hasCK505)
501		newRef |= DREF_NONSPREAD_CK505_ENABLE;
502	else
503		newRef |= DREF_NONSPREAD_SOURCE_ENABLE;
504
505	newRef &= ~DREF_SSC_SOURCE_MASK;
506	newRef &= ~DREF_CPU_SOURCE_OUTPUT_MASK;
507	newRef &= ~DREF_SSC1_ENABLE;
508
509	if (newRef == clkRef) {
510		TRACE("%s: No changes to reference clock.\n", __func__);
511		return;
512	}
513
514	if (hasPanel) {
515		newRef &= ~DREF_SSC_SOURCE_MASK;
516		newRef |= DREF_SSC_SOURCE_ENABLE;
517
518		if (wantsSSC)
519			newRef |= DREF_SSC1_ENABLE;
520		else
521			newRef &= ~DREF_SSC1_ENABLE;
522
523		// Power up SSC before enabling outputs
524		write32(PCH_DREF_CONTROL, newRef);
525		read32(PCH_DREF_CONTROL);
526		TRACE("%s: PCH_DREF_CONTROL after SSC on/off: 0x%" B_PRIx32 "\n",
527				__func__, read32(PCH_DREF_CONTROL));
528		spin(200);
529
530		newRef &= ~DREF_CPU_SOURCE_OUTPUT_MASK;
531
532		bool hasEDP = true;
533		if (hasEDP) {
534			if (wantsSSC)
535				newRef |= DREF_CPU_SOURCE_OUTPUT_DOWNSPREAD;
536			else
537				newRef |= DREF_CPU_SOURCE_OUTPUT_NONSPREAD;
538		} else
539			newRef |= DREF_CPU_SOURCE_OUTPUT_DISABLE;
540
541		write32(PCH_DREF_CONTROL, newRef);
542		read32(PCH_DREF_CONTROL);
543		TRACE("%s: PCH_DREF_CONTROL after done: 0x%" B_PRIx32 "\n",
544				__func__, read32(PCH_DREF_CONTROL));
545		spin(200);
546	} else {
547		newRef &= ~DREF_CPU_SOURCE_OUTPUT_MASK;
548		newRef |= DREF_CPU_SOURCE_OUTPUT_DISABLE;
549
550		write32(PCH_DREF_CONTROL, newRef);
551		read32(PCH_DREF_CONTROL);
552		TRACE("%s: PCH_DREF_CONTROL after disable CPU output: 0x%" B_PRIx32 "\n",
553				__func__, read32(PCH_DREF_CONTROL));
554		spin(200);
555
556		if (!wantsSSC) {
557			newRef &= ~DREF_SSC_SOURCE_MASK;
558			newRef |= DREF_SSC_SOURCE_DISABLE;
559			newRef &= ~DREF_SSC1_ENABLE;
560
561			write32(PCH_DREF_CONTROL, newRef);
562			read32(PCH_DREF_CONTROL);
563			TRACE("%s: PCH_DREF_CONTROL after disable SSC: 0x%" B_PRIx32 "\n",
564					__func__, read32(PCH_DREF_CONTROL));
565			spin(200);
566		}
567	}
568}
569
570
571//excerpt (plus modifications) from intel_dpll_mgr.c:
572
573/*
574 * Copyright �� 2006-2016 Intel Corporation
575 *
576 * Permission is hereby granted, free of charge, to any person obtaining a
577 * copy of this software and associated documentation files (the "Software"),
578 * to deal in the Software without restriction, including without limitation
579 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
580 * and/or sell copies of the Software, and to permit persons to whom the
581 * Software is furnished to do so, subject to the following conditions:
582 *
583 * The above copyright notice and this permission notice (including the next
584 * paragraph) shall be included in all copies or substantial portions of the
585 * Software.
586 *
587 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
588 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
589 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
590 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
591 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
592 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
593 * DEALINGS IN THE SOFTWARE.
594 */
595
596#define LC_FREQ 2700
597#define LC_FREQ_2K (uint64)(LC_FREQ * 2000)
598
599#define P_MIN 2
600#define P_MAX 64
601#define P_INC 2
602
603/* Constraints for PLL good behavior */
604#define REF_MIN 48
605#define REF_MAX 400
606#define VCO_MIN 2400
607#define VCO_MAX 4800
608
609static uint64 AbsSubtr64(uint64 nr1, uint64 nr2)
610{
611	if (nr1 >= nr2) {
612		return nr1 - nr2;
613	} else {
614		return nr2 - nr1;
615	}
616}
617
618struct hsw_wrpll_rnp {
619	unsigned p, n2, r2;
620};
621
622static unsigned hsw_wrpll_get_budget_for_freq(int clock)
623{
624	unsigned budget;
625
626	switch (clock) {
627	case 25175000:
628	case 25200000:
629	case 27000000:
630	case 27027000:
631	case 37762500:
632	case 37800000:
633	case 40500000:
634	case 40541000:
635	case 54000000:
636	case 54054000:
637	case 59341000:
638	case 59400000:
639	case 72000000:
640	case 74176000:
641	case 74250000:
642	case 81000000:
643	case 81081000:
644	case 89012000:
645	case 89100000:
646	case 108000000:
647	case 108108000:
648	case 111264000:
649	case 111375000:
650	case 148352000:
651	case 148500000:
652	case 162000000:
653	case 162162000:
654	case 222525000:
655	case 222750000:
656	case 296703000:
657	case 297000000:
658		budget = 0;
659		break;
660	case 233500000:
661	case 245250000:
662	case 247750000:
663	case 253250000:
664	case 298000000:
665		budget = 1500;
666		break;
667	case 169128000:
668	case 169500000:
669	case 179500000:
670	case 202000000:
671		budget = 2000;
672		break;
673	case 256250000:
674	case 262500000:
675	case 270000000:
676	case 272500000:
677	case 273750000:
678	case 280750000:
679	case 281250000:
680	case 286000000:
681	case 291750000:
682		budget = 4000;
683		break;
684	case 267250000:
685	case 268500000:
686		budget = 5000;
687		break;
688	default:
689		budget = 1000;
690		break;
691	}
692
693	return budget;
694}
695
696static void hsw_wrpll_update_rnp(uint64 freq2k, unsigned int budget,
697				 unsigned int r2, unsigned int n2,
698				 unsigned int p,
699				 struct hsw_wrpll_rnp *best)
700{
701	uint64 a, b, c, d, diff, diff_best;
702
703	/* No best (r,n,p) yet */
704	if (best->p == 0) {
705		best->p = p;
706		best->n2 = n2;
707		best->r2 = r2;
708		return;
709	}
710
711	/*
712	 * Output clock is (LC_FREQ_2K / 2000) * N / (P * R), which compares to
713	 * freq2k.
714	 *
715	 * delta = 1e6 *
716	 *	   abs(freq2k - (LC_FREQ_2K * n2/(p * r2))) /
717	 *	   freq2k;
718	 *
719	 * and we would like delta <= budget.
720	 *
721	 * If the discrepancy is above the PPM-based budget, always prefer to
722	 * improve upon the previous solution.  However, if you're within the
723	 * budget, try to maximize Ref * VCO, that is N / (P * R^2).
724	 */
725	a = freq2k * budget * p * r2;
726	b = freq2k * budget * best->p * best->r2;
727	diff = AbsSubtr64((uint64)freq2k * p * r2, LC_FREQ_2K * n2);
728	diff_best = AbsSubtr64((uint64)freq2k * best->p * best->r2,
729			     LC_FREQ_2K * best->n2);
730	c = 1000000 * diff;
731	d = 1000000 * diff_best;
732
733	if (a < c && b < d) {
734		/* If both are above the budget, pick the closer */
735		if (best->p * best->r2 * diff < p * r2 * diff_best) {
736			best->p = p;
737			best->n2 = n2;
738			best->r2 = r2;
739		}
740	} else if (a >= c && b < d) {
741		/* If A is below the threshold but B is above it?  Update. */
742		best->p = p;
743		best->n2 = n2;
744		best->r2 = r2;
745	} else if (a >= c && b >= d) {
746		/* Both are below the limit, so pick the higher n2/(r2*r2) */
747		if (n2 * best->r2 * best->r2 > best->n2 * r2 * r2) {
748			best->p = p;
749			best->n2 = n2;
750			best->r2 = r2;
751		}
752	}
753	/* Otherwise a < c && b >= d, do nothing */
754}
755
756void
757hsw_ddi_calculate_wrpll(int clock /* in Hz */,
758			unsigned *r2_out, unsigned *n2_out, unsigned *p_out)
759{
760	uint64 freq2k;
761	unsigned p, n2, r2;
762	struct hsw_wrpll_rnp best = { 0, 0, 0 };
763	unsigned budget;
764
765	freq2k = clock / 100;
766
767	budget = hsw_wrpll_get_budget_for_freq(clock);
768
769	/* Special case handling for 540 pixel clock: bypass WR PLL entirely
770	 * and directly pass the LC PLL to it. */
771	if (freq2k == 5400000) {
772		*n2_out = 2;
773		*p_out = 1;
774		*r2_out = 2;
775		return;
776	}
777
778	/*
779	 * Ref = LC_FREQ / R, where Ref is the actual reference input seen by
780	 * the WR PLL.
781	 *
782	 * We want R so that REF_MIN <= Ref <= REF_MAX.
783	 * Injecting R2 = 2 * R gives:
784	 *   REF_MAX * r2 > LC_FREQ * 2 and
785	 *   REF_MIN * r2 < LC_FREQ * 2
786	 *
787	 * Which means the desired boundaries for r2 are:
788	 *  LC_FREQ * 2 / REF_MAX < r2 < LC_FREQ * 2 / REF_MIN
789	 *
790	 */
791	for (r2 = LC_FREQ * 2 / REF_MAX + 1;
792	     r2 <= LC_FREQ * 2 / REF_MIN;
793	     r2++) {
794
795		/*
796		 * VCO = N * Ref, that is: VCO = N * LC_FREQ / R
797		 *
798		 * Once again we want VCO_MIN <= VCO <= VCO_MAX.
799		 * Injecting R2 = 2 * R and N2 = 2 * N, we get:
800		 *   VCO_MAX * r2 > n2 * LC_FREQ and
801		 *   VCO_MIN * r2 < n2 * LC_FREQ)
802		 *
803		 * Which means the desired boundaries for n2 are:
804		 * VCO_MIN * r2 / LC_FREQ < n2 < VCO_MAX * r2 / LC_FREQ
805		 */
806		for (n2 = VCO_MIN * r2 / LC_FREQ + 1;
807		     n2 <= VCO_MAX * r2 / LC_FREQ;
808		     n2++) {
809
810			for (p = P_MIN; p <= P_MAX; p += P_INC)
811				hsw_wrpll_update_rnp(freq2k, budget,
812						     r2, n2, p, &best);
813		}
814	}
815
816	*n2_out = best.n2;
817	*p_out = best.p;
818	*r2_out = best.r2;
819}
820
821struct skl_wrpll_context {
822	uint64 min_deviation;		/* current minimal deviation */
823	uint64 central_freq;		/* chosen central freq */
824	uint64 dco_freq;			/* chosen dco freq */
825	unsigned int p;				/* chosen divider */
826};
827
828/* DCO freq must be within +1%/-6%  of the DCO central freq */
829#define SKL_DCO_MAX_PDEVIATION	100
830#define SKL_DCO_MAX_NDEVIATION	600
831
832static void skl_wrpll_try_divider(struct skl_wrpll_context *ctx,
833				  uint64 central_freq,
834				  uint64 dco_freq,
835				  unsigned int divider)
836{
837	uint64 deviation;
838
839	deviation = ((uint64)10000 * AbsSubtr64(dco_freq, central_freq)
840			      / central_freq);
841
842	/* positive deviation */
843	if (dco_freq >= central_freq) {
844		if (deviation < SKL_DCO_MAX_PDEVIATION &&
845		    deviation < ctx->min_deviation) {
846			ctx->min_deviation = deviation;
847			ctx->central_freq = central_freq;
848			ctx->dco_freq = dco_freq;
849			ctx->p = divider;
850
851			TRACE("%s: DCO central frequency %" B_PRIu64 "Hz\n", __func__, central_freq);
852			TRACE("%s: DCO frequency %" B_PRIu64 "Hz\n", __func__, dco_freq);
853			TRACE("%s: positive offset accepted, deviation %" B_PRIu64 "\n",
854				__func__, deviation);
855		}
856	/* negative deviation */
857	} else if (deviation < SKL_DCO_MAX_NDEVIATION &&
858		   deviation < ctx->min_deviation) {
859		ctx->min_deviation = deviation;
860		ctx->central_freq = central_freq;
861		ctx->dco_freq = dco_freq;
862		ctx->p = divider;
863
864		TRACE("%s: DCO central frequency %" B_PRIu64 "Hz\n", __func__, central_freq);
865		TRACE("%s: DCO frequency %" B_PRIu64 "Hz\n", __func__, dco_freq);
866		TRACE("%s: negative offset accepted, deviation %" B_PRIu64 "\n",
867			__func__, deviation);
868	}
869}
870
871static void skl_wrpll_get_multipliers(unsigned int p,
872				      unsigned int *p0 /* out */,
873				      unsigned int *p1 /* out */,
874				      unsigned int *p2 /* out */)
875{
876	/* even dividers */
877	if (p % 2 == 0) {
878		unsigned int half = p / 2;
879
880		if (half == 1 || half == 2 || half == 3 || half == 5) {
881			*p0 = 2;
882			*p1 = 1;
883			*p2 = half;
884		} else if (half % 2 == 0) {
885			*p0 = 2;
886			*p1 = half / 2;
887			*p2 = 2;
888		} else if (half % 3 == 0) {
889			*p0 = 3;
890			*p1 = half / 3;
891			*p2 = 2;
892		} else if (half % 7 == 0) {
893			*p0 = 7;
894			*p1 = half / 7;
895			*p2 = 2;
896		}
897	} else if (p == 3 || p == 9) {  /* 3, 5, 7, 9, 15, 21, 35 */
898		*p0 = 3;
899		*p1 = 1;
900		*p2 = p / 3;
901	} else if (p == 5 || p == 7) {
902		*p0 = p;
903		*p1 = 1;
904		*p2 = 1;
905	} else if (p == 15) {
906		*p0 = 3;
907		*p1 = 1;
908		*p2 = 5;
909	} else if (p == 21) {
910		*p0 = 7;
911		*p1 = 1;
912		*p2 = 3;
913	} else if (p == 35) {
914		*p0 = 7;
915		*p1 = 1;
916		*p2 = 5;
917	}
918}
919
920static void skl_wrpll_context_init(struct skl_wrpll_context *ctx)
921{
922	memset(ctx, 0, sizeof(*ctx));
923	ctx->min_deviation = UINT64_MAX;
924}
925
926static void skl_wrpll_params_populate(struct skl_wrpll_params *params,
927				      uint64 afe_clock,
928				      int ref_clock,
929				      uint64 central_freq,
930				      uint32 p0, uint32 p1, uint32 p2)
931{
932	uint64 dco_freq;
933
934	switch (central_freq) {
935	case 9600000000ULL:
936		params->central_freq = 0;
937		break;
938	case 9000000000ULL:
939		params->central_freq = 1;
940		break;
941	case 8400000000ULL:
942		params->central_freq = 3;
943	}
944
945	switch (p0) {
946	case 1:
947		params->pdiv = 0;
948		break;
949	case 2:
950		params->pdiv = 1;
951		break;
952	case 3:
953		params->pdiv = 2;
954		break;
955	case 7:
956		params->pdiv = 4;
957		break;
958	default:
959		TRACE("%s: Incorrect PDiv\n", __func__);
960	}
961
962	switch (p2) {
963	case 5:
964		params->kdiv = 0;
965		break;
966	case 2:
967		params->kdiv = 1;
968		break;
969	case 3:
970		params->kdiv = 2;
971		break;
972	case 1:
973		params->kdiv = 3;
974		break;
975	default:
976		TRACE("%s: Incorrect KDiv\n", __func__);
977	}
978
979	params->qdiv_ratio = p1;
980	params->qdiv_mode = (params->qdiv_ratio == 1) ? 0 : 1;
981
982	dco_freq = p0 * p1 * p2 * afe_clock;
983	TRACE("%s: AFE frequency %" B_PRIu64 "Hz\n", __func__, afe_clock);
984	TRACE("%s: p0: %" B_PRIu32 ", p1: %" B_PRIu32 ", p2: %" B_PRIu32 "\n",
985		__func__, p0,p1,p2);
986	TRACE("%s: DCO frequency %" B_PRIu64 "Hz\n", __func__, dco_freq);
987
988	/*
989	 * Intermediate values are in Hz.
990	 * Divide by MHz to match bsepc
991	 */
992	params->dco_integer = (uint64)dco_freq / ((uint64)ref_clock * 1000);
993	params->dco_fraction = (
994			(uint64)dco_freq / ((uint64)ref_clock / 1000) -
995			(uint64)params->dco_integer * 1000000) * 0x8000 /
996			1000000;
997
998	TRACE("%s: Reference clock: %gMhz\n", __func__, ref_clock / 1000.0f);
999	TRACE("%s: DCO integer %" B_PRIu32 "\n", __func__, params->dco_integer);
1000	TRACE("%s: DCO fraction 0x%" B_PRIx32 "\n", __func__, params->dco_fraction);
1001}
1002
1003#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
1004
1005bool
1006skl_ddi_calculate_wrpll(int clock /* in Hz */,
1007			int ref_clock,
1008			struct skl_wrpll_params *wrpll_params)
1009{
1010	uint64 afe_clock = (uint64) clock * 5; /* AFE Clock is 5x Pixel clock */
1011	uint64 dco_central_freq[3] = { 8400000000ULL,
1012				    9000000000ULL,
1013				    9600000000ULL };
1014	static const int even_dividers[] = {  4,  6,  8, 10, 12, 14, 16, 18, 20,
1015					     24, 28, 30, 32, 36, 40, 42, 44,
1016					     48, 52, 54, 56, 60, 64, 66, 68,
1017					     70, 72, 76, 78, 80, 84, 88, 90,
1018					     92, 96, 98 };
1019	static const int odd_dividers[] = { 3, 5, 7, 9, 15, 21, 35 };
1020	static const struct {
1021		const int *list;
1022		unsigned int n_dividers;
1023	} dividers[] = {
1024		{ even_dividers, ARRAY_SIZE(even_dividers) },
1025		{ odd_dividers, ARRAY_SIZE(odd_dividers) },
1026	};
1027	struct skl_wrpll_context ctx;
1028	unsigned int dco, d, i;
1029	unsigned int p0, p1, p2;
1030
1031	skl_wrpll_context_init(&ctx);
1032
1033	for (d = 0; d < ARRAY_SIZE(dividers); d++) {
1034		for (dco = 0; dco < ARRAY_SIZE(dco_central_freq); dco++) {
1035			for (i = 0; i < dividers[d].n_dividers; i++) {
1036				unsigned int p = dividers[d].list[i];
1037				uint64 dco_freq = p * afe_clock;
1038
1039				skl_wrpll_try_divider(&ctx,
1040						      dco_central_freq[dco],
1041						      dco_freq,
1042						      p);
1043				/*
1044				 * Skip the remaining dividers if we're sure to
1045				 * have found the definitive divider, we can't
1046				 * improve a 0 deviation.
1047				 */
1048				if (ctx.min_deviation == 0)
1049					goto skip_remaining_dividers;
1050			}
1051		}
1052
1053skip_remaining_dividers:
1054		/*
1055		 * If a solution is found with an even divider, prefer
1056		 * this one.
1057		 */
1058		if (d == 0 && ctx.p)
1059			break;
1060	}
1061
1062	if (!ctx.p) {
1063		TRACE("%s: No valid divider found for %dHz\n", __func__, clock);
1064		return false;
1065	}
1066	TRACE("%s: Full divider (p) found is %d\n", __func__, ctx.p);
1067
1068	/*
1069	 * gcc incorrectly analyses that these can be used without being
1070	 * initialized. To be fair, it's hard to guess.
1071	 */
1072	p0 = p1 = p2 = 0;
1073	skl_wrpll_get_multipliers(ctx.p, &p0, &p1, &p2);
1074	skl_wrpll_params_populate(wrpll_params, afe_clock, ref_clock,
1075				  ctx.central_freq, p0, p1, p2);
1076
1077	return true;
1078}
1079
1080