1/*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: AMD
23 *
24 */
25
26#include "dc.h"
27#include "opp.h"
28#include "color_gamma.h"
29
30/* When calculating LUT values the first region and at least one subsequent
31 * region are calculated with full precision. These defines are a demarcation
32 * of where the second region starts and ends.
33 * These are hardcoded values to avoid recalculating them in loops.
34 */
35#define PRECISE_LUT_REGION_START 224
36#define PRECISE_LUT_REGION_END 239
37
38static struct hw_x_point coordinates_x[MAX_HW_POINTS + 2];
39
40// Hardcoded table that depends on setup_x_points_distribution and sdr_level=80
41// If x points are changed, then PQ Y points will be misaligned and a new
42// table would need to be generated. Or use old method that calls compute_pq.
43// The last point is above PQ formula range (0-125 in normalized FP16)
44// The value for the last point (128) is such that interpolation from
45// 120 to 128 will give 1.0 for X = 125.0
46// first couple points are 0 - HW LUT is mirrored around zero, so making first
47// segment 0 to 0 will effectively clip it, and these are very low PQ codes
48// min nonzero value below (216825) is a little under 12-bit PQ code 1.
49static const unsigned long long pq_divider = 1000000000;
50static const unsigned long long pq_numerator[MAX_HW_POINTS + 1] = {
51		0, 0, 0, 0, 216825, 222815,
52		228691, 234460, 240128, 245702, 251187, 256587,
53		261908, 267152, 272324, 277427, 282465, 292353,
54		302011, 311456, 320704, 329768, 338661, 347394,
55		355975, 364415, 372721, 380900, 388959, 396903,
56		404739, 412471, 420104, 435089, 449727, 464042,
57		478060, 491800, 505281, 518520, 531529, 544324,
58		556916, 569316, 581533, 593576, 605454, 617175,
59		628745, 651459, 673643, 695337, 716578, 737395,
60		757817, 777869, 797572, 816947, 836012, 854782,
61		873274, 891500, 909474, 927207, 944709, 979061,
62		1012601, 1045391, 1077485, 1108931, 1139770, 1170042,
63		1199778, 1229011, 1257767, 1286071, 1313948, 1341416,
64		1368497, 1395207, 1421563, 1473272, 1523733, 1573041,
65		1621279, 1668520, 1714828, 1760262, 1804874, 1848710,
66		1891814, 1934223, 1975973, 2017096, 2057622, 2097578,
67		2136989, 2214269, 2289629, 2363216, 2435157, 2505564,
68		2574539, 2642169, 2708536, 2773711, 2837760, 2900742,
69		2962712, 3023719, 3083810, 3143025, 3201405, 3315797,
70		3427246, 3535974, 3642181, 3746038, 3847700, 3947305,
71		4044975, 4140823, 4234949, 4327445, 4418394, 4507872,
72		4595951, 4682694, 4768161, 4935487, 5098326, 5257022,
73		5411878, 5563161, 5711107, 5855928, 5997812, 6136929,
74		6273436, 6407471, 6539163, 6668629, 6795976, 6921304,
75		7044703, 7286050, 7520623, 7748950, 7971492, 8188655,
76		8400800, 8608247, 8811286, 9010175, 9205149, 9396421,
77		9584186, 9768620, 9949889, 10128140, 10303513, 10646126,
78		10978648, 11301874, 11616501, 11923142, 12222340, 12514578,
79		12800290, 13079866, 13353659, 13621988, 13885144, 14143394,
80		14396982, 14646132, 14891052, 15368951, 15832050, 16281537,
81		16718448, 17143696, 17558086, 17962337, 18357092, 18742927,
82		19120364, 19489877, 19851894, 20206810, 20554983, 20896745,
83		21232399, 21886492, 22519276, 23132491, 23727656, 24306104,
84		24869013, 25417430, 25952292, 26474438, 26984626, 27483542,
85		27971811, 28450000, 28918632, 29378184, 29829095, 30706591,
86		31554022, 32373894, 33168387, 33939412, 34688657, 35417620,
87		36127636, 36819903, 37495502, 38155408, 38800507, 39431607,
88		40049446, 40654702, 41247996, 42400951, 43512407, 44585892,
89		45624474, 46630834, 47607339, 48556082, 49478931, 50377558,
90		51253467, 52108015, 52942436, 53757848, 54555277, 55335659,
91		56099856, 57582802, 59009766, 60385607, 61714540, 63000246,
92		64245964, 65454559, 66628579, 67770304, 68881781, 69964856,
93		71021203, 72052340, 73059655, 74044414, 75007782, 76874537,
94		78667536, 80393312, 82057522, 83665098, 85220372, 86727167,
95		88188883, 89608552, 90988895, 92332363, 93641173, 94917336,
96		96162685, 97378894, 98567496, 100867409, 103072439, 105191162,
97		107230989, 109198368, 111098951, 112937723, 114719105, 116447036,
98		118125045, 119756307, 121343688, 122889787, 124396968, 125867388,
99		127303021, 130077030, 132731849, 135278464, 137726346, 140083726,
100		142357803, 144554913, 146680670, 148740067, 150737572, 152677197,
101		154562560, 156396938, 158183306, 159924378, 161622632, 164899602,
102		168030318, 171028513, 173906008, 176673051, 179338593, 181910502,
103		184395731, 186800463, 189130216, 191389941, 193584098, 195716719,
104		197791463, 199811660, 201780351, 205574133, 209192504, 212652233,
105		215967720, 219151432, 222214238, 225165676, 228014163, 230767172,
106		233431363, 236012706, 238516569, 240947800, 243310793, 245609544,
107		247847696, 252155270, 256257056, 260173059, 263920427, 267513978,
108		270966613, 274289634, 277493001, 280585542, 283575118, 286468763,
109		289272796, 291992916, 294634284, 297201585, 299699091, 304500003,
110		309064541, 313416043, 317574484, 321557096, 325378855, 329052864,
111		332590655, 336002433, 339297275, 342483294, 345567766, 348557252,
112		351457680, 354274432, 357012407, 362269536, 367260561, 372012143,
113		376547060, 380884936, 385042798, 389035522, 392876185, 396576344,
114		400146265, 403595112, 406931099, 410161619, 413293351, 416332348,
115		419284117, 424945627, 430313203, 435416697, 440281572, 444929733,
116		449380160, 453649415, 457752035, 461700854, 465507260, 469181407,
117		472732388, 476168376, 479496748, 482724188, 485856764, 491858986,
118		497542280, 502939446, 508078420, 512983199, 517674549, 522170569,
119		526487126, 530638214, 534636233, 538492233, 542216094, 545816693,
120		549302035, 552679362, 555955249, 562226134, 568156709, 573782374,
121		579133244, 584235153, 589110430, 593778512, 598256421, 602559154,
122		606699989, 610690741, 614541971, 618263157, 621862836, 625348729,
123		628727839, 635190643, 641295921, 647081261, 652578597, 657815287,
124		662814957, 667598146, 672182825, 676584810, 680818092, 684895111,
125		688826974, 692623643, 696294085, 699846401, 703287935, 709864782,
126		716071394, 721947076, 727525176, 732834238, 737898880, 742740485,
127		747377745, 751827095, 756103063, 760218552, 764185078, 768012958,
128		771711474, 775289005, 778753144, 785368225, 791604988, 797503949,
129		803099452, 808420859, 813493471, 818339244, 822977353, 827424644,
130		831695997, 835804619, 839762285, 843579541, 847265867, 850829815,
131		854279128, 860861356, 867061719, 872921445, 878475444, 883753534,
132		888781386, 893581259, 898172578, 902572393, 906795754, 910856010,
133		914765057, 918533538, 922171018, 925686119, 929086644, 935571664,
134		941675560, 947439782, 952899395, 958084324, 963020312, 967729662,
135		972231821, 976543852, 980680801, 984656009, 988481353, 992167459,
136		995723865, 999159168, 1002565681};
137
138// these are helpers for calculations to reduce stack usage
139// do not depend on these being preserved across calls
140
141/* Helper to optimize gamma calculation, only use in translate_from_linear, in
142 * particular the dc_fixpt_pow function which is very expensive
143 * The idea is that our regions for X points are exponential and currently they all use
144 * the same number of points (NUM_PTS_IN_REGION) and in each region every point
145 * is exactly 2x the one at the same index in the previous region. In other words
146 * X[i] = 2 * X[i-NUM_PTS_IN_REGION] for i>=16
147 * The other fact is that (2x)^gamma = 2^gamma * x^gamma
148 * So we compute and save x^gamma for the first 16 regions, and for every next region
149 * just multiply with 2^gamma which can be computed once, and save the result so we
150 * recursively compute all the values.
151 */
152
153/*
154 * Regamma coefficients are used for both regamma and degamma. Degamma
155 * coefficients are calculated in our formula using the regamma coefficients.
156 */
157									 /*sRGB     709     2.2 2.4 P3*/
158static const int32_t numerator01[] = { 31308,   180000, 0,  0,  0};
159static const int32_t numerator02[] = { 12920,   4500,   0,  0,  0};
160static const int32_t numerator03[] = { 55,      99,     0,  0,  0};
161static const int32_t numerator04[] = { 55,      99,     0,  0,  0};
162static const int32_t numerator05[] = { 2400,    2222,   2200, 2400, 2600};
163
164/* one-time setup of X points */
165void setup_x_points_distribution(void)
166{
167	struct fixed31_32 region_size = dc_fixpt_from_int(128);
168	int32_t segment;
169	uint32_t seg_offset;
170	uint32_t index;
171	struct fixed31_32 increment;
172
173	coordinates_x[MAX_HW_POINTS].x = region_size;
174	coordinates_x[MAX_HW_POINTS + 1].x = region_size;
175
176	for (segment = 6; segment > (6 - NUM_REGIONS); segment--) {
177		region_size = dc_fixpt_div_int(region_size, 2);
178		increment = dc_fixpt_div_int(region_size,
179						NUM_PTS_IN_REGION);
180		seg_offset = (segment + (NUM_REGIONS - 7)) * NUM_PTS_IN_REGION;
181		coordinates_x[seg_offset].x = region_size;
182
183		for (index = seg_offset + 1;
184				index < seg_offset + NUM_PTS_IN_REGION;
185				index++) {
186			coordinates_x[index].x = dc_fixpt_add
187					(coordinates_x[index-1].x, increment);
188		}
189	}
190}
191
192void log_x_points_distribution(struct dal_logger *logger)
193{
194	int i = 0;
195
196	if (logger != NULL) {
197		LOG_GAMMA_WRITE("Log X Distribution\n");
198
199		for (i = 0; i < MAX_HW_POINTS; i++)
200			LOG_GAMMA_WRITE("%llu\n", coordinates_x[i].x.value);
201	}
202}
203
204static void compute_pq(struct fixed31_32 in_x, struct fixed31_32 *out_y)
205{
206	/* consts for PQ gamma formula. */
207	const struct fixed31_32 m1 =
208		dc_fixpt_from_fraction(159301758, 1000000000);
209	const struct fixed31_32 m2 =
210		dc_fixpt_from_fraction(7884375, 100000);
211	const struct fixed31_32 c1 =
212		dc_fixpt_from_fraction(8359375, 10000000);
213	const struct fixed31_32 c2 =
214		dc_fixpt_from_fraction(188515625, 10000000);
215	const struct fixed31_32 c3 =
216		dc_fixpt_from_fraction(186875, 10000);
217
218	struct fixed31_32 l_pow_m1;
219	struct fixed31_32 base;
220
221	if (dc_fixpt_lt(in_x, dc_fixpt_zero))
222		in_x = dc_fixpt_zero;
223
224	l_pow_m1 = dc_fixpt_pow(in_x, m1);
225	base = dc_fixpt_div(
226			dc_fixpt_add(c1,
227					(dc_fixpt_mul(c2, l_pow_m1))),
228			dc_fixpt_add(dc_fixpt_one,
229					(dc_fixpt_mul(c3, l_pow_m1))));
230	*out_y = dc_fixpt_pow(base, m2);
231}
232
233static void compute_de_pq(struct fixed31_32 in_x, struct fixed31_32 *out_y)
234{
235	/* consts for dePQ gamma formula. */
236	const struct fixed31_32 m1 =
237		dc_fixpt_from_fraction(159301758, 1000000000);
238	const struct fixed31_32 m2 =
239		dc_fixpt_from_fraction(7884375, 100000);
240	const struct fixed31_32 c1 =
241		dc_fixpt_from_fraction(8359375, 10000000);
242	const struct fixed31_32 c2 =
243		dc_fixpt_from_fraction(188515625, 10000000);
244	const struct fixed31_32 c3 =
245		dc_fixpt_from_fraction(186875, 10000);
246
247	struct fixed31_32 l_pow_m1;
248	struct fixed31_32 base, div;
249	struct fixed31_32 base2;
250
251
252	if (dc_fixpt_lt(in_x, dc_fixpt_zero))
253		in_x = dc_fixpt_zero;
254
255	l_pow_m1 = dc_fixpt_pow(in_x,
256			dc_fixpt_div(dc_fixpt_one, m2));
257	base = dc_fixpt_sub(l_pow_m1, c1);
258
259	div = dc_fixpt_sub(c2, dc_fixpt_mul(c3, l_pow_m1));
260
261	base2 = dc_fixpt_div(base, div);
262	// avoid complex numbers
263	if (dc_fixpt_lt(base2, dc_fixpt_zero))
264		base2 = dc_fixpt_sub(dc_fixpt_zero, base2);
265
266
267	*out_y = dc_fixpt_pow(base2, dc_fixpt_div(dc_fixpt_one, m1));
268
269}
270
271
272/* de gamma, non-linear to linear */
273static void compute_hlg_eotf(struct fixed31_32 in_x,
274		struct fixed31_32 *out_y,
275		uint32_t sdr_white_level, uint32_t max_luminance_nits)
276{
277	struct fixed31_32 a;
278	struct fixed31_32 b;
279	struct fixed31_32 c;
280	struct fixed31_32 threshold;
281	struct fixed31_32 x;
282
283	struct fixed31_32 scaling_factor =
284			dc_fixpt_from_fraction(max_luminance_nits, sdr_white_level);
285	a = dc_fixpt_from_fraction(17883277, 100000000);
286	b = dc_fixpt_from_fraction(28466892, 100000000);
287	c = dc_fixpt_from_fraction(55991073, 100000000);
288	threshold = dc_fixpt_from_fraction(1, 2);
289
290	if (dc_fixpt_lt(in_x, threshold)) {
291		x = dc_fixpt_mul(in_x, in_x);
292		x = dc_fixpt_div_int(x, 3);
293	} else {
294		x = dc_fixpt_sub(in_x, c);
295		x = dc_fixpt_div(x, a);
296		x = dc_fixpt_exp(x);
297		x = dc_fixpt_add(x, b);
298		x = dc_fixpt_div_int(x, 12);
299	}
300	*out_y = dc_fixpt_mul(x, scaling_factor);
301
302}
303
304/* re gamma, linear to non-linear */
305static void compute_hlg_oetf(struct fixed31_32 in_x, struct fixed31_32 *out_y,
306		uint32_t sdr_white_level, uint32_t max_luminance_nits)
307{
308	struct fixed31_32 a;
309	struct fixed31_32 b;
310	struct fixed31_32 c;
311	struct fixed31_32 threshold;
312	struct fixed31_32 x;
313
314	struct fixed31_32 scaling_factor =
315			dc_fixpt_from_fraction(sdr_white_level, max_luminance_nits);
316	a = dc_fixpt_from_fraction(17883277, 100000000);
317	b = dc_fixpt_from_fraction(28466892, 100000000);
318	c = dc_fixpt_from_fraction(55991073, 100000000);
319	threshold = dc_fixpt_from_fraction(1, 12);
320	x = dc_fixpt_mul(in_x, scaling_factor);
321
322
323	if (dc_fixpt_lt(x, threshold)) {
324		x = dc_fixpt_mul(x, dc_fixpt_from_fraction(3, 1));
325		*out_y = dc_fixpt_pow(x, dc_fixpt_half);
326	} else {
327		x = dc_fixpt_mul(x, dc_fixpt_from_fraction(12, 1));
328		x = dc_fixpt_sub(x, b);
329		x = dc_fixpt_log(x);
330		x = dc_fixpt_mul(a, x);
331		*out_y = dc_fixpt_add(x, c);
332	}
333}
334
335
336/* one-time pre-compute PQ values - only for sdr_white_level 80 */
337void precompute_pq(void)
338{
339	int i;
340	struct fixed31_32 *pq_table = mod_color_get_table(type_pq_table);
341
342	for (i = 0; i <= MAX_HW_POINTS; i++)
343		pq_table[i] = dc_fixpt_from_fraction(pq_numerator[i], pq_divider);
344
345	/* below is old method that uses run-time calculation in fixed pt space */
346	/* pow function has problems with arguments too small */
347	/*
348	struct fixed31_32 x;
349	const struct hw_x_point *coord_x = coordinates_x + 32;
350	struct fixed31_32 scaling_factor =
351			dc_fixpt_from_fraction(80, 10000);
352
353	for (i = 0; i < 32; i++)
354		pq_table[i] = dc_fixpt_zero;
355
356	for (i = 32; i <= MAX_HW_POINTS; i++) {
357		x = dc_fixpt_mul(coord_x->x, scaling_factor);
358		compute_pq(x, &pq_table[i]);
359		++coord_x;
360	}
361	*/
362}
363
364/* one-time pre-compute dePQ values - only for max pixel value 125 FP16 */
365void precompute_de_pq(void)
366{
367	int i;
368	struct fixed31_32  y;
369	uint32_t begin_index, end_index;
370
371	struct fixed31_32 scaling_factor = dc_fixpt_from_int(125);
372	struct fixed31_32 *de_pq_table = mod_color_get_table(type_de_pq_table);
373	/* X points is 2^-25 to 2^7
374	 * De-gamma X is 2^-12 to 2^0 ��� we are skipping first -12-(-25) = 13 regions
375	 */
376	begin_index = 13 * NUM_PTS_IN_REGION;
377	end_index = begin_index + 12 * NUM_PTS_IN_REGION;
378
379	for (i = 0; i <= begin_index; i++)
380		de_pq_table[i] = dc_fixpt_zero;
381
382	for (; i <= end_index; i++) {
383		compute_de_pq(coordinates_x[i].x, &y);
384		de_pq_table[i] = dc_fixpt_mul(y, scaling_factor);
385	}
386
387	for (; i <= MAX_HW_POINTS; i++)
388		de_pq_table[i] = de_pq_table[i-1];
389}
390struct dividers {
391	struct fixed31_32 divider1;
392	struct fixed31_32 divider2;
393	struct fixed31_32 divider3;
394};
395
396
397static bool build_coefficients(struct gamma_coefficients *coefficients,
398		enum dc_transfer_func_predefined type)
399{
400
401	uint32_t i = 0;
402	uint32_t index = 0;
403	bool ret = true;
404
405	if (type == TRANSFER_FUNCTION_SRGB)
406		index = 0;
407	else if (type == TRANSFER_FUNCTION_BT709)
408		index = 1;
409	else if (type == TRANSFER_FUNCTION_GAMMA22)
410		index = 2;
411	else if (type == TRANSFER_FUNCTION_GAMMA24)
412		index = 3;
413	else if (type == TRANSFER_FUNCTION_GAMMA26)
414		index = 4;
415	else {
416		ret = false;
417		goto release;
418	}
419
420	do {
421		coefficients->a0[i] = dc_fixpt_from_fraction(
422			numerator01[index], 10000000);
423		coefficients->a1[i] = dc_fixpt_from_fraction(
424			numerator02[index], 1000);
425		coefficients->a2[i] = dc_fixpt_from_fraction(
426			numerator03[index], 1000);
427		coefficients->a3[i] = dc_fixpt_from_fraction(
428			numerator04[index], 1000);
429		coefficients->user_gamma[i] = dc_fixpt_from_fraction(
430			numerator05[index], 1000);
431
432		++i;
433	} while (i != ARRAY_SIZE(coefficients->a0));
434release:
435	return ret;
436}
437
438static struct fixed31_32 translate_from_linear_space(
439		struct translate_from_linear_space_args *args)
440{
441	const struct fixed31_32 one = dc_fixpt_from_int(1);
442
443	struct fixed31_32 scratch_1, scratch_2;
444	struct calculate_buffer *cal_buffer = args->cal_buffer;
445
446	if (dc_fixpt_le(one, args->arg))
447		return one;
448
449	if (dc_fixpt_le(args->arg, dc_fixpt_neg(args->a0))) {
450		scratch_1 = dc_fixpt_add(one, args->a3);
451		scratch_2 = dc_fixpt_pow(
452				dc_fixpt_neg(args->arg),
453				dc_fixpt_recip(args->gamma));
454		scratch_1 = dc_fixpt_mul(scratch_1, scratch_2);
455		scratch_1 = dc_fixpt_sub(args->a2, scratch_1);
456
457		return scratch_1;
458	} else if (dc_fixpt_le(args->a0, args->arg)) {
459		if (cal_buffer->buffer_index == 0) {
460			cal_buffer->gamma_of_2 = dc_fixpt_pow(dc_fixpt_from_int(2),
461					dc_fixpt_recip(args->gamma));
462		}
463		scratch_1 = dc_fixpt_add(one, args->a3);
464		/* In the first region (first 16 points) and in the
465		 * region delimited by START/END we calculate with
466		 * full precision to avoid error accumulation.
467		 */
468		if ((cal_buffer->buffer_index >= PRECISE_LUT_REGION_START &&
469			cal_buffer->buffer_index <= PRECISE_LUT_REGION_END) ||
470			(cal_buffer->buffer_index < 16))
471			scratch_2 = dc_fixpt_pow(args->arg,
472					dc_fixpt_recip(args->gamma));
473		else
474			scratch_2 = dc_fixpt_mul(cal_buffer->gamma_of_2,
475					cal_buffer->buffer[cal_buffer->buffer_index%16]);
476
477		if (cal_buffer->buffer_index != -1) {
478			cal_buffer->buffer[cal_buffer->buffer_index%16] = scratch_2;
479			cal_buffer->buffer_index++;
480		}
481
482		scratch_1 = dc_fixpt_mul(scratch_1, scratch_2);
483		scratch_1 = dc_fixpt_sub(scratch_1, args->a2);
484
485		return scratch_1;
486	} else
487		return dc_fixpt_mul(args->arg, args->a1);
488}
489
490
491static struct fixed31_32 translate_from_linear_space_long(
492		struct translate_from_linear_space_args *args)
493{
494	const struct fixed31_32 one = dc_fixpt_from_int(1);
495
496	if (dc_fixpt_lt(one, args->arg))
497		return one;
498
499	if (dc_fixpt_le(args->arg, dc_fixpt_neg(args->a0)))
500		return dc_fixpt_sub(
501			args->a2,
502			dc_fixpt_mul(
503				dc_fixpt_add(
504					one,
505					args->a3),
506				dc_fixpt_pow(
507					dc_fixpt_neg(args->arg),
508					dc_fixpt_recip(args->gamma))));
509	else if (dc_fixpt_le(args->a0, args->arg))
510		return dc_fixpt_sub(
511			dc_fixpt_mul(
512				dc_fixpt_add(
513					one,
514					args->a3),
515				dc_fixpt_pow(
516						args->arg,
517					dc_fixpt_recip(args->gamma))),
518					args->a2);
519	else
520		return dc_fixpt_mul(args->arg, args->a1);
521}
522
523static struct fixed31_32 calculate_gamma22(struct fixed31_32 arg, bool use_eetf, struct calculate_buffer *cal_buffer)
524{
525	struct fixed31_32 gamma = dc_fixpt_from_fraction(22, 10);
526	struct translate_from_linear_space_args scratch_gamma_args;
527
528	scratch_gamma_args.arg = arg;
529	scratch_gamma_args.a0 = dc_fixpt_zero;
530	scratch_gamma_args.a1 = dc_fixpt_zero;
531	scratch_gamma_args.a2 = dc_fixpt_zero;
532	scratch_gamma_args.a3 = dc_fixpt_zero;
533	scratch_gamma_args.cal_buffer = cal_buffer;
534	scratch_gamma_args.gamma = gamma;
535
536	if (use_eetf)
537		return translate_from_linear_space_long(&scratch_gamma_args);
538
539	return translate_from_linear_space(&scratch_gamma_args);
540}
541
542
543static struct fixed31_32 translate_to_linear_space(
544	struct fixed31_32 arg,
545	struct fixed31_32 a0,
546	struct fixed31_32 a1,
547	struct fixed31_32 a2,
548	struct fixed31_32 a3,
549	struct fixed31_32 gamma)
550{
551	struct fixed31_32 linear;
552
553	a0 = dc_fixpt_mul(a0, a1);
554	if (dc_fixpt_le(arg, dc_fixpt_neg(a0)))
555
556		linear = dc_fixpt_neg(
557				 dc_fixpt_pow(
558				 dc_fixpt_div(
559				 dc_fixpt_sub(a2, arg),
560				 dc_fixpt_add(
561				 dc_fixpt_one, a3)), gamma));
562
563	else if (dc_fixpt_le(dc_fixpt_neg(a0), arg) &&
564			 dc_fixpt_le(arg, a0))
565		linear = dc_fixpt_div(arg, a1);
566	else
567		linear =  dc_fixpt_pow(
568					dc_fixpt_div(
569					dc_fixpt_add(a2, arg),
570					dc_fixpt_add(
571					dc_fixpt_one, a3)), gamma);
572
573	return linear;
574}
575
576static struct fixed31_32 translate_from_linear_space_ex(
577	struct fixed31_32 arg,
578	struct gamma_coefficients *coeff,
579	uint32_t color_index,
580	struct calculate_buffer *cal_buffer)
581{
582	struct translate_from_linear_space_args scratch_gamma_args;
583
584	scratch_gamma_args.arg = arg;
585	scratch_gamma_args.a0 = coeff->a0[color_index];
586	scratch_gamma_args.a1 = coeff->a1[color_index];
587	scratch_gamma_args.a2 = coeff->a2[color_index];
588	scratch_gamma_args.a3 = coeff->a3[color_index];
589	scratch_gamma_args.gamma = coeff->user_gamma[color_index];
590	scratch_gamma_args.cal_buffer = cal_buffer;
591
592	return translate_from_linear_space(&scratch_gamma_args);
593}
594
595
596static inline struct fixed31_32 translate_to_linear_space_ex(
597	struct fixed31_32 arg,
598	struct gamma_coefficients *coeff,
599	uint32_t color_index)
600{
601	return translate_to_linear_space(
602		arg,
603		coeff->a0[color_index],
604		coeff->a1[color_index],
605		coeff->a2[color_index],
606		coeff->a3[color_index],
607		coeff->user_gamma[color_index]);
608}
609
610
611static bool find_software_points(
612	const struct dc_gamma *ramp,
613	const struct gamma_pixel *axis_x,
614	struct fixed31_32 hw_point,
615	enum channel_name channel,
616	uint32_t *index_to_start,
617	uint32_t *index_left,
618	uint32_t *index_right,
619	enum hw_point_position *pos)
620{
621	const uint32_t max_number = ramp->num_entries + 3;
622
623	struct fixed31_32 left, right;
624
625	uint32_t i = *index_to_start;
626
627	while (i < max_number) {
628		if (channel == CHANNEL_NAME_RED) {
629			left = axis_x[i].r;
630
631			if (i < max_number - 1)
632				right = axis_x[i + 1].r;
633			else
634				right = axis_x[max_number - 1].r;
635		} else if (channel == CHANNEL_NAME_GREEN) {
636			left = axis_x[i].g;
637
638			if (i < max_number - 1)
639				right = axis_x[i + 1].g;
640			else
641				right = axis_x[max_number - 1].g;
642		} else {
643			left = axis_x[i].b;
644
645			if (i < max_number - 1)
646				right = axis_x[i + 1].b;
647			else
648				right = axis_x[max_number - 1].b;
649		}
650
651		if (dc_fixpt_le(left, hw_point) &&
652			dc_fixpt_le(hw_point, right)) {
653			*index_to_start = i;
654			*index_left = i;
655
656			if (i < max_number - 1)
657				*index_right = i + 1;
658			else
659				*index_right = max_number - 1;
660
661			*pos = HW_POINT_POSITION_MIDDLE;
662
663			return true;
664		} else if ((i == *index_to_start) &&
665			dc_fixpt_le(hw_point, left)) {
666			*index_to_start = i;
667			*index_left = i;
668			*index_right = i;
669
670			*pos = HW_POINT_POSITION_LEFT;
671
672			return true;
673		} else if ((i == max_number - 1) &&
674			dc_fixpt_le(right, hw_point)) {
675			*index_to_start = i;
676			*index_left = i;
677			*index_right = i;
678
679			*pos = HW_POINT_POSITION_RIGHT;
680
681			return true;
682		}
683
684		++i;
685	}
686
687	return false;
688}
689
690static bool build_custom_gamma_mapping_coefficients_worker(
691	const struct dc_gamma *ramp,
692	struct pixel_gamma_point *coeff,
693	const struct hw_x_point *coordinates_x,
694	const struct gamma_pixel *axis_x,
695	enum channel_name channel,
696	uint32_t number_of_points)
697{
698	uint32_t i = 0;
699
700	while (i <= number_of_points) {
701		struct fixed31_32 coord_x;
702
703		uint32_t index_to_start = 0;
704		uint32_t index_left = 0;
705		uint32_t index_right = 0;
706
707		enum hw_point_position hw_pos;
708
709		struct gamma_point *point;
710
711		struct fixed31_32 left_pos;
712		struct fixed31_32 right_pos;
713
714		if (channel == CHANNEL_NAME_RED)
715			coord_x = coordinates_x[i].regamma_y_red;
716		else if (channel == CHANNEL_NAME_GREEN)
717			coord_x = coordinates_x[i].regamma_y_green;
718		else
719			coord_x = coordinates_x[i].regamma_y_blue;
720
721		if (!find_software_points(
722			ramp, axis_x, coord_x, channel,
723			&index_to_start, &index_left, &index_right, &hw_pos)) {
724			BREAK_TO_DEBUGGER();
725			return false;
726		}
727
728		if (index_left >= ramp->num_entries + 3) {
729			BREAK_TO_DEBUGGER();
730			return false;
731		}
732
733		if (index_right >= ramp->num_entries + 3) {
734			BREAK_TO_DEBUGGER();
735			return false;
736		}
737
738		if (channel == CHANNEL_NAME_RED) {
739			point = &coeff[i].r;
740
741			left_pos = axis_x[index_left].r;
742			right_pos = axis_x[index_right].r;
743		} else if (channel == CHANNEL_NAME_GREEN) {
744			point = &coeff[i].g;
745
746			left_pos = axis_x[index_left].g;
747			right_pos = axis_x[index_right].g;
748		} else {
749			point = &coeff[i].b;
750
751			left_pos = axis_x[index_left].b;
752			right_pos = axis_x[index_right].b;
753		}
754
755		if (hw_pos == HW_POINT_POSITION_MIDDLE)
756			point->coeff = dc_fixpt_div(
757				dc_fixpt_sub(
758					coord_x,
759					left_pos),
760				dc_fixpt_sub(
761					right_pos,
762					left_pos));
763		else if (hw_pos == HW_POINT_POSITION_LEFT)
764			point->coeff = dc_fixpt_zero;
765		else if (hw_pos == HW_POINT_POSITION_RIGHT)
766			point->coeff = dc_fixpt_from_int(2);
767		else {
768			BREAK_TO_DEBUGGER();
769			return false;
770		}
771
772		point->left_index = index_left;
773		point->right_index = index_right;
774		point->pos = hw_pos;
775
776		++i;
777	}
778
779	return true;
780}
781
782static struct fixed31_32 calculate_mapped_value(
783	struct pwl_float_data *rgb,
784	const struct pixel_gamma_point *coeff,
785	enum channel_name channel,
786	uint32_t max_index)
787{
788	const struct gamma_point *point;
789
790	struct fixed31_32 result;
791
792	if (channel == CHANNEL_NAME_RED)
793		point = &coeff->r;
794	else if (channel == CHANNEL_NAME_GREEN)
795		point = &coeff->g;
796	else
797		point = &coeff->b;
798
799	if ((point->left_index < 0) || (point->left_index > max_index)) {
800		BREAK_TO_DEBUGGER();
801		return dc_fixpt_zero;
802	}
803
804	if ((point->right_index < 0) || (point->right_index > max_index)) {
805		BREAK_TO_DEBUGGER();
806		return dc_fixpt_zero;
807	}
808
809	if (point->pos == HW_POINT_POSITION_MIDDLE)
810		if (channel == CHANNEL_NAME_RED)
811			result = dc_fixpt_add(
812				dc_fixpt_mul(
813					point->coeff,
814					dc_fixpt_sub(
815						rgb[point->right_index].r,
816						rgb[point->left_index].r)),
817				rgb[point->left_index].r);
818		else if (channel == CHANNEL_NAME_GREEN)
819			result = dc_fixpt_add(
820				dc_fixpt_mul(
821					point->coeff,
822					dc_fixpt_sub(
823						rgb[point->right_index].g,
824						rgb[point->left_index].g)),
825				rgb[point->left_index].g);
826		else
827			result = dc_fixpt_add(
828				dc_fixpt_mul(
829					point->coeff,
830					dc_fixpt_sub(
831						rgb[point->right_index].b,
832						rgb[point->left_index].b)),
833				rgb[point->left_index].b);
834	else if (point->pos == HW_POINT_POSITION_LEFT) {
835		BREAK_TO_DEBUGGER();
836		result = dc_fixpt_zero;
837	} else {
838		result = dc_fixpt_one;
839	}
840
841	return result;
842}
843
844static void build_pq(struct pwl_float_data_ex *rgb_regamma,
845		uint32_t hw_points_num,
846		const struct hw_x_point *coordinate_x,
847		uint32_t sdr_white_level)
848{
849	uint32_t i, start_index;
850
851	struct pwl_float_data_ex *rgb = rgb_regamma;
852	const struct hw_x_point *coord_x = coordinate_x;
853	struct fixed31_32 x;
854	struct fixed31_32 output;
855	struct fixed31_32 scaling_factor =
856			dc_fixpt_from_fraction(sdr_white_level, 10000);
857	struct fixed31_32 *pq_table = mod_color_get_table(type_pq_table);
858
859	if (!mod_color_is_table_init(type_pq_table) && sdr_white_level == 80) {
860		precompute_pq();
861		mod_color_set_table_init_state(type_pq_table, true);
862	}
863
864	/* TODO: start index is from segment 2^-24, skipping first segment
865	 * due to x values too small for power calculations
866	 */
867	start_index = 32;
868	rgb += start_index;
869	coord_x += start_index;
870
871	for (i = start_index; i <= hw_points_num; i++) {
872		/* Multiply 0.008 as regamma is 0-1 and FP16 input is 0-125.
873		 * FP 1.0 = 80nits
874		 */
875		if (sdr_white_level == 80) {
876			output = pq_table[i];
877		} else {
878			x = dc_fixpt_mul(coord_x->x, scaling_factor);
879			compute_pq(x, &output);
880		}
881
882		/* should really not happen? */
883		if (dc_fixpt_lt(output, dc_fixpt_zero))
884			output = dc_fixpt_zero;
885
886		rgb->r = output;
887		rgb->g = output;
888		rgb->b = output;
889
890		++coord_x;
891		++rgb;
892	}
893}
894
895static void build_de_pq(struct pwl_float_data_ex *de_pq,
896		uint32_t hw_points_num,
897		const struct hw_x_point *coordinate_x)
898{
899	uint32_t i;
900	struct fixed31_32 output;
901	struct fixed31_32 *de_pq_table = mod_color_get_table(type_de_pq_table);
902	struct fixed31_32 scaling_factor = dc_fixpt_from_int(125);
903
904	if (!mod_color_is_table_init(type_de_pq_table)) {
905		precompute_de_pq();
906		mod_color_set_table_init_state(type_de_pq_table, true);
907	}
908
909
910	for (i = 0; i <= hw_points_num; i++) {
911		output = de_pq_table[i];
912		/* should really not happen? */
913		if (dc_fixpt_lt(output, dc_fixpt_zero))
914			output = dc_fixpt_zero;
915		else if (dc_fixpt_lt(scaling_factor, output))
916			output = scaling_factor;
917		de_pq[i].r = output;
918		de_pq[i].g = output;
919		de_pq[i].b = output;
920	}
921}
922
923static bool build_regamma(struct pwl_float_data_ex *rgb_regamma,
924		uint32_t hw_points_num,
925		const struct hw_x_point *coordinate_x,
926		enum dc_transfer_func_predefined type,
927		struct calculate_buffer *cal_buffer)
928{
929	uint32_t i;
930	bool ret = false;
931
932	struct gamma_coefficients *coeff;
933	struct pwl_float_data_ex *rgb = rgb_regamma;
934	const struct hw_x_point *coord_x = coordinate_x;
935
936	coeff = kvzalloc(sizeof(*coeff), GFP_KERNEL);
937	if (!coeff)
938		goto release;
939
940	if (!build_coefficients(coeff, type))
941		goto release;
942
943	memset(cal_buffer->buffer, 0, NUM_PTS_IN_REGION * sizeof(struct fixed31_32));
944	cal_buffer->buffer_index = 0; // see variable definition for more info
945
946	i = 0;
947	while (i <= hw_points_num) {
948		/* TODO use y vs r,g,b */
949		rgb->r = translate_from_linear_space_ex(
950			coord_x->x, coeff, 0, cal_buffer);
951		rgb->g = rgb->r;
952		rgb->b = rgb->r;
953		++coord_x;
954		++rgb;
955		++i;
956	}
957	cal_buffer->buffer_index = -1;
958	ret = true;
959release:
960	kvfree(coeff);
961	return ret;
962}
963
964static void hermite_spline_eetf(struct fixed31_32 input_x,
965				struct fixed31_32 max_display,
966				struct fixed31_32 min_display,
967				struct fixed31_32 max_content,
968				struct fixed31_32 *out_x)
969{
970	struct fixed31_32 min_lum_pq;
971	struct fixed31_32 max_lum_pq;
972	struct fixed31_32 max_content_pq;
973	struct fixed31_32 ks;
974	struct fixed31_32 E1;
975	struct fixed31_32 E2;
976	struct fixed31_32 E3;
977	struct fixed31_32 t;
978	struct fixed31_32 t2;
979	struct fixed31_32 t3;
980	struct fixed31_32 two;
981	struct fixed31_32 three;
982	struct fixed31_32 temp1;
983	struct fixed31_32 temp2;
984	struct fixed31_32 a = dc_fixpt_from_fraction(15, 10);
985	struct fixed31_32 b = dc_fixpt_from_fraction(5, 10);
986	struct fixed31_32 epsilon = dc_fixpt_from_fraction(1, 1000000); // dc_fixpt_epsilon is a bit too small
987
988	if (dc_fixpt_eq(max_content, dc_fixpt_zero)) {
989		*out_x = dc_fixpt_zero;
990		return;
991	}
992
993	compute_pq(input_x, &E1);
994	compute_pq(dc_fixpt_div(min_display, max_content), &min_lum_pq);
995	compute_pq(dc_fixpt_div(max_display, max_content), &max_lum_pq);
996	compute_pq(dc_fixpt_one, &max_content_pq); // always 1? DAL2 code is weird
997	a = dc_fixpt_div(dc_fixpt_add(dc_fixpt_one, b), max_content_pq); // (1+b)/maxContent
998	ks = dc_fixpt_sub(dc_fixpt_mul(a, max_lum_pq), b); // a * max_lum_pq - b
999
1000	if (dc_fixpt_lt(E1, ks))
1001		E2 = E1;
1002	else if (dc_fixpt_le(ks, E1) && dc_fixpt_le(E1, dc_fixpt_one)) {
1003		if (dc_fixpt_lt(epsilon, dc_fixpt_sub(dc_fixpt_one, ks)))
1004			// t = (E1 - ks) / (1 - ks)
1005			t = dc_fixpt_div(dc_fixpt_sub(E1, ks),
1006					dc_fixpt_sub(dc_fixpt_one, ks));
1007		else
1008			t = dc_fixpt_zero;
1009
1010		two = dc_fixpt_from_int(2);
1011		three = dc_fixpt_from_int(3);
1012
1013		t2 = dc_fixpt_mul(t, t);
1014		t3 = dc_fixpt_mul(t2, t);
1015		temp1 = dc_fixpt_mul(two, t3);
1016		temp2 = dc_fixpt_mul(three, t2);
1017
1018		// (2t^3 - 3t^2 + 1) * ks
1019		E2 = dc_fixpt_mul(ks, dc_fixpt_add(dc_fixpt_one,
1020				dc_fixpt_sub(temp1, temp2)));
1021
1022		// (-2t^3 + 3t^2) * max_lum_pq
1023		E2 = dc_fixpt_add(E2, dc_fixpt_mul(max_lum_pq,
1024				dc_fixpt_sub(temp2, temp1)));
1025
1026		temp1 = dc_fixpt_mul(two, t2);
1027		temp2 = dc_fixpt_sub(dc_fixpt_one, ks);
1028
1029		// (t^3 - 2t^2 + t) * (1-ks)
1030		E2 = dc_fixpt_add(E2, dc_fixpt_mul(temp2,
1031				dc_fixpt_add(t, dc_fixpt_sub(t3, temp1))));
1032	} else
1033		E2 = dc_fixpt_one;
1034
1035	temp1 = dc_fixpt_sub(dc_fixpt_one, E2);
1036	temp2 = dc_fixpt_mul(temp1, temp1);
1037	temp2 = dc_fixpt_mul(temp2, temp2);
1038	// temp2 = (1-E2)^4
1039
1040	E3 =  dc_fixpt_add(E2, dc_fixpt_mul(min_lum_pq, temp2));
1041	compute_de_pq(E3, out_x);
1042
1043	*out_x = dc_fixpt_div(*out_x, dc_fixpt_div(max_display, max_content));
1044}
1045
1046static bool build_freesync_hdr(struct pwl_float_data_ex *rgb_regamma,
1047		uint32_t hw_points_num,
1048		const struct hw_x_point *coordinate_x,
1049		const struct hdr_tm_params *fs_params,
1050		struct calculate_buffer *cal_buffer)
1051{
1052	uint32_t i;
1053	struct pwl_float_data_ex *rgb = rgb_regamma;
1054	const struct hw_x_point *coord_x = coordinate_x;
1055	const struct hw_x_point *prv_coord_x = coord_x;
1056	struct fixed31_32 scaledX = dc_fixpt_zero;
1057	struct fixed31_32 scaledX1 = dc_fixpt_zero;
1058	struct fixed31_32 max_display;
1059	struct fixed31_32 min_display;
1060	struct fixed31_32 max_content;
1061	struct fixed31_32 clip = dc_fixpt_one;
1062	struct fixed31_32 output;
1063	bool use_eetf = false;
1064	bool is_clipped = false;
1065	struct fixed31_32 sdr_white_level;
1066	struct fixed31_32 coordX_diff;
1067	struct fixed31_32 out_dist_max;
1068	struct fixed31_32 bright_norm;
1069
1070	if (fs_params->max_content == 0 ||
1071			fs_params->max_display == 0)
1072		return false;
1073
1074	max_display = dc_fixpt_from_int(fs_params->max_display);
1075	min_display = dc_fixpt_from_fraction(fs_params->min_display, 10000);
1076	max_content = dc_fixpt_from_int(fs_params->max_content);
1077	sdr_white_level = dc_fixpt_from_int(fs_params->sdr_white_level);
1078
1079	if (fs_params->min_display > 1000) // cap at 0.1 at the bottom
1080		min_display = dc_fixpt_from_fraction(1, 10);
1081	if (fs_params->max_display < 100) // cap at 100 at the top
1082		max_display = dc_fixpt_from_int(100);
1083
1084	// only max used, we don't adjust min luminance
1085	if (fs_params->max_content > fs_params->max_display)
1086		use_eetf = true;
1087	else
1088		max_content = max_display;
1089
1090	if (!use_eetf)
1091		cal_buffer->buffer_index = 0; // see var definition for more info
1092	rgb += 32; // first 32 points have problems with fixed point, too small
1093	coord_x += 32;
1094
1095	for (i = 32; i <= hw_points_num; i++) {
1096		if (!is_clipped) {
1097			if (use_eetf) {
1098				/* max content is equal 1 */
1099				scaledX1 = dc_fixpt_div(coord_x->x,
1100						dc_fixpt_div(max_content, sdr_white_level));
1101				hermite_spline_eetf(scaledX1, max_display, min_display,
1102						max_content, &scaledX);
1103			} else
1104				scaledX = dc_fixpt_div(coord_x->x,
1105						dc_fixpt_div(max_display, sdr_white_level));
1106
1107			if (dc_fixpt_lt(scaledX, clip)) {
1108				if (dc_fixpt_lt(scaledX, dc_fixpt_zero))
1109					output = dc_fixpt_zero;
1110				else
1111					output = calculate_gamma22(scaledX, use_eetf, cal_buffer);
1112
1113				// Ensure output respects reasonable boundaries
1114				output = dc_fixpt_clamp(output, dc_fixpt_zero, dc_fixpt_one);
1115
1116				rgb->r = output;
1117				rgb->g = output;
1118				rgb->b = output;
1119			} else {
1120				/* Here clipping happens for the first time */
1121				is_clipped = true;
1122
1123				/* The next few lines implement the equation
1124				 * output = prev_out +
1125				 * (coord_x->x - prev_coord_x->x) *
1126				 * (1.0 - prev_out) /
1127				 * (maxDisp/sdr_white_level - prevCoordX)
1128				 *
1129				 * This equation interpolates the first point
1130				 * after max_display/80 so that the slope from
1131				 * hw_x_before_max and hw_x_after_max is such
1132				 * that we hit Y=1.0 at max_display/80.
1133				 */
1134
1135				coordX_diff = dc_fixpt_sub(coord_x->x, prv_coord_x->x);
1136				out_dist_max = dc_fixpt_sub(dc_fixpt_one, output);
1137				bright_norm = dc_fixpt_div(max_display, sdr_white_level);
1138
1139				output = dc_fixpt_add(
1140					output, dc_fixpt_mul(
1141						coordX_diff, dc_fixpt_div(
1142							out_dist_max,
1143							dc_fixpt_sub(bright_norm, prv_coord_x->x)
1144						)
1145					)
1146				);
1147
1148				/* Relaxing the maximum boundary to 1.07 (instead of 1.0)
1149				 * because the last point in the curve must be such that
1150				 * the maximum display pixel brightness interpolates to
1151				 * exactly 1.0. The worst case scenario was calculated
1152				 * around 1.057, so the limit of 1.07 leaves some safety
1153				 * margin.
1154				 */
1155				output = dc_fixpt_clamp(output, dc_fixpt_zero,
1156					dc_fixpt_from_fraction(107, 100));
1157
1158				rgb->r = output;
1159				rgb->g = output;
1160				rgb->b = output;
1161			}
1162		} else {
1163			/* Every other clipping after the first
1164			 * one is dealt with here
1165			 */
1166			rgb->r = clip;
1167			rgb->g = clip;
1168			rgb->b = clip;
1169		}
1170
1171		prv_coord_x = coord_x;
1172		++coord_x;
1173		++rgb;
1174	}
1175	cal_buffer->buffer_index = -1;
1176
1177	return true;
1178}
1179
1180static bool build_degamma(struct pwl_float_data_ex *curve,
1181		uint32_t hw_points_num,
1182		const struct hw_x_point *coordinate_x, enum dc_transfer_func_predefined type)
1183{
1184	uint32_t i;
1185	struct gamma_coefficients coeff;
1186	uint32_t begin_index, end_index;
1187	bool ret = false;
1188
1189	if (!build_coefficients(&coeff, type))
1190		goto release;
1191
1192	i = 0;
1193
1194	/* X points is 2^-25 to 2^7
1195	 * De-gamma X is 2^-12 to 2^0 ��� we are skipping first -12-(-25) = 13 regions
1196	 */
1197	begin_index = 13 * NUM_PTS_IN_REGION;
1198	end_index = begin_index + 12 * NUM_PTS_IN_REGION;
1199
1200	while (i != begin_index) {
1201		curve[i].r = dc_fixpt_zero;
1202		curve[i].g = dc_fixpt_zero;
1203		curve[i].b = dc_fixpt_zero;
1204		i++;
1205	}
1206
1207	while (i != end_index) {
1208		curve[i].r = translate_to_linear_space_ex(
1209				coordinate_x[i].x, &coeff, 0);
1210		curve[i].g = curve[i].r;
1211		curve[i].b = curve[i].r;
1212		i++;
1213	}
1214	while (i != hw_points_num + 1) {
1215		curve[i].r = dc_fixpt_one;
1216		curve[i].g = dc_fixpt_one;
1217		curve[i].b = dc_fixpt_one;
1218		i++;
1219	}
1220	ret = true;
1221release:
1222	return ret;
1223}
1224
1225
1226
1227
1228
1229static void build_hlg_degamma(struct pwl_float_data_ex *degamma,
1230		uint32_t hw_points_num,
1231		const struct hw_x_point *coordinate_x,
1232		uint32_t sdr_white_level, uint32_t max_luminance_nits)
1233{
1234	uint32_t i;
1235
1236	struct pwl_float_data_ex *rgb = degamma;
1237	const struct hw_x_point *coord_x = coordinate_x;
1238
1239	i = 0;
1240	// check when i == 434
1241	while (i != hw_points_num + 1) {
1242		compute_hlg_eotf(coord_x->x, &rgb->r, sdr_white_level, max_luminance_nits);
1243		rgb->g = rgb->r;
1244		rgb->b = rgb->r;
1245		++coord_x;
1246		++rgb;
1247		++i;
1248	}
1249}
1250
1251
1252static void build_hlg_regamma(struct pwl_float_data_ex *regamma,
1253		uint32_t hw_points_num,
1254		const struct hw_x_point *coordinate_x,
1255		uint32_t sdr_white_level, uint32_t max_luminance_nits)
1256{
1257	uint32_t i;
1258
1259	struct pwl_float_data_ex *rgb = regamma;
1260	const struct hw_x_point *coord_x = coordinate_x;
1261
1262	i = 0;
1263
1264	// when i == 471
1265	while (i != hw_points_num + 1) {
1266		compute_hlg_oetf(coord_x->x, &rgb->r, sdr_white_level, max_luminance_nits);
1267		rgb->g = rgb->r;
1268		rgb->b = rgb->r;
1269		++coord_x;
1270		++rgb;
1271		++i;
1272	}
1273}
1274
1275static void scale_gamma(struct pwl_float_data *pwl_rgb,
1276		const struct dc_gamma *ramp,
1277		struct dividers dividers)
1278{
1279	const struct fixed31_32 max_driver = dc_fixpt_from_int(0xFFFF);
1280	const struct fixed31_32 max_os = dc_fixpt_from_int(0xFF00);
1281	struct fixed31_32 scaler = max_os;
1282	uint32_t i;
1283	struct pwl_float_data *rgb = pwl_rgb;
1284	struct pwl_float_data *rgb_last = rgb + ramp->num_entries - 1;
1285
1286	i = 0;
1287
1288	do {
1289		if (dc_fixpt_lt(max_os, ramp->entries.red[i]) ||
1290			dc_fixpt_lt(max_os, ramp->entries.green[i]) ||
1291			dc_fixpt_lt(max_os, ramp->entries.blue[i])) {
1292			scaler = max_driver;
1293			break;
1294		}
1295		++i;
1296	} while (i != ramp->num_entries);
1297
1298	i = 0;
1299
1300	do {
1301		rgb->r = dc_fixpt_div(
1302			ramp->entries.red[i], scaler);
1303		rgb->g = dc_fixpt_div(
1304			ramp->entries.green[i], scaler);
1305		rgb->b = dc_fixpt_div(
1306			ramp->entries.blue[i], scaler);
1307
1308		++rgb;
1309		++i;
1310	} while (i != ramp->num_entries);
1311
1312	rgb->r = dc_fixpt_mul(rgb_last->r,
1313			dividers.divider1);
1314	rgb->g = dc_fixpt_mul(rgb_last->g,
1315			dividers.divider1);
1316	rgb->b = dc_fixpt_mul(rgb_last->b,
1317			dividers.divider1);
1318
1319	++rgb;
1320
1321	rgb->r = dc_fixpt_mul(rgb_last->r,
1322			dividers.divider2);
1323	rgb->g = dc_fixpt_mul(rgb_last->g,
1324			dividers.divider2);
1325	rgb->b = dc_fixpt_mul(rgb_last->b,
1326			dividers.divider2);
1327
1328	++rgb;
1329
1330	rgb->r = dc_fixpt_mul(rgb_last->r,
1331			dividers.divider3);
1332	rgb->g = dc_fixpt_mul(rgb_last->g,
1333			dividers.divider3);
1334	rgb->b = dc_fixpt_mul(rgb_last->b,
1335			dividers.divider3);
1336}
1337
1338static void scale_gamma_dx(struct pwl_float_data *pwl_rgb,
1339		const struct dc_gamma *ramp,
1340		struct dividers dividers)
1341{
1342	uint32_t i;
1343	struct fixed31_32 min = dc_fixpt_zero;
1344	struct fixed31_32 max = dc_fixpt_one;
1345
1346	struct fixed31_32 delta = dc_fixpt_zero;
1347	struct fixed31_32 offset = dc_fixpt_zero;
1348
1349	for (i = 0 ; i < ramp->num_entries; i++) {
1350		if (dc_fixpt_lt(ramp->entries.red[i], min))
1351			min = ramp->entries.red[i];
1352
1353		if (dc_fixpt_lt(ramp->entries.green[i], min))
1354			min = ramp->entries.green[i];
1355
1356		if (dc_fixpt_lt(ramp->entries.blue[i], min))
1357			min = ramp->entries.blue[i];
1358
1359		if (dc_fixpt_lt(max, ramp->entries.red[i]))
1360			max = ramp->entries.red[i];
1361
1362		if (dc_fixpt_lt(max, ramp->entries.green[i]))
1363			max = ramp->entries.green[i];
1364
1365		if (dc_fixpt_lt(max, ramp->entries.blue[i]))
1366			max = ramp->entries.blue[i];
1367	}
1368
1369	if (dc_fixpt_lt(min, dc_fixpt_zero))
1370		delta = dc_fixpt_neg(min);
1371
1372	offset = dc_fixpt_add(min, max);
1373
1374	for (i = 0 ; i < ramp->num_entries; i++) {
1375		pwl_rgb[i].r = dc_fixpt_div(
1376			dc_fixpt_add(
1377				ramp->entries.red[i], delta), offset);
1378		pwl_rgb[i].g = dc_fixpt_div(
1379			dc_fixpt_add(
1380				ramp->entries.green[i], delta), offset);
1381		pwl_rgb[i].b = dc_fixpt_div(
1382			dc_fixpt_add(
1383				ramp->entries.blue[i], delta), offset);
1384
1385	}
1386
1387	pwl_rgb[i].r =  dc_fixpt_sub(dc_fixpt_mul_int(
1388				pwl_rgb[i-1].r, 2), pwl_rgb[i-2].r);
1389	pwl_rgb[i].g =  dc_fixpt_sub(dc_fixpt_mul_int(
1390				pwl_rgb[i-1].g, 2), pwl_rgb[i-2].g);
1391	pwl_rgb[i].b =  dc_fixpt_sub(dc_fixpt_mul_int(
1392				pwl_rgb[i-1].b, 2), pwl_rgb[i-2].b);
1393	++i;
1394	pwl_rgb[i].r =  dc_fixpt_sub(dc_fixpt_mul_int(
1395				pwl_rgb[i-1].r, 2), pwl_rgb[i-2].r);
1396	pwl_rgb[i].g =  dc_fixpt_sub(dc_fixpt_mul_int(
1397				pwl_rgb[i-1].g, 2), pwl_rgb[i-2].g);
1398	pwl_rgb[i].b =  dc_fixpt_sub(dc_fixpt_mul_int(
1399				pwl_rgb[i-1].b, 2), pwl_rgb[i-2].b);
1400}
1401
1402/* todo: all these scale_gamma functions are inherently the same but
1403 *  take different structures as params or different format for ramp
1404 *  values. We could probably implement it in a more generic fashion
1405 */
1406static void scale_user_regamma_ramp(struct pwl_float_data *pwl_rgb,
1407		const struct regamma_ramp *ramp,
1408		struct dividers dividers)
1409{
1410	unsigned short max_driver = 0xFFFF;
1411	unsigned short max_os = 0xFF00;
1412	unsigned short scaler = max_os;
1413	uint32_t i;
1414	struct pwl_float_data *rgb = pwl_rgb;
1415	struct pwl_float_data *rgb_last = rgb + GAMMA_RGB_256_ENTRIES - 1;
1416
1417	i = 0;
1418	do {
1419		if (ramp->gamma[i] > max_os ||
1420				ramp->gamma[i + 256] > max_os ||
1421				ramp->gamma[i + 512] > max_os) {
1422			scaler = max_driver;
1423			break;
1424		}
1425		i++;
1426	} while (i != GAMMA_RGB_256_ENTRIES);
1427
1428	i = 0;
1429	do {
1430		rgb->r = dc_fixpt_from_fraction(
1431				ramp->gamma[i], scaler);
1432		rgb->g = dc_fixpt_from_fraction(
1433				ramp->gamma[i + 256], scaler);
1434		rgb->b = dc_fixpt_from_fraction(
1435				ramp->gamma[i + 512], scaler);
1436
1437		++rgb;
1438		++i;
1439	} while (i != GAMMA_RGB_256_ENTRIES);
1440
1441	rgb->r = dc_fixpt_mul(rgb_last->r,
1442			dividers.divider1);
1443	rgb->g = dc_fixpt_mul(rgb_last->g,
1444			dividers.divider1);
1445	rgb->b = dc_fixpt_mul(rgb_last->b,
1446			dividers.divider1);
1447
1448	++rgb;
1449
1450	rgb->r = dc_fixpt_mul(rgb_last->r,
1451			dividers.divider2);
1452	rgb->g = dc_fixpt_mul(rgb_last->g,
1453			dividers.divider2);
1454	rgb->b = dc_fixpt_mul(rgb_last->b,
1455			dividers.divider2);
1456
1457	++rgb;
1458
1459	rgb->r = dc_fixpt_mul(rgb_last->r,
1460			dividers.divider3);
1461	rgb->g = dc_fixpt_mul(rgb_last->g,
1462			dividers.divider3);
1463	rgb->b = dc_fixpt_mul(rgb_last->b,
1464			dividers.divider3);
1465}
1466
1467/*
1468 * RS3+ color transform DDI - 1D LUT adjustment is composed with regamma here
1469 * Input is evenly distributed in the output color space as specified in
1470 * SetTimings
1471 *
1472 * Interpolation details:
1473 * 1D LUT has 4096 values which give curve correction in 0-1 float range
1474 * for evenly spaced points in 0-1 range. lut1D[index] gives correction
1475 * for index/4095.
1476 * First we find index for which:
1477 *	index/4095 < regamma_y < (index+1)/4095 =>
1478 *	index < 4095*regamma_y < index + 1
1479 * norm_y = 4095*regamma_y, and index is just truncating to nearest integer
1480 * lut1 = lut1D[index], lut2 = lut1D[index+1]
1481 *
1482 * adjustedY is then linearly interpolating regamma Y between lut1 and lut2
1483 *
1484 * Custom degamma on Linux uses the same interpolation math, so is handled here
1485 */
1486static void apply_lut_1d(
1487		const struct dc_gamma *ramp,
1488		uint32_t num_hw_points,
1489		struct dc_transfer_func_distributed_points *tf_pts)
1490{
1491	int i = 0;
1492	int color = 0;
1493	struct fixed31_32 *regamma_y;
1494	struct fixed31_32 norm_y;
1495	struct fixed31_32 lut1;
1496	struct fixed31_32 lut2;
1497	const int max_lut_index = 4095;
1498	const struct fixed31_32 penult_lut_index_f =
1499			dc_fixpt_from_int(max_lut_index-1);
1500	const struct fixed31_32 max_lut_index_f =
1501			dc_fixpt_from_int(max_lut_index);
1502	int32_t index = 0, index_next = 0;
1503	struct fixed31_32 index_f;
1504	struct fixed31_32 delta_lut;
1505	struct fixed31_32 delta_index;
1506
1507	if (ramp->type != GAMMA_CS_TFM_1D && ramp->type != GAMMA_CUSTOM)
1508		return; // this is not expected
1509
1510	for (i = 0; i < num_hw_points; i++) {
1511		for (color = 0; color < 3; color++) {
1512			if (color == 0)
1513				regamma_y = &tf_pts->red[i];
1514			else if (color == 1)
1515				regamma_y = &tf_pts->green[i];
1516			else
1517				regamma_y = &tf_pts->blue[i];
1518
1519			norm_y = dc_fixpt_mul(max_lut_index_f,
1520						   *regamma_y);
1521			index = dc_fixpt_floor(norm_y);
1522			index_f = dc_fixpt_from_int(index);
1523
1524			if (index < 0)
1525				continue;
1526
1527			if (index <= max_lut_index)
1528				index_next = (index == max_lut_index) ? index : index+1;
1529			else {
1530				/* Here we are dealing with the last point in the curve,
1531				 * which in some cases might exceed the range given by
1532				 * max_lut_index. So we interpolate the value using
1533				 * max_lut_index and max_lut_index - 1.
1534				 */
1535				index = max_lut_index - 1;
1536				index_next = max_lut_index;
1537				index_f = penult_lut_index_f;
1538			}
1539
1540			if (color == 0) {
1541				lut1 = ramp->entries.red[index];
1542				lut2 = ramp->entries.red[index_next];
1543			} else if (color == 1) {
1544				lut1 = ramp->entries.green[index];
1545				lut2 = ramp->entries.green[index_next];
1546			} else {
1547				lut1 = ramp->entries.blue[index];
1548				lut2 = ramp->entries.blue[index_next];
1549			}
1550
1551			// we have everything now, so interpolate
1552			delta_lut = dc_fixpt_sub(lut2, lut1);
1553			delta_index = dc_fixpt_sub(norm_y, index_f);
1554
1555			*regamma_y = dc_fixpt_add(lut1,
1556				dc_fixpt_mul(delta_index, delta_lut));
1557		}
1558	}
1559}
1560
1561static void build_evenly_distributed_points(
1562	struct gamma_pixel *points,
1563	uint32_t numberof_points,
1564	struct dividers dividers)
1565{
1566	struct gamma_pixel *p = points;
1567	struct gamma_pixel *p_last;
1568
1569	uint32_t i = 0;
1570
1571	// This function should not gets called with 0 as a parameter
1572	ASSERT(numberof_points > 0);
1573	p_last = p + numberof_points - 1;
1574
1575	do {
1576		struct fixed31_32 value = dc_fixpt_from_fraction(i,
1577			numberof_points - 1);
1578
1579		p->r = value;
1580		p->g = value;
1581		p->b = value;
1582
1583		++p;
1584		++i;
1585	} while (i < numberof_points);
1586
1587	p->r = dc_fixpt_div(p_last->r, dividers.divider1);
1588	p->g = dc_fixpt_div(p_last->g, dividers.divider1);
1589	p->b = dc_fixpt_div(p_last->b, dividers.divider1);
1590
1591	++p;
1592
1593	p->r = dc_fixpt_div(p_last->r, dividers.divider2);
1594	p->g = dc_fixpt_div(p_last->g, dividers.divider2);
1595	p->b = dc_fixpt_div(p_last->b, dividers.divider2);
1596
1597	++p;
1598
1599	p->r = dc_fixpt_div(p_last->r, dividers.divider3);
1600	p->g = dc_fixpt_div(p_last->g, dividers.divider3);
1601	p->b = dc_fixpt_div(p_last->b, dividers.divider3);
1602}
1603
1604static inline void copy_rgb_regamma_to_coordinates_x(
1605		struct hw_x_point *coordinates_x,
1606		uint32_t hw_points_num,
1607		const struct pwl_float_data_ex *rgb_ex)
1608{
1609	struct hw_x_point *coords = coordinates_x;
1610	uint32_t i = 0;
1611	const struct pwl_float_data_ex *rgb_regamma = rgb_ex;
1612
1613	while (i <= hw_points_num + 1) {
1614		coords->regamma_y_red = rgb_regamma->r;
1615		coords->regamma_y_green = rgb_regamma->g;
1616		coords->regamma_y_blue = rgb_regamma->b;
1617
1618		++coords;
1619		++rgb_regamma;
1620		++i;
1621	}
1622}
1623
1624static bool calculate_interpolated_hardware_curve(
1625	const struct dc_gamma *ramp,
1626	struct pixel_gamma_point *coeff128,
1627	struct pwl_float_data *rgb_user,
1628	const struct hw_x_point *coordinates_x,
1629	const struct gamma_pixel *axis_x,
1630	uint32_t number_of_points,
1631	struct dc_transfer_func_distributed_points *tf_pts)
1632{
1633
1634	const struct pixel_gamma_point *coeff = coeff128;
1635	uint32_t max_entries = 3 - 1;
1636
1637	uint32_t i = 0;
1638
1639	for (i = 0; i < 3; i++) {
1640		if (!build_custom_gamma_mapping_coefficients_worker(
1641				ramp, coeff128, coordinates_x, axis_x, i,
1642				number_of_points))
1643			return false;
1644	}
1645
1646	i = 0;
1647	max_entries += ramp->num_entries;
1648
1649	/* TODO: float point case */
1650
1651	while (i <= number_of_points) {
1652		tf_pts->red[i] = calculate_mapped_value(
1653			rgb_user, coeff, CHANNEL_NAME_RED, max_entries);
1654		tf_pts->green[i] = calculate_mapped_value(
1655			rgb_user, coeff, CHANNEL_NAME_GREEN, max_entries);
1656		tf_pts->blue[i] = calculate_mapped_value(
1657			rgb_user, coeff, CHANNEL_NAME_BLUE, max_entries);
1658
1659		++coeff;
1660		++i;
1661	}
1662
1663	return true;
1664}
1665
1666/* The "old" interpolation uses a complicated scheme to build an array of
1667 * coefficients while also using an array of 0-255 normalized to 0-1
1668 * Then there's another loop using both of the above + new scaled user ramp
1669 * and we concatenate them. It also searches for points of interpolation and
1670 * uses enums for positions.
1671 *
1672 * This function uses a different approach:
1673 * user ramp is always applied on X with 0/255, 1/255, 2/255, ..., 255/255
1674 * To find index for hwX , we notice the following:
1675 * i/255 <= hwX < (i+1)/255  <=> i <= 255*hwX < i+1
1676 * See apply_lut_1d which is the same principle, but on 4K entry 1D LUT
1677 *
1678 * Once the index is known, combined Y is simply:
1679 * user_ramp(index) + (hwX-index/255)*(user_ramp(index+1) - user_ramp(index)
1680 *
1681 * We should switch to this method in all cases, it's simpler and faster
1682 * ToDo one day - for now this only applies to ADL regamma to avoid regression
1683 * for regular use cases (sRGB and PQ)
1684 */
1685static void interpolate_user_regamma(uint32_t hw_points_num,
1686		struct pwl_float_data *rgb_user,
1687		bool apply_degamma,
1688		struct dc_transfer_func_distributed_points *tf_pts)
1689{
1690	uint32_t i;
1691	uint32_t color = 0;
1692	int32_t index;
1693	int32_t index_next;
1694	struct fixed31_32 *tf_point;
1695	struct fixed31_32 hw_x;
1696	struct fixed31_32 norm_factor =
1697			dc_fixpt_from_int(255);
1698	struct fixed31_32 norm_x;
1699	struct fixed31_32 index_f;
1700	struct fixed31_32 lut1;
1701	struct fixed31_32 lut2;
1702	struct fixed31_32 delta_lut;
1703	struct fixed31_32 delta_index;
1704	const struct fixed31_32 one = dc_fixpt_from_int(1);
1705
1706	i = 0;
1707	/* fixed_pt library has problems handling too small values */
1708	while (i != 32) {
1709		tf_pts->red[i] = dc_fixpt_zero;
1710		tf_pts->green[i] = dc_fixpt_zero;
1711		tf_pts->blue[i] = dc_fixpt_zero;
1712		++i;
1713	}
1714	while (i <= hw_points_num + 1) {
1715		for (color = 0; color < 3; color++) {
1716			if (color == 0)
1717				tf_point = &tf_pts->red[i];
1718			else if (color == 1)
1719				tf_point = &tf_pts->green[i];
1720			else
1721				tf_point = &tf_pts->blue[i];
1722
1723			if (apply_degamma) {
1724				if (color == 0)
1725					hw_x = coordinates_x[i].regamma_y_red;
1726				else if (color == 1)
1727					hw_x = coordinates_x[i].regamma_y_green;
1728				else
1729					hw_x = coordinates_x[i].regamma_y_blue;
1730			} else
1731				hw_x = coordinates_x[i].x;
1732
1733			if (dc_fixpt_le(one, hw_x))
1734				hw_x = one;
1735
1736			norm_x = dc_fixpt_mul(norm_factor, hw_x);
1737			index = dc_fixpt_floor(norm_x);
1738			if (index < 0 || index > 255)
1739				continue;
1740
1741			index_f = dc_fixpt_from_int(index);
1742			index_next = (index == 255) ? index : index + 1;
1743
1744			if (color == 0) {
1745				lut1 = rgb_user[index].r;
1746				lut2 = rgb_user[index_next].r;
1747			} else if (color == 1) {
1748				lut1 = rgb_user[index].g;
1749				lut2 = rgb_user[index_next].g;
1750			} else {
1751				lut1 = rgb_user[index].b;
1752				lut2 = rgb_user[index_next].b;
1753			}
1754
1755			// we have everything now, so interpolate
1756			delta_lut = dc_fixpt_sub(lut2, lut1);
1757			delta_index = dc_fixpt_sub(norm_x, index_f);
1758
1759			*tf_point = dc_fixpt_add(lut1,
1760				dc_fixpt_mul(delta_index, delta_lut));
1761		}
1762		++i;
1763	}
1764}
1765
1766static void build_new_custom_resulted_curve(
1767	uint32_t hw_points_num,
1768	struct dc_transfer_func_distributed_points *tf_pts)
1769{
1770	uint32_t i = 0;
1771
1772	while (i != hw_points_num + 1) {
1773		tf_pts->red[i] = dc_fixpt_clamp(
1774			tf_pts->red[i], dc_fixpt_zero,
1775			dc_fixpt_one);
1776		tf_pts->green[i] = dc_fixpt_clamp(
1777			tf_pts->green[i], dc_fixpt_zero,
1778			dc_fixpt_one);
1779		tf_pts->blue[i] = dc_fixpt_clamp(
1780			tf_pts->blue[i], dc_fixpt_zero,
1781			dc_fixpt_one);
1782
1783		++i;
1784	}
1785}
1786
1787static void apply_degamma_for_user_regamma(struct pwl_float_data_ex *rgb_regamma,
1788		uint32_t hw_points_num, struct calculate_buffer *cal_buffer)
1789{
1790	uint32_t i;
1791
1792	struct gamma_coefficients coeff;
1793	struct pwl_float_data_ex *rgb = rgb_regamma;
1794	const struct hw_x_point *coord_x = coordinates_x;
1795
1796	build_coefficients(&coeff, TRANSFER_FUNCTION_SRGB);
1797
1798	i = 0;
1799	while (i != hw_points_num + 1) {
1800		rgb->r = translate_from_linear_space_ex(
1801				coord_x->x, &coeff, 0, cal_buffer);
1802		rgb->g = rgb->r;
1803		rgb->b = rgb->r;
1804		++coord_x;
1805		++rgb;
1806		++i;
1807	}
1808}
1809
1810static bool map_regamma_hw_to_x_user(
1811	const struct dc_gamma *ramp,
1812	struct pixel_gamma_point *coeff128,
1813	struct pwl_float_data *rgb_user,
1814	struct hw_x_point *coords_x,
1815	const struct gamma_pixel *axis_x,
1816	const struct pwl_float_data_ex *rgb_regamma,
1817	uint32_t hw_points_num,
1818	struct dc_transfer_func_distributed_points *tf_pts,
1819	bool map_user_ramp,
1820	bool do_clamping)
1821{
1822	/* setup to spare calculated ideal regamma values */
1823
1824	int i = 0;
1825	struct hw_x_point *coords = coords_x;
1826	const struct pwl_float_data_ex *regamma = rgb_regamma;
1827
1828	if (ramp && map_user_ramp) {
1829		copy_rgb_regamma_to_coordinates_x(coords,
1830				hw_points_num,
1831				rgb_regamma);
1832
1833		calculate_interpolated_hardware_curve(
1834			ramp, coeff128, rgb_user, coords, axis_x,
1835			hw_points_num, tf_pts);
1836	} else {
1837		/* just copy current rgb_regamma into  tf_pts */
1838		while (i <= hw_points_num) {
1839			tf_pts->red[i] = regamma->r;
1840			tf_pts->green[i] = regamma->g;
1841			tf_pts->blue[i] = regamma->b;
1842
1843			++regamma;
1844			++i;
1845		}
1846	}
1847
1848	if (do_clamping) {
1849		/* this should be named differently, all it does is clamp to 0-1 */
1850		build_new_custom_resulted_curve(hw_points_num, tf_pts);
1851	}
1852
1853	return true;
1854}
1855
1856#define _EXTRA_POINTS 3
1857
1858bool calculate_user_regamma_coeff(struct dc_transfer_func *output_tf,
1859		const struct regamma_lut *regamma,
1860		struct calculate_buffer *cal_buffer,
1861		const struct dc_gamma *ramp)
1862{
1863	struct gamma_coefficients coeff;
1864	const struct hw_x_point *coord_x = coordinates_x;
1865	uint32_t i = 0;
1866
1867	do {
1868		coeff.a0[i] = dc_fixpt_from_fraction(
1869				regamma->coeff.A0[i], 10000000);
1870		coeff.a1[i] = dc_fixpt_from_fraction(
1871				regamma->coeff.A1[i], 1000);
1872		coeff.a2[i] = dc_fixpt_from_fraction(
1873				regamma->coeff.A2[i], 1000);
1874		coeff.a3[i] = dc_fixpt_from_fraction(
1875				regamma->coeff.A3[i], 1000);
1876		coeff.user_gamma[i] = dc_fixpt_from_fraction(
1877				regamma->coeff.gamma[i], 1000);
1878
1879		++i;
1880	} while (i != 3);
1881
1882	i = 0;
1883	/* fixed_pt library has problems handling too small values */
1884	while (i != 32) {
1885		output_tf->tf_pts.red[i] = dc_fixpt_zero;
1886		output_tf->tf_pts.green[i] = dc_fixpt_zero;
1887		output_tf->tf_pts.blue[i] = dc_fixpt_zero;
1888		++coord_x;
1889		++i;
1890	}
1891	while (i != MAX_HW_POINTS + 1) {
1892		output_tf->tf_pts.red[i] = translate_from_linear_space_ex(
1893				coord_x->x, &coeff, 0, cal_buffer);
1894		output_tf->tf_pts.green[i] = translate_from_linear_space_ex(
1895				coord_x->x, &coeff, 1, cal_buffer);
1896		output_tf->tf_pts.blue[i] = translate_from_linear_space_ex(
1897				coord_x->x, &coeff, 2, cal_buffer);
1898		++coord_x;
1899		++i;
1900	}
1901
1902	if (ramp && ramp->type == GAMMA_CS_TFM_1D)
1903		apply_lut_1d(ramp, MAX_HW_POINTS, &output_tf->tf_pts);
1904
1905	// this function just clamps output to 0-1
1906	build_new_custom_resulted_curve(MAX_HW_POINTS, &output_tf->tf_pts);
1907	output_tf->type = TF_TYPE_DISTRIBUTED_POINTS;
1908
1909	return true;
1910}
1911
1912bool calculate_user_regamma_ramp(struct dc_transfer_func *output_tf,
1913		const struct regamma_lut *regamma,
1914		struct calculate_buffer *cal_buffer,
1915		const struct dc_gamma *ramp)
1916{
1917	struct dc_transfer_func_distributed_points *tf_pts = &output_tf->tf_pts;
1918	struct dividers dividers;
1919
1920	struct pwl_float_data *rgb_user = NULL;
1921	struct pwl_float_data_ex *rgb_regamma = NULL;
1922	bool ret = false;
1923
1924	if (regamma == NULL)
1925		return false;
1926
1927	output_tf->type = TF_TYPE_DISTRIBUTED_POINTS;
1928
1929	rgb_user = kcalloc(GAMMA_RGB_256_ENTRIES + _EXTRA_POINTS,
1930			   sizeof(*rgb_user),
1931			   GFP_KERNEL);
1932	if (!rgb_user)
1933		goto rgb_user_alloc_fail;
1934
1935	rgb_regamma = kcalloc(MAX_HW_POINTS + _EXTRA_POINTS,
1936			      sizeof(*rgb_regamma),
1937			      GFP_KERNEL);
1938	if (!rgb_regamma)
1939		goto rgb_regamma_alloc_fail;
1940
1941	dividers.divider1 = dc_fixpt_from_fraction(3, 2);
1942	dividers.divider2 = dc_fixpt_from_int(2);
1943	dividers.divider3 = dc_fixpt_from_fraction(5, 2);
1944
1945	scale_user_regamma_ramp(rgb_user, &regamma->ramp, dividers);
1946
1947	if (regamma->flags.bits.applyDegamma == 1) {
1948		apply_degamma_for_user_regamma(rgb_regamma, MAX_HW_POINTS, cal_buffer);
1949		copy_rgb_regamma_to_coordinates_x(coordinates_x,
1950				MAX_HW_POINTS, rgb_regamma);
1951	}
1952
1953	interpolate_user_regamma(MAX_HW_POINTS, rgb_user,
1954			regamma->flags.bits.applyDegamma, tf_pts);
1955
1956	// no custom HDR curves!
1957	tf_pts->end_exponent = 0;
1958	tf_pts->x_point_at_y1_red = 1;
1959	tf_pts->x_point_at_y1_green = 1;
1960	tf_pts->x_point_at_y1_blue = 1;
1961
1962	if (ramp && ramp->type == GAMMA_CS_TFM_1D)
1963		apply_lut_1d(ramp, MAX_HW_POINTS, &output_tf->tf_pts);
1964
1965	// this function just clamps output to 0-1
1966	build_new_custom_resulted_curve(MAX_HW_POINTS, tf_pts);
1967
1968	ret = true;
1969
1970	kfree(rgb_regamma);
1971rgb_regamma_alloc_fail:
1972	kfree(rgb_user);
1973rgb_user_alloc_fail:
1974	return ret;
1975}
1976
1977bool mod_color_calculate_degamma_params(struct dc_color_caps *dc_caps,
1978		struct dc_transfer_func *input_tf,
1979		const struct dc_gamma *ramp, bool map_user_ramp)
1980{
1981	struct dc_transfer_func_distributed_points *tf_pts = &input_tf->tf_pts;
1982	struct dividers dividers;
1983	struct pwl_float_data *rgb_user = NULL;
1984	struct pwl_float_data_ex *curve = NULL;
1985	struct gamma_pixel *axis_x = NULL;
1986	struct pixel_gamma_point *coeff = NULL;
1987	enum dc_transfer_func_predefined tf;
1988	uint32_t i;
1989	bool ret = false;
1990
1991	if (input_tf->type == TF_TYPE_BYPASS)
1992		return false;
1993
1994	/* we can use hardcoded curve for plain SRGB TF
1995	 * If linear, it's bypass if no user ramp
1996	 */
1997	if (input_tf->type == TF_TYPE_PREDEFINED) {
1998		if ((input_tf->tf == TRANSFER_FUNCTION_SRGB ||
1999				input_tf->tf == TRANSFER_FUNCTION_LINEAR) &&
2000				!map_user_ramp)
2001			return true;
2002
2003		if (dc_caps != NULL &&
2004			dc_caps->dpp.dcn_arch == 1) {
2005
2006			if (input_tf->tf == TRANSFER_FUNCTION_PQ &&
2007					dc_caps->dpp.dgam_rom_caps.pq == 1)
2008				return true;
2009
2010			if (input_tf->tf == TRANSFER_FUNCTION_GAMMA22 &&
2011					dc_caps->dpp.dgam_rom_caps.gamma2_2 == 1)
2012				return true;
2013
2014			// HLG OOTF not accounted for
2015			if (input_tf->tf == TRANSFER_FUNCTION_HLG &&
2016					dc_caps->dpp.dgam_rom_caps.hlg == 1)
2017				return true;
2018		}
2019	}
2020
2021	input_tf->type = TF_TYPE_DISTRIBUTED_POINTS;
2022
2023	if (map_user_ramp && ramp && ramp->type == GAMMA_RGB_256) {
2024		rgb_user = kvcalloc(ramp->num_entries + _EXTRA_POINTS,
2025				sizeof(*rgb_user),
2026				GFP_KERNEL);
2027		if (!rgb_user)
2028			goto rgb_user_alloc_fail;
2029
2030		axis_x = kvcalloc(ramp->num_entries + _EXTRA_POINTS, sizeof(*axis_x),
2031				GFP_KERNEL);
2032		if (!axis_x)
2033			goto axis_x_alloc_fail;
2034
2035		dividers.divider1 = dc_fixpt_from_fraction(3, 2);
2036		dividers.divider2 = dc_fixpt_from_int(2);
2037		dividers.divider3 = dc_fixpt_from_fraction(5, 2);
2038
2039		build_evenly_distributed_points(
2040				axis_x,
2041				ramp->num_entries,
2042				dividers);
2043
2044		scale_gamma(rgb_user, ramp, dividers);
2045	}
2046
2047	curve = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS, sizeof(*curve),
2048			GFP_KERNEL);
2049	if (!curve)
2050		goto curve_alloc_fail;
2051
2052	coeff = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS, sizeof(*coeff),
2053			GFP_KERNEL);
2054	if (!coeff)
2055		goto coeff_alloc_fail;
2056
2057	tf = input_tf->tf;
2058
2059	if (tf == TRANSFER_FUNCTION_PQ)
2060		build_de_pq(curve,
2061				MAX_HW_POINTS,
2062				coordinates_x);
2063	else if (tf == TRANSFER_FUNCTION_SRGB ||
2064		tf == TRANSFER_FUNCTION_BT709 ||
2065		tf == TRANSFER_FUNCTION_GAMMA22 ||
2066		tf == TRANSFER_FUNCTION_GAMMA24 ||
2067		tf == TRANSFER_FUNCTION_GAMMA26)
2068		build_degamma(curve,
2069				MAX_HW_POINTS,
2070				coordinates_x,
2071				tf);
2072	else if (tf == TRANSFER_FUNCTION_HLG)
2073		build_hlg_degamma(curve,
2074				MAX_HW_POINTS,
2075				coordinates_x,
2076				80, 1000);
2077	else if (tf == TRANSFER_FUNCTION_LINEAR) {
2078		// just copy coordinates_x into curve
2079		i = 0;
2080		while (i != MAX_HW_POINTS + 1) {
2081			curve[i].r = coordinates_x[i].x;
2082			curve[i].g = curve[i].r;
2083			curve[i].b = curve[i].r;
2084			i++;
2085		}
2086	} else
2087		goto invalid_tf_fail;
2088
2089	tf_pts->end_exponent = 0;
2090	tf_pts->x_point_at_y1_red = 1;
2091	tf_pts->x_point_at_y1_green = 1;
2092	tf_pts->x_point_at_y1_blue = 1;
2093
2094	if (input_tf->tf == TRANSFER_FUNCTION_PQ) {
2095		/* just copy current rgb_regamma into  tf_pts */
2096		struct pwl_float_data_ex *curvePt = curve;
2097		int i = 0;
2098
2099		while (i <= MAX_HW_POINTS) {
2100			tf_pts->red[i]   = curvePt->r;
2101			tf_pts->green[i] = curvePt->g;
2102			tf_pts->blue[i]  = curvePt->b;
2103			++curvePt;
2104			++i;
2105		}
2106	} else {
2107		// clamps to 0-1
2108		map_regamma_hw_to_x_user(ramp, coeff, rgb_user,
2109				coordinates_x, axis_x, curve,
2110				MAX_HW_POINTS, tf_pts,
2111				map_user_ramp && ramp && ramp->type == GAMMA_RGB_256,
2112				true);
2113	}
2114
2115
2116
2117	if (ramp && ramp->type == GAMMA_CUSTOM)
2118		apply_lut_1d(ramp, MAX_HW_POINTS, tf_pts);
2119
2120	ret = true;
2121
2122invalid_tf_fail:
2123	kvfree(coeff);
2124coeff_alloc_fail:
2125	kvfree(curve);
2126curve_alloc_fail:
2127	kvfree(axis_x);
2128axis_x_alloc_fail:
2129	kvfree(rgb_user);
2130rgb_user_alloc_fail:
2131
2132	return ret;
2133}
2134
2135static bool calculate_curve(enum dc_transfer_func_predefined trans,
2136				struct dc_transfer_func_distributed_points *points,
2137				struct pwl_float_data_ex *rgb_regamma,
2138				const struct hdr_tm_params *fs_params,
2139				uint32_t sdr_ref_white_level,
2140				struct calculate_buffer *cal_buffer)
2141{
2142	uint32_t i;
2143	bool ret = false;
2144
2145	if (trans == TRANSFER_FUNCTION_UNITY ||
2146		trans == TRANSFER_FUNCTION_LINEAR) {
2147		points->end_exponent = 0;
2148		points->x_point_at_y1_red = 1;
2149		points->x_point_at_y1_green = 1;
2150		points->x_point_at_y1_blue = 1;
2151
2152		for (i = 0; i <= MAX_HW_POINTS ; i++) {
2153			rgb_regamma[i].r = coordinates_x[i].x;
2154			rgb_regamma[i].g = coordinates_x[i].x;
2155			rgb_regamma[i].b = coordinates_x[i].x;
2156		}
2157
2158		ret = true;
2159	} else if (trans == TRANSFER_FUNCTION_PQ) {
2160		points->end_exponent = 7;
2161		points->x_point_at_y1_red = 125;
2162		points->x_point_at_y1_green = 125;
2163		points->x_point_at_y1_blue = 125;
2164
2165		build_pq(rgb_regamma,
2166				MAX_HW_POINTS,
2167				coordinates_x,
2168				sdr_ref_white_level);
2169
2170		ret = true;
2171	} else if (trans == TRANSFER_FUNCTION_GAMMA22 &&
2172			fs_params != NULL && fs_params->skip_tm == 0) {
2173		build_freesync_hdr(rgb_regamma,
2174				MAX_HW_POINTS,
2175				coordinates_x,
2176				fs_params,
2177				cal_buffer);
2178
2179		ret = true;
2180	} else if (trans == TRANSFER_FUNCTION_HLG) {
2181		points->end_exponent = 4;
2182		points->x_point_at_y1_red = 12;
2183		points->x_point_at_y1_green = 12;
2184		points->x_point_at_y1_blue = 12;
2185
2186		build_hlg_regamma(rgb_regamma,
2187				MAX_HW_POINTS,
2188				coordinates_x,
2189				80, 1000);
2190
2191		ret = true;
2192	} else {
2193		// trans == TRANSFER_FUNCTION_SRGB
2194		// trans == TRANSFER_FUNCTION_BT709
2195		// trans == TRANSFER_FUNCTION_GAMMA22
2196		// trans == TRANSFER_FUNCTION_GAMMA24
2197		// trans == TRANSFER_FUNCTION_GAMMA26
2198		points->end_exponent = 0;
2199		points->x_point_at_y1_red = 1;
2200		points->x_point_at_y1_green = 1;
2201		points->x_point_at_y1_blue = 1;
2202
2203		build_regamma(rgb_regamma,
2204				MAX_HW_POINTS,
2205				coordinates_x,
2206				trans,
2207				cal_buffer);
2208
2209		ret = true;
2210	}
2211
2212	return ret;
2213}
2214
2215bool mod_color_calculate_regamma_params(struct dc_transfer_func *output_tf,
2216					const struct dc_gamma *ramp,
2217					bool map_user_ramp,
2218					bool can_rom_be_used,
2219					const struct hdr_tm_params *fs_params,
2220					struct calculate_buffer *cal_buffer)
2221{
2222	struct dc_transfer_func_distributed_points *tf_pts = &output_tf->tf_pts;
2223	struct dividers dividers;
2224
2225	struct pwl_float_data *rgb_user = NULL;
2226	struct pwl_float_data_ex *rgb_regamma = NULL;
2227	struct gamma_pixel *axis_x = NULL;
2228	struct pixel_gamma_point *coeff = NULL;
2229	enum dc_transfer_func_predefined tf;
2230	bool do_clamping = true;
2231	bool ret = false;
2232
2233	if (output_tf->type == TF_TYPE_BYPASS)
2234		return false;
2235
2236	/* we can use hardcoded curve for plain SRGB TF */
2237	if (output_tf->type == TF_TYPE_PREDEFINED && can_rom_be_used == true &&
2238			output_tf->tf == TRANSFER_FUNCTION_SRGB) {
2239		if (ramp == NULL)
2240			return true;
2241		if ((ramp->is_identity && ramp->type != GAMMA_CS_TFM_1D) ||
2242		    (!map_user_ramp && ramp->type == GAMMA_RGB_256))
2243			return true;
2244	}
2245
2246	output_tf->type = TF_TYPE_DISTRIBUTED_POINTS;
2247
2248	if (ramp && ramp->type != GAMMA_CS_TFM_1D &&
2249	    (map_user_ramp || ramp->type != GAMMA_RGB_256)) {
2250		rgb_user = kvcalloc(ramp->num_entries + _EXTRA_POINTS,
2251			    sizeof(*rgb_user),
2252			    GFP_KERNEL);
2253		if (!rgb_user)
2254			goto rgb_user_alloc_fail;
2255
2256		axis_x = kvcalloc(ramp->num_entries + 3, sizeof(*axis_x),
2257				GFP_KERNEL);
2258		if (!axis_x)
2259			goto axis_x_alloc_fail;
2260
2261		dividers.divider1 = dc_fixpt_from_fraction(3, 2);
2262		dividers.divider2 = dc_fixpt_from_int(2);
2263		dividers.divider3 = dc_fixpt_from_fraction(5, 2);
2264
2265		build_evenly_distributed_points(
2266				axis_x,
2267				ramp->num_entries,
2268				dividers);
2269
2270		if (ramp->type == GAMMA_RGB_256 && map_user_ramp)
2271			scale_gamma(rgb_user, ramp, dividers);
2272		else if (ramp->type == GAMMA_RGB_FLOAT_1024)
2273			scale_gamma_dx(rgb_user, ramp, dividers);
2274	}
2275
2276	rgb_regamma = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS,
2277			       sizeof(*rgb_regamma),
2278			       GFP_KERNEL);
2279	if (!rgb_regamma)
2280		goto rgb_regamma_alloc_fail;
2281
2282	coeff = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS, sizeof(*coeff),
2283			 GFP_KERNEL);
2284	if (!coeff)
2285		goto coeff_alloc_fail;
2286
2287	tf = output_tf->tf;
2288
2289	ret = calculate_curve(tf,
2290			tf_pts,
2291			rgb_regamma,
2292			fs_params,
2293			output_tf->sdr_ref_white_level,
2294			cal_buffer);
2295
2296	if (ret) {
2297		do_clamping = !(output_tf->tf == TRANSFER_FUNCTION_PQ) &&
2298				!(output_tf->tf == TRANSFER_FUNCTION_GAMMA22 &&
2299				fs_params != NULL && fs_params->skip_tm == 0);
2300
2301		map_regamma_hw_to_x_user(ramp, coeff, rgb_user,
2302					 coordinates_x, axis_x, rgb_regamma,
2303					 MAX_HW_POINTS, tf_pts,
2304					 (map_user_ramp || (ramp && ramp->type != GAMMA_RGB_256)) &&
2305					 (ramp && ramp->type != GAMMA_CS_TFM_1D),
2306					 do_clamping);
2307
2308		if (ramp && ramp->type == GAMMA_CS_TFM_1D)
2309			apply_lut_1d(ramp, MAX_HW_POINTS, tf_pts);
2310	}
2311
2312	kvfree(coeff);
2313coeff_alloc_fail:
2314	kvfree(rgb_regamma);
2315rgb_regamma_alloc_fail:
2316	kvfree(axis_x);
2317axis_x_alloc_fail:
2318	kvfree(rgb_user);
2319rgb_user_alloc_fail:
2320	return ret;
2321}
2322