1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Support for Intel Camera Imaging ISP subsystem.
4 * Copyright (c) 2015, Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13 * more details.
14 */
15
16#include "ia_css_types.h"
17#include "sh_css_defs.h"
18#ifndef IA_CSS_NO_DEBUG
19#include "ia_css_debug.h"
20#endif
21#include "sh_css_frac.h"
22#include "assert_support.h"
23
24#include "bh/bh_2/ia_css_bh.host.h"
25#include "ia_css_s3a.host.h"
26
27const struct ia_css_3a_config default_3a_config = {
28	25559,
29	32768,
30	7209,
31	65535,
32	0,
33	65535,
34	{-3344, -6104, -19143, 19143, 6104, 3344, 0},
35	{1027, 0, -9219, 16384, -9219, 1027, 0}
36};
37
38static unsigned int s3a_raw_bit_depth;
39
40void
41ia_css_s3a_configure(unsigned int raw_bit_depth)
42{
43	s3a_raw_bit_depth = raw_bit_depth;
44}
45
46static void
47ia_css_ae_encode(
48    struct sh_css_isp_ae_params *to,
49    const struct ia_css_3a_config *from,
50    unsigned int size)
51{
52	(void)size;
53	/* coefficients to calculate Y */
54	to->y_coef_r =
55	    uDIGIT_FITTING(from->ae_y_coef_r, 16, SH_CSS_AE_YCOEF_SHIFT);
56	to->y_coef_g =
57	    uDIGIT_FITTING(from->ae_y_coef_g, 16, SH_CSS_AE_YCOEF_SHIFT);
58	to->y_coef_b =
59	    uDIGIT_FITTING(from->ae_y_coef_b, 16, SH_CSS_AE_YCOEF_SHIFT);
60}
61
62static void
63ia_css_awb_encode(
64    struct sh_css_isp_awb_params *to,
65    const struct ia_css_3a_config *from,
66    unsigned int size)
67{
68	(void)size;
69	/* AWB level gate */
70	to->lg_high_raw =
71	    uDIGIT_FITTING(from->awb_lg_high_raw, 16, s3a_raw_bit_depth);
72	to->lg_low =
73	    uDIGIT_FITTING(from->awb_lg_low, 16, SH_CSS_BAYER_BITS);
74	to->lg_high =
75	    uDIGIT_FITTING(from->awb_lg_high, 16, SH_CSS_BAYER_BITS);
76}
77
78static void
79ia_css_af_encode(
80    struct sh_css_isp_af_params *to,
81    const struct ia_css_3a_config *from,
82    unsigned int size)
83{
84	unsigned int i;
85	(void)size;
86
87	/* af fir coefficients */
88	for (i = 0; i < 7; ++i) {
89		to->fir1[i] =
90		    sDIGIT_FITTING(from->af_fir1_coef[i], 15,
91				   SH_CSS_AF_FIR_SHIFT);
92		to->fir2[i] =
93		    sDIGIT_FITTING(from->af_fir2_coef[i], 15,
94				   SH_CSS_AF_FIR_SHIFT);
95	}
96}
97
98void
99ia_css_s3a_encode(
100    struct sh_css_isp_s3a_params *to,
101    const struct ia_css_3a_config *from,
102    unsigned int size)
103{
104	(void)size;
105
106	ia_css_ae_encode(&to->ae,   from, sizeof(to->ae));
107	ia_css_awb_encode(&to->awb, from, sizeof(to->awb));
108	ia_css_af_encode(&to->af,   from, sizeof(to->af));
109}
110
111#if 0
112void
113ia_css_process_s3a(
114    unsigned int pipe_id,
115    const struct ia_css_pipeline_stage *stage,
116    struct ia_css_isp_parameters *params)
117{
118	short dmem_offset = stage->binary->info->mem_offsets->dmem.s3a;
119
120	assert(params);
121
122	if (dmem_offset >= 0) {
123		ia_css_s3a_encode((struct sh_css_isp_s3a_params *)
124				  &stage->isp_mem_params[IA_CSS_ISP_DMEM0].address[dmem_offset],
125				  &params->s3a_config);
126		ia_css_bh_encode((struct sh_css_isp_bh_params *)
127				 &stage->isp_mem_params[IA_CSS_ISP_DMEM0].address[dmem_offset],
128				 &params->s3a_config);
129		params->isp_params_changed = true;
130		params->isp_mem_params_changed[pipe_id][stage->stage_num][IA_CSS_ISP_DMEM0] =
131		    true;
132	}
133
134	params->isp_params_changed = true;
135}
136#endif
137
138#ifndef IA_CSS_NO_DEBUG
139void
140ia_css_ae_dump(
141    const struct sh_css_isp_ae_params *ae,
142    unsigned int level)
143{
144	if (!ae) return;
145	ia_css_debug_dtrace(level, "\t%-32s = %d\n",
146			    "ae_y_coef_r", ae->y_coef_r);
147	ia_css_debug_dtrace(level, "\t%-32s = %d\n",
148			    "ae_y_coef_g", ae->y_coef_g);
149	ia_css_debug_dtrace(level, "\t%-32s = %d\n",
150			    "ae_y_coef_b", ae->y_coef_b);
151}
152
153void
154ia_css_awb_dump(
155    const struct sh_css_isp_awb_params *awb,
156    unsigned int level)
157{
158	ia_css_debug_dtrace(level, "\t%-32s = %d\n",
159			    "awb_lg_high_raw", awb->lg_high_raw);
160	ia_css_debug_dtrace(level, "\t%-32s = %d\n",
161			    "awb_lg_low", awb->lg_low);
162	ia_css_debug_dtrace(level, "\t%-32s = %d\n",
163			    "awb_lg_high", awb->lg_high);
164}
165
166void
167ia_css_af_dump(
168    const struct sh_css_isp_af_params *af,
169    unsigned int level)
170{
171	ia_css_debug_dtrace(level, "\t%-32s = %d\n",
172			    "af_fir1[0]", af->fir1[0]);
173	ia_css_debug_dtrace(level, "\t%-32s = %d\n",
174			    "af_fir1[1]", af->fir1[1]);
175	ia_css_debug_dtrace(level, "\t%-32s = %d\n",
176			    "af_fir1[2]", af->fir1[2]);
177	ia_css_debug_dtrace(level, "\t%-32s = %d\n",
178			    "af_fir1[3]", af->fir1[3]);
179	ia_css_debug_dtrace(level, "\t%-32s = %d\n",
180			    "af_fir1[4]", af->fir1[4]);
181	ia_css_debug_dtrace(level, "\t%-32s = %d\n",
182			    "af_fir1[5]", af->fir1[5]);
183	ia_css_debug_dtrace(level, "\t%-32s = %d\n",
184			    "af_fir1[6]", af->fir1[6]);
185	ia_css_debug_dtrace(level, "\t%-32s = %d\n",
186			    "af_fir2[0]", af->fir2[0]);
187	ia_css_debug_dtrace(level, "\t%-32s = %d\n",
188			    "af_fir2[1]", af->fir2[1]);
189	ia_css_debug_dtrace(level, "\t%-32s = %d\n",
190			    "af_fir2[2]", af->fir2[2]);
191	ia_css_debug_dtrace(level, "\t%-32s = %d\n",
192			    "af_fir2[3]", af->fir2[3]);
193	ia_css_debug_dtrace(level, "\t%-32s = %d\n",
194			    "af_fir2[4]", af->fir2[4]);
195	ia_css_debug_dtrace(level, "\t%-32s = %d\n",
196			    "af_fir2[5]", af->fir2[5]);
197	ia_css_debug_dtrace(level, "\t%-32s = %d\n",
198			    "af_fir2[6]", af->fir2[6]);
199}
200
201void
202ia_css_s3a_dump(
203    const struct sh_css_isp_s3a_params *s3a,
204    unsigned int level)
205{
206	ia_css_debug_dtrace(level, "S3A Support:\n");
207	ia_css_ae_dump(&s3a->ae, level);
208	ia_css_awb_dump(&s3a->awb, level);
209	ia_css_af_dump(&s3a->af, level);
210}
211
212void
213ia_css_s3a_debug_dtrace(
214    const struct ia_css_3a_config *config,
215    unsigned int level)
216{
217	ia_css_debug_dtrace(level,
218			    "config.ae_y_coef_r=%d, config.ae_y_coef_g=%d, config.ae_y_coef_b=%d, config.awb_lg_high_raw=%d, config.awb_lg_low=%d, config.awb_lg_high=%d\n",
219			    config->ae_y_coef_r, config->ae_y_coef_g,
220			    config->ae_y_coef_b, config->awb_lg_high_raw,
221			    config->awb_lg_low, config->awb_lg_high);
222}
223#endif
224
225void
226ia_css_s3a_hmem_decode(
227    struct ia_css_3a_statistics *host_stats,
228    const struct ia_css_bh_table *hmem_buf)
229{
230	struct ia_css_3a_rgby_output	*out_ptr;
231	int			i;
232
233	/* pixel counts(BQ) for 3A area */
234	int count_for_3a;
235	int sum_r, diff;
236
237	assert(host_stats);
238	assert(host_stats->rgby_data);
239	assert(hmem_buf);
240
241	count_for_3a = host_stats->grid.width * host_stats->grid.height
242		       * host_stats->grid.bqs_per_grid_cell
243		       * host_stats->grid.bqs_per_grid_cell;
244
245	out_ptr = host_stats->rgby_data;
246
247	ia_css_bh_hmem_decode(out_ptr, hmem_buf);
248
249	/* Calculate sum of histogram of R,
250	   which should not be less than count_for_3a */
251	sum_r = 0;
252	for (i = 0; i < HMEM_UNIT_SIZE; i++) {
253		sum_r += out_ptr[i].r;
254	}
255	if (sum_r < count_for_3a) {
256		/* histogram is invalid */
257		return;
258	}
259
260	/* Verify for sum of histogram of R/G/B/Y */
261#if 0
262	{
263		int sum_g = 0;
264		int sum_b = 0;
265		int sum_y = 0;
266
267		for (i = 0; i < HMEM_UNIT_SIZE; i++) {
268			sum_g += out_ptr[i].g;
269			sum_b += out_ptr[i].b;
270			sum_y += out_ptr[i].y;
271		}
272		if (sum_g != sum_r || sum_b != sum_r || sum_y != sum_r) {
273			/* histogram is invalid */
274			return;
275		}
276	}
277#endif
278
279	/*
280	 * Limit the histogram area only to 3A area.
281	 * In DSP, the histogram of 0 is incremented for pixels
282	 * which are outside of 3A area. That amount should be subtracted here.
283	 *   hist[0] = hist[0] - ((sum of all hist[]) - (pixel count for 3A area))
284	 */
285	diff = sum_r - count_for_3a;
286	out_ptr[0].r -= diff;
287	out_ptr[0].g -= diff;
288	out_ptr[0].b -= diff;
289	out_ptr[0].y -= diff;
290}
291
292void
293ia_css_s3a_dmem_decode(
294    struct ia_css_3a_statistics *host_stats,
295    const struct ia_css_3a_output *isp_stats)
296{
297	int isp_width, host_width, height, i;
298	struct ia_css_3a_output *host_ptr;
299
300	assert(host_stats);
301	assert(host_stats->data);
302	assert(isp_stats);
303
304	isp_width  = host_stats->grid.aligned_width;
305	host_width = host_stats->grid.width;
306	height     = host_stats->grid.height;
307	host_ptr   = host_stats->data;
308
309	/* Getting 3A statistics from DMEM does not involve any
310	 * transformation (like the VMEM version), we just copy the data
311	 * using a different output width. */
312	for (i = 0; i < height; i++) {
313		memcpy(host_ptr, isp_stats, host_width * sizeof(*host_ptr));
314		isp_stats += isp_width;
315		host_ptr += host_width;
316	}
317}
318
319/* MW: this is an ISP function */
320static inline int
321merge_hi_lo_14(unsigned short hi, unsigned short lo)
322{
323	int val = (int)((((unsigned int)hi << 14) & 0xfffc000) |
324			((unsigned int)lo & 0x3fff));
325	return val;
326}
327
328void
329ia_css_s3a_vmem_decode(
330    struct ia_css_3a_statistics *host_stats,
331    const u16 *isp_stats_hi,
332    const uint16_t *isp_stats_lo)
333{
334	int out_width, out_height, chunk, rest, kmax, y, x, k, elm_start, elm, ofs;
335	const u16 *hi, *lo;
336	struct ia_css_3a_output *output;
337
338	assert(host_stats);
339	assert(host_stats->data);
340	assert(isp_stats_hi);
341	assert(isp_stats_lo);
342
343	output = host_stats->data;
344	out_width  = host_stats->grid.width;
345	out_height = host_stats->grid.height;
346	hi = isp_stats_hi;
347	lo = isp_stats_lo;
348
349	chunk = ISP_VEC_NELEMS >> host_stats->grid.deci_factor_log2;
350	chunk = max(chunk, 1);
351
352	for (y = 0; y < out_height; y++) {
353		elm_start = y * ISP_S3ATBL_HI_LO_STRIDE;
354		rest = out_width;
355		x = 0;
356		while (x < out_width) {
357			kmax = (rest > chunk) ? chunk : rest;
358			ofs = y * out_width + x;
359			elm = elm_start + x * sizeof(*output) / sizeof(int32_t);
360			for (k = 0; k < kmax; k++, elm++) {
361				output[ofs + k].ae_y    = merge_hi_lo_14(
362							      hi[elm + chunk * 0], lo[elm + chunk * 0]);
363				output[ofs + k].awb_cnt = merge_hi_lo_14(
364							      hi[elm + chunk * 1], lo[elm + chunk * 1]);
365				output[ofs + k].awb_gr  = merge_hi_lo_14(
366							      hi[elm + chunk * 2], lo[elm + chunk * 2]);
367				output[ofs + k].awb_r   = merge_hi_lo_14(
368							      hi[elm + chunk * 3], lo[elm + chunk * 3]);
369				output[ofs + k].awb_b   = merge_hi_lo_14(
370							      hi[elm + chunk * 4], lo[elm + chunk * 4]);
371				output[ofs + k].awb_gb  = merge_hi_lo_14(
372							      hi[elm + chunk * 5], lo[elm + chunk * 5]);
373				output[ofs + k].af_hpf1 = merge_hi_lo_14(
374							      hi[elm + chunk * 6], lo[elm + chunk * 6]);
375				output[ofs + k].af_hpf2 = merge_hi_lo_14(
376							      hi[elm + chunk * 7], lo[elm + chunk * 7]);
377			}
378			x += chunk;
379			rest -= chunk;
380		}
381	}
382}
383