1/*
2	Copyright (c) 2002-2004, Thomas Kurschel
3
4	Part of Radeon accelerant
5
6	Hardware access routines for overlays
7*/
8
9#include "GlobalData.h"
10#include "radeon_interface.h"
11#include "mmio.h"
12#include "overlay_regs.h"
13#include "pll_regs.h"
14#include "capture_regs.h"
15#include "utils.h"
16#include "pll_access.h"
17#include <math.h>
18#include <string.h>
19#include "CP.h"
20
21
22void Radeon_TempHideOverlay( accelerator_info *ai );
23
24// standard (linear) gamma
25static struct {
26    uint16 reg;
27    bool r200_or_above;
28    uint32 slope;
29    uint32 offset;
30} std_gamma[] = {
31    { RADEON_OV0_GAMMA_0_F, false, 0x100, 0x0000 },
32    { RADEON_OV0_GAMMA_10_1F, false, 0x100, 0x0020 },
33    { RADEON_OV0_GAMMA_20_3F, false, 0x100, 0x0040 },
34    { RADEON_OV0_GAMMA_40_7F, false, 0x100, 0x0080 },
35    { RADEON_OV0_GAMMA_80_BF, true, 0x100, 0x0100 },
36    { RADEON_OV0_GAMMA_C0_FF, true, 0x100, 0x0100 },
37    { RADEON_OV0_GAMMA_100_13F, true, 0x100, 0x0200 },
38    { RADEON_OV0_GAMMA_140_17F, true, 0x100, 0x0200 },
39    { RADEON_OV0_GAMMA_180_1BF, true, 0x100, 0x0300 },
40    { RADEON_OV0_GAMMA_1C0_1FF, true, 0x100, 0x0300 },
41    { RADEON_OV0_GAMMA_200_23F, true, 0x100, 0x0400 },
42    { RADEON_OV0_GAMMA_240_27F, true, 0x100, 0x0400 },
43    { RADEON_OV0_GAMMA_280_2BF, true, 0x100, 0x0500 },
44    { RADEON_OV0_GAMMA_2C0_2FF, true, 0x100, 0x0500 },
45    { RADEON_OV0_GAMMA_300_33F, true, 0x100, 0x0600 },
46    { RADEON_OV0_GAMMA_340_37F, true, 0x100, 0x0600 },
47    { RADEON_OV0_GAMMA_380_3BF, false, 0x100, 0x0700 },
48    { RADEON_OV0_GAMMA_3C0_3FF, false, 0x100, 0x0700 }
49};
50
51
52// setup overlay unit before first use
53void Radeon_InitOverlay(
54	accelerator_info *ai, int crtc_idx )
55{
56	vuint8 *regs = ai->regs;
57	shared_info *si = ai->si;
58	uint i;
59	uint32 ecp_div;
60
61	SHOW_FLOW0( 0, "" );
62
63	// make sure we really write this value as the "toggle" bit
64	// contained in it (which is zero initially) is edge-sensitive!
65	// for capturing, we need to select "software" video port
66	si->overlay_mgr.auto_flip_reg = RADEON_OV0_VID_PORT_SELECT_SOFTWARE;
67
68	OUTREG( regs, RADEON_OV0_SCALE_CNTL, RADEON_SCALER_SOFT_RESET );
69	OUTREG( regs, RADEON_OV0_AUTO_FLIP_CNTRL, si->overlay_mgr.auto_flip_reg );
70	OUTREG( regs, RADEON_OV0_FILTER_CNTL, 			// use fixed filter coefficients
71		RADEON_OV0_HC_COEF_ON_HORZ_Y |
72		RADEON_OV0_HC_COEF_ON_HORZ_UV |
73		RADEON_OV0_HC_COEF_ON_VERT_Y |
74		RADEON_OV0_HC_COEF_ON_VERT_UV );
75	OUTREG( regs, RADEON_OV0_KEY_CNTL, RADEON_GRAPHIC_KEY_FN_EQ |
76		RADEON_VIDEO_KEY_FN_FALSE |
77		RADEON_CMP_MIX_OR );
78	OUTREG( regs, RADEON_OV0_TEST, 0 );
79//	OUTREG( regs, RADEON_FCP_CNTL, RADEON_FCP_CNTL_GND );	// disable capture clock
80//	OUTREG( regs, RADEON_CAP0_TRIG_CNTL, 0 );				// disable capturing
81	OUTREG( regs, RADEON_OV0_REG_LOAD_CNTL, 0 );
82	// tell deinterlacer to always show recent field
83	OUTREG( regs, RADEON_OV0_DEINTERLACE_PATTERN,
84		0xaaaaa | (9 << RADEON_OV0_DEINT_PAT_LEN_M1_SHIFT) );
85
86	// set gamma
87	for( i = 0; i < sizeof( std_gamma ) / sizeof( std_gamma[0] ); ++i ) {
88		if( !std_gamma[i].r200_or_above || si->asic >= rt_r200 ) {
89			OUTREG( regs, std_gamma[i].reg,
90				(std_gamma[i].slope << 16) | std_gamma[i].offset );
91		}
92	}
93
94	// overlay unit can only handle up to 175 MHz, if pixel clock is higher,
95	// only every second pixel is handled
96	if( si->crtc[crtc_idx].mode.timing.pixel_clock < 175000 )
97		ecp_div = 0;
98	else
99		ecp_div = 1;
100
101	Radeon_OUTPLLP( regs, si->asic, RADEON_VCLK_ECP_CNTL,
102		ecp_div << RADEON_ECP_DIV_SHIFT, ~RADEON_ECP_DIV_MASK );
103
104	// Force the overlay clock on for integrated chips
105	if ((si->asic == rt_rs100) ||
106	(si->asic == rt_rs200) ||
107	(si->asic == rt_rs300)) {
108		Radeon_OUTPLL( regs, si->asic, RADEON_VCLK_ECP_CNTL,
109        	(Radeon_INPLL( regs, si->asic, RADEON_VCLK_ECP_CNTL) | (1<<18)));
110    }
111
112	si->active_overlay.crtc_idx = si->pending_overlay.crtc_idx;
113
114	// invalidate active colour space
115	si->active_overlay.ob.space = -1;
116
117	// invalidate position/scaling
118	si->active_overlay.ob.width = -1;
119}
120
121// colour space transformation matrix
122typedef struct space_transform
123{
124    float   RefLuma;	// scaling of luma to use full RGB range
125    float   RefRCb;		// b/u -> r
126    float   RefRY;		// g/y -> r
127    float   RefRCr;		// r/v -> r
128    float   RefGCb;
129    float   RefGY;
130    float   RefGCr;
131    float   RefBCb;
132    float   RefBY;
133    float   RefBCr;
134} space_transform;
135
136
137// Parameters for ITU-R BT.601 and ITU-R BT.709 colour spaces
138space_transform trans_yuv[2] =
139{
140    { 1.1678, 0.0, 1, 1.6007, -0.3929, 1, -0.8154, 2.0232, 1, 0.0 }, /* BT.601 */
141    { 1.1678, 0.0, 1, 1.7980, -0.2139, 1, -0.5345, 2.1186, 1, 0.0 }  /* BT.709 */
142};
143
144
145// RGB is a pass through
146space_transform trans_rgb =
147	{ 1, 0, 0, 1, 0, 1, 0, 1, 0, 0 };
148
149
150// set overlay colour space transformation matrix
151static void Radeon_SetTransform(
152	accelerator_info *ai,
153	float	    bright,
154	float	    cont,
155	float	    sat,
156	float	    hue,
157	float	    red_intensity,
158	float	    green_intensity,
159	float	    blue_intensity,
160	uint	    ref)
161{
162	vuint8 *regs = ai->regs;
163	shared_info *si = ai->si;
164	float	    OvHueSin, OvHueCos;
165	float	    CAdjOff;
166	float		CAdjRY, CAdjGY, CAdjBY;
167	float	    CAdjRCb, CAdjRCr;
168	float	    CAdjGCb, CAdjGCr;
169	float	    CAdjBCb, CAdjBCr;
170	float	    RedAdj,GreenAdj,BlueAdj;
171	float	    OvROff, OvGOff, OvBOff;
172	float		OvRY, OvGY, OvBY;
173	float	    OvRCb, OvRCr;
174	float	    OvGCb, OvGCr;
175	float	    OvBCb, OvBCr;
176	float	    Loff;
177	float	    Coff;
178
179	uint32	    dwOvROff, dwOvGOff, dwOvBOff;
180	uint32		dwOvRY, dwOvGY, dwOvBY;
181	uint32	    dwOvRCb, dwOvRCr;
182	uint32	    dwOvGCb, dwOvGCr;
183	uint32	    dwOvBCb, dwOvBCr;
184
185	space_transform	*trans;
186
187	SHOW_FLOW0( 0, "" );
188
189	// get proper conversion formula
190	switch( si->pending_overlay.ob.space ) {
191	case B_YCbCr422:
192	case B_YUV12:
193		Loff = 16 * 4;		// internal representation is 10 Bits
194		Coff = 128 * 4;
195
196		if (ref >= 2)
197			ref = 0;
198
199		trans = &trans_yuv[ref];
200		break;
201
202	case B_RGB15:
203	case B_RGB16:
204	case B_RGB32:
205	default:
206		Loff = 0;
207		Coff = 0;
208		trans = &trans_rgb;
209	}
210
211	OvHueSin = sin(hue);
212	OvHueCos = cos(hue);
213
214	// get matrix values to convert overlay colour space to RGB
215	// applying colour adjustment, saturation and luma scaling
216	// (saturation doesn't work with RGB input, perhaps it did with some
217	//  maths; this is left to the reader :)
218	CAdjRY = cont * trans->RefLuma * trans->RefRY;
219	CAdjGY = cont * trans->RefLuma * trans->RefGY;
220	CAdjBY = cont * trans->RefLuma * trans->RefBY;
221
222	CAdjRCb = sat * -OvHueSin * trans->RefRCr;
223	CAdjRCr = sat * OvHueCos * trans->RefRCr;
224	CAdjGCb = sat * (OvHueCos * trans->RefGCb - OvHueSin * trans->RefGCr);
225	CAdjGCr = sat * (OvHueSin * trans->RefGCb + OvHueCos * trans->RefGCr);
226	CAdjBCb = sat * OvHueCos * trans->RefBCb;
227	CAdjBCr = sat * OvHueSin * trans->RefBCb;
228
229	// adjust black level
230	CAdjOff = cont * trans[ref].RefLuma * bright * 1023.0;
231	RedAdj = cont * trans[ref].RefLuma * red_intensity * 1023.0;
232	GreenAdj = cont * trans[ref].RefLuma * green_intensity * 1023.0;
233	BlueAdj = cont * trans[ref].RefLuma * blue_intensity * 1023.0;
234
235	OvRY = CAdjRY;
236	OvGY = CAdjGY;
237	OvBY = CAdjBY;
238	OvRCb = CAdjRCb;
239	OvRCr = CAdjRCr;
240	OvGCb = CAdjGCb;
241	OvGCr = CAdjGCr;
242	OvBCb = CAdjBCb;
243	OvBCr = CAdjBCr;
244	// apply offsets
245	OvROff = RedAdj + CAdjOff -	CAdjRY * Loff - (OvRCb + OvRCr) * Coff;
246	OvGOff = GreenAdj + CAdjOff - CAdjGY * Loff - (OvGCb + OvGCr) * Coff;
247	OvBOff = BlueAdj + CAdjOff - CAdjBY * Loff - (OvBCb + OvBCr) * Coff;
248
249	dwOvROff = ((int32)(OvROff * 2.0)) & 0x1fff;
250	dwOvGOff = ((int32)(OvGOff * 2.0)) & 0x1fff;
251	dwOvBOff = ((int32)(OvBOff * 2.0)) & 0x1fff;
252
253	dwOvRY = (((int32)(OvRY * 2048.0))&0x7fff)<<17;
254	dwOvGY = (((int32)(OvGY * 2048.0))&0x7fff)<<17;
255	dwOvBY = (((int32)(OvBY * 2048.0))&0x7fff)<<17;
256	dwOvRCb = (((int32)(OvRCb * 2048.0))&0x7fff)<<1;
257	dwOvRCr = (((int32)(OvRCr * 2048.0))&0x7fff)<<17;
258	dwOvGCb = (((int32)(OvGCb * 2048.0))&0x7fff)<<1;
259	dwOvGCr = (((int32)(OvGCr * 2048.0))&0x7fff)<<17;
260	dwOvBCb = (((int32)(OvBCb * 2048.0))&0x7fff)<<1;
261	dwOvBCr = (((int32)(OvBCr * 2048.0))&0x7fff)<<17;
262
263	OUTREG( regs, RADEON_OV0_LIN_TRANS_A, dwOvRCb | dwOvRY );
264	OUTREG( regs, RADEON_OV0_LIN_TRANS_B, dwOvROff | dwOvRCr );
265	OUTREG( regs, RADEON_OV0_LIN_TRANS_C, dwOvGCb | dwOvGY );
266	OUTREG( regs, RADEON_OV0_LIN_TRANS_D, dwOvGOff | dwOvGCr );
267	OUTREG( regs, RADEON_OV0_LIN_TRANS_E, dwOvBCb | dwOvBY );
268	OUTREG( regs, RADEON_OV0_LIN_TRANS_F, dwOvBOff | dwOvBCr );
269
270	si->active_overlay.ob.space = si->pending_overlay.ob.space;
271}
272
273
274// convert Be colour key to rgb value
275static uint32 colourKey2RGB32(
276	uint32 space, uint8 red, uint8 green, uint8 blue )
277{
278	uint32 res;
279
280	SHOW_FLOW0( 3, "" );
281
282	// the way Be defines colour keys may be convinient to some driver developers,
283	// but it's not well defined - took me some time to find out the format used
284	// and still I have no idea how alpha is defined; Rudolf told me that alpha is
285	// never used
286	switch( space ) {
287	case B_RGB15:
288		res =
289			((uint32)(red >> 0) << (16+3)) |
290			((uint32)(green >> 0) << (8+3)) |
291			((blue >> 0) << 3);
292		break;
293	case B_RGB16:
294		res =
295			((uint32)(red >> 0) << (16+3)) |
296			((uint32)(green >> 0) << (8+2)) |
297			((blue >> 0) << 3);
298		break;
299	case B_RGB32:
300	case B_CMAP8:
301		res = ((uint32)(red) << 16) | ((uint32)(green) << 8) | blue;
302		break;
303	default:
304		res = 0;
305	}
306
307	SHOW_FLOW( 3, "key=%lx", res );
308	return res;
309}
310
311
312// set colour key of overlay
313static void Radeon_SetColourKey(
314	accelerator_info *ai, const overlay_window *ow )
315{
316	virtual_card *vc = ai->vc;
317	vuint8 *regs = ai->regs;
318	uint32 rgb32, mask32, min32, max32;
319
320	/*SHOW_FLOW( 0, "value=%02x %02x %02x, mask=%02x %02x %02x",
321		ow->red.value, ow->green.value, ow->blue.value,
322		ow->red.mask, ow->green.mask, ow->blue.mask );*/
323
324	// Radeons don't support value and mask as colour key but colour range
325	rgb32 = colourKey2RGB32( vc->mode.space,
326		ow->red.value, ow->green.value, ow->blue.value );
327	mask32 = colourKey2RGB32( vc->mode.space,
328		ow->red.mask, ow->green.mask, ow->blue.mask );
329
330	// ~mask32 are all unimportant (usually low order) bits
331	// oring this to the colour should give us the highest valid colour value
332	// (add would be more precise but may lead to overflows)
333	min32 = rgb32;
334	max32 = rgb32 | ~mask32;
335
336	OUTREG( regs, RADEON_OV0_GRAPHICS_KEY_CLR_LOW, min32 );
337	OUTREG( regs, RADEON_OV0_GRAPHICS_KEY_CLR_HIGH, max32 );
338	OUTREG( regs, RADEON_OV0_KEY_CNTL,
339		RADEON_GRAPHIC_KEY_FN_EQ |
340		RADEON_VIDEO_KEY_FN_FALSE |
341		RADEON_CMP_MIX_OR );
342}
343
344typedef struct {
345	uint max_scale;					// maximum src_width/dest_width,
346									// i.e. source increment per screen pixel
347	uint8 group_size; 				// size of one filter group in pixels
348	uint8 p1_step_by, p23_step_by;	// > 0: log(source pixel increment)+1, 2-tap filter
349									// = 0: source pixel increment = 1, 4-tap filter
350} hscale_factor;
351
352#define count_of( a ) (sizeof( a ) / sizeof( a[0] ))
353
354// scaling/filter tables depending on overlay colour space:
355// magnifying pixels is no problem, but minifying can lead to overload,
356// so we have to skip pixels and/or use 2-tap filters
357static hscale_factor scale_RGB16[] = {
358	{ (2 << 12), 		2, 1, 1 },
359	{ (4 << 12), 		2, 2, 2 },
360	{ (8 << 12), 		2, 3, 3 },
361	{ (16 << 12), 		2, 4, 4 },
362	{ (32 << 12), 		2, 5, 5 }
363};
364
365static hscale_factor scale_RGB32[] = {
366	{ (2 << 12) / 3,	2, 0, 0 },
367	{ (4 << 12) / 3,	4, 1, 1 },
368	{ (8 << 12) / 3,	4, 2, 2 },
369	{ (4 << 12), 		4, 2, 3 },
370	{ (16 << 12) / 3,	4, 3, 3 },
371	{ (8 << 12), 		4, 3, 4 },
372	{ (32 << 12) / 3,	4, 4, 4 },
373	{ (16 << 12),		4, 5, 5 }
374};
375
376static hscale_factor scale_YUV[] = {
377	{ (16 << 12) / 16,	2, 0, 0 },
378	{ (16 << 12) / 12,	2, 0, 1 },	// mode 4, 1, 0 (as used by YUV12) is impossible
379	{ (16 << 12) / 8,	4, 1, 1 },
380	{ (16 << 12) / 6,	4, 1, 2 },
381	{ (16 << 12) / 4,	4, 2, 2 },
382	{ (16 << 12) / 3,	4, 2, 3 },
383	{ (16 << 12) / 2,	4, 3, 3 },
384	{ (16 << 12) / 1,	4, 4, 4 }
385};
386
387static hscale_factor scale_YUV12[] = {
388	{ (16 << 12) / 16,			2, 0, 0 },
389	{ (16 << 12) / 12,			4, 1, 0 },
390	{ (16 << 12) / 12,			2, 0, 1 },
391	{ (16 << 12) / 8,			4, 1, 1 },
392	{ (16 << 12) / 6,			4, 1, 2 },
393	{ (16 << 12) / 4,			4, 2, 2 },
394	{ (16 << 12) / 3,			4, 2, 3 },
395	{ (16 << 12) / 2,			4, 3, 3 },
396	{ (int)((16 << 12) / 1.5),	4, 3, 4 },
397	{ (int)((16 << 12) / 1.0),	4, 4, 4 },
398	{ (int)((16 << 12) / 0.75),	4, 4, 5 },
399	{ (int)((16 << 12) / 0.5),	4, 5, 5 }
400};
401
402#define min3( a, b, c ) (min( (a), min( (b), (c) )))
403
404static hscale_factor scale_YUV9[] = {
405	{ min3( (16 << 12) / 12,	(3 << 12) * 1,	(2 << 12) * 4 * 1 ),	2, 0, 0 },
406	{ min3( (16 << 12) / 8, 	(3 << 12) * 1,	(2 << 12) * 4 * 1 ),	4, 1, 0 },
407	{ min3( (16 << 12) / 10,	(3 << 12) * 1,	(2 << 12) * 4 * 1 ),	2, 0, 1 },
408	{ min3( (16 << 12) / 6, 	(3 << 12) * 1,	(2 << 12) * 4 * 1 ),	4, 1, 1 },
409	{ min3( (16 << 12) / 5, 	(3 << 12) * 1,	(2 << 12) * 4 * 2 ),	4, 1, 2 },
410	{ min3( (16 << 12) / 3, 	(3 << 12) * 2,	(2 << 12) * 4 * 2 ),	4, 2, 2 },
411	{ min3( (int)((16 << 12) / 2.5), 	(3 << 12) * 1,	(2 << 12) * 4 * 4 ),	4, 2, 3 },	// probably, it should be (3 << 12) * 2
412	{ min3( (int)((16 << 12) / 1.5), 	(3 << 12) * 4,	(2 << 12) * 4 * 4 ),	4, 3, 3 },
413	{ min3( (int)((16 << 12) / 0.75), 	(3 << 12) * 8,	(2 << 12) * 4 * 8 ),	4, 4, 4 },
414	{ min3( (int)((16 << 12) / 0.625), 	(3 << 12) * 8,	(2 << 12) * 4 * 16 ),	4, 4, 5 },
415	{ min3( (int)((16 << 12) / 0.375), 	(3 << 12) * 16,	(2 << 12) * 4 * 16 ),	4, 5, 5 }
416};
417
418
419// parameters of an overlay colour space
420typedef struct {
421	uint8 bpp_shift;				// log2( bytes per pixel (main plain) )
422	uint8 bpuv_shift;				// log2( bytes per pixel (uv-plane) );
423									// if there is one plane only: bpp=bpuv
424	uint8 num_planes;				// number of planes
425	uint8 h_uv_sub_sample_shift;	// log2( horizontal pixels per uv pair )
426	uint8 v_uv_sub_sample_shift;	// log2( vertical pixels per uv pair )
427	hscale_factor *factors;			// scaling/filter table
428	uint8 num_factors;
429} space_params;
430
431static space_params space_params_table[16] = {
432	{ 0, 0, 0, 0, 0, NULL, 0 },	// reserved
433	{ 0, 0, 0, 0, 0, NULL, 0 },	// reserved
434	{ 0, 0, 0, 0, 0, NULL, 0 },	// reserved
435	{ 1, 1, 1, 0, 0, scale_RGB16, count_of( scale_RGB16 ) },	// RGB15
436	{ 1, 1, 1, 0, 0, scale_RGB16, count_of( scale_RGB16 ) },	// RGB16
437	{ 0, 0, 0, 0, 0, NULL, 0 },	// reserved
438	{ 2, 2, 1, 0, 0, scale_RGB32, count_of( scale_RGB32 ) },	// RGB32
439	{ 0, 0, 0, 0, 0, NULL, 0 },	// reserved
440	{ 0, 0, 0, 0, 0, NULL, 0 },	// reserved
441	{ 0, 0, 3, 2, 2, scale_YUV9, count_of( scale_YUV9 ) },		// YUV9
442	{ 0, 0, 3, 1, 1, scale_YUV12, count_of( scale_YUV12 ) },	// YUV12, three-plane
443	{ 1, 1, 1, 1, 0, scale_YUV, count_of( scale_YUV ) },		// VYUY422
444	{ 1, 1, 1, 1, 0, scale_YUV, count_of( scale_YUV ) },		// YVYU422
445	{ 0, 1, 2, 1, 1, scale_YUV12, count_of( scale_YUV12 ) },	// YUV12, two-plane
446	{ 0, 1, 2, 1, 1, NULL, 0 },	// ???
447	{ 0, 0, 0, 0, 0, NULL, 0 }	// reserved
448};
449
450// get appropriate scaling/filter parameters
451static hscale_factor *getHScaleFactor(
452	accelerator_info *ai,
453	space_params *params,
454	uint32 src_left, uint32 src_right, uint32 *h_inc )
455{
456	uint words_per_p1_line, words_per_p23_line, max_words_per_line;
457	bool p1_4tap_allowed, p23_4tap_allowed;
458	uint i;
459	uint num_factors;
460	hscale_factor *factors;
461
462	SHOW_FLOW0( 3, "" );
463
464	// check whether fifo is large enough to feed vertical 4-tap-filter
465
466	words_per_p1_line =
467		ceilShiftDiv( (src_right - 1) << params->bpp_shift, 4 ) -
468		((src_left << params->bpp_shift) >> 4) + 1;
469	words_per_p23_line =
470		ceilShiftDiv( (src_right - 1) << params->bpuv_shift, 4 ) -
471		((src_left << params->bpuv_shift) >> 4) + 1;
472
473	// overlay scaler line length differs for different revisions
474	// this needs to be maintained by hand
475	if (ai->si->asic == rt_r200 || ai->si->asic >= rt_r300)
476		max_words_per_line = 1920 / 16;
477	else
478		max_words_per_line = 1536 / 16;
479
480	switch (params->num_planes) {
481		case 3:
482			p1_4tap_allowed = words_per_p1_line < max_words_per_line / 2;
483			p23_4tap_allowed = words_per_p23_line < max_words_per_line / 4;
484			break;
485		case 2:
486			p1_4tap_allowed = words_per_p1_line < max_words_per_line / 2;
487			p23_4tap_allowed = words_per_p23_line < max_words_per_line / 2;
488			break;
489		case 1:
490		default:
491			p1_4tap_allowed = p23_4tap_allowed = words_per_p1_line < max_words_per_line;
492			break;
493	}
494
495	SHOW_FLOW( 3, "p1_4tap_allowed=%d, p23_4t_allowed=%d",
496		(int)p1_4tap_allowed, (int)p23_4tap_allowed );
497
498	// search for proper scaling/filter entry
499	factors = params->factors;
500	num_factors = params->num_factors;
501
502	if (factors == NULL || num_factors == 0)
503		return NULL;
504
505	for (i = 0; i < num_factors; ++i, ++factors) {
506		if (*h_inc <= factors->max_scale
507			&& (factors->p1_step_by > 0 || p1_4tap_allowed)
508			&& (factors->p23_step_by > 0 || p23_4tap_allowed))
509			break;
510	}
511
512	if (i == num_factors) {
513		// overlay is asked to be scaled down more than allowed,
514		// so use least scaling factor supported
515		--factors;
516		*h_inc = factors->max_scale;
517	}
518
519	SHOW_FLOW( 3, "group_size=%d, p1_step_by=%d, p23_step_by=%d",
520		factors->group_size, factors->p1_step_by, factors->p23_step_by );
521
522	return factors;
523}
524
525
526#define I2FF( a, shift ) ((uint32)((a) * (1 << (shift))))
527
528
529// show overlay on screen
530static status_t Radeon_ShowOverlay(
531	accelerator_info *ai, int crtc_idx )
532{
533	virtual_card *vc = ai->vc;
534	shared_info *si = ai->si;
535	vuint8 *regs = ai->regs;
536	overlay_info *overlay = &si->pending_overlay;
537	overlay_buffer_node *node = overlay->on;
538	crtc_info *crtc = &si->crtc[crtc_idx];
539
540	uint32 ecp_div;
541	uint32 v_inc, h_inc;
542	uint32 src_v_inc, src_h_inc;
543	uint32 src_left, src_top, src_right, src_bottom;
544	int32 dest_left, dest_top, dest_right, dest_bottom;
545	uint32 offset;
546	uint32 tmp;
547	uint32 p1_h_accum_init, p23_h_accum_init, p1_v_accum_init, p23_v_accum_init;
548	uint32 p1_active_lines, p23_active_lines;
549	hscale_factor *factors;
550	space_params *params;
551
552	uint32 p1_h_inc, p23_h_inc;
553	uint32 p1_x_start, p1_x_end;
554	uint32 p23_x_start, p23_x_end;
555
556	uint scale_ctrl;
557
558	/*uint32 buffer[20*2];
559	uint idx = 0;*/
560
561	SHOW_FLOW0( 0, "" );
562
563	Radeon_SetColourKey( ai, &overlay->ow );
564
565	// overlay unit can only handle up to 175 MHz; if pixel clock is higher,
566	// only every second pixel is handled
567	// (this devider is gets written into PLL by InitOverlay,
568	//  so we don't need to do it ourself)
569	if( crtc->mode.timing.pixel_clock < 175000 )
570		ecp_div = 0;
571	else
572		ecp_div = 1;
573
574
575	// scaling is independant of clipping, get this first
576	{
577		uint32 src_width, src_height;
578
579		src_width = overlay->ov.width;
580		src_height = overlay->ov.height;
581
582		// this is for graphics card
583		v_inc = (src_height << 20) / overlay->ow.height;
584		h_inc = (src_width << (12 + ecp_div)) / overlay->ow.width;
585
586
587		// this is for us
588		src_v_inc = (src_height << 16) / overlay->ow.height;
589		src_h_inc = (src_width << 16) / overlay->ow.width;
590	}
591
592	// calculate unclipped position/size
593	// TBD: I assume that overlay_window.offset_xyz is only a hint where
594	//      no overlay is visible; another interpretation were to zoom
595	//      the overlay so it fits into remaining space
596	src_left = (overlay->ov.h_start << 16) + overlay->ow.offset_left * src_h_inc;
597	src_top = (overlay->ov.v_start << 16) + overlay->ow.offset_top * src_v_inc;
598	src_right = ((overlay->ov.h_start + overlay->ov.width) << 16) -
599		overlay->ow.offset_right * src_h_inc;
600	src_bottom = ((overlay->ov.v_start + overlay->ov.height) << 16) -
601		overlay->ow.offset_top * src_v_inc;
602	dest_left = overlay->ow.h_start + overlay->ow.offset_left;
603	dest_top = overlay->ow.v_start + overlay->ow.offset_top;
604	dest_right = overlay->ow.h_start + overlay->ow.width - overlay->ow.offset_right;
605	dest_bottom = overlay->ow.v_start + overlay->ow.height - overlay->ow.offset_bottom;
606
607	SHOW_FLOW( 3, "ow: h=%d, v=%d, width=%d, height=%d",
608		overlay->ow.h_start, overlay->ow.v_start,
609		overlay->ow.width, overlay->ow.height );
610
611	SHOW_FLOW( 3, "offset_left=%d, offset_right=%d, offset_top=%d, offset_bottom=%d",
612		overlay->ow.offset_left, overlay->ow.offset_right,
613		overlay->ow.offset_top, overlay->ow.offset_bottom );
614
615
616	// apply virtual screen
617	dest_left -= vc->mode.h_display_start + crtc->rel_x;
618	dest_top -= vc->mode.v_display_start + crtc->rel_y;
619	dest_right -= vc->mode.h_display_start + crtc->rel_x;
620	dest_bottom -= vc->mode.v_display_start + crtc->rel_y;
621
622	// clip to visible area
623	if( dest_left < 0 ) {
624		src_left += -dest_left * src_h_inc;
625		dest_left = 0;
626	}
627	if( dest_top < 0 ) {
628		src_top += -dest_top * src_v_inc;
629		dest_top = 0;
630	}
631
632	SHOW_FLOW( 3, "mode: w=%d, h=%d",
633		crtc->mode.timing.h_display, crtc->mode.timing.v_display );
634
635	if( dest_right > crtc->mode.timing.h_display )
636		dest_right = crtc->mode.timing.h_display;
637	if( dest_bottom > crtc->mode.timing.v_display )
638		dest_bottom = crtc->mode.timing.v_display;
639
640	SHOW_FLOW( 3, "src=(%d, %d, %d, %d)",
641		src_left, src_top, src_right, src_bottom );
642	SHOW_FLOW( 3, "dest=(%d, %d, %d, %d)",
643		dest_left, dest_top, dest_right, dest_bottom );
644
645
646	// especially with multi-screen modes the overlay may not be on screen at all
647	if( dest_left >= dest_right || dest_top >= dest_bottom ||
648		src_left >= src_right || src_top >= src_bottom )
649	{
650		Radeon_TempHideOverlay( ai );
651		goto done;
652	}
653
654
655	// let's calculate all those nice register values
656	SHOW_FLOW( 3, "ati_space=%d", node->ati_space );
657	params = &space_params_table[node->ati_space];
658
659	// choose proper scaler
660	{
661		factors = getHScaleFactor( ai, params, src_left >> 16, src_right >> 16, &h_inc );
662		if( factors == NULL )
663			return B_ERROR;
664
665		p1_h_inc = factors->p1_step_by > 0 ?
666			h_inc >> (factors->p1_step_by - 1) : h_inc;
667		p23_h_inc =
668			(factors->p23_step_by > 0 ? h_inc >> (factors->p23_step_by - 1) : h_inc)
669			>> params->h_uv_sub_sample_shift;
670
671		SHOW_FLOW( 3, "p1_h_inc=%x, p23_h_inc=%x", p1_h_inc, p23_h_inc );
672	}
673
674	// get register value for start/end position of overlay image (pixel-precise only)
675	{
676		uint32 p1_step_size, p23_step_size;
677		uint32 p1_left, p1_right, p1_width;
678		uint32 p23_left, p23_right, p23_width;
679
680		p1_left = src_left >> 16;
681		p1_right = src_right >> 16;
682		p1_width = p1_right - p1_left;
683
684		p1_step_size = factors->p1_step_by > 0 ? (1 << (factors->p1_step_by - 1)) : 1;
685		p1_x_start = p1_left % (16 >> params->bpp_shift);
686		p1_x_end = ((p1_x_start + p1_width - 1) / p1_step_size) * p1_step_size;
687
688		SHOW_FLOW( 3, "p1_x_start=%d, p1_x_end=%d", p1_x_start, p1_x_end );
689
690		p23_left = (src_left >> 16) >> params->h_uv_sub_sample_shift;
691		p23_right = (src_right >> 16) >> params->h_uv_sub_sample_shift;
692		p23_width = p23_right - p23_left;
693
694		p23_step_size = factors->p23_step_by > 0 ? (1 << (factors->p23_step_by - 1)) : 1;
695		// if resolution of Y and U/V differs but YUV are stored in one
696		// plane then UV alignment depends on Y data, therefore the hack
697		// (you are welcome to replace this with some cleaner code ;)
698		p23_x_start = p23_left %
699			((16 >> params->bpuv_shift) /
700			 (node->ati_space == 11 || node->ati_space == 12 ? 2 : 1));
701		p23_x_end = (int)((p23_x_start + p23_width - 1) / p23_step_size) * p23_step_size;
702
703		SHOW_FLOW( 3, "p23_x_start=%d, p23_x_end=%d", p23_x_start, p23_x_end );
704
705		// get memory location of first word to be read by scaler
706		// (save relative offset for fast update)
707		si->active_overlay.rel_offset = (src_top >> 16) * node->buffer.bytes_per_row +
708			((p1_left << params->bpp_shift) & ~0xf);
709		offset = node->mem_offset + si->active_overlay.rel_offset;
710
711		SHOW_FLOW( 3, "rel_offset=%x", si->active_overlay.rel_offset );
712	}
713
714	// get active lines for scaler
715	// (we could add additional blank lines for DVD letter box mode,
716	//  but this is not supported by API; additionally, this only makes
717	//  sense if want to put subtitles onto the black border, which is
718	//  supported neither)
719	{
720		uint16 int_top, int_bottom;
721
722		int_top = src_top >> 16;
723		int_bottom = (src_bottom >> 16);
724
725		p1_active_lines = int_bottom - int_top - 1;
726		p23_active_lines =
727			ceilShiftDiv( int_bottom - 1, params->v_uv_sub_sample_shift ) -
728			(int_top >> params->v_uv_sub_sample_shift);
729
730		SHOW_FLOW( 3, "p1_active_lines=%d, p23_active_lines=%d",
731			p1_active_lines, p23_active_lines );
732	}
733
734	// if picture is stretched for flat panel, we need to scale all
735	// vertical values accordingly
736	// TBD: there is no description at all concerning this, so v_accum_init may
737	//      need to be initialized based on original value
738	{
739		if( (crtc->active_displays & (dd_lvds | dd_dvi)) != 0 ) {
740			uint64 v_ratio;
741
742			// convert 32.32 format to 16.16 format; else we
743			// cannot multiply two fixed point values without
744			// overflow
745			v_ratio = si->flatpanels[crtc->flatpanel_port].v_ratio >> (FIX_SHIFT - 16);
746
747			v_inc = (v_inc * v_ratio) >> 16;
748		}
749
750		SHOW_FLOW( 3, "v_inc=%x", v_inc );
751	}
752
753	// get initial horizontal scaler values, taking care of precharge
754	// don't ask questions about formulas - take them as is
755	// (TBD: home-brewed sub-pixel source clipping may be wrong,
756	//       especially for uv-planes)
757	{
758		uint32 p23_group_size;
759
760	    tmp = ((src_left & 0xffff) >> 11) + (
761	    	(
762		    	I2FF( p1_x_start % factors->group_size, 12 ) +
763		    	I2FF( 2.5, 12 ) +
764		    	p1_h_inc / 2 +
765		    	I2FF( 0.5, 12-5 )	// rounding
766	        ) >> (12 - 5));	// scaled by 1 << 5
767
768	    SHOW_FLOW( 3, "p1_h_accum_init=%x", tmp );
769
770		p1_h_accum_init =
771			((tmp << 15) & RADEON_OV0_P1_H_ACCUM_INIT_MASK) |
772			((tmp << 23) & RADEON_OV0_P1_PRESHIFT_MASK);
773
774
775		p23_group_size = 2;
776
777		tmp = ((src_left & 0xffff) >> 11) + (
778			(
779				I2FF( p23_x_start % p23_group_size, 12 ) +
780				I2FF( 2.5, 12 ) +
781				p23_h_inc / 2 +
782				I2FF( 0.5, 12-5 )	// rounding
783			) >> (12 - 5)); // scaled by 1 << 5
784
785		SHOW_FLOW( 3, "p23_h_accum_init=%x", tmp );
786
787		p23_h_accum_init =
788			((tmp << 15) & RADEON_OV0_P23_H_ACCUM_INIT_MASK) |
789			((tmp << 23) & RADEON_OV0_P23_PRESHIFT_MASK);
790	}
791
792	// get initial vertical scaler values, taking care of precharge
793	{
794		uint extra_full_line;
795
796		extra_full_line = factors->p1_step_by == 0 ? 1 : 0;
797
798	    tmp = ((src_top & 0x0000ffff) >> 11) + (
799	    	(min(
800		    	I2FF( 1.5, 20 ) + I2FF( extra_full_line, 20 ) + v_inc / 2,
801	    		I2FF( 2.5, 20 ) + 2 * I2FF( extra_full_line, 20 )
802	    	 ) + I2FF( 0.5, 20-5 )) // rounding
803	    	>> (20 - 5)); // scaled by 1 << 5
804
805	    SHOW_FLOW( 3, "p1_v_accum_init=%x", tmp );
806
807		p1_v_accum_init =
808			((tmp << 15) & RADEON_OV0_P1_V_ACCUM_INIT_MASK) | 0x00000001;
809
810
811		extra_full_line = factors->p23_step_by == 0 ? 1 : 0;
812
813		if( params->v_uv_sub_sample_shift > 0 ) {
814			tmp = ((src_top & 0x0000ffff) >> 11) + (
815				(min(
816					I2FF( 1.5, 20 ) +
817						I2FF( extra_full_line, 20 ) +
818						((v_inc / 2) >> params->v_uv_sub_sample_shift),
819					I2FF( 2.5, 20 ) +
820						2 * I2FF( extra_full_line, 20 )
821				 ) + I2FF( 0.5, 20-5 )) // rounding
822				>> (20 - 5)); // scaled by 1 << 5
823		} else {
824			tmp = ((src_top & 0x0000ffff) >> 11) + (
825				(
826					I2FF( 2.5, 20 ) +
827					2 * I2FF( extra_full_line, 20 ) +
828					I2FF( 0.5, 20-5 )	// rounding
829				) >> (20 - 5)); // scaled by 1 << 5
830		}
831
832		SHOW_FLOW( 3, "p23_v_accum_init=%x", tmp );
833
834		p23_v_accum_init =
835			((tmp << 15) & RADEON_OV0_P23_V_ACCUM_INIT_MASK) | 0x00000001;
836	}
837
838	// show me what you've got!
839	// we could lock double buffering of overlay unit during update
840	// (new values are copied during vertical blank, so if we've updated
841	// only some of them, you get a whole frame of mismatched values)
842	// but during tests I couldn't get the artifacts go away, so
843	// we use the dangerous way which has the pro to not require any
844	// waiting
845
846	// let's try to lock overlay unit
847	// we had to wait now until the lock takes effect, but this is
848	// impossible with CCE; perhaps we have to convert this code to
849	// direct register access; did that - let's see what happens...
850	OUTREG( regs, RADEON_OV0_REG_LOAD_CNTL, RADEON_REG_LD_CTL_LOCK );
851
852	// wait until register access is locked
853	while( (INREG( regs, RADEON_OV0_REG_LOAD_CNTL)
854		& RADEON_REG_LD_CTL_LOCK_READBACK) == 0 )
855		;
856
857	OUTREG( regs, RADEON_OV0_VID_BUF0_BASE_ADRS, offset );
858	OUTREG( regs, RADEON_OV0_VID_BUF_PITCH0_VALUE, node->buffer.bytes_per_row );
859	OUTREG( regs, RADEON_OV0_H_INC, p1_h_inc | (p23_h_inc << 16) );
860	OUTREG( regs, RADEON_OV0_STEP_BY, factors->p1_step_by | (factors->p23_step_by << 8) );
861	OUTREG( regs, RADEON_OV0_V_INC, v_inc );
862
863	OUTREG( regs,
864		crtc->crtc_idx == 0 ? RADEON_OV0_Y_X_START : RADEON_OV1_Y_X_START,
865		(dest_left) | (dest_top << 16) );
866	OUTREG( regs,
867		crtc->crtc_idx == 0 ? RADEON_OV0_Y_X_END : RADEON_OV1_Y_X_END,
868		(dest_right - 1) | ((dest_bottom - 1) << 16) );
869
870	OUTREG( regs, RADEON_OV0_P1_BLANK_LINES_AT_TOP,
871		RADEON_P1_BLNK_LN_AT_TOP_M1_MASK | (p1_active_lines << 16) );
872	OUTREG( regs, RADEON_OV0_P1_X_START_END, p1_x_end | (p1_x_start << 16) );
873	OUTREG( regs, RADEON_OV0_P1_H_ACCUM_INIT, p1_h_accum_init );
874	OUTREG( regs, RADEON_OV0_P1_V_ACCUM_INIT, p1_v_accum_init );
875
876	OUTREG( regs, RADEON_OV0_P23_BLANK_LINES_AT_TOP,
877		RADEON_P23_BLNK_LN_AT_TOP_M1_MASK | (p23_active_lines << 16) );
878	OUTREG( regs, RADEON_OV0_P2_X_START_END,
879		p23_x_end | (p23_x_start << 16) );
880	OUTREG( regs, RADEON_OV0_P3_X_START_END,
881		p23_x_end | (p23_x_start << 16) );
882	OUTREG( regs, RADEON_OV0_P23_H_ACCUM_INIT, p23_h_accum_init );
883	OUTREG( regs, RADEON_OV0_P23_V_ACCUM_INIT, p23_v_accum_init );
884
885	OUTREG( regs, RADEON_OV0_TEST, node->test_reg );
886
887	scale_ctrl = RADEON_SCALER_ENABLE |
888		RADEON_SCALER_DOUBLE_BUFFER |
889		(node->ati_space << 8) |
890		/* RADEON_SCALER_ADAPTIVE_DEINT | */
891		RADEON_SCALER_BURST_PER_PLANE |
892		(crtc->crtc_idx == 0 ? 0 : RADEON_SCALER_CRTC_SEL );
893
894	switch (node->ati_space << 8) {
895		case RADEON_SCALER_SOURCE_15BPP: // RGB15
896		case RADEON_SCALER_SOURCE_16BPP:
897		case RADEON_SCALER_SOURCE_32BPP:
898			OUTREG( regs, RADEON_OV0_SCALE_CNTL, scale_ctrl |
899							RADEON_SCALER_LIN_TRANS_BYPASS);
900			break;
901		case RADEON_SCALER_SOURCE_VYUY422: // VYUY422
902		case RADEON_SCALER_SOURCE_YVYU422: // YVYU422
903			OUTREG( regs, RADEON_OV0_SCALE_CNTL, scale_ctrl);
904			break;
905		default:
906			SHOW_FLOW(4, "What overlay format is this??? %d", node->ati_space);
907			OUTREG( regs, RADEON_OV0_SCALE_CNTL, scale_ctrl |
908			 (( ai->si->asic >= rt_r200) ? R200_SCALER_TEMPORAL_DEINT : 0));
909
910	}
911
912	si->overlay_mgr.auto_flip_reg ^= RADEON_OV0_SOFT_EOF_TOGGLE;
913
914	OUTREG( regs, RADEON_OV0_AUTO_FLIP_CNTRL,
915		si->overlay_mgr.auto_flip_reg );
916
917	OUTREG( regs, RADEON_OV0_REG_LOAD_CNTL, 0 );
918
919done:
920	ai->si->active_overlay.on = ai->si->pending_overlay.on;
921	ai->si->active_overlay.ow = ai->si->pending_overlay.ow;
922	ai->si->active_overlay.ov = ai->si->pending_overlay.ov;
923	ai->si->active_overlay.ob = ai->si->pending_overlay.ob;
924	ai->si->active_overlay.h_display_start = vc->mode.h_display_start;
925	ai->si->active_overlay.v_display_start = vc->mode.v_display_start;
926
927	return B_OK;
928}
929
930
931// hide overlay, but not permanently
932void Radeon_TempHideOverlay(
933	accelerator_info *ai )
934{
935	SHOW_FLOW0( 3, "" );
936
937	OUTREG( ai->regs, RADEON_OV0_SCALE_CNTL, 0 );
938}
939
940
941// hide overlay (can be called even if there is none visible)
942void Radeon_HideOverlay(
943	accelerator_info *ai )
944{
945	shared_info *si = ai->si;
946
947	Radeon_TempHideOverlay( ai );
948
949	// remember that there is no overlay to be shown
950	si->active_overlay.on = NULL;
951	si->active_overlay.prev_on = NULL;
952	si->pending_overlay.on = NULL;
953
954	// invalidate active head so it will be setup again once
955	// a new overlay is shown
956	si->active_overlay.crtc_idx = -1;
957}
958
959
960// show new overlay buffer with same parameters as last one
961static void Radeon_ReplaceOverlayBuffer(
962	accelerator_info *ai )
963{
964#if 0
965	shared_info *si = ai->si;
966	vuint8 *regs = ai->regs;
967	uint32 offset;
968	int /*old_buf, */new_buf;
969
970	offset = si->pending_overlay.on->mem_offset + si->active_overlay.rel_offset;
971
972	/*old_buf = si->overlay_mgr.auto_flip_reg & RADEON_OV0_SOFT_BUF_NUM_MASK;
973	new_buf = old_buf == 0 ? 3 : 0;
974	si->overlay_mgr.auto_flip_reg &= ~RADEON_OV0_SOFT_BUF_NUM_MASK;
975	si->overlay_mgr.auto_flip_reg |= new_buf;*/
976	new_buf = 0;
977
978	// lock overlay registers
979/*	OUTREG( regs, RADEON_OV0_REG_LOAD_CNTL, RADEON_REG_LD_CTL_LOCK );
980
981	// wait until register access is locked
982	while( (INREG( regs, RADEON_OV0_REG_LOAD_CNTL)
983		& RADEON_REG_LD_CTL_LOCK_READBACK) == 0 )
984		;*/
985
986	// setup new buffer
987	/*OUTREG( regs,
988		new_buf == 0 ? RADEON_OV0_VID_BUF_PITCH0_VALUE : RADEON_OV0_VID_BUF_PITCH1_VALUE,
989		si->pending_overlay.on->buffer.bytes_per_row );*/
990	OUTREG( regs,
991		new_buf == 0 ? RADEON_OV0_VID_BUF0_BASE_ADRS : RADEON_OV0_VID_BUF3_BASE_ADRS,
992		offset | (new_buf == 0 ? 0 : RADEON_VIF_BUF0_PITCH_SEL));
993
994	// make changes visible
995	si->overlay_mgr.auto_flip_reg ^= RADEON_OV0_SOFT_EOF_TOGGLE;
996
997	OUTREG( regs, RADEON_OV0_AUTO_FLIP_CNTRL, si->overlay_mgr.auto_flip_reg );
998
999	// unlock overlay registers
1000//	OUTREG( regs, RADEON_OV0_REG_LOAD_CNTL, 0 );
1001
1002	ai->si->active_overlay.on = ai->si->pending_overlay.on;
1003#else
1004	shared_info *si = ai->si;
1005	uint32 offset;
1006
1007	if ( ai->si->acc_dma )
1008	{
1009		START_IB();
1010
1011		offset = si->pending_overlay.on->mem_offset + si->active_overlay.rel_offset;
1012
1013		WRITE_IB_REG( RADEON_OV0_VID_BUF0_BASE_ADRS, offset);
1014
1015		si->overlay_mgr.auto_flip_reg ^= RADEON_OV0_SOFT_EOF_TOGGLE;
1016		WRITE_IB_REG( RADEON_OV0_AUTO_FLIP_CNTRL, si->overlay_mgr.auto_flip_reg );
1017
1018		SUBMIT_IB();
1019	} else {
1020		Radeon_WaitForFifo( ai, 2 );
1021		offset = si->pending_overlay.on->mem_offset + si->active_overlay.rel_offset;
1022
1023		OUTREG( ai->regs, RADEON_OV0_VID_BUF0_BASE_ADRS, offset);
1024
1025		si->overlay_mgr.auto_flip_reg ^= RADEON_OV0_SOFT_EOF_TOGGLE;
1026		OUTREG( ai->regs, RADEON_OV0_AUTO_FLIP_CNTRL, si->overlay_mgr.auto_flip_reg );
1027	}
1028	ai->si->active_overlay.on = ai->si->pending_overlay.on;
1029#endif
1030}
1031
1032
1033// get number of pixels of overlay shown on virtual port
1034static int getIntersectArea(
1035	accelerator_info *ai, overlay_window *ow, crtc_info *crtc )
1036{
1037	virtual_card *vc = ai->vc;
1038	int left, top, right, bottom;
1039
1040	left = ow->h_start - (vc->mode.h_display_start + crtc->rel_x);
1041	top = ow->v_start - (vc->mode.v_display_start + crtc->rel_y);
1042	right = left + ow->width;
1043	bottom = top + ow->height;
1044
1045	if( left < 0 )
1046		left = 0;
1047	if( top < 0 )
1048		top = 0;
1049	if( right > crtc->mode.timing.h_display )
1050		right = crtc->mode.timing.h_display;
1051	if( bottom > crtc->mode.timing.v_display )
1052		bottom = crtc->mode.timing.v_display;
1053
1054	if( right < left || bottom < top )
1055		return 0;
1056
1057	return (right - left) * (bottom - top);
1058}
1059
1060
1061// update overlay, to be called whenever something in terms of
1062// overlay have or can have been changed
1063status_t Radeon_UpdateOverlay(
1064	accelerator_info *ai )
1065{
1066	virtual_card *vc = ai->vc;
1067	shared_info *si = ai->si;
1068	int crtc_idx;
1069
1070	float brightness = 0.0f;
1071	float contrast = 1.0f;
1072	float saturation = 1.0f;
1073	float hue = 0.0f;
1074    int32 ref = 0;
1075
1076    SHOW_FLOW0( 3, "" );
1077
1078	// don't mess around with overlay of someone else
1079    if( !vc->uses_overlay )
1080    	return B_OK;
1081
1082	// make sure there really is an overlay
1083	if( si->pending_overlay.on == NULL )
1084		return B_OK;
1085
1086	// verify that the overlay is still valid
1087	if( (uint32)si->pending_overlay.ot != si->overlay_mgr.token )
1088		return B_BAD_VALUE;
1089
1090	if( vc->different_heads > 1 ) {
1091		int area0, area1;
1092
1093		// determine on which port most of the overlay is shown
1094		area0 = getIntersectArea( ai, &si->pending_overlay.ow, &si->crtc[0] );
1095		area1 = getIntersectArea( ai, &si->pending_overlay.ow, &si->crtc[0] );
1096
1097		SHOW_FLOW( 3, "area0=%d, area1=%d", area0, area1 );
1098
1099		if( area0 >= area1 )
1100			crtc_idx = 0;
1101		else
1102			crtc_idx = 1;
1103
1104	} else if( vc->independant_heads > 1 ) {
1105		// both ports show the same, use "swap displays" to decide
1106		// where to show the overlay (to be improved as this flag isn't
1107		// really designed for that)
1108		if( vc->swap_displays )
1109			crtc_idx = 1;
1110		else
1111			crtc_idx = 0;
1112
1113	} else {
1114
1115		// one crtc used only - pick the one that we use
1116		crtc_idx = vc->used_crtc[0] ? 0 : 1;
1117	}
1118
1119	si->pending_overlay.crtc_idx = crtc_idx;
1120
1121	// only update registers that have been changed to minimize work
1122	if( si->active_overlay.crtc_idx != si->pending_overlay.crtc_idx ) {
1123		Radeon_InitOverlay( ai, crtc_idx );
1124	}
1125
1126	if( si->active_overlay.ob.space != si->pending_overlay.ob.space ) {
1127		Radeon_SetTransform( ai, brightness, contrast, saturation, hue, 0, 0, 0, ref );
1128	}
1129
1130	if( memcmp( &si->active_overlay.ow, &si->pending_overlay.ow, sizeof( si->active_overlay.ow )) != 0 ||
1131		memcmp( &si->active_overlay.ov, &si->pending_overlay.ov, sizeof( si->active_overlay.ov )) != 0 ||
1132		si->active_overlay.h_display_start != vc->mode.h_display_start ||
1133		si->active_overlay.v_display_start != vc->mode.v_display_start ||
1134		si->active_overlay.ob.width != si->pending_overlay.ob.width ||
1135		si->active_overlay.ob.height != si->pending_overlay.ob.height ||
1136		si->active_overlay.ob.bytes_per_row != si->pending_overlay.ob.bytes_per_row )
1137		Radeon_ShowOverlay( ai, crtc_idx );
1138
1139	else if( si->active_overlay.on != si->pending_overlay.on )
1140		Radeon_ReplaceOverlayBuffer( ai );
1141
1142	SHOW_FLOW0( 3, "success" );
1143
1144	return B_OK;
1145}
1146