1/* VIA Unichrome Back End Scaler functions */
2/* Written by Rudolf Cornelissen 05/2002-2/2016 */
3
4#define MODULE_BIT 0x00000200
5
6#include "std.h"
7
8typedef struct move_overlay_info move_overlay_info;
9
10struct move_overlay_info
11{
12	uint32 hcoordv;		/* left and right edges of video output window */
13	uint32 vcoordv;		/* top and bottom edges of video output window */
14	uint32 hsrcstv;		/* horizontal source start in source buffer (clipping) */
15	uint32 hsrcendv;	/* horizontal source end in source buffer (clipping) */
16	uint32 v1srcstv;	/* vertical source start in source buffer (clipping) */
17	uint32 a1orgv;		/* alternate source clipping via startadress of source buffer */
18};
19
20static void eng_bes_calc_move_overlay(move_overlay_info *moi);
21static void eng_bes_program_move_overlay(move_overlay_info moi);
22
23/* returns true if the current displaymode leaves enough bandwidth for overlay
24 * support, false if not. */
25bool eng_bes_chk_bandwidth()
26{
27	float refresh, bandwidth;
28	uint8 depth;
29
30	switch(si->dm.space)
31	{
32	case B_CMAP8:        depth =  8; break;
33	case B_RGB15_LITTLE: depth = 16; break;
34	case B_RGB16_LITTLE: depth = 16; break;
35	case B_RGB32_LITTLE: depth = 32; break;
36	default:
37		LOG(8,("Overlay: Invalid colour depth 0x%08x\n", si->dm.space));
38		return false;
39	}
40
41	refresh =
42		(si->dm.timing.pixel_clock * 1000) /
43		(si->dm.timing.h_total * si->dm.timing.v_total);
44	bandwidth =
45		si->dm.timing.h_display * si->dm.timing.v_display * refresh * depth;
46	LOG(8,("Overlay: Current mode's refreshrate is %.2fHz, bandwidth is %.0f\n",
47		refresh, bandwidth));
48
49	switch (((CRTCR(MEMCLK)) & 0x70) >> 4)
50	{
51	case 0: /* SDR  66 */
52		LOG(8,("Overlay: System memory is type SDR 66\n"));
53		return false;
54		break;
55	case 1: /* SDR 100 */
56		LOG(8,("Overlay: System memory is type SDR 100\n"));
57		return false;
58		break;
59	case 2: /* SDR 133 */
60		/* memory is too slow, sorry. */
61		LOG(8,("Overlay: System memory is type SDR 133\n"));
62		return false;
63		break;
64	case 3: /* DDR 100 (PC1600) */
65		/* DDR100's basic limit... */
66		LOG(8,("Overlay: System memory is type DDR 100\n"));
67		if (bandwidth > 921600000.0) return false;
68		/* ... but we have constraints at higher than 800x600 */
69		if (si->dm.timing.h_display > 800)
70		{
71			if (depth != 8) return false;
72			if (si->dm.timing.v_display > 768) return false;
73			if (refresh > 60.2) return false;
74		}
75		break;
76	case 4: /* DDR 133 (PC2100) */
77		LOG(8,("Overlay: System memory is type DDR 133\n"));
78		if (bandwidth > 4045440000.0) return false;
79		break;
80	case 5: /* DDR 166 (PC2700) */
81		LOG(8,("Overlay: System memory is type DDR 166\n"));
82		if (bandwidth > 5210000000.0) return false;//fixme: set more correct limit?
83		break;
84	case 6: /* DDR 200 (PC3200) */
85		LOG(8,("Overlay: System memory is type DDR 200\n"));
86		if (bandwidth > 6170000000.0) return false;//fixme: set more correct limit?
87		break;
88	default: /* not (yet?) used */
89		LOG(8,("Overlay: System memory is (yet) unknown type!\n"));
90		return false;
91		break;
92	}
93
94	return true;
95}
96
97/* move the overlay output window in virtualscreens */
98/* Note:
99 * si->dm.h_display_start and si->dm.v_display_start determine where the new
100 * output window is located! */
101void eng_bes_move_overlay()
102{
103	move_overlay_info moi;
104
105	/* abort if overlay is not active */
106	if (!si->overlay.active) return;
107
108	eng_bes_calc_move_overlay(&moi);
109	eng_bes_program_move_overlay(moi);
110}
111
112static void eng_bes_calc_move_overlay(move_overlay_info *moi)
113{
114	/* misc used variables */
115	uint16 temp1, temp2;
116	/* visible screen window in virtual workspaces */
117	uint16 crtc_hstart, crtc_vstart, crtc_hend, crtc_vend;
118
119	/* do 'overlay follow head' in dualhead modes on dualhead cards */
120	if (si->ps.secondary_head)
121	{
122		switch (si->dm.flags & DUALHEAD_BITS)
123		{
124		case DUALHEAD_ON:
125		case DUALHEAD_SWITCH:
126			if ((si->overlay.ow.h_start + (si->overlay.ow.width / 2)) <
127					(si->dm.h_display_start + si->dm.timing.h_display))
128				eng_bes_to_crtc(si->crtc_switch_mode);
129			else
130				eng_bes_to_crtc(!si->crtc_switch_mode);
131			break;
132		default:
133				eng_bes_to_crtc(si->crtc_switch_mode);
134			break;
135		}
136	}
137
138	/* the BES does not respect virtual_workspaces, but adheres to CRTC
139	 * constraints only */
140	crtc_hstart = si->dm.h_display_start;
141	/* make dualhead stretch and switch mode work while we're at it.. */
142	if (si->overlay.crtc)
143	{
144		crtc_hstart += si->dm.timing.h_display;
145	}
146
147	/* horizontal end is the first position beyond the displayed range on the CRTC */
148	crtc_hend = crtc_hstart + si->dm.timing.h_display;
149	crtc_vstart = si->dm.v_display_start;
150	/* vertical end is the first position beyond the displayed range on the CRTC */
151	crtc_vend = crtc_vstart + si->dm.timing.v_display;
152
153
154	/****************************************
155	 *** setup all edges of output window ***
156	 ****************************************/
157
158	/* setup left and right edges of output window */
159	moi->hcoordv = 0;
160	/* left edge coordinate of output window, must be inside desktop */
161	/* clipping on the left side */
162	if (si->overlay.ow.h_start < crtc_hstart)
163	{
164		temp1 = 0;
165	}
166	else
167	{
168		/* clipping on the right side */
169		if (si->overlay.ow.h_start >= (crtc_hend - 1))
170		{
171			/* width < 2 is not allowed */
172			temp1 = (crtc_hend - crtc_hstart - 2) & 0x7ff;
173		}
174		else
175		/* no clipping here */
176		{
177			temp1 = (si->overlay.ow.h_start - crtc_hstart) & 0x7ff;
178		}
179	}
180	moi->hcoordv |= temp1 << 16;
181	/* right edge coordinate of output window, must be inside desktop */
182	/* width < 2 is not allowed */
183	if (si->overlay.ow.width < 2)
184	{
185		temp2 = (temp1 + 1) & 0x7ff;
186	}
187	else
188	{
189		/* clipping on the right side */
190		if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) > (crtc_hend - 1))
191		{
192			temp2 = (crtc_hend - crtc_hstart - 1) & 0x7ff;
193		}
194		else
195		{
196			/* clipping on the left side */
197			if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) < (crtc_hstart + 1))
198			{
199				/* width < 2 is not allowed */
200				temp2 = 1;
201			}
202			else
203			/* no clipping here */
204			{
205				temp2 = ((uint16)(si->overlay.ow.h_start + si->overlay.ow.width - crtc_hstart - 1)) & 0x7ff;
206			}
207		}
208	}
209	moi->hcoordv |= temp2 << 0;
210	LOG(4,("Overlay: CRTC left-edge output %d, right-edge output %d\n",temp1, temp2));
211
212	/* setup top and bottom edges of output window */
213	moi->vcoordv = 0;
214	/* top edge coordinate of output window, must be inside desktop */
215	/* clipping on the top side */
216	if (si->overlay.ow.v_start < crtc_vstart)
217	{
218		temp1 = 0;
219	}
220	else
221	{
222		/* clipping on the bottom side */
223		if (si->overlay.ow.v_start >= (crtc_vend - 1))
224		{
225			/* height < 2 is not allowed */
226			temp1 = (crtc_vend - crtc_vstart - 2) & 0x7ff;
227		}
228		else
229		/* no clipping here */
230		{
231			temp1 = (si->overlay.ow.v_start - crtc_vstart) & 0x7ff;
232		}
233	}
234	moi->vcoordv |= temp1 << 16;
235	/* bottom edge coordinate of output window, must be inside desktop */
236	/* height < 2 is not allowed */
237	if (si->overlay.ow.height < 2)
238	{
239		temp2 = (temp1 + 1) & 0x7ff;
240	}
241	else
242	{
243		/* clipping on the bottom side */
244		if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) > (crtc_vend - 1))
245		{
246			temp2 = (crtc_vend - crtc_vstart - 1) & 0x7ff;
247		}
248		else
249		{
250			/* clipping on the top side */
251			if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) < (crtc_vstart + 1))
252			{
253				/* height < 2 is not allowed */
254				temp2 = 1;
255			}
256			else
257			/* no clipping here */
258			{
259				temp2 = ((uint16)(si->overlay.ow.v_start + si->overlay.ow.height - crtc_vstart - 1)) & 0x7ff;
260			}
261		}
262	}
263	moi->vcoordv |= temp2 << 0;
264	LOG(4,("Overlay: CRTC top-edge output %d, bottom-edge output %d\n",temp1, temp2));
265
266
267	/*********************************
268	 *** setup horizontal clipping ***
269	 *********************************/
270
271	/* Setup horizontal source start: first (sub)pixel contributing to output picture */
272	/* Note:
273	 * The method is to calculate, based on 1:1 scaling, based on the output window.
274	 * After this is done, include the scaling factor so you get a value based on the input bitmap.
275	 * Then add the left starting position of the bitmap's view (zoom function) to get the final value needed.
276	 * Note: The input bitmaps slopspace is automatically excluded from the calculations this way! */
277	/* Note also:
278	 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */
279	moi->hsrcstv = 0;
280	/* check for destination horizontal clipping at left side */
281	if (si->overlay.ow.h_start < crtc_hstart)
282	{
283		/* check if entire destination picture is clipping left:
284		 * (2 pixels will be clamped onscreen at least) */
285		if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) < (crtc_hstart + 1))
286		{
287			/* increase 'first contributing pixel' with 'fixed value': (total dest. width - 2) */
288			moi->hsrcstv += (si->overlay.ow.width - 2);
289		}
290		else
291		{
292			/* increase 'first contributing pixel' with actual number of dest. clipping pixels */
293			moi->hsrcstv += (crtc_hstart - si->overlay.ow.h_start);
294		}
295		LOG(4,("Overlay: clipping left...\n"));
296
297		/* The calculated value is based on scaling = 1x. So we now compensate for scaling.
298		 * Note that this also already takes care of aligning the value to the BES register! */
299		moi->hsrcstv *= si->overlay.h_ifactor;
300	}
301	/* take zoom into account */
302	moi->hsrcstv += ((uint32)si->overlay.my_ov.h_start) << 16;
303	/* AND below required by hardware */
304	moi->hsrcstv &= 0x03fffffc;
305	LOG(4,("Overlay: first hor. (sub)pixel of input bitmap contributing %f\n", moi->hsrcstv / (float)65536));
306
307	/* Setup horizontal source end: last (sub)pixel contributing to output picture */
308	/* Note:
309	 * The method is to calculate, based on 1:1 scaling, based on the output window.
310	 * After this is done, include the scaling factor so you get a value based on the input bitmap.
311	 * Then add the right ending position of the bitmap's view (zoom function) to get the final value needed. */
312	/* Note also:
313	 * Even if the scaling factor is clamping we instruct the BES to use the correct source end pos.! */
314
315	moi->hsrcendv = 0;
316	/* check for destination horizontal clipping at right side */
317	if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) > (crtc_hend - 1))
318	{
319		/* check if entire destination picture is clipping right:
320		 * (2 pixels will be clamped onscreen at least) */
321		if (si->overlay.ow.h_start > (crtc_hend - 2))
322		{
323			/* increase 'number of clipping pixels' with 'fixed value': (total dest. width - 2) */
324			moi->hsrcendv += (si->overlay.ow.width - 2);
325		}
326		else
327		{
328			/* increase 'number of clipping pixels' with actual number of dest. clipping pixels */
329			moi->hsrcendv += ((si->overlay.ow.h_start + si->overlay.ow.width - 1) - (crtc_hend - 1));
330		}
331		LOG(4,("Overlay: clipping right...\n"));
332
333		/* The calculated value is based on scaling = 1x. So we now compensate for scaling.
334		 * Note that this also already takes care of aligning the value to the BES register! */
335		moi->hsrcendv *= si->overlay.h_ifactor;
336		/* now subtract this value from the last used pixel in (zoomed) inputbuffer, aligned to BES */
337		moi->hsrcendv = (((uint32)((si->overlay.my_ov.h_start + si->overlay.my_ov.width) - 1)) << 16) - moi->hsrcendv;
338	}
339	else
340	{
341		/* set last contributing pixel to last used pixel in (zoomed) inputbuffer, aligned to BES */
342		moi->hsrcendv = (((uint32)((si->overlay.my_ov.h_start + si->overlay.my_ov.width) - 1)) << 16);
343	}
344	/* AND below required by hardware */
345	moi->hsrcendv &= 0x03ffffff;
346	LOG(4,("Overlay: last horizontal (sub)pixel of input bitmap contributing %f\n", moi->hsrcendv / (float)65536));
347
348
349	/*******************************
350	 *** setup vertical clipping ***
351	 *******************************/
352
353	/* calculate inputbitmap origin adress */
354	moi->a1orgv = (uintptr_t)((vuint32 *)si->overlay.ob.buffer);
355	moi->a1orgv -= (uintptr_t)((vuint32 *)si->framebuffer);
356	LOG(4,("Overlay: topleft corner of input bitmap (cardRAM offset) $%p\n", moi->a1orgv));
357
358	/* Setup vertical source start: first (sub)pixel contributing to output picture. */
359	/* Note:
360	 * The method is to calculate, based on 1:1 scaling, based on the output window.
361	 * 'After' this is done, include the scaling factor so you get a value based on the input bitmap.
362	 * Then add the top starting position of the bitmap's view (zoom function) to get the final value needed. */
363	/* Note also:
364	 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */
365
366	moi->v1srcstv = 0;
367	/* check for destination vertical clipping at top side */
368	if (si->overlay.ow.v_start < crtc_vstart)
369	{
370		/* check if entire destination picture is clipping at top:
371		 * (2 pixels will be clamped onscreen at least) */
372		if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) < (crtc_vstart + 1))
373		{
374			/* increase 'number of clipping pixels' with 'fixed value':
375			 * 'total height - 2' of dest. picture in pixels * inverse scaling factor */
376			moi->v1srcstv = (si->overlay.ow.height - 2) * si->overlay.v_ifactor;
377			/* we need to do clipping in the source bitmap because no seperate clipping
378			 * registers exist... */
379			moi->a1orgv += ((moi->v1srcstv >> 16) * si->overlay.ob.bytes_per_row);
380		}
381		else
382		{
383			/* increase 'first contributing pixel' with:
384			 * number of destination picture clipping pixels * inverse scaling factor */
385			moi->v1srcstv = (crtc_vstart - si->overlay.ow.v_start) * si->overlay.v_ifactor;
386			/* we need to do clipping in the source bitmap because no seperate clipping
387			 * registers exist... */
388			moi->a1orgv += ((moi->v1srcstv >> 16) * si->overlay.ob.bytes_per_row);
389		}
390		LOG(4,("Overlay: clipping at top...\n"));
391	}
392	/* take zoom into account */
393	moi->v1srcstv += (((uint32)si->overlay.my_ov.v_start) << 16);
394	moi->a1orgv += (si->overlay.my_ov.v_start * si->overlay.ob.bytes_per_row);
395	LOG(4,("Overlay: 'contributing part of buffer' origin is (cardRAM offset) $%08x\n", moi->a1orgv));
396	LOG(4,("Overlay: first vert. (sub)pixel of input bitmap contributing %f\n", moi->v1srcstv / (float)65536));
397
398	/* AND below is probably required by hardware. */
399	/* Buffer A topleft corner of field 1 (origin)(field 1 contains our full frames) */
400	moi->a1orgv &= 0x07fffff0;
401}
402
403static void eng_bes_program_move_overlay(move_overlay_info moi)
404{
405	/*************************************
406	 *** sync to BES (Back End Scaler) ***
407	 *************************************/
408
409	/* Done in card hardware:
410	 * double buffered registers + trigger during 'BES-'VBI feature. */
411
412
413	/**************************************
414	 *** actually program the registers ***
415	 **************************************/
416
417	if (si->ps.card_arch < K8M800)
418	{
419		/* setup clipped(!) buffer startadress in RAM */
420		/* VIA bes doesn't have clipping registers, so no subpixelprecise clipping
421		 * either. We do pixelprecise vertical and 'two pixel' precise horizontal clipping here. */
422		/* first include 'pixel precise' left clipping... (top clipping was already included) */
423		moi.a1orgv += ((moi.hsrcstv >> 16) * 2);
424		/* we need to step in 4-byte (2 pixel) granularity due to the nature of yuy2 */
425		BESW(VID1Y_ADDR0, (moi.a1orgv & 0x07fffffc));
426
427		/* horizontal source end does not use subpixelprecision: granularity is 8 pixels */
428		/* notes:
429		 * - make absolutely sure the engine can fetch the last pixel needed from
430		 *   the sourcebitmap even if only to generate a tiny subpixel from it!
431		 * - the engine uses byte format instead of pixel format;
432		 * - the engine uses 16 bytes, so 8 pixels granularity. */
433		BESW(VID1_FETCH, (((((moi.hsrcendv >> 16) + 1 + 0x0007) & ~0x0007) * 2) << (20 - 4)));
434
435		/* setup output window position */
436		BESW(VID1_HVSTART, ((moi.hcoordv & 0xffff0000) | ((moi.vcoordv & 0xffff0000) >> 16)));
437
438		/* setup output window size */
439		BESW(VID1_SIZE, (((moi.hcoordv & 0x0000ffff) << 16) | (moi.vcoordv & 0x0000ffff)));
440
441		/* enable colorkeying (b0 = 1), disable chromakeying (b1 = 0), Vid1 on top of Vid3 (b20 = 0),
442		 * all registers are loaded during the next 'BES-'VBI (b28 = 1), Vid1 cmds fire (b31 = 1) */
443		BESW(COMPOSE, 0x90000001);//fixme: >>>!<<< don't touch colorkey enable bit!
444	}
445	else
446	{
447		/* setup clipped(!) buffer startadress in RAM */
448		/* VIA bes doesn't have clipping registers, so no subpixelprecise clipping
449		 * either. We do pixelprecise vertical and 'two pixel' precise horizontal clipping here. */
450		/* first include 'pixel precise' left clipping... (top clipping was already included) */
451		moi.a1orgv += ((moi.hsrcstv >> 16) * 2);
452		/* we need to step in 4-byte (2 pixel) granularity due to the nature of yuy2 */
453		BESW(VID3_ADDR0, (moi.a1orgv & 0x07fffffc));
454
455		/* horizontal source end does not use subpixelprecision: granularity is 8 pixels */
456		/* notes:
457		 * - make absolutely sure the engine can fetch the last pixel needed from
458		 *   the sourcebitmap even if only to generate a tiny subpixel from it!
459		 * - the engine uses byte format instead of pixel format;
460		 * - the engine uses 16 bytes, so 8 pixels granularity. */
461		BESW(V3A_FETCH, (((((moi.hsrcendv >> 16) + 1 + 0x0007) & ~0x0007) * 2) << (20 - 4)));
462
463		/* setup output window position */
464		BESW(VID3_HVSTART, ((moi.hcoordv & 0xffff0000) | ((moi.vcoordv & 0xffff0000) >> 16)));
465
466		/* setup output window size */
467		BESW(VID3_SIZE, (((moi.hcoordv & 0x0000ffff) << 16) | (moi.vcoordv & 0x0000ffff)));
468
469		/* enable colorkeying (b0 = 1), disable chromakeying (b1 = 0), Vid3 on top of Vid1 (b20 = 1),
470		 * all registers are loaded during the next 'BES-'VBI (b8 = 1), Vid3 cmds fire (b30 = 1) */
471		BESW(COMPOSE, 0x40100101);//fixme: >>>!<<< don't touch colorkey enable bit!
472	}
473}
474
475status_t eng_bes_to_crtc(bool crtc)
476{
477	if (si->ps.secondary_head)
478	{
479		if (crtc)
480		{
481			LOG(4,("Overlay: switching overlay to CRTC2\n"));
482			/* switch overlay engine to CRTC2 */
483//			ENG_REG32(RG32_FUNCSEL) &= ~0x00001000;
484//			ENG_REG32(RG32_2FUNCSEL) |= 0x00001000;
485			si->overlay.crtc = !si->crtc_switch_mode;
486		}
487		else
488		{
489			LOG(4,("Overlay: switching overlay to CRTC1\n"));
490			/* switch overlay engine to CRTC1 */
491//			ENG_REG32(RG32_2FUNCSEL) &= ~0x00001000;
492//			ENG_REG32(RG32_FUNCSEL) |= 0x00001000;
493			si->overlay.crtc = si->crtc_switch_mode;
494		}
495		return B_OK;
496	}
497	else
498	{
499		return B_ERROR;
500	}
501}
502
503status_t eng_bes_init()
504{
505	if (si->ps.card_arch < K8M800)
506	{
507		if (si->ps.chip_rev < 0x10)
508		{
509			/* setup brightness, contrast and saturation to be 'neutral' */
510			BESW(VID1_COLSPAC1, 0x140020f2);
511			BESW(VID1_COLSPAC2, 0x0a0a2c00);
512			/* fifo depth is $20 (b0-5), threshold $10 (b8-13), prethreshold $1d (b24-29) */
513			BESW(VID1_FIFO, 0x1d00101f);
514		}
515		else
516		{
517			/* setup brightness, contrast and saturation to be 'neutral' */
518			BESW(VID1_COLSPAC1, 0x13000ded);
519			BESW(VID1_COLSPAC2, 0x13171000);
520			/* fifo depth is $40 (b0-5), threshold $38 (b8-13), prethreshold $38 (b24-29) */
521			BESW(VID1_FIFO, 0x3800383f);
522		}
523	}
524	else
525	{
526		/* setup brightness, contrast and saturation to be 'neutral' */
527		BESW(VID3_COLSPAC1, 0x13000ded);
528		BESW(VID3_COLSPAC2, 0x13171000);
529
530		/* fifo prethreshold 60 (b0-6) */
531		BESW(V3A_PREFIFO, (60 & 0x7f));
532		/* fifo depth-1 is 63 (b0-7), threshold 60 (b8-15) */
533		BESW(V3A_FIFO, ((63 & 0xff) | ((60 & 0xff) << 8)));
534	}
535
536	return B_OK;
537}
538
539status_t eng_configure_bes
540	(const overlay_buffer *ob, const overlay_window *ow, const overlay_view *ov, int offset)
541{
542	/* yuy2 (4:2:2) colorspace calculations */
543
544	/* Note:
545	 * in BeOS R5.0.3 and DANO:
546	 * 'ow->offset_xxx' is always 0, so not used;
547	 * 'ow->width' and 'ow->height' are the output window size: does not change
548	 * if window is clipping;
549	 * 'ow->h_start' and 'ow->v_start' are the left-top position of the output
550	 * window. These values can be negative: this means the window is clipping
551	 * at the left or the top of the display, respectively. */
552
553	/* 'ov' is the view in the source bitmap, so which part of the bitmap is actually
554	 * displayed on screen. This is used for the 'hardware zoom' function. */
555
556	/* output window position and clipping info for source buffer */
557	move_overlay_info moi;
558	/* calculated BES register values */
559	uint32 	hiscalv, viscalv;
560	/* interval representation, used for scaling calculations */
561	uint16 intrep;
562	/* inverse scaling factor, used for source positioning */
563	uint32 ifactor;
564	/* copy of overlay view which has checked valid values */
565	overlay_view my_ov;
566	/* true if scaling needed */
567	bool scale_x, scale_y;
568	/* for computing scaling register value */
569	uint32 scaleval;
570	/* for computing 'pre-scaling' on downscaling */
571	uint32 minictrl;
572
573	/**************************************************************************************
574	 *** copy, check and limit if needed the user-specified view into the intput bitmap ***
575	 **************************************************************************************/
576	my_ov = *ov;
577	/* check for valid 'coordinates' */
578	if (my_ov.width == 0) my_ov.width++;
579	if (my_ov.height == 0) my_ov.height++;
580	if (my_ov.h_start > ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1))
581		my_ov.h_start = ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1);
582	if (((my_ov.h_start + my_ov.width) - 1) > ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1))
583		my_ov.width = ((((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1) - my_ov.h_start) + 1);
584	if (my_ov.v_start > (ob->height - 1))
585		my_ov.v_start = (ob->height - 1);
586	if (((my_ov.v_start + my_ov.height) - 1) > (ob->height - 1))
587		my_ov.height = (((ob->height - 1) - my_ov.v_start) + 1);
588
589	LOG(4,("Overlay: inputbuffer view (zoom) left %d, top %d, width %d, height %d\n",
590		my_ov.h_start, my_ov.v_start, my_ov.width, my_ov.height));
591
592	/* save for eng_bes_calc_move_overlay() */
593	si->overlay.ow = *ow;
594	si->overlay.ob = *ob;
595	si->overlay.my_ov = my_ov;
596
597
598	/********************************
599	 *** setup horizontal scaling ***
600	 ********************************/
601	LOG(4,("Overlay: total input picture width = %d, height = %d\n",
602			(ob->width - si->overlay.myBufInfo[offset].slopspace), ob->height));
603	LOG(4,("Overlay: output picture width = %d, height = %d\n", ow->width, ow->height));
604
605	/* preset X and Y prescaling to be 1x */
606	minictrl = 0x00000000;
607	/* determine interval representation value, taking zoom into account */
608	if (ow->flags & B_OVERLAY_HORIZONTAL_FILTERING)
609	{
610		/* horizontal filtering is ON */
611		if ((my_ov.width == ow->width) | (ow->width < 2))
612		{
613			/* no horizontal scaling used, OR destination width < 2 */
614			intrep = 0;
615		}
616		else
617		{
618			intrep = 1;
619		}
620	}
621	else
622	{
623		/* horizontal filtering is OFF */
624		if ((ow->width < my_ov.width) & (ow->width >= 2))
625		{
626			/* horizontal downscaling used AND destination width >= 2 */
627			intrep = 1;
628		}
629		else
630		{
631			intrep = 0;
632		}
633	}
634	LOG(4,("Overlay: horizontal interval representation value is %d\n",intrep));
635
636	/* calculate inverse horizontal scaling factor, taking zoom into account */
637	/* standard scaling formula: */
638	ifactor = (((uint32)(my_ov.width - intrep)) << 16) / (ow->width - intrep);
639
640	/* correct factor to prevent most-right visible 'line' from distorting */
641	ifactor -= (1 << 5);
642	hiscalv = ifactor;
643	/* save for eng_bes_calc_move_overlay() */
644	si->overlay.h_ifactor = ifactor;
645	LOG(4,("Overlay: horizontal scaling factor is %f\n", (float)65536 / ifactor));
646
647	/* check scaling factor (and modify if needed) to be within scaling limits */
648	//fixme: checkout...
649	if (hiscalv < 0x00002000)
650	{
651		/* (non-inverse) factor too large, set factor to max. valid value */
652		hiscalv = 0x00002000;
653		LOG(4,("Overlay: horizontal scaling factor too large, clamping at %f\n", (float)65536 / hiscalv));
654	}
655	/* VIA has a 'downscaling' limit of 1.0, but seperate prescaling to 1/16th can be done.
656	 * (X-scaler has 11bit register with 0.11 format value, with special 1.0 scaling factor setting;
657	 *  prescaler has fixed 1x, 1/2x, 1/4x, 1/8x and 1/16x settings.) */
658	if (hiscalv > 0x00100000)
659	{
660		/* (non-inverse) factor too small, set factor to min. valid value */
661		hiscalv = 0x00100000;
662		LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)2048 / (hiscalv >> 5)));
663	}
664
665	/* setup pre-downscaling if 'requested' */
666	if ((hiscalv > 0x00010000) && (hiscalv <= 0x00020000))
667	{
668		/* instruct BES to horizontal prescale 0.5x */
669		minictrl |= 0x01000000;
670		/* correct normal scalingfactor so total scaling is 0.5 <= factor < 1.0x */
671		hiscalv >>= 1;
672	}
673	else
674		if ((hiscalv > 0x00020000) && (hiscalv <= 0x00040000))
675		{
676			/* instruct BES to horizontal prescale 0.25x */
677			minictrl |= 0x03000000;
678			/* correct normal scalingfactor so total scaling is 0.5 <= factor < 1.0x */
679			hiscalv >>= 2;
680		}
681		else
682			if ((hiscalv > 0x00040000) && (hiscalv <= 0x00080000))
683			{
684				/* instruct BES to horizontal prescale 0.125x */
685				minictrl |= 0x05000000;
686				/* correct normal scalingfactor so total scaling is 0.5 <= factor < 1.0x */
687				hiscalv >>= 3;
688			}
689			else
690				if ((hiscalv > 0x00080000) && (hiscalv <= 0x00100000))
691				{
692					/* instruct BES to horizontal prescale 0.125x */
693					minictrl |= 0x07000000;
694					/* correct normal scalingfactor so total scaling is 0.5 <= factor < 1.0x */
695					hiscalv >>= 4;
696				}
697
698	/* only instruct normal scaler to scale if it must do so */
699	scale_x = true;
700	if (hiscalv == 0x00010000) scale_x = false;
701
702	/* AND below is required by hardware */
703	hiscalv &= 0x0000ffe0;
704
705
706	/******************************
707	 *** setup vertical scaling ***
708	 ******************************/
709
710	/* determine interval representation value, taking zoom into account */
711	if (ow->flags & B_OVERLAY_VERTICAL_FILTERING)
712	{
713		/* vertical filtering is ON */
714		if ((my_ov.height == ow->height) | (ow->height < 2))
715		{
716			/* no vertical scaling used, OR destination height < 2 */
717			intrep = 0;
718		}
719		else
720		{
721			intrep = 1;
722		}
723	}
724	else
725	{
726		/* vertical filtering is OFF */
727		if ((ow->height < my_ov.height) & (ow->height >= 2))
728		{
729			/* vertical downscaling used AND destination height >= 2 */
730			intrep = 1;
731		}
732		else
733		{
734			intrep = 0;
735		}
736	}
737	LOG(4,("Overlay: vertical interval representation value is %d\n",intrep));
738
739	/* calculate inverse vertical scaling factor, taking zoom into account */
740	/* standard scaling formula: */
741	ifactor = (((uint32)(my_ov.height - intrep)) << 16) / (ow->height - intrep);
742
743	/* correct factor to prevent lowest visible line from distorting */
744	ifactor -= (1 << 6);
745	LOG(4,("Overlay: vertical scaling factor is %f\n", (float)65536 / ifactor));
746
747	/* preserve ifactor for source positioning calculations later on */
748	viscalv = ifactor;
749	/* save for eng_bes_calc_move_overlay() */
750	si->overlay.v_ifactor = ifactor;
751
752	/* check scaling factor (and modify if needed) to be within scaling limits */
753	//fixme: checkout...
754	if (viscalv < 0x00002000)
755	{
756		/* (non-inverse) factor too large, set factor to max. valid value */
757		viscalv = 0x00002000;
758		LOG(4,("Overlay: vertical scaling factor too large, clamping at %f\n", (float)65536 / viscalv));
759	}
760	/* VIA has a 'downscaling' limit of 1.0, but seperate prescaling to 1/16th can be done.
761	 * (Y-scaler has 10bit register with 0.10 format value, with special 1.0 scaling factor setting;
762	 *  prescaler has fixed 1x, 1/2x, 1/4x, 1/8x and 1/16x settings.) */
763	if (viscalv > 0x00100000)
764	{
765		/* (non-inverse) factor too small, set factor to min. valid value */
766		viscalv = 0x00100000;
767		LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)1024 / (viscalv >> 6)));
768	}
769
770	/* setup pre-downscaling if 'requested' */
771	if ((viscalv > 0x00010000) && (viscalv <= 0x00020000))
772	{
773		/* instruct BES to horizontal prescale 0.5x */
774		minictrl |= 0x00010000;
775		/* correct normal scalingfactor so total scaling is 0.5 <= factor < 1.0x */
776		viscalv >>= 1;
777	}
778	else
779		if ((viscalv > 0x00020000) && (viscalv <= 0x00040000))
780		{
781			/* instruct BES to horizontal prescale 0.25x */
782			minictrl |= 0x00030000;
783			/* correct normal scalingfactor so total scaling is 0.5 <= factor < 1.0x */
784			viscalv >>= 2;
785		}
786		else
787			if ((viscalv > 0x00040000) && (viscalv <= 0x00080000))
788			{
789				/* instruct BES to horizontal prescale 0.125x */
790				minictrl |= 0x00050000;
791				/* correct normal scalingfactor so total scaling is 0.5 <= factor < 1.0x */
792				viscalv >>= 3;
793			}
794			else
795				if ((viscalv > 0x00080000) && (viscalv <= 0x00100000))
796				{
797					/* instruct BES to horizontal prescale 0.125x */
798					minictrl |= 0x00070000;
799					/* correct normal scalingfactor so total scaling is 0.5 <= factor < 1.0x */
800					viscalv >>= 4;
801				}
802
803	/* only instruct normal scaler to scale if it must do so */
804	scale_y = true;
805	if (viscalv == 0x00010000) scale_y = false;
806
807	/* AND below is required by hardware */
808	viscalv &= 0x0000ffc0;
809
810
811	/********************************************************************************
812	 *** setup all edges of output window, setup horizontal and vertical clipping ***
813	 ********************************************************************************/
814	eng_bes_calc_move_overlay(&moi);
815
816
817	/*****************************
818	 *** log color keying info ***
819	 *****************************/
820
821	LOG(4,("Overlay: key_red %d, key_green %d, key_blue %d, key_alpha %d\n",
822		ow->red.value, ow->green.value, ow->blue.value, ow->alpha.value));
823	LOG(4,("Overlay: mask_red %d, mask_green %d, mask_blue %d, mask_alpha %d\n",
824		ow->red.mask, ow->green.mask, ow->blue.mask, ow->alpha.mask));
825
826
827	/*****************
828	 *** log flags ***
829	 *****************/
830
831	LOG(4,("Overlay: ow->flags is $%08x\n",ow->flags));
832	/* BTW: horizontal and vertical filtering are fixed and turned on for GeForce overlay. */
833
834
835	/*************************************
836	 *** sync to BES (Back End Scaler) ***
837	 *************************************/
838
839	/* Done in card hardware:
840	 * double buffered registers + trigger during 'BES-'VBI feature. */
841
842
843	/**************************************
844	 *** actually program the registers ***
845	 **************************************/
846
847	if (si->ps.card_arch < K8M800)
848	{
849		/* setup clipped(!) buffer startadress in RAM */
850		/* VIA bes doesn't have clipping registers, so no subpixelprecise clipping
851		 * either. We do pixelprecise vertical and 'two pixel' precise horizontal clipping here. */
852		/* first include 'pixel precise' left clipping... (top clipping was already included) */
853		moi.a1orgv += ((moi.hsrcstv >> 16) * 2);
854		/* we need to step in 4-byte (2 pixel) granularity due to the nature of yuy2 */
855		BESW(VID1Y_ADDR0, (moi.a1orgv & 0x07fffffc));
856
857		/* horizontal source end does not use subpixelprecision: granularity is 8 pixels */
858		/* notes:
859		 * - make absolutely sure the engine can fetch the last pixel needed from
860		 *   the sourcebitmap even if only to generate a tiny subpixel from it!
861		 * - the engine uses byte format instead of pixel format;
862		 * - the engine uses 16 bytes, so 8 pixels granularity. */
863		BESW(VID1_FETCH, (((((moi.hsrcendv >> 16) + 1 + 0x0007) & ~0x0007) * 2) << (20 - 4)));
864
865		/* enable horizontal filtering if asked for */
866		if (ow->flags & B_OVERLAY_HORIZONTAL_FILTERING)
867		{
868			minictrl |= (1 << 1);
869			LOG(4,("Overlay: using horizontal interpolation on scaling\n"));
870		}
871		/* enable vertical filtering if asked for */
872		if (ow->flags & B_OVERLAY_VERTICAL_FILTERING)
873		{
874			/* vertical interpolation b0, interpolation on Y, Cb and Cr all (b2) */
875			minictrl |= ((1 << 2) | (1 << 0));
876			LOG(4,("Overlay: using vertical interpolation on scaling\n"));
877		}
878		/* and program horizontal and vertical 'prescaling' for downscaling */
879		BESW(VID1_MINI_CTL, minictrl);
880
881		/* setup buffersize */
882		BESW(V1_SOURCE_WH, ((ob->height << 16) | (ob->width)));
883
884		/* setup buffer source pitch including slopspace (in bytes) */
885		BESW(VID1_STRIDE, (ob->width * 2));
886
887		/* setup output window position */
888		BESW(VID1_HVSTART, ((moi.hcoordv & 0xffff0000) | ((moi.vcoordv & 0xffff0000) >> 16)));
889
890		/* setup output window size */
891		BESW(VID1_SIZE, (((moi.hcoordv & 0x0000ffff) << 16) | (moi.vcoordv & 0x0000ffff)));
892
893		/* setup horizontal and vertical scaling:
894		 * setup horizontal scaling enable (b31), setup vertical scaling enable (b15).
895		 * Note:
896		 * Vertical scaling has a different resolution than horizontal scaling(!).  */
897		scaleval = 0x00000000;
898		if (scale_x) scaleval |= 0x80000000;
899		if (scale_y) scaleval |= 0x00008000;
900		BESW(VID1_ZOOM, (scaleval | ((hiscalv << 16) >> 5) | (viscalv >> 6)));
901
902		if (si->ps.chip_rev < 0x10)
903		{
904			/* enable BES (b0), format yuv422 (b2-4 = %000), set colorspace sign (b7 = 1),
905			 * input is frame (not field) picture (b9 = 0), expire = $5 (b16-19),
906			 * select field (not frame)(!) base (b24 = 0) */
907			BESW(VID1_CTL, 0x00050081);
908		}
909		else
910		{
911			/* enable BES (b0), format yuv422 (b2-4 = %000), set colorspace sign (b7 = 1),
912			 * input is frame (not field) picture (b9 = 0), expire = $f (b16-19),
913			 * select field (not frame)(!) base (b24 = 0) */
914			BESW(VID1_CTL, 0x000f0081);
915		}
916	}
917	else
918	{
919		/* setup clipped(!) buffer startadress in RAM */
920		/* VIA bes doesn't have clipping registers, so no subpixelprecise clipping
921		 * either. We do pixelprecise vertical and 'two pixel' precise horizontal clipping here. */
922		/* first include 'pixel precise' left clipping... (top clipping was already included) */
923		moi.a1orgv += ((moi.hsrcstv >> 16) * 2);
924		/* we need to step in 4-byte (2 pixel) granularity due to the nature of yuy2 */
925		BESW(VID3_ADDR0, (moi.a1orgv & 0x07fffffc));
926
927		/* horizontal source end does not use subpixelprecision: granularity is 8 pixels */
928		/* notes:
929		 * - make absolutely sure the engine can fetch the last pixel needed from
930		 *   the sourcebitmap even if only to generate a tiny subpixel from it!
931		 * - the engine uses byte format instead of pixel format;
932		 * - the engine uses 16 bytes, so 8 pixels granularity. */
933		BESW(V3A_FETCH, (((((moi.hsrcendv >> 16) + 1 + 0x0007) & ~0x0007) * 2) << (20 - 4)));
934
935		/* enable horizontal filtering if asked for */
936		if (ow->flags & B_OVERLAY_HORIZONTAL_FILTERING)
937		{
938			minictrl |= (1 << 1);
939			LOG(4,("Overlay: using horizontal interpolation on scaling\n"));
940		}
941		/* enable vertical filtering if asked for */
942		if (ow->flags & B_OVERLAY_VERTICAL_FILTERING)
943		{
944			/* vertical interpolation b0, interpolation on Y, Cb and Cr all (b2) */
945			minictrl |= ((1 << 2) | (1 << 0));
946			LOG(4,("Overlay: using vertical interpolation on scaling\n"));
947		}
948		/* and program horizontal and vertical 'prescaling' for downscaling */
949		BESW(VID3_MINI_CTL, minictrl);
950
951		/* setup buffersize (V3 does not need ob->height(?)) */
952		BESW(V3_SRC_WIDTH, (ob->width));
953
954		/* setup buffer source pitch including slopspace (in bytes) */
955		BESW(VID3_STRIDE, (ob->width * 2));
956
957		/* setup output window position */
958		BESW(VID3_HVSTART, ((moi.hcoordv & 0xffff0000) | ((moi.vcoordv & 0xffff0000) >> 16)));
959
960		/* setup output window size */
961		BESW(VID3_SIZE, (((moi.hcoordv & 0x0000ffff) << 16) | (moi.vcoordv & 0x0000ffff)));
962
963		/* setup horizontal and vertical scaling:
964		 * setup horizontal scaling enable (b31), setup vertical scaling enable (b15).
965		 * Note:
966		 * Vertical scaling has a different resolution than horizontal scaling(!).  */
967		scaleval = 0x00000000;
968		if (scale_x) scaleval |= 0x80000000;
969		if (scale_y) scaleval |= 0x00008000;
970		BESW(VID3_ZOOM, (scaleval | ((hiscalv << 16) >> 5) | (viscalv >> 6)));
971
972		/* enable BES (b0), format yuv422 (b2-3 = %00), set colorspace sign (b7 = 1),
973		 * expire = $8 (b16-19), select field (not frame)(!) base (b24 = 0),
974		 * enable prefetch (b30 = 1) */
975		BESW(VID3_CTL, 0x40080081);
976	}
977
978
979	/**************************
980	 *** setup color keying ***
981	 **************************/
982
983	/* setup colorkeying */
984	switch(si->dm.space)
985	{
986	case B_CMAP8:
987		{
988			/* do color palette index lookup for current colorkey */
989			/* note:
990			 * since apparantly some hardware works with color indexes instead of colors,
991			 * it might be a good idea(!!) to include the colorindex in the system's
992			 * overlay_window struct. */
993			static uint8 *r,*g,*b;
994			static uint32 idx;
995			r = si->color_data;
996			g = r + 256;
997			b = g + 256;
998			/* if index 1 doesn't help us, we assume 0 will (got to program something anyway) */
999			//fixme, note, tweakalert:
1000			//I'm counting down for a reason:
1001			//BeOS assigns the color white (0x00ffffff) to two indexes in the palette:
1002			//index 0x3f and 0xff. In the framebuffer index 0xff is used (apparantly).
1003			//The hardware compares framebuffer to given key, so the BES must receive 0xff.
1004			for (idx = 255; idx > 0; idx--)
1005			{
1006				if ((r[idx] == ow->red.value) &&
1007					(g[idx] == ow->green.value) &&
1008					(b[idx] == ow->blue.value))
1009						break;
1010			}
1011			LOG(4,("Overlay: colorkey's palette index is $%02x\n", idx));
1012			/* program color palette index into BES engine */
1013			BESW(COLKEY, idx);
1014		}
1015		break;
1016	case B_RGB15_LITTLE:
1017		BESW(COLKEY, (
1018			((ow->blue.value & ow->blue.mask) << 0)   |
1019			((ow->green.value & ow->green.mask) << 5) |
1020			((ow->red.value & ow->red.mask) << 10)
1021			/* alpha keying is not supported here */
1022			));
1023		break;
1024	case B_RGB16_LITTLE:
1025		BESW(COLKEY, (
1026			((ow->blue.value & ow->blue.mask) << 0)   |
1027			((ow->green.value & ow->green.mask) << 5) |
1028			((ow->red.value & ow->red.mask) << 11)
1029			/* this space has no alpha bits */
1030			));
1031		break;
1032	case B_RGB32_LITTLE:
1033	default:
1034		BESW(COLKEY, (
1035			((ow->blue.value & ow->blue.mask) << 0)   |
1036			((ow->green.value & ow->green.mask) << 8) |
1037			((ow->red.value & ow->red.mask) << 16)
1038			/* alpha keying is not supported here */
1039			));
1040		break;
1041	}
1042
1043	if (si->ps.card_arch < K8M800)
1044	{
1045		/* disable chromakeying (b1 = 0), Vid1 on top of Vid3 (b20 = 0),
1046		 * all registers are loaded during the next 'BES-'VBI (b28 = 1), Vid1 cmds fire (b31 = 1) */
1047		if (ow->flags & B_OVERLAY_COLOR_KEY)
1048		{
1049			/* enable colorkeying (b0 = 1) */
1050			BESW(COMPOSE, 0x90000001);
1051		}
1052		else
1053		{
1054			/* disable colorkeying (b0 = 0) */
1055			BESW(COMPOSE, 0x90000000);
1056		}
1057	}
1058	else
1059	{
1060		/* disable chromakeying (b1 = 0), Vid3 on top of Vid1 (b20 = 1),
1061		 * all registers are loaded during the next 'BES-'VBI (b8 = 1), Vid3 cmds fire (b30 = 1) */
1062		if (ow->flags & B_OVERLAY_COLOR_KEY)
1063		{
1064			/* enable colorkeying (b0 = 1) */
1065			BESW(COMPOSE, 0x40100101);
1066		}
1067		else
1068		{
1069			/* disable colorkeying (b0 = 0) */
1070			BESW(COMPOSE, 0x40100100);
1071		}
1072	}
1073
1074	/* note that overlay is in use (for eng_bes_move_overlay()) */
1075	si->overlay.active = true;
1076
1077	return B_OK;
1078}
1079
1080status_t eng_release_bes()
1081{
1082	if (si->ps.card_arch < K8M800)
1083	{
1084		/* setup BES control: disable scaler (b0 = 0) */
1085		BESW(VID1_CTL, 0x00000000);
1086
1087		/* make sure the 'disable' command really gets executed: (no 'VBI' anymore if BES disabled) */
1088		/* all registers are loaded immediately (b29 = 1), Vid1 cmds fire (b31 = 1) */
1089		BESW(COMPOSE, 0xa0000000);
1090	}
1091	else
1092	{
1093		/* setup BES control: disable scaler (b0 = 0) */
1094		BESW(VID3_CTL, 0x00000000);
1095
1096		/* make sure the 'disable' command really gets executed: (no 'VBI' anymore if BES disabled) */
1097		/* all registers are loaded immediately (b29 = 1), Vid3 cmds fire (b30 = 1) */
1098		BESW(COMPOSE, 0x60000000);//fixme >>>!<<< test b29, should that be b27??
1099	}
1100
1101	/* note that overlay is not in use (for eng_bes_move_overlay()) */
1102	si->overlay.active = false;
1103
1104	return B_OK;
1105}
1106