1/* G200-G550 Back End Scaler functions */
2/* Written by Rudolf Cornelissen 05/2002-11/2009 */
3
4#define MODULE_BIT 0x00000200
5
6#include "mga_std.h"
7
8typedef struct move_overlay_info move_overlay_info;
9
10struct move_overlay_info
11{
12	uint32 hcoordv;		/* left and right edges of video output window */
13	uint32 vcoordv;		/* top and bottom edges of video output window */
14	uint32 hsrcstv;		/* horizontal source start in source buffer (clipping) */
15	uint32 hsrcendv;	/* horizontal source end in source buffer (clipping) */
16	uint32 v1srcstv;	/* vertical source start in source buffer (clipping) */
17	uint32 a1orgv;		/* alternate source clipping via startadress of source buffer */
18};
19
20static void gx00_bes_calc_move_overlay(move_overlay_info *moi);
21static void gx00_bes_program_move_overlay(move_overlay_info moi);
22
23/* move the overlay output window in virtualscreens */
24/* Note:
25 * si->dm.h_display_start and si->dm.v_display_start determine where the new
26 * output window is located! */
27void gx00_bes_move_overlay()
28{
29	move_overlay_info moi;
30
31	/* abort if overlay is not active */
32	if (!si->overlay.active) return;
33
34	gx00_bes_calc_move_overlay(&moi);
35	gx00_bes_program_move_overlay(moi);
36}
37
38static void gx00_bes_calc_move_overlay(move_overlay_info *moi)
39{
40	/* misc used variables */
41	uint16 temp1, temp2;
42	/* visible screen window in virtual workspaces */
43	uint16 crtc_hstart, crtc_vstart, crtc_hend, crtc_vend;
44
45	/* the BES does not respect virtual_workspaces, but adheres to CRTC
46	 * constraints only */
47	crtc_hstart = si->dm.h_display_start;
48	/* make dualhead switch mode with TVout enabled work while we're at it.. */
49	if (si->switched_crtcs)
50	{
51		crtc_hstart += si->dm.timing.h_display;
52	}
53	/* horizontal end is the first position beyond the displayed range on the CRTC */
54	crtc_hend = crtc_hstart + si->dm.timing.h_display;
55	crtc_vstart = si->dm.v_display_start;
56	/* vertical end is the first position beyond the displayed range on the CRTC */
57	crtc_vend = crtc_vstart + si->dm.timing.v_display;
58
59
60	/****************************************
61	 *** setup all edges of output window ***
62	 ****************************************/
63
64	/* setup left and right edges of output window */
65	moi->hcoordv = 0;
66	/* left edge coordinate of output window, must be inside desktop */
67	/* clipping on the left side */
68	if (si->overlay.ow.h_start < crtc_hstart)
69	{
70		temp1 = 0;
71	}
72	else
73	{
74		/* clipping on the right side */
75		if (si->overlay.ow.h_start >= (crtc_hend - 1))
76		{
77			/* width < 2 is not allowed */
78			temp1 = (crtc_hend - crtc_hstart - 2) & 0x7ff;
79		}
80		else
81		/* no clipping here */
82		{
83			temp1 = (si->overlay.ow.h_start - crtc_hstart) & 0x7ff;
84		}
85	}
86	moi->hcoordv |= temp1 << 16;
87	/* right edge coordinate of output window, must be inside desktop */
88	/* width < 2 is not allowed */
89	if (si->overlay.ow.width < 2)
90	{
91		temp2 = (temp1 + 1) & 0x7ff;
92	}
93	else
94	{
95		/* clipping on the right side */
96		if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) > (crtc_hend - 1))
97		{
98			temp2 = (crtc_hend - crtc_hstart - 1) & 0x7ff;
99		}
100		else
101		{
102			/* clipping on the left side */
103			if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) < (crtc_hstart + 1))
104			{
105				/* width < 2 is not allowed */
106				temp2 = 1;
107			}
108			else
109			/* no clipping here */
110			{
111				temp2 = ((uint16)(si->overlay.ow.h_start + si->overlay.ow.width - crtc_hstart - 1)) & 0x7ff;
112			}
113		}
114	}
115	moi->hcoordv |= temp2 << 0;
116	LOG(4,("Overlay: CRTC left-edge output %d, right-edge output %d\n",temp1, temp2));
117
118	/* setup top and bottom edges of output window */
119	moi->vcoordv = 0;
120	/* top edge coordinate of output window, must be inside desktop */
121	/* clipping on the top side */
122	if (si->overlay.ow.v_start < crtc_vstart)
123	{
124		temp1 = 0;
125	}
126	else
127	{
128		/* clipping on the bottom side */
129		if (si->overlay.ow.v_start >= (crtc_vend - 1))
130		{
131			/* height < 2 is not allowed */
132			temp1 = (crtc_vend - crtc_vstart - 2) & 0x7ff;
133		}
134		else
135		/* no clipping here */
136		{
137			temp1 = (si->overlay.ow.v_start - crtc_vstart) & 0x7ff;
138		}
139	}
140	moi->vcoordv |= temp1 << 16;
141	/* bottom edge coordinate of output window, must be inside desktop */
142	/* height < 2 is not allowed */
143	if (si->overlay.ow.height < 2)
144	{
145		temp2 = (temp1 + 1) & 0x7ff;
146	}
147	else
148	{
149		/* clipping on the bottom side */
150		if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) > (crtc_vend - 1))
151		{
152			temp2 = (crtc_vend - crtc_vstart - 1) & 0x7ff;
153		}
154		else
155		{
156			/* clipping on the top side */
157			if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) < (crtc_vstart + 1))
158			{
159				/* height < 2 is not allowed */
160				temp2 = 1;
161			}
162			else
163			/* no clipping here */
164			{
165				temp2 = ((uint16)(si->overlay.ow.v_start + si->overlay.ow.height - crtc_vstart - 1)) & 0x7ff;
166			}
167		}
168	}
169	moi->vcoordv |= temp2 << 0;
170	LOG(4,("Overlay: CRTC top-edge output %d, bottom-edge output %d\n",temp1, temp2));
171
172
173	/*********************************
174	 *** setup horizontal clipping ***
175	 *********************************/
176
177	/* Setup horizontal source start: first (sub)pixel contributing to output picture */
178	/* Note:
179	 * The method is to calculate, based on 1:1 scaling, based on the output window.
180	 * After this is done, include the scaling factor so you get a value based on the input bitmap.
181	 * Then add the left starting position of the bitmap's view (zoom function) to get the final value needed.
182	 * Note: The input bitmaps slopspace is automatically excluded from the calculations this way! */
183	/* Note also:
184	 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */
185	moi->hsrcstv = 0;
186	/* check for destination horizontal clipping at left side */
187	if (si->overlay.ow.h_start < crtc_hstart)
188	{
189		/* check if entire destination picture is clipping left:
190		 * (2 pixels will be clamped onscreen at least) */
191		if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) < (crtc_hstart + 1))
192		{
193			/* increase 'first contributing pixel' with 'fixed value': (total dest. width - 2) */
194			moi->hsrcstv += (si->overlay.ow.width - 2);
195		}
196		else
197		{
198			/* increase 'first contributing pixel' with actual number of dest. clipping pixels */
199			moi->hsrcstv += (crtc_hstart - si->overlay.ow.h_start);
200		}
201		LOG(4,("Overlay: clipping left...\n"));
202
203		/* The calculated value is based on scaling = 1x. So we now compensate for scaling.
204		 * Note that this also already takes care of aligning the value to the BES register! */
205		moi->hsrcstv *= si->overlay.h_ifactor;
206	}
207	/* take zoom into account */
208	moi->hsrcstv += ((uint32)si->overlay.my_ov.h_start) << 16;
209	/* AND below required by hardware */
210	moi->hsrcstv &= 0x07fffffc;
211	LOG(4,("Overlay: first hor. (sub)pixel of input bitmap contributing %f\n", moi->hsrcstv / (float)65536));
212
213	/* Setup horizontal source end: last (sub)pixel contributing to output picture */
214	/* Note:
215	 * The method is to calculate, based on 1:1 scaling, based on the output window.
216	 * After this is done, include the scaling factor so you get a value based on the input bitmap.
217	 * Then add the right ending position of the bitmap's view (zoom function) to get the final value needed. */
218	/* Note also:
219	 * Even if the scaling factor is clamping we instruct the BES to use the correct source end pos.! */
220	moi->hsrcendv = 0;
221	/* check for destination horizontal clipping at right side */
222	if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) > (crtc_hend - 1))
223	{
224		/* check if entire destination picture is clipping right:
225		 * (2 pixels will be clamped onscreen at least) */
226		if (si->overlay.ow.h_start > (crtc_hend - 2))
227		{
228			/* increase 'number of clipping pixels' with 'fixed value': (total dest. width - 2) */
229			moi->hsrcendv += (si->overlay.ow.width - 2);
230		}
231		else
232		{
233			/* increase 'number of clipping pixels' with actual number of dest. clipping pixels */
234			moi->hsrcendv += ((si->overlay.ow.h_start + si->overlay.ow.width - 1) - (crtc_hend - 1));
235		}
236		LOG(4,("Overlay: clipping right...\n"));
237
238		/* The calculated value is based on scaling = 1x. So we now compensate for scaling.
239		 * Note that this also already takes care of aligning the value to the BES register! */
240		moi->hsrcendv *= si->overlay.h_ifactor;
241		/* now subtract this value from the last used pixel in (zoomed) inputbuffer, aligned to BES */
242		moi->hsrcendv = (((uint32)((si->overlay.my_ov.h_start + si->overlay.my_ov.width) - 1)) << 16) - moi->hsrcendv;
243	}
244	else
245	{
246		/* set last contributing pixel to last used pixel in (zoomed) inputbuffer, aligned to BES */
247		moi->hsrcendv = (((uint32)((si->overlay.my_ov.h_start + si->overlay.my_ov.width) - 1)) << 16);
248	}
249	/* AND below required by hardware (confirmed G200 can do upto 1024 pixels, G450 and G550 can do above.) */
250	moi->hsrcendv &= 0x07fffffc;
251	LOG(4,("Overlay: last horizontal (sub)pixel of input bitmap contributing %f\n", moi->hsrcendv / (float)65536));
252
253
254	/*******************************
255	 *** setup vertical clipping ***
256	 *******************************/
257
258	/* Setup vertical source start: first (sub)pixel contributing to output picture. */
259	/* Note: this exists of two parts:
260	 * 1. setup fractional part (sign is always 'positive');
261	 * 2. setup relative base_adress, taking clipping on top (and zoom) into account.
262	 * Both parts are done intertwined below. */
263	/* Note:
264	 * The method is to calculate, based on 1:1 scaling, based on the output window.
265	 * 'After' this is done, include the scaling factor so you get a value based on the input bitmap.
266	 * Then add the top starting position of the bitmap's view (zoom function) to get the final value needed. */
267	/* Note also:
268	 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */
269
270	/* calculate relative base_adress and 'vertical weight fractional part' */
271	moi->v1srcstv = 0;
272	/* calculate origin adress */
273	moi->a1orgv = (uint32)((vuint32 *)si->overlay.ob.buffer);
274	moi->a1orgv -= (uint32)((vuint32 *)si->framebuffer);
275	LOG(4,("Overlay: topleft corner of input bitmap (cardRAM offset) $%08x\n", moi->a1orgv));
276	/* check for destination vertical clipping at top side */
277	if (si->overlay.ow.v_start < crtc_vstart)
278	{
279		/* check if entire destination picture is clipping at top:
280		 * (2 pixels will be clamped onscreen at least) */
281		if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) < (crtc_vstart + 1))
282		{
283			/* increase source buffer origin with 'fixed value':
284			 * (integer part of ('total height - 2' of dest. picture in pixels * inverse scaling factor)) *
285			 * bytes per row source picture */
286			moi->v1srcstv = (si->overlay.ow.height - 2) * si->overlay.v_ifactor;
287			moi->a1orgv += ((moi->v1srcstv >> 16) * si->overlay.ob.bytes_per_row);
288		}
289		else
290		{
291			/* increase source buffer origin with:
292			 * (integer part of (number of destination picture clipping pixels * inverse scaling factor)) *
293			 * bytes per row source picture */
294			moi->v1srcstv = (crtc_vstart - si->overlay.ow.v_start) * si->overlay.v_ifactor;
295			moi->a1orgv += ((moi->v1srcstv >> 16) * si->overlay.ob.bytes_per_row);
296		}
297		LOG(4,("Overlay: clipping at top...\n"));
298	}
299	/* take zoom into account */
300	moi->v1srcstv += (((uint32)si->overlay.my_ov.v_start) << 16);
301	moi->a1orgv += (si->overlay.my_ov.v_start * si->overlay.ob.bytes_per_row);
302	LOG(4,("Overlay: 'contributing part of buffer' origin is (cardRAM offset) $%08x\n", moi->a1orgv));
303	LOG(4,("Overlay: first vert. (sub)pixel of input bitmap contributing %f\n", moi->v1srcstv / (float)65536));
304
305	/* Note:
306	 * Because all > G200 overlay units will ignore b0-3 of the calculated adress,
307	 * we do not use the above way for horizontal source positioning.
308	 * (G200 cards ignore b0-2.)
309	 * If we did, 8 source-image pixel jumps (in 4:2:2 colorspace) will occur if the picture
310	 * is shifted horizontally during left clipping on all > G200 cards, while G200 cards
311	 * will have 4 source-image pixel jumps occuring. */
312
313	/* AND below is required by G200-G550 hardware. > G200 cards can have max. 32Mb RAM on board
314	 * (16Mb on G200 cards). Compatible setting used (between G200 and the rest), this has no
315	 * downside consequences here. */
316	/* Buffer A topleft corner of field 1 (origin)(field 1 contains our full frames) */
317	moi->a1orgv &= 0x01fffff0;
318
319	/* field 1 weight: AND below required by hardware, also make sure 'sign' is always 'positive' */
320	moi->v1srcstv &= 0x0000fffc;
321}
322
323static void gx00_bes_program_move_overlay(move_overlay_info moi)
324{
325	/*************************************
326	 *** sync to BES (Back End Scaler) ***
327	 *************************************/
328
329	/* Make sure reprogramming the BES completes before the next retrace occurs,
330	 * to prevent register-update glitches (double buffer feature). */
331
332	LOG(3,("Overlay: starting register programming beyond Vcount %d\n", CR1R(VCOUNT)));
333	/* Even at 1600x1200x90Hz, a single line still takes about 9uS to complete:
334	 * this resolution will generate about 180Mhz pixelclock while we can do
335	 * upto 360Mhz. So snooze about 4uS to prevent bus-congestion...
336	 * Appr. 200 lines time will provide enough room even on a 100Mhz CPU if it's
337	 * screen is set to the highest refreshrate/resolution possible. */
338	while ((uint16)CR1R(VCOUNT) > (si->dm.timing.v_total - 200)) snooze(4);
339
340
341	/**************************************
342	 *** actually program the registers ***
343	 **************************************/
344
345	BESW(HCOORD, moi.hcoordv);
346	BESW(VCOORD, moi.vcoordv);
347	BESW(HSRCST, moi.hsrcstv);
348	BESW(HSRCEND, moi.hsrcendv);
349	BESW(A1ORG, moi.a1orgv);
350	BESW(V1WGHT, moi.v1srcstv);
351
352	/* on a 500Mhz P3 CPU just logging a line costs 400uS (18-19 vcounts at 1024x768x60Hz)!
353	 * programming the registers above actually costs 180uS here */
354	LOG(3,("Overlay: completed at Vcount %d\n", CR1R(VCOUNT)));
355}
356
357status_t gx00_configure_bes
358	(const overlay_buffer *ob, const overlay_window *ow, const overlay_view *ov, int offset)
359{
360	/* yuy2 (4:2:2) colorspace calculations */
361	/* Note: Some calculations will have to be modified for other colorspaces if they are incorporated. */
362
363	/* Note:
364	 * in BeOS R5.0.3 and DANO:
365	 * 'ow->offset_xxx' is always 0, so not used;
366	 * 'ow->width' and 'ow->height' are the output window size: does not change
367	 * if window is clipping;
368	 * 'ow->h_start' and 'ow->v_start' are the left-top position of the output
369	 * window. These values can be negative: this means the window is clipping
370	 * at the left or the top of the display, respectively. */
371
372	/* 'ov' is the view in the source bitmap, so which part of the bitmap is actually
373	 * displayed on screen. This is used for the 'hardware zoom' function. */
374
375	/* output window position and clipping info for source buffer */
376	move_overlay_info moi;
377	/* calculated BES register values */
378	uint32 	hiscalv, hsrclstv, viscalv, v1srclstv, globctlv, ctlv;
379	/* interval representation, used for scaling calculations */
380	uint16 intrep;
381	/* inverse scaling factor, used for source positioning */
382	uint32 ifactor;
383	/* copy of overlay view which has checked valid values */
384	overlay_view my_ov;
385
386	/* Slowdown the G200-G550 BES if the pixelclock is too high for it to cope.
387	 * This will in fact half the horizontal resolution of the BES with high
388	 * pixelclocks (by setting a BES hardware 'zoom' = 2x).
389	 * If you want optimal output quality better make sure you set the refreshrate/resolution
390	 * of your monitor not too high ... */
391	uint16 acczoom = 1;
392	LOG(4,("Overlay: pixelclock is %dkHz, ", si->dm.timing.pixel_clock));
393	if (si->dm.timing.pixel_clock > BESMAXSPEED)
394	{
395		/* BES running at half speed and resolution */
396		/* This is how it works (BES slowing down):
397		 * - Activate BES internal horizontal hardware scaling = 4x (in GLOBCTL below),
398		 * - This also sets up BES only getting half the amount of pixels per line from
399		 *   the input picture buffer (in effect half-ing the BES pixelclock input speed).
400		 * Now in order to get the picture back to original size, we need to also double
401		 * the inverse horizontal scaling factor here (x4 /2 /2 = 1x again).
402		 * Note that every other pixel is now doubled or interpolated, according to another
403		 * GLOBCTL bit. */
404		acczoom = 2;
405		LOG(4,("slowing down BES!\n"));
406	}
407	else
408	{
409		/* BES running at full speed and resolution */
410		LOG(4,("BES is running at full speed\n"));
411	}
412
413
414	/**************************************************************************************
415	 *** copy, check and limit if needed the user-specified view into the intput bitmap ***
416	 **************************************************************************************/
417	my_ov = *ov;
418	/* check for valid 'coordinates' */
419	if (my_ov.width == 0) my_ov.width++;
420	if (my_ov.height == 0) my_ov.height++;
421	if (my_ov.h_start > ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1))
422		my_ov.h_start = ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1);
423	if (((my_ov.h_start + my_ov.width) - 1) > ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1))
424		my_ov.width = ((((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1) - my_ov.h_start) + 1);
425	if (my_ov.v_start > (ob->height - 1))
426		my_ov.v_start = (ob->height - 1);
427	if (((my_ov.v_start + my_ov.height) - 1) > (ob->height - 1))
428		my_ov.height = (((ob->height - 1) - my_ov.v_start) + 1);
429
430	LOG(6,("Overlay: inputbuffer view (zoom) left %d, top %d, width %d, height %d\n",
431		my_ov.h_start, my_ov.v_start, my_ov.width, my_ov.height));
432
433	/* save for nv_bes_calc_move_overlay() */
434	si->overlay.ow = *ow;
435	si->overlay.ob = *ob;
436	si->overlay.my_ov = my_ov;
437
438
439	/********************************
440	 *** setup horizontal scaling ***
441	 ********************************/
442
443	LOG(6,("Overlay: total input picture width = %d, height = %d\n",
444			(ob->width - si->overlay.myBufInfo[offset].slopspace), ob->height));
445	LOG(6,("Overlay: output picture width = %d, height = %d\n", ow->width, ow->height));
446
447	/* determine interval representation value, taking zoom into account */
448	if (ow->flags & B_OVERLAY_HORIZONTAL_FILTERING)
449	{
450		/* horizontal filtering is ON */
451		if ((my_ov.width == ow->width) | (ow->width < 2))
452		{
453			/* no horizontal scaling used, OR destination width < 2 */
454			intrep = 0;
455		}
456		else
457		{
458			intrep = 1;
459		}
460	}
461	else
462	{
463		/* horizontal filtering is OFF */
464		if ((ow->width < my_ov.width) & (ow->width >= 2))
465		{
466			/* horizontal downscaling used AND destination width >= 2 */
467			intrep = 1;
468		}
469		else
470		{
471			intrep = 0;
472		}
473	}
474	LOG(4,("Overlay: horizontal interval representation value is %d\n",intrep));
475
476	/* calculate inverse horizontal scaling factor, taking zoom into account */
477	/* standard scaling formula: */
478	ifactor = (((uint32)(my_ov.width - intrep)) << 16) / (ow->width - intrep);
479
480	/* correct factor to prevent most-right visible 'line' from distorting */
481	ifactor -= (1 << 2);
482	LOG(4,("Overlay: horizontal scaling factor is %f\n", (float)65536 / ifactor));
483
484	/* compensate for accelerated 2x zoom (slowdown BES if pixelclock is too high) */
485	hiscalv = ifactor * acczoom;
486	/* save for gx00_bes_calc_move_overlay() */
487	si->overlay.h_ifactor = ifactor;
488	LOG(4,("Overlay: horizontal speed compensated factor is %f\n", (float)65536 / hiscalv));
489
490	/* check scaling factor (and modify if needed) to be within scaling limits */
491	if (((((uint32)my_ov.width) << 16) / 16384) > hiscalv)
492	{
493		/* (non-inverse) factor too large, set factor to max. valid value */
494		hiscalv = ((((uint32)my_ov.width) << 16) / 16384);
495		LOG(4,("Overlay: horizontal scaling factor too large, clamping at %f\n", (float)65536 / hiscalv));
496	}
497	if (hiscalv >= (32 << 16))
498	{
499		/* (non-inverse) factor too small, set factor to min. valid value */
500		hiscalv = 0x1ffffc;
501		LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)65536 / hiscalv));
502	}
503	/* AND below is required by hardware */
504	hiscalv &= 0x001ffffc;
505
506
507	/******************************
508	 *** setup vertical scaling ***
509	 ******************************/
510
511	/* determine interval representation value, taking zoom into account */
512	if (ow->flags & B_OVERLAY_VERTICAL_FILTERING)
513	{
514		/* vertical filtering is ON */
515		if ((my_ov.height == ow->height) | (ow->height < 2))
516		{
517			/* no vertical scaling used, OR destination height < 2 */
518			intrep = 0;
519		}
520		else
521		{
522			intrep = 1;
523		}
524	}
525	else
526	{
527		/* vertical filtering is OFF */
528		if ((ow->height < my_ov.height) & (ow->height >= 2))
529		{
530			/* vertical downscaling used AND destination height >= 2 */
531			intrep = 1;
532		}
533		else
534		{
535			intrep = 0;
536		}
537	}
538	LOG(4,("Overlay: vertical interval representation value is %d\n",intrep));
539
540	/* calculate inverse vertical scaling factor, taking zoom into account */
541	/* standard scaling formula: */
542	ifactor = (((uint32)(my_ov.height - intrep)) << 16) / (ow->height - intrep);
543
544	/* correct factor to prevent lowest visible line from distorting */
545	ifactor -= (1 << 2);
546	LOG(4,("Overlay: vertical scaling factor is %f\n", (float)65536 / ifactor));
547
548	/* preserve ifactor for source positioning calculations later on */
549	viscalv = ifactor;
550	/* save for gx00_bes_calc_move_overlay() */
551	si->overlay.v_ifactor = ifactor;
552
553	/* check scaling factor (and modify if needed) to be within scaling limits */
554	if (((((uint32)my_ov.height) << 16) / 16384) > viscalv)
555	{
556		/* (non-inverse) factor too large, set factor to max. valid value */
557		viscalv = ((((uint32)my_ov.height) << 16) / 16384);
558		LOG(4,("Overlay: vertical scaling factor too large, clamping at %f\n", (float)65536 / viscalv));
559	}
560	if (viscalv >= (32 << 16))
561	{
562		/* (non-inverse) factor too small, set factor to min. valid value */
563		viscalv = 0x1ffffc;
564		LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)65536 / viscalv));
565	}
566	/* AND below is required by hardware */
567	viscalv &= 0x001ffffc;
568
569
570	/********************************************************************************
571	 *** setup all edges of output window, setup horizontal and vertical clipping ***
572	 ********************************************************************************/
573	gx00_bes_calc_move_overlay(&moi);
574
575
576	/***************************************
577	 *** setup misc. source bitmap stuff ***
578	 ***************************************/
579
580	/* setup horizontal source last position excluding slopspace:
581	 * this is the last pixel that will be used for calculating interpolated pixels */
582	hsrclstv = ((ob->width - 1) - si->overlay.myBufInfo[offset].slopspace) << 16;
583	/* AND below required by hardware */
584	hsrclstv &= 0x07ff0000;
585
586	/* setup field 1 (is our complete frame) vertical source last position.
587	 * this is the last pixel that will be used for calculating interpolated pixels */
588	v1srclstv = (ob->height - 1);
589	/* AND below required by hardware */
590	v1srclstv &= 0x000007ff;
591
592
593	/*****************************
594	 *** log color keying info ***
595	 *****************************/
596
597	LOG(6,("Overlay: key_red %d, key_green %d, key_blue %d, key_alpha %d\n",
598		ow->red.value, ow->green.value, ow->blue.value, ow->alpha.value));
599	LOG(6,("Overlay: mask_red %d, mask_green %d, mask_blue %d, mask_alpha %d\n",
600		ow->red.mask, ow->green.mask, ow->blue.mask, ow->alpha.mask));
601
602
603	/*************************
604	 *** setup BES control ***
605	 *************************/
606
607	/* BES global control: setup functions */
608	globctlv = 0;
609
610	/* slowdown BES if nessesary */
611	if (acczoom == 1)
612	{
613		/* run at full speed and resolution */
614		globctlv |= 0 << 0;
615		/* disable filtering for half speed interpolation */
616		globctlv |= 0 << 1;
617	}
618	else
619	{
620		/* run at half speed and resolution */
621		globctlv |= 1 << 0;
622		/* enable filtering for half speed interpolation */
623		globctlv |= 1 << 1;
624	}
625
626	/* 4:2:0 specific setup: not needed here */
627	globctlv |= 0 << 3;
628	/* BES testregister: keep zero */
629	globctlv |= 0 << 4;
630	/* the following bits marked (> G200) *must* be zero on G200: */
631	/* 4:2:0 specific setup: not needed here (> G200) */
632	globctlv |= 0 << 5;
633	/* select yuy2 byte-order to B_YCbCr422 (> G200) */
634	globctlv |= 0 << 6;
635	/* BES internal contrast and brighness controls are not used, disabled (> G200) */
636	globctlv |= 0 << 7;
637	/* RGB specific setup: not needed here, so disabled (> G200) */
638	globctlv |= 0 << 8;
639	globctlv |= 0 << 9;
640	/* 4:2:0 specific setup: not needed here (> G200) */
641	globctlv |= 0 << 10;
642	/* Tell BES when to copy the new register values to the actual active registers.
643	 * bits 16-27 (12 bits) are the CRTC vert. count value at which copying takes
644	 * place.
645	 * (This is the double buffering feature: programming must be completed *before*
646	 *  the CRTC vert count value set here!) */
647	/* CRTC vert count for copying = $000, so during retrace, line 0. */
648	globctlv |= 0x000 << 16;
649
650	/* BES control: enable scaler and setup functions */
651	/* pre-reset all bits */
652	ctlv = 0;
653	/* enable BES */
654	ctlv |= 1 << 0;
655	/* we start displaying at an even startline (zero) in 'field 1' (no hardware de-interlacing is used) */
656	ctlv |= 0 << 6;
657	/* we don't use field 2, so its startline is not important */
658	ctlv |= 0 << 7;
659
660	LOG(6,("Overlay: ow->flags is $%08x\n",ow->flags));
661	/* enable horizontal filtering on scaling if asked for: if we *are* actually scaling */
662	if ((ow->flags & B_OVERLAY_HORIZONTAL_FILTERING) && (hiscalv != (0x01 << 16)))
663	{
664		ctlv |= 1 << 10;
665		LOG(6,("Overlay: using horizontal interpolation on scaling\n"));
666	}
667	else
668	{
669		ctlv |= 0 << 10;
670		LOG(6,("Overlay: using horizontal dropping or replication on scaling\n"));
671	}
672	/* enable vertical filtering on scaling if asked for: if we are *upscaling* only */
673	if ((ow->flags & B_OVERLAY_VERTICAL_FILTERING) && (viscalv < (0x01 << 16)) && (ob->width <= 1024))	{
674		ctlv |= 1 << 11;
675		LOG(6,("Overlay: using vertical interpolation on scaling\n"));
676	} else {
677		ctlv |= 0 << 11;
678		LOG(6,("Overlay: using vertical dropping or replication on scaling\n"));
679	}
680
681	/* use actual calculated weight for horizontal interpolation */
682	ctlv |= 0 << 12;
683	/* use horizontal chroma interpolation upsampling on BES input picture */
684	ctlv |= 1 << 16;
685	/* select 4:2:2 BES input format */
686	ctlv |= 0 << 17;
687	/* dithering is enabled */
688	ctlv |= 1 << 18;
689	/* horizontal mirroring is not used */
690	ctlv |= 0 << 19;
691	/* BES output should be in color */
692	ctlv |= 0 << 20;
693	/* BES output blanking is disabled: we want a picture, no 'black box'! */
694	ctlv |= 0 << 21;
695	/* we do software field select (field select is not used) */
696	ctlv |= 0 << 24;
697	/* we always display field 1 in buffer A, this contains our full frames */
698	/* select field 1 */
699	ctlv |= 0 << 25;
700	/* select buffer A */
701	ctlv |= 0 << 26;
702
703
704	/*************************************
705	 *** sync to BES (Back End Scaler) ***
706	 *************************************/
707
708	/* Make sure reprogramming the BES completes before the next retrace occurs,
709	 * to prevent register-update glitches (double buffer feature). */
710
711	LOG(3,("Overlay: starting register programming beyond Vcount %d\n", CR1R(VCOUNT)));
712	/* Even at 1600x1200x90Hz, a single line still takes about 9uS to complete:
713	 * this resolution will generate about 180Mhz pixelclock while we can do
714	 * upto 360Mhz. So snooze about 4uS to prevent bus-congestion...
715	 * Appr. 200 lines time will provide enough room even on a 100Mhz CPU if it's
716	 * screen is set to the highest refreshrate/resolution possible. */
717	while ((uint16)CR1R(VCOUNT) > (si->dm.timing.v_total - 200)) snooze(4);
718
719
720	/**************************************
721	 *** actually program the registers ***
722	 **************************************/
723
724	BESW(HCOORD, moi.hcoordv);
725	BESW(VCOORD, moi.vcoordv);
726	BESW(HISCAL, hiscalv);
727	BESW(HSRCST, moi.hsrcstv);
728	BESW(HSRCEND, moi.hsrcendv);
729	BESW(HSRCLST, hsrclstv);
730	BESW(VISCAL, viscalv);
731	BESW(A1ORG, moi.a1orgv);
732	BESW(V1WGHT, moi.v1srcstv);
733	BESW(V1SRCLST, v1srclstv);
734	BESW(GLOBCTL, globctlv);
735	BESW(CTL, ctlv);
736
737
738	/**************************
739	 *** setup color keying ***
740	 **************************/
741
742	/* setup colorkeying */
743	DXIW(COLKEY, (ow->alpha.value & ow->alpha.mask));
744
745	DXIW(COLKEY0RED, (ow->red.value & ow->red.mask));
746	DXIW(COLKEY0GREEN, (ow->green.value & ow->green.mask));
747	DXIW(COLKEY0BLUE, (ow->blue.value & ow->blue.mask));
748
749	DXIW(COLMSK, ow->alpha.mask);
750
751	DXIW(COLMSK0RED, ow->red.mask);
752	DXIW(COLMSK0GREEN, ow->green.mask);
753	DXIW(COLMSK0BLUE, ow->blue.mask);
754
755	/* setup colorkeying */
756	if (ow->flags & B_OVERLAY_COLOR_KEY)
757		DXIW(KEYOPMODE,0x01);
758	else
759		DXIW(KEYOPMODE,0x00);
760
761
762	/*************************
763	 *** setup misc. stuff ***
764	 *************************/
765
766	/* setup brightness and contrast to be 'neutral' (this is not implemented on G200) */
767	BESW(LUMACTL, 0x00000080);
768
769	/* setup source pitch including slopspace (in pixels); AND is required by hardware */
770	BESW(PITCH, (ob->width & 0x00000fff));
771
772	/* on a 500Mhz P3 CPU just logging a line costs 400uS (18-19 vcounts at 1024x768x60Hz)!
773	 * programming the registers above actually costs 180uS here */
774	LOG(3,("Overlay: completed at Vcount %d\n", CR1R(VCOUNT)));
775
776	/* note that overlay is in use (for gx00_bes_move_overlay()) */
777	si->overlay.active = true;
778
779	return B_OK;
780}
781
782status_t gx00_release_bes()
783{
784	/* setup BES control: disable scaler */
785	BESW(CTL, 0x00000000);
786
787	/* note that overlay is not in use (for gx00_bes_move_overlay()) */
788	si->overlay.active = false;
789
790	return B_OK;
791}
792