1/* Nvidia TNT and GeForce Back End Scaler functions */
2/* Written by Rudolf Cornelissen 05/2002-9/2004 */
3
4#define MODULE_BIT 0x00000200
5
6#include "std.h"
7
8typedef struct move_overlay_info move_overlay_info;
9
10struct move_overlay_info
11{
12	uint32 hcoordv;		/* left and right edges of video output window */
13	uint32 vcoordv;		/* top and bottom edges of video output window */
14	uint32 hsrcstv;		/* horizontal source start in source buffer (clipping) */
15	uint32 v1srcstv;	/* vertical source start in source buffer (clipping) */
16	uint32 a1orgv;		/* alternate source clipping via startadress of source buffer */
17};
18
19static void eng_bes_calc_move_overlay(move_overlay_info *moi);
20static void eng_bes_program_move_overlay(move_overlay_info moi);
21
22/* move the overlay output window in virtualscreens */
23/* Note:
24 * si->dm.h_display_start and si->dm.v_display_start determine where the new
25 * output window is located! */
26void eng_bes_move_overlay()
27{
28	move_overlay_info moi;
29
30	/* abort if overlay is not active */
31	if (!si->overlay.active) return;
32
33	eng_bes_calc_move_overlay(&moi);
34	eng_bes_program_move_overlay(moi);
35}
36
37static void eng_bes_calc_move_overlay(move_overlay_info *moi)
38{
39	/* misc used variables */
40	uint16 temp1, temp2;
41	/* visible screen window in virtual workspaces */
42	uint16 crtc_hstart, crtc_vstart, crtc_hend, crtc_vend;
43
44	/* do 'overlay follow head' in dualhead modes on dualhead cards */
45	if (si->ps.secondary_head)
46	{
47		switch (si->dm.flags & DUALHEAD_BITS)
48		{
49		case DUALHEAD_ON:
50		case DUALHEAD_SWITCH:
51			if ((si->overlay.ow.h_start + (si->overlay.ow.width / 2)) <
52					(si->dm.h_display_start + si->dm.timing.h_display))
53				eng_bes_to_crtc(si->crtc_switch_mode);
54			else
55				eng_bes_to_crtc(!si->crtc_switch_mode);
56			break;
57		default:
58				eng_bes_to_crtc(si->crtc_switch_mode);
59			break;
60		}
61	}
62
63	/* the BES does not respect virtual_workspaces, but adheres to CRTC
64	 * constraints only */
65	crtc_hstart = si->dm.h_display_start;
66	/* make dualhead stretch and switch mode work while we're at it.. */
67	if (si->overlay.crtc)
68	{
69		crtc_hstart += si->dm.timing.h_display;
70	}
71
72	/* horizontal end is the first position beyond the displayed range on the CRTC */
73	crtc_hend = crtc_hstart + si->dm.timing.h_display;
74	crtc_vstart = si->dm.v_display_start;
75	/* vertical end is the first position beyond the displayed range on the CRTC */
76	crtc_vend = crtc_vstart + si->dm.timing.v_display;
77
78
79	/****************************************
80	 *** setup all edges of output window ***
81	 ****************************************/
82
83	/* setup left and right edges of output window */
84	moi->hcoordv = 0;
85	/* left edge coordinate of output window, must be inside desktop */
86	/* clipping on the left side */
87	if (si->overlay.ow.h_start < crtc_hstart)
88	{
89		temp1 = 0;
90	}
91	else
92	{
93		/* clipping on the right side */
94		if (si->overlay.ow.h_start >= (crtc_hend - 1))
95		{
96			/* width < 2 is not allowed */
97			temp1 = (crtc_hend - crtc_hstart - 2) & 0x7ff;
98		}
99		else
100		/* no clipping here */
101		{
102			temp1 = (si->overlay.ow.h_start - crtc_hstart) & 0x7ff;
103		}
104	}
105	moi->hcoordv |= temp1 << 16;
106	/* right edge coordinate of output window, must be inside desktop */
107	/* width < 2 is not allowed */
108	if (si->overlay.ow.width < 2)
109	{
110		temp2 = (temp1 + 1) & 0x7ff;
111	}
112	else
113	{
114		/* clipping on the right side */
115		if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) > (crtc_hend - 1))
116		{
117			temp2 = (crtc_hend - crtc_hstart - 1) & 0x7ff;
118		}
119		else
120		{
121			/* clipping on the left side */
122			if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) < (crtc_hstart + 1))
123			{
124				/* width < 2 is not allowed */
125				temp2 = 1;
126			}
127			else
128			/* no clipping here */
129			{
130				temp2 = ((uint16)(si->overlay.ow.h_start + si->overlay.ow.width - crtc_hstart - 1)) & 0x7ff;
131			}
132		}
133	}
134	moi->hcoordv |= temp2 << 0;
135	LOG(4,("Overlay: CRTC left-edge output %d, right-edge output %d\n",temp1, temp2));
136
137	/* setup top and bottom edges of output window */
138	moi->vcoordv = 0;
139	/* top edge coordinate of output window, must be inside desktop */
140	/* clipping on the top side */
141	if (si->overlay.ow.v_start < crtc_vstart)
142	{
143		temp1 = 0;
144	}
145	else
146	{
147		/* clipping on the bottom side */
148		if (si->overlay.ow.v_start >= (crtc_vend - 1))
149		{
150			/* height < 2 is not allowed */
151			temp1 = (crtc_vend - crtc_vstart - 2) & 0x7ff;
152		}
153		else
154		/* no clipping here */
155		{
156			temp1 = (si->overlay.ow.v_start - crtc_vstart) & 0x7ff;
157		}
158	}
159	moi->vcoordv |= temp1 << 16;
160	/* bottom edge coordinate of output window, must be inside desktop */
161	/* height < 2 is not allowed */
162	if (si->overlay.ow.height < 2)
163	{
164		temp2 = (temp1 + 1) & 0x7ff;
165	}
166	else
167	{
168		/* clipping on the bottom side */
169		if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) > (crtc_vend - 1))
170		{
171			temp2 = (crtc_vend - crtc_vstart - 1) & 0x7ff;
172		}
173		else
174		{
175			/* clipping on the top side */
176			if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) < (crtc_vstart + 1))
177			{
178				/* height < 2 is not allowed */
179				temp2 = 1;
180			}
181			else
182			/* no clipping here */
183			{
184				temp2 = ((uint16)(si->overlay.ow.v_start + si->overlay.ow.height - crtc_vstart - 1)) & 0x7ff;
185			}
186		}
187	}
188	moi->vcoordv |= temp2 << 0;
189	LOG(4,("Overlay: CRTC top-edge output %d, bottom-edge output %d\n",temp1, temp2));
190
191
192	/*********************************
193	 *** setup horizontal clipping ***
194	 *********************************/
195
196	/* Setup horizontal source start: first (sub)pixel contributing to output picture */
197	/* Note:
198	 * The method is to calculate, based on 1:1 scaling, based on the output window.
199	 * After this is done, include the scaling factor so you get a value based on the input bitmap.
200	 * Then add the left starting position of the bitmap's view (zoom function) to get the final value needed.
201	 * Note: The input bitmaps slopspace is automatically excluded from the calculations this way! */
202	/* Note also:
203	 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */
204	moi->hsrcstv = 0;
205	/* check for destination horizontal clipping at left side */
206	if (si->overlay.ow.h_start < crtc_hstart)
207	{
208		/* check if entire destination picture is clipping left:
209		 * (2 pixels will be clamped onscreen at least) */
210		if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) < (crtc_hstart + 1))
211		{
212			/* increase 'first contributing pixel' with 'fixed value': (total dest. width - 2) */
213			moi->hsrcstv += (si->overlay.ow.width - 2);
214		}
215		else
216		{
217			/* increase 'first contributing pixel' with actual number of dest. clipping pixels */
218			moi->hsrcstv += (crtc_hstart - si->overlay.ow.h_start);
219		}
220		LOG(4,("Overlay: clipping left...\n"));
221
222		/* The calculated value is based on scaling = 1x. So we now compensate for scaling.
223		 * Note that this also already takes care of aligning the value to the BES register! */
224		moi->hsrcstv *= si->overlay.h_ifactor;
225	}
226	/* take zoom into account */
227	moi->hsrcstv += ((uint32)si->overlay.my_ov.h_start) << 16;
228	/* AND below required by hardware */
229	moi->hsrcstv &= 0x03fffffc;
230	LOG(4,("Overlay: first hor. (sub)pixel of input bitmap contributing %f\n", moi->hsrcstv / (float)65536));
231
232
233	/*******************************
234	 *** setup vertical clipping ***
235	 *******************************/
236
237	/* calculate inputbitmap origin adress */
238	moi->a1orgv = (uint32)((vuint32 *)si->overlay.ob.buffer);
239	moi->a1orgv -= (uint32)((vuint32 *)si->framebuffer);
240	LOG(4,("Overlay: topleft corner of input bitmap (cardRAM offset) $%08x\n", moi->a1orgv));
241
242	/* Setup vertical source start: first (sub)pixel contributing to output picture. */
243	/* Note:
244	 * The method is to calculate, based on 1:1 scaling, based on the output window.
245	 * 'After' this is done, include the scaling factor so you get a value based on the input bitmap.
246	 * Then add the top starting position of the bitmap's view (zoom function) to get the final value needed. */
247	/* Note also:
248	 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */
249
250	moi->v1srcstv = 0;
251	/* check for destination vertical clipping at top side */
252	if (si->overlay.ow.v_start < crtc_vstart)
253	{
254		/* check if entire destination picture is clipping at top:
255		 * (2 pixels will be clamped onscreen at least) */
256		if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) < (crtc_vstart + 1))
257		{
258			/* increase 'number of clipping pixels' with 'fixed value':
259			 * 'total height - 2' of dest. picture in pixels * inverse scaling factor */
260			moi->v1srcstv = (si->overlay.ow.height - 2) * si->overlay.v_ifactor;
261			/* on pre-NV10 we need to do clipping in the source
262			 * bitmap because no seperate clipping registers exist... */
263			if (si->ps.card_arch < NV10A)
264				moi->a1orgv += ((moi->v1srcstv >> 16) * si->overlay.ob.bytes_per_row);
265		}
266		else
267		{
268			/* increase 'first contributing pixel' with:
269			 * number of destination picture clipping pixels * inverse scaling factor */
270			moi->v1srcstv = (crtc_vstart - si->overlay.ow.v_start) * si->overlay.v_ifactor;
271			/* on pre-NV10 we need to do clipping in the source
272			 * bitmap because no seperate clipping registers exist... */
273			if (si->ps.card_arch < NV10A)
274				moi->a1orgv += ((moi->v1srcstv >> 16) * si->overlay.ob.bytes_per_row);
275		}
276		LOG(4,("Overlay: clipping at top...\n"));
277	}
278	/* take zoom into account */
279	moi->v1srcstv += (((uint32)si->overlay.my_ov.v_start) << 16);
280	if (si->ps.card_arch < NV10A)
281	{
282		moi->a1orgv += (si->overlay.my_ov.v_start * si->overlay.ob.bytes_per_row);
283		LOG(4,("Overlay: 'contributing part of buffer' origin is (cardRAM offset) $%08x\n", moi->a1orgv));
284	}
285	LOG(4,("Overlay: first vert. (sub)pixel of input bitmap contributing %f\n", moi->v1srcstv / (float)65536));
286
287	/* AND below is probably required by hardware. */
288	/* Buffer A topleft corner of field 1 (origin)(field 1 contains our full frames) */
289	moi->a1orgv &= 0xfffffff0;
290}
291
292static void eng_bes_program_move_overlay(move_overlay_info moi)
293{
294	/*************************************
295	 *** sync to BES (Back End Scaler) ***
296	 *************************************/
297
298	/* Done in card hardware:
299	 * double buffered registers + trigger if programming complete feature. */
300
301
302	/**************************************
303	 *** actually program the registers ***
304	 **************************************/
305
306	if (si->ps.card_arch < NV10A)
307	{
308		/* unknown, but needed (otherwise high-res distortions and only half the frames */
309		BESW(NV04_OE_STATE, 0x00000000);
310		/* select buffer 0 as active (b16) */
311		BESW(NV04_SU_STATE, 0x00000000);
312		/* unknown (no effect?) */
313		BESW(NV04_RM_STATE, 0x00000000);
314		/* setup clipped(!) buffer startadress in RAM */
315		/* RIVA128 - TNT bes doesn't have clipping registers, so no subpixelprecise clipping
316		 * either. We do pixelprecise vertical and 'two pixel' precise horizontal clipping here. */
317		/* (program both buffers to prevent sync distortions) */
318		/* first include 'pixel precise' left clipping... (top clipping was already included) */
319		moi.a1orgv += ((moi.hsrcstv >> 16) * 2);
320		/* we need to step in 4-byte (2 pixel) granularity due to the nature of yuy2 */
321		BESW(NV04_0BUFADR, (moi.a1orgv & ~0x03));
322		BESW(NV04_1BUFADR, (moi.a1orgv & ~0x03));
323		/* setup output window position */
324		BESW(NV04_DSTREF, ((moi.vcoordv & 0xffff0000) | ((moi.hcoordv & 0xffff0000) >> 16)));
325		/* setup output window size */
326		BESW(NV04_DSTSIZE, (
327			(((moi.vcoordv & 0x0000ffff) - ((moi.vcoordv & 0xffff0000) >> 16) + 1) << 16) |
328			((moi.hcoordv & 0x0000ffff) - ((moi.hcoordv & 0xffff0000) >> 16) + 1)
329			));
330		/* select buffer 1 as active (b16) */
331		BESW(NV04_SU_STATE, 0x00010000);
332	}
333	else
334	{
335		/* >= NV10A */
336
337		/* setup buffer origin: GeForce uses subpixel precise clipping on left and top! (12.4 values) */
338		BESW(NV10_0SRCREF, ((moi.v1srcstv << 4) & 0xffff0000) | ((moi.hsrcstv >> 12) & 0x0000ffff));
339		/* setup output window position */
340		BESW(NV10_0DSTREF, ((moi.vcoordv & 0xffff0000) | ((moi.hcoordv & 0xffff0000) >> 16)));
341		/* setup output window size */
342		BESW(NV10_0DSTSIZE, (
343			(((moi.vcoordv & 0x0000ffff) - ((moi.vcoordv & 0xffff0000) >> 16) + 1) << 16) |
344			((moi.hcoordv & 0x0000ffff) - ((moi.hcoordv & 0xffff0000) >> 16) + 1)
345			));
346		/* We only use buffer buffer 0: select it. (0x01 = buffer 0, 0x10 = buffer 1) */
347		/* This also triggers activation of programmed values (double buffered registers feature) */
348		BESW(NV10_BUFSEL, 0x00000001);
349	}
350}
351
352status_t eng_bes_to_crtc(bool crtc)
353{
354	if (si->ps.secondary_head)
355	{
356		if (crtc)
357		{
358			LOG(4,("Overlay: switching overlay to CRTC2\n"));
359			/* switch overlay engine to CRTC2 */
360			ENG_RG32(RG32_FUNCSEL) &= ~0x00001000;
361			ENG_RG32(RG32_2FUNCSEL) |= 0x00001000;
362			si->overlay.crtc = !si->crtc_switch_mode;
363		}
364		else
365		{
366			LOG(4,("Overlay: switching overlay to CRTC1\n"));
367			/* switch overlay engine to CRTC1 */
368			ENG_RG32(RG32_2FUNCSEL) &= ~0x00001000;
369			ENG_RG32(RG32_FUNCSEL) |= 0x00001000;
370			si->overlay.crtc = si->crtc_switch_mode;
371		}
372		return B_OK;
373	}
374	else
375	{
376		return B_ERROR;
377	}
378}
379
380status_t eng_bes_init()
381{
382	if (si->ps.card_arch < NV10A)
383	{
384		/* disable overlay ints (b0 = buffer 0, b4 = buffer 1) */
385		BESW(NV04_INTE, 0x00000000);
386
387		/* setup saturation to be 'neutral' */
388		BESW(NV04_SAT, 0x00000000);
389		/* setup RGB brightness to be 'neutral' */
390		BESW(NV04_RED_AMP, 0x00000069);
391		BESW(NV04_GRN_AMP, 0x0000003e);
392		BESW(NV04_BLU_AMP, 0x00000089);
393
394		/* setup fifo for fetching data */
395		BESW(NV04_FIFOBURL, 0x00000003);
396		BESW(NV04_FIFOTHRS, 0x00000038);
397
398		/* unknown, but needed (registers only have b0 implemented) */
399		/* (program both buffers to prevent sync distortions) */
400		BESW(NV04_0OFFSET, 0x00000000);
401		BESW(NV04_1OFFSET, 0x00000000);
402	}
403	else
404	{
405		/* >= NV10A */
406
407		/* disable overlay ints (b0 = buffer 0, b4 = buffer 1) */
408		BESW(NV10_INTE, 0x00000000);
409		/* shut off GeForce4MX MPEG2 decoder */
410		BESW(DEC_GENCTRL, 0x00000000);
411		/* setup BES memory-range mask */
412		BESW(NV10_0MEMMASK, (si->ps.memory_size - 1));
413		/* unknown, but needed */
414		BESW(NV10_0OFFSET, 0x00000000);
415
416		/* setup brightness, contrast and saturation to be 'neutral' */
417		BESW(NV10_0BRICON, ((0x1000 << 16) | 0x1000));
418		BESW(NV10_0SAT, ((0x0000 << 16) | 0x1000));
419	}
420
421	return B_OK;
422}
423
424status_t eng_configure_bes
425	(const overlay_buffer *ob, const overlay_window *ow, const overlay_view *ov, int offset)
426{
427	/* yuy2 (4:2:2) colorspace calculations */
428
429	/* Note:
430	 * in BeOS R5.0.3 and DANO:
431	 * 'ow->offset_xxx' is always 0, so not used;
432	 * 'ow->width' and 'ow->height' are the output window size: does not change
433	 * if window is clipping;
434	 * 'ow->h_start' and 'ow->v_start' are the left-top position of the output
435	 * window. These values can be negative: this means the window is clipping
436	 * at the left or the top of the display, respectively. */
437
438	/* 'ov' is the view in the source bitmap, so which part of the bitmap is actually
439	 * displayed on screen. This is used for the 'hardware zoom' function. */
440
441	/* output window position and clipping info for source buffer */
442	move_overlay_info moi;
443	/* calculated BES register values */
444	uint32 	hiscalv, viscalv;
445	/* interval representation, used for scaling calculations */
446	uint16 intrep;
447	/* inverse scaling factor, used for source positioning */
448	uint32 ifactor;
449	/* copy of overlay view which has checked valid values */
450	overlay_view my_ov;
451
452
453	/**************************************************************************************
454	 *** copy, check and limit if needed the user-specified view into the intput bitmap ***
455	 **************************************************************************************/
456	my_ov = *ov;
457	/* check for valid 'coordinates' */
458	if (my_ov.width == 0) my_ov.width++;
459	if (my_ov.height == 0) my_ov.height++;
460	if (my_ov.h_start > ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1))
461		my_ov.h_start = ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1);
462	if (((my_ov.h_start + my_ov.width) - 1) > ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1))
463		my_ov.width = ((((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1) - my_ov.h_start) + 1);
464	if (my_ov.v_start > (ob->height - 1))
465		my_ov.v_start = (ob->height - 1);
466	if (((my_ov.v_start + my_ov.height) - 1) > (ob->height - 1))
467		my_ov.height = (((ob->height - 1) - my_ov.v_start) + 1);
468
469	LOG(4,("Overlay: inputbuffer view (zoom) left %d, top %d, width %d, height %d\n",
470		my_ov.h_start, my_ov.v_start, my_ov.width, my_ov.height));
471
472	/* save for eng_bes_calc_move_overlay() */
473	si->overlay.ow = *ow;
474	si->overlay.ob = *ob;
475	si->overlay.my_ov = my_ov;
476
477
478	/********************************
479	 *** setup horizontal scaling ***
480	 ********************************/
481	LOG(4,("Overlay: total input picture width = %d, height = %d\n",
482			(ob->width - si->overlay.myBufInfo[offset].slopspace), ob->height));
483	LOG(4,("Overlay: output picture width = %d, height = %d\n", ow->width, ow->height));
484
485	/* determine interval representation value, taking zoom into account */
486	if (ow->flags & B_OVERLAY_HORIZONTAL_FILTERING)
487	{
488		/* horizontal filtering is ON */
489		if ((my_ov.width == ow->width) | (ow->width < 2))
490		{
491			/* no horizontal scaling used, OR destination width < 2 */
492			intrep = 0;
493		}
494		else
495		{
496			intrep = 1;
497		}
498	}
499	else
500	{
501		/* horizontal filtering is OFF */
502		if ((ow->width < my_ov.width) & (ow->width >= 2))
503		{
504			/* horizontal downscaling used AND destination width >= 2 */
505			intrep = 1;
506		}
507		else
508		{
509			intrep = 0;
510		}
511	}
512	LOG(4,("Overlay: horizontal interval representation value is %d\n",intrep));
513
514	/* calculate inverse horizontal scaling factor, taking zoom into account */
515	/* standard scaling formula: */
516	ifactor = (((uint32)(my_ov.width - intrep)) << 16) / (ow->width - intrep);
517
518	/* correct factor to prevent most-right visible 'line' from distorting */
519	ifactor -= (1 << 2);
520	hiscalv = ifactor;
521	/* save for eng_bes_calc_move_overlay() */
522	si->overlay.h_ifactor = ifactor;
523	LOG(4,("Overlay: horizontal scaling factor is %f\n", (float)65536 / ifactor));
524
525	/* check scaling factor (and modify if needed) to be within scaling limits */
526	/* all cards have a upscaling limit of 8.0 (see official nVidia specsheets) */
527	if (hiscalv < 0x00002000)
528	{
529		/* (non-inverse) factor too large, set factor to max. valid value */
530		hiscalv = 0x00002000;
531		LOG(4,("Overlay: horizontal scaling factor too large, clamping at %f\n", (float)65536 / hiscalv));
532	}
533	switch (si->ps.card_arch)
534	{
535	case NV04A:
536		/* Riva128-TNT2 series have a 'downscaling' limit of 1.000489
537		 * (16bit register with 0.11 format value) */
538		if (hiscalv > 0x0000ffff)
539		{
540			/* (non-inverse) factor too small, set factor to min. valid value */
541			hiscalv = 0x0000ffff;
542			LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)2048 / (hiscalv >> 5)));
543		}
544		break;
545	case NV30A:
546	case NV40A:
547		/* GeForceFX series and up have a downscaling limit of 0.5 (except NV31!) */
548		if ((hiscalv > (2 << 16)) && (si->ps.card_type != NV31))
549		{
550			/* (non-inverse) factor too small, set factor to min. valid value */
551			hiscalv = (2 << 16);
552			LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)65536 / hiscalv));
553		}
554		/* NV31 (confirmed GeForceFX 5600) has NV20A scaling limits!
555		 * So let it fall through... */
556		if (si->ps.card_type != NV31) break;
557	default:
558		/* the rest has a downscaling limit of 0.125 */
559		if (hiscalv > (8 << 16))
560		{
561			/* (non-inverse) factor too small, set factor to min. valid value */
562			hiscalv = (8 << 16);
563			LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)65536 / hiscalv));
564		}
565		break;
566	}
567	/* AND below is required by hardware */
568	hiscalv &= 0x001ffffc;
569
570
571	/******************************
572	 *** setup vertical scaling ***
573	 ******************************/
574
575	/* determine interval representation value, taking zoom into account */
576	if (ow->flags & B_OVERLAY_VERTICAL_FILTERING)
577	{
578		/* vertical filtering is ON */
579		if ((my_ov.height == ow->height) | (ow->height < 2))
580		{
581			/* no vertical scaling used, OR destination height < 2 */
582			intrep = 0;
583		}
584		else
585		{
586			intrep = 1;
587		}
588	}
589	else
590	{
591		/* vertical filtering is OFF */
592		if ((ow->height < my_ov.height) & (ow->height >= 2))
593		{
594			/* vertical downscaling used AND destination height >= 2 */
595			intrep = 1;
596		}
597		else
598		{
599			intrep = 0;
600		}
601	}
602	LOG(4,("Overlay: vertical interval representation value is %d\n",intrep));
603
604	/* calculate inverse vertical scaling factor, taking zoom into account */
605	/* standard scaling formula: */
606	ifactor = (((uint32)(my_ov.height - intrep)) << 16) / (ow->height - intrep);
607
608	/* correct factor to prevent lowest visible line from distorting */
609	ifactor -= (1 << 2);
610	LOG(4,("Overlay: vertical scaling factor is %f\n", (float)65536 / ifactor));
611
612	/* preserve ifactor for source positioning calculations later on */
613	viscalv = ifactor;
614	/* save for eng_bes_calc_move_overlay() */
615	si->overlay.v_ifactor = ifactor;
616
617	/* check scaling factor (and modify if needed) to be within scaling limits */
618	/* all cards have a upscaling limit of 8.0 (see official nVidia specsheets) */
619	if (viscalv < 0x00002000)
620	{
621		/* (non-inverse) factor too large, set factor to max. valid value */
622		viscalv = 0x00002000;
623		LOG(4,("Overlay: vertical scaling factor too large, clamping at %f\n", (float)65536 / viscalv));
624	}
625	switch (si->ps.card_arch)
626	{
627	case NV04A:
628		/* Riva128-TNT2 series have a 'downscaling' limit of 1.000489
629		 * (16bit register with 0.11 format value) */
630		if (viscalv > 0x0000ffff)
631		{
632			/* (non-inverse) factor too small, set factor to min. valid value */
633			viscalv = 0x0000ffff;
634			LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)2048 / (viscalv >> 5)));
635		}
636		break;
637	case NV30A:
638	case NV40A:
639		/* GeForceFX series and up have a downscaling limit of 0.5 (except NV31!) */
640		if ((viscalv > (2 << 16)) && (si->ps.card_type != NV31))
641		{
642			/* (non-inverse) factor too small, set factor to min. valid value */
643			viscalv = (2 << 16);
644			LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)65536 / viscalv));
645		}
646		/* NV31 (confirmed GeForceFX 5600) has NV20A scaling limits!
647		 * So let it fall through... */
648		if (si->ps.card_type != NV31) break;
649	default:
650		/* the rest has a downscaling limit of 0.125 */
651		if (viscalv > (8 << 16))
652		{
653			/* (non-inverse) factor too small, set factor to min. valid value */
654			viscalv = (8 << 16);
655			LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)65536 / viscalv));
656		}
657		break;
658	}
659	/* AND below is required by hardware */
660	viscalv &= 0x001ffffc;
661
662
663	/********************************************************************************
664	 *** setup all edges of output window, setup horizontal and vertical clipping ***
665	 ********************************************************************************/
666	eng_bes_calc_move_overlay(&moi);
667
668
669	/*****************************
670	 *** log color keying info ***
671	 *****************************/
672
673	LOG(4,("Overlay: key_red %d, key_green %d, key_blue %d, key_alpha %d\n",
674		ow->red.value, ow->green.value, ow->blue.value, ow->alpha.value));
675	LOG(4,("Overlay: mask_red %d, mask_green %d, mask_blue %d, mask_alpha %d\n",
676		ow->red.mask, ow->green.mask, ow->blue.mask, ow->alpha.mask));
677
678
679	/*****************
680	 *** log flags ***
681	 *****************/
682
683	LOG(4,("Overlay: ow->flags is $%08x\n",ow->flags));
684	/* BTW: horizontal and vertical filtering are fixed and turned on for GeForce overlay. */
685
686
687	/*************************************
688	 *** sync to BES (Back End Scaler) ***
689	 *************************************/
690
691	/* Done in card hardware:
692	 * double buffered registers + trigger if programming complete feature. */
693
694
695	/**************************************
696	 *** actually program the registers ***
697	 **************************************/
698
699	if (si->ps.card_arch < NV10A)
700	{
701		/* unknown, but needed (otherwise high-res distortions and only half the frames */
702		BESW(NV04_OE_STATE, 0x00000000);
703		/* select buffer 0 as active (b16) */
704		BESW(NV04_SU_STATE, 0x00000000);
705		/* unknown (no effect?) */
706		BESW(NV04_RM_STATE, 0x00000000);
707		/* setup clipped(!) buffer startadress in RAM */
708		/* RIVA128 - TNT bes doesn't have clipping registers, so no subpixelprecise clipping
709		 * either. We do pixelprecise vertical and 'two pixel' precise horizontal clipping here. */
710		/* (program both buffers to prevent sync distortions) */
711		/* first include 'pixel precise' left clipping... (top clipping was already included) */
712		moi.a1orgv += ((moi.hsrcstv >> 16) * 2);
713		/* we need to step in 4-byte (2 pixel) granularity due to the nature of yuy2 */
714		BESW(NV04_0BUFADR, (moi.a1orgv & ~0x03));
715		BESW(NV04_1BUFADR, (moi.a1orgv & ~0x03));
716		/* setup buffer source pitch including slopspace (in bytes).
717		 * Note:
718		 * source pitch granularity = 16 pixels on the RIVA128 - TNT (so pre-NV10) bes */
719		/* (program both buffers to prevent sync distortions) */
720		BESW(NV04_0SRCPTCH, (ob->width * 2));
721		BESW(NV04_1SRCPTCH, (ob->width * 2));
722		/* setup output window position */
723		BESW(NV04_DSTREF, ((moi.vcoordv & 0xffff0000) | ((moi.hcoordv & 0xffff0000) >> 16)));
724		/* setup output window size */
725		BESW(NV04_DSTSIZE, (
726			(((moi.vcoordv & 0x0000ffff) - ((moi.vcoordv & 0xffff0000) >> 16) + 1) << 16) |
727			((moi.hcoordv & 0x0000ffff) - ((moi.hcoordv & 0xffff0000) >> 16) + 1)
728			));
729		/* setup horizontal and vertical scaling */
730		BESW(NV04_ISCALVH, (((viscalv << 16) >> 5) | (hiscalv >> 5)));
731		/* enable vertical filtering (b0) */
732		BESW(NV04_CTRL_V, 0x00000001);
733		/* enable horizontal filtering (no effect?) */
734		BESW(NV04_CTRL_H, 0x00000111);
735
736		/* enable BES (b0), enable colorkeying (b4), format yuy2 (b8: 0 = ccir) */
737		BESW(NV04_GENCTRL, 0x00000111);
738		/* select buffer 1 as active (b16) */
739		BESW(NV04_SU_STATE, 0x00010000);
740
741		/**************************
742		 *** setup color keying ***
743		 **************************/
744
745		/* setup colorkeying */
746		switch(si->dm.space)
747		{
748		case B_RGB15_LITTLE:
749			BESW(NV04_COLKEY, (
750				((ow->blue.value & ow->blue.mask) << 0)   |
751				((ow->green.value & ow->green.mask) << 5) |
752				((ow->red.value & ow->red.mask) << 10)    |
753				((ow->alpha.value & ow->alpha.mask) << 15)
754				));
755			break;
756		case B_RGB16_LITTLE:
757			BESW(NV04_COLKEY, (
758				((ow->blue.value & ow->blue.mask) << 0)   |
759				((ow->green.value & ow->green.mask) << 5) |
760				((ow->red.value & ow->red.mask) << 11)
761				/* this space has no alpha bits */
762				));
763			break;
764		case B_CMAP8:
765		case B_RGB32_LITTLE:
766		default:
767			BESW(NV04_COLKEY, (
768				((ow->blue.value & ow->blue.mask) << 0)   |
769				((ow->green.value & ow->green.mask) << 8) |
770				((ow->red.value & ow->red.mask) << 16)    |
771				((ow->alpha.value & ow->alpha.mask) << 24)
772				));
773			break;
774		}
775	}
776	else
777	{
778		/* >= NV10A */
779
780		/* setup buffer origin: GeForce uses subpixel precise clipping on left and top! (12.4 values) */
781		BESW(NV10_0SRCREF, ((moi.v1srcstv << 4) & 0xffff0000) | ((moi.hsrcstv >> 12) & 0x0000ffff));
782		/* setup buffersize */
783		//fixme if needed: width must be even officially...
784		BESW(NV10_0SRCSIZE, ((ob->height << 16) | ob->width));
785		/* setup source pitch including slopspace (in bytes),
786		 * b16: select YUY2 (0 = YV12), b20: use colorkey, b24: no iturbt_709 (do iturbt_601) */
787		/* Note:
788		 * source pitch granularity = 32 pixels on GeForce cards!! */
789		BESW(NV10_0SRCPTCH, (((ob->width * 2) & 0x0000ffff) | (1 << 16) | (1 << 20) | (0 << 24)));
790		/* setup output window position */
791		BESW(NV10_0DSTREF, ((moi.vcoordv & 0xffff0000) | ((moi.hcoordv & 0xffff0000) >> 16)));
792		/* setup output window size */
793		BESW(NV10_0DSTSIZE, (
794			(((moi.vcoordv & 0x0000ffff) - ((moi.vcoordv & 0xffff0000) >> 16) + 1) << 16) |
795			((moi.hcoordv & 0x0000ffff) - ((moi.hcoordv & 0xffff0000) >> 16) + 1)
796			));
797		/* setup horizontal scaling */
798		BESW(NV10_0ISCALH, (hiscalv << 4));
799		/* setup vertical scaling */
800		BESW(NV10_0ISCALV, (viscalv << 4));
801		/* setup (unclipped!) buffer startadress in RAM */
802		BESW(NV10_0BUFADR, moi.a1orgv);
803		/* enable BES (b0 = 0) */
804		BESW(NV10_GENCTRL, 0x00000000);
805		/* We only use buffer buffer 0: select it. (0x01 = buffer 0, 0x10 = buffer 1) */
806		/* This also triggers activation of programmed values (double buffered registers feature) */
807		BESW(NV10_BUFSEL, 0x00000001);
808
809		/**************************
810		 *** setup color keying ***
811		 **************************/
812
813		/* setup colorkeying */
814		switch(si->dm.space)
815		{
816		case B_RGB15_LITTLE:
817			BESW(NV10_COLKEY, (
818				((ow->blue.value & ow->blue.mask) << 0)   |
819				((ow->green.value & ow->green.mask) << 5) |
820				((ow->red.value & ow->red.mask) << 10)    |
821				((ow->alpha.value & ow->alpha.mask) << 15)
822				));
823			break;
824		case B_RGB16_LITTLE:
825			BESW(NV10_COLKEY, (
826				((ow->blue.value & ow->blue.mask) << 0)   |
827				((ow->green.value & ow->green.mask) << 5) |
828				((ow->red.value & ow->red.mask) << 11)
829				/* this space has no alpha bits */
830				));
831			break;
832		case B_CMAP8:
833		case B_RGB32_LITTLE:
834		default:
835			BESW(NV10_COLKEY, (
836				((ow->blue.value & ow->blue.mask) << 0)   |
837				((ow->green.value & ow->green.mask) << 8) |
838				((ow->red.value & ow->red.mask) << 16)    |
839				((ow->alpha.value & ow->alpha.mask) << 24)
840				));
841			break;
842		}
843	}
844
845	/* note that overlay is in use (for eng_bes_move_overlay()) */
846	si->overlay.active = true;
847
848	return B_OK;
849}
850
851status_t eng_release_bes()
852{
853	if (si->ps.card_arch < NV10A)
854	{
855		/* setup BES control: disable scaler (b0 = 0) */
856		BESW(NV04_GENCTRL, 0x00000000);
857	}
858	else
859	{
860		/* setup BES control: disable scaler (b0 = 1) */
861		BESW(NV10_GENCTRL, 0x00000001);
862	}
863
864	/* note that overlay is not in use (for eng_bes_move_overlay()) */
865	si->overlay.active = false;
866
867	return B_OK;
868}
869