1/* Nvidia TNT and GeForce Back End Scaler functions */
2/* Written by Rudolf Cornelissen 05/2002-5/2009 */
3
4#define MODULE_BIT 0x00000200
5
6#include "nv_std.h"
7
8typedef struct move_overlay_info move_overlay_info;
9
10struct move_overlay_info
11{
12	uint32 hcoordv;		/* left and right edges of video output window */
13	uint32 vcoordv;		/* top and bottom edges of video output window */
14	uint32 hsrcstv;		/* horizontal source start in source buffer (clipping) */
15	uint32 v1srcstv;	/* vertical source start in source buffer (clipping) */
16	uintptr_t a1orgv;		/* alternate source clipping via startadress of source buffer */
17};
18
19static void nv_bes_calc_move_overlay(move_overlay_info *moi);
20static void nv_bes_program_move_overlay(move_overlay_info moi);
21
22/* move the overlay output window in virtualscreens */
23/* Note:
24 * si->dm.h_display_start and si->dm.v_display_start determine where the new
25 * output window is located! */
26void nv_bes_move_overlay()
27{
28	move_overlay_info moi;
29
30	/* abort if overlay is not active */
31	if (!si->overlay.active) return;
32
33	nv_bes_calc_move_overlay(&moi);
34	nv_bes_program_move_overlay(moi);
35}
36
37static void nv_bes_calc_move_overlay(move_overlay_info *moi)
38{
39	/* misc used variables */
40	uint16 temp1, temp2;
41	/* visible screen window in virtual workspaces */
42	uint16 crtc_hstart, crtc_vstart, crtc_hend, crtc_vend;
43
44	/* do 'overlay follow head' in dualhead modes on dualhead cards */
45	if (si->ps.secondary_head)
46	{
47		switch (si->dm.flags & DUALHEAD_BITS)
48		{
49		case DUALHEAD_ON:
50		case DUALHEAD_SWITCH:
51			if ((si->overlay.ow.h_start + (si->overlay.ow.width / 2)) <
52					(si->dm.h_display_start + si->dm.timing.h_display))
53				nv_bes_to_crtc(si->crtc_switch_mode);
54			else
55				nv_bes_to_crtc(!si->crtc_switch_mode);
56			break;
57		default:
58				nv_bes_to_crtc(si->crtc_switch_mode);
59			break;
60		}
61	}
62
63	/* the BES does not respect virtual_workspaces, but adheres to CRTC
64	 * constraints only */
65	crtc_hstart = si->dm.h_display_start;
66	/* make dualhead stretch and switch mode work while we're at it.. */
67	if (si->overlay.crtc)
68	{
69		crtc_hstart += si->dm.timing.h_display;
70	}
71
72	/* horizontal end is the first position beyond the displayed range on the CRTC */
73	crtc_hend = crtc_hstart + si->dm.timing.h_display;
74	crtc_vstart = si->dm.v_display_start;
75	/* vertical end is the first position beyond the displayed range on the CRTC */
76	crtc_vend = crtc_vstart + si->dm.timing.v_display;
77
78
79	/****************************************
80	 *** setup all edges of output window ***
81	 ****************************************/
82
83	/* setup left and right edges of output window */
84	moi->hcoordv = 0;
85	/* left edge coordinate of output window, must be inside desktop */
86	/* clipping on the left side */
87	if (si->overlay.ow.h_start < crtc_hstart)
88	{
89		temp1 = 0;
90	}
91	else
92	{
93		/* clipping on the right side */
94		if (si->overlay.ow.h_start >= (crtc_hend - 1))
95		{
96			/* width < 2 is not allowed */
97			temp1 = (crtc_hend - crtc_hstart - 2) & 0x7ff;
98		}
99		else
100		/* no clipping here */
101		{
102			temp1 = (si->overlay.ow.h_start - crtc_hstart) & 0x7ff;
103		}
104	}
105	moi->hcoordv |= temp1 << 16;
106	/* right edge coordinate of output window, must be inside desktop */
107	/* width < 2 is not allowed */
108	if (si->overlay.ow.width < 2)
109	{
110		temp2 = (temp1 + 1) & 0x7ff;
111	}
112	else
113	{
114		/* clipping on the right side */
115		if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) > (crtc_hend - 1))
116		{
117			temp2 = (crtc_hend - crtc_hstart - 1) & 0x7ff;
118		}
119		else
120		{
121			/* clipping on the left side */
122			if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) < (crtc_hstart + 1))
123			{
124				/* width < 2 is not allowed */
125				temp2 = 1;
126			}
127			else
128			/* no clipping here */
129			{
130				temp2 = ((uint16)(si->overlay.ow.h_start + si->overlay.ow.width - crtc_hstart - 1)) & 0x7ff;
131			}
132		}
133	}
134	moi->hcoordv |= temp2 << 0;
135	LOG(4,("Overlay: CRTC left-edge output %d, right-edge output %d\n",temp1, temp2));
136
137	/* setup top and bottom edges of output window */
138	moi->vcoordv = 0;
139	/* top edge coordinate of output window, must be inside desktop */
140	/* clipping on the top side */
141	if (si->overlay.ow.v_start < crtc_vstart)
142	{
143		temp1 = 0;
144	}
145	else
146	{
147		/* clipping on the bottom side */
148		if (si->overlay.ow.v_start >= (crtc_vend - 1))
149		{
150			/* height < 2 is not allowed */
151			temp1 = (crtc_vend - crtc_vstart - 2) & 0x7ff;
152		}
153		else
154		/* no clipping here */
155		{
156			temp1 = (si->overlay.ow.v_start - crtc_vstart) & 0x7ff;
157		}
158	}
159	moi->vcoordv |= temp1 << 16;
160	/* bottom edge coordinate of output window, must be inside desktop */
161	/* height < 2 is not allowed */
162	if (si->overlay.ow.height < 2)
163	{
164		temp2 = (temp1 + 1) & 0x7ff;
165	}
166	else
167	{
168		/* clipping on the bottom side */
169		if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) > (crtc_vend - 1))
170		{
171			temp2 = (crtc_vend - crtc_vstart - 1) & 0x7ff;
172		}
173		else
174		{
175			/* clipping on the top side */
176			if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) < (crtc_vstart + 1))
177			{
178				/* height < 2 is not allowed */
179				temp2 = 1;
180			}
181			else
182			/* no clipping here */
183			{
184				temp2 = ((uint16)(si->overlay.ow.v_start + si->overlay.ow.height - crtc_vstart - 1)) & 0x7ff;
185			}
186		}
187	}
188	moi->vcoordv |= temp2 << 0;
189	LOG(4,("Overlay: CRTC top-edge output %d, bottom-edge output %d\n",temp1, temp2));
190
191
192	/*********************************
193	 *** setup horizontal clipping ***
194	 *********************************/
195
196	/* Setup horizontal source start: first (sub)pixel contributing to output picture */
197	/* Note:
198	 * The method is to calculate, based on 1:1 scaling, based on the output window.
199	 * After this is done, include the scaling factor so you get a value based on the input bitmap.
200	 * Then add the left starting position of the bitmap's view (zoom function) to get the final value needed.
201	 * Note: The input bitmaps slopspace is automatically excluded from the calculations this way! */
202	/* Note also:
203	 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */
204	moi->hsrcstv = 0;
205	/* check for destination horizontal clipping at left side */
206	if (si->overlay.ow.h_start < crtc_hstart)
207	{
208		/* check if entire destination picture is clipping left:
209		 * (2 pixels will be clamped onscreen at least) */
210		if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) < (crtc_hstart + 1))
211		{
212			/* increase 'first contributing pixel' with 'fixed value': (total dest. width - 2) */
213			moi->hsrcstv += (si->overlay.ow.width - 2);
214		}
215		else
216		{
217			/* increase 'first contributing pixel' with actual number of dest. clipping pixels */
218			moi->hsrcstv += (crtc_hstart - si->overlay.ow.h_start);
219		}
220		LOG(4,("Overlay: clipping left...\n"));
221
222		/* The calculated value is based on scaling = 1x. So we now compensate for scaling.
223		 * Note that this also already takes care of aligning the value to the BES register! */
224		moi->hsrcstv *= si->overlay.h_ifactor;
225	}
226	/* take zoom into account */
227	moi->hsrcstv += ((uint32)si->overlay.my_ov.h_start) << 16;
228	/* AND below required by hardware (> 1024 support confirmed on all cards) */
229	moi->hsrcstv &= 0x07fffffc;
230	LOG(4,("Overlay: first hor. (sub)pixel of input bitmap contributing %f\n", moi->hsrcstv / (float)65536));
231
232
233	/*******************************
234	 *** setup vertical clipping ***
235	 *******************************/
236
237	/* calculate inputbitmap origin adress */
238	moi->a1orgv = (uintptr_t)((vuint32 *)si->overlay.ob.buffer);
239	moi->a1orgv -= (uintptr_t)((vuint32 *)si->framebuffer);
240	LOG(4, ("Overlay: topleft corner of input bitmap (cardRAM offset) $%08x\n", moi->a1orgv));
241
242	/* Setup vertical source start: first (sub)pixel contributing to output picture. */
243	/* Note:
244	 * The method is to calculate, based on 1:1 scaling, based on the output window.
245	 * 'After' this is done, include the scaling factor so you get a value based on the input bitmap.
246	 * Then add the top starting position of the bitmap's view (zoom function) to get the final value needed. */
247	/* Note also:
248	 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */
249
250	moi->v1srcstv = 0;
251	/* check for destination vertical clipping at top side */
252	if (si->overlay.ow.v_start < crtc_vstart)
253	{
254		/* check if entire destination picture is clipping at top:
255		 * (2 pixels will be clamped onscreen at least) */
256		if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) < (crtc_vstart + 1))
257		{
258			/* increase 'number of clipping pixels' with 'fixed value':
259			 * 'total height - 2' of dest. picture in pixels * inverse scaling factor */
260			moi->v1srcstv = (si->overlay.ow.height - 2) * si->overlay.v_ifactor;
261			/* on pre-NV10 we need to do clipping in the source
262			 * bitmap because no seperate clipping registers exist... */
263			if (si->ps.card_arch < NV10A)
264				moi->a1orgv += ((moi->v1srcstv >> 16) * si->overlay.ob.bytes_per_row);
265		}
266		else
267		{
268			/* increase 'first contributing pixel' with:
269			 * number of destination picture clipping pixels * inverse scaling factor */
270			moi->v1srcstv = (crtc_vstart - si->overlay.ow.v_start) * si->overlay.v_ifactor;
271			/* on pre-NV10 we need to do clipping in the source
272			 * bitmap because no seperate clipping registers exist... */
273			if (si->ps.card_arch < NV10A)
274				moi->a1orgv += ((moi->v1srcstv >> 16) * si->overlay.ob.bytes_per_row);
275		}
276		LOG(4,("Overlay: clipping at top...\n"));
277	}
278	/* take zoom into account */
279	moi->v1srcstv += (((uint32)si->overlay.my_ov.v_start) << 16);
280	if (si->ps.card_arch < NV10A)
281	{
282		moi->a1orgv += (si->overlay.my_ov.v_start * si->overlay.ob.bytes_per_row);
283		LOG(4,("Overlay: 'contributing part of buffer' origin is (cardRAM offset) $%08x\n", moi->a1orgv));
284	}
285	LOG(4,("Overlay: first vert. (sub)pixel of input bitmap contributing %f\n", moi->v1srcstv / (float)65536));
286
287	/* AND below is probably required by hardware. */
288	/* Buffer A topleft corner of field 1 (origin)(field 1 contains our full frames) */
289	moi->a1orgv &= 0xfffffff0;
290}
291
292static void nv_bes_program_move_overlay(move_overlay_info moi)
293{
294	/*************************************
295	 *** sync to BES (Back End Scaler) ***
296	 *************************************/
297
298	/* Done in card hardware:
299	 * double buffered registers + trigger if programming complete feature. */
300
301
302	/**************************************
303	 *** actually program the registers ***
304	 **************************************/
305
306	if (si->ps.card_arch < NV10A)
307	{
308		/* unknown, but needed (otherwise high-res distortions and only half the frames */
309		BESW(NV04_OE_STATE, 0x00000000);
310		/* select buffer 0 as active (b16) */
311		BESW(NV04_SU_STATE, 0x00000000);
312		/* unknown (no effect?) */
313		BESW(NV04_RM_STATE, 0x00000000);
314		/* setup clipped(!) buffer startadress in RAM */
315		/* RIVA128 - TNT bes doesn't have clipping registers, so no subpixelprecise clipping
316		 * either. We do pixelprecise vertical and 'two pixel' precise horizontal clipping here. */
317		/* (program both buffers to prevent sync distortions) */
318		/* first include 'pixel precise' left clipping... (top clipping was already included) */
319		moi.a1orgv += ((moi.hsrcstv >> 16) * 2);
320		/* we need to step in 4-byte (2 pixel) granularity due to the nature of yuy2 */
321		BESW(NV04_0BUFADR, (moi.a1orgv & ~0x03));
322		BESW(NV04_1BUFADR, (moi.a1orgv & ~0x03));
323		/* setup output window position */
324		BESW(NV04_DSTREF, ((moi.vcoordv & 0xffff0000) | ((moi.hcoordv & 0xffff0000) >> 16)));
325		/* setup output window size */
326		BESW(NV04_DSTSIZE, (
327			(((moi.vcoordv & 0x0000ffff) - ((moi.vcoordv & 0xffff0000) >> 16) + 1) << 16) |
328			((moi.hcoordv & 0x0000ffff) - ((moi.hcoordv & 0xffff0000) >> 16) + 1)
329			));
330		/* select buffer 1 as active (b16) */
331		BESW(NV04_SU_STATE, 0x00010000);
332	}
333	else
334	{
335		/* >= NV10A */
336
337		/* setup buffer origin: GeForce uses subpixel precise clipping on left and top! (12.4 values) */
338		BESW(NV10_0SRCREF, ((moi.v1srcstv << 4) & 0xffff0000) | ((moi.hsrcstv >> 12) & 0x0000ffff));
339		/* setup output window position */
340		BESW(NV10_0DSTREF, ((moi.vcoordv & 0xffff0000) | ((moi.hcoordv & 0xffff0000) >> 16)));
341		/* setup output window size */
342		BESW(NV10_0DSTSIZE, (
343			(((moi.vcoordv & 0x0000ffff) - ((moi.vcoordv & 0xffff0000) >> 16) + 1) << 16) |
344			((moi.hcoordv & 0x0000ffff) - ((moi.hcoordv & 0xffff0000) >> 16) + 1)
345			));
346		/* We only use buffer buffer 0: select it. (0x01 = buffer 0, 0x10 = buffer 1) */
347		/* This also triggers activation of programmed values (double buffered registers feature) */
348		BESW(NV10_BUFSEL, 0x00000001);
349	}
350}
351
352status_t nv_bes_to_crtc(bool crtc)
353{
354	if (si->ps.secondary_head)
355	{
356		if (crtc)
357		{
358			LOG(4,("Overlay: switching overlay to CRTC2\n"));
359			/* switch overlay engine to CRTC2 */
360			NV_REG32(NV32_FUNCSEL) &= ~0x00001000;
361			NV_REG32(NV32_2FUNCSEL) |= 0x00001000;
362			si->overlay.crtc = !si->crtc_switch_mode;
363		}
364		else
365		{
366			LOG(4,("Overlay: switching overlay to CRTC1\n"));
367			/* switch overlay engine to CRTC1 */
368			NV_REG32(NV32_2FUNCSEL) &= ~0x00001000;
369			NV_REG32(NV32_FUNCSEL) |= 0x00001000;
370			si->overlay.crtc = si->crtc_switch_mode;
371		}
372		return B_OK;
373	}
374	else
375	{
376		return B_ERROR;
377	}
378}
379
380status_t nv_bes_init()
381{
382	if (si->ps.card_arch < NV10A)
383	{
384		/* disable overlay ints (b0 = buffer 0, b4 = buffer 1) */
385		BESW(NV04_INTE, 0x00000000);
386
387		/* setup saturation to be 'neutral' */
388		BESW(NV04_SAT, 0x00000000);
389		/* setup RGB brightness to be 'neutral' */
390		BESW(NV04_RED_AMP, 0x00000069);
391		BESW(NV04_GRN_AMP, 0x0000003e);
392		BESW(NV04_BLU_AMP, 0x00000089);
393
394		/* setup fifo for fetching data */
395		BESW(NV04_FIFOBURL, 0x00000003);
396		BESW(NV04_FIFOTHRS, 0x00000038);
397
398		/* unknown, but needed (registers only have b0 implemented) */
399		/* (program both buffers to prevent sync distortions) */
400		BESW(NV04_0OFFSET, 0x00000000);
401		BESW(NV04_1OFFSET, 0x00000000);
402	}
403	else
404	{
405		/* >= NV10A */
406
407		/* disable overlay ints (b0 = buffer 0, b4 = buffer 1) */
408		BESW(NV10_INTE, 0x00000000);
409		/* shut off GeForce4MX MPEG2 decoder */
410		BESW(DEC_GENCTRL, 0x00000000);
411		/* setup BES memory-range mask */
412		BESW(NV10_0MEMMASK, (si->ps.memory_size - 1));
413		/* unknown, but needed */
414		BESW(NV10_0OFFSET, 0x00000000);
415
416		/* setup brightness, contrast and saturation to be 'neutral' */
417		BESW(NV10_0BRICON, ((0x1000 << 16) | 0x1000));
418		BESW(NV10_0SAT, ((0x0000 << 16) | 0x1000));
419	}
420
421	/* make sure the engine is disabled. */
422	nv_release_bes();
423
424	return B_OK;
425}
426
427status_t nv_configure_bes
428	(const overlay_buffer *ob, const overlay_window *ow, const overlay_view *ov, int offset)
429{
430	/* yuy2 (4:2:2) colorspace calculations */
431
432	/* Note:
433	 * in BeOS R5.0.3 and DANO:
434	 * 'ow->offset_xxx' is always 0, so not used;
435	 * 'ow->width' and 'ow->height' are the output window size: does not change
436	 * if window is clipping;
437	 * 'ow->h_start' and 'ow->v_start' are the left-top position of the output
438	 * window. These values can be negative: this means the window is clipping
439	 * at the left or the top of the display, respectively. */
440
441	/* 'ov' is the view in the source bitmap, so which part of the bitmap is actually
442	 * displayed on screen. This is used for the 'hardware zoom' function. */
443
444	/* output window position and clipping info for source buffer */
445	move_overlay_info moi;
446	/* calculated BES register values */
447	uint32 	hiscalv, viscalv;
448	/* interval representation, used for scaling calculations */
449	uint16 intrep;
450	/* inverse scaling factor, used for source positioning */
451	uint32 ifactor;
452	/* copy of overlay view which has checked valid values */
453	overlay_view my_ov;
454
455
456	/**************************************************************************************
457	 *** copy, check and limit if needed the user-specified view into the intput bitmap ***
458	 **************************************************************************************/
459	my_ov = *ov;
460	/* check for valid 'coordinates' */
461	if (my_ov.width == 0) my_ov.width++;
462	if (my_ov.height == 0) my_ov.height++;
463	if (my_ov.h_start > ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1))
464		my_ov.h_start = ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1);
465	if (((my_ov.h_start + my_ov.width) - 1) > ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1))
466		my_ov.width = ((((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1) - my_ov.h_start) + 1);
467	if (my_ov.v_start > (ob->height - 1))
468		my_ov.v_start = (ob->height - 1);
469	if (((my_ov.v_start + my_ov.height) - 1) > (ob->height - 1))
470		my_ov.height = (((ob->height - 1) - my_ov.v_start) + 1);
471
472	LOG(4,("Overlay: inputbuffer view (zoom) left %d, top %d, width %d, height %d\n",
473		my_ov.h_start, my_ov.v_start, my_ov.width, my_ov.height));
474
475	/* save for nv_bes_calc_move_overlay() */
476	si->overlay.ow = *ow;
477	si->overlay.ob = *ob;
478	si->overlay.my_ov = my_ov;
479
480
481	/********************************
482	 *** setup horizontal scaling ***
483	 ********************************/
484	LOG(4,("Overlay: total input picture width = %d, height = %d\n",
485			(ob->width - si->overlay.myBufInfo[offset].slopspace), ob->height));
486	LOG(4,("Overlay: output picture width = %d, height = %d\n", ow->width, ow->height));
487
488	/* determine interval representation value, taking zoom into account */
489	if (ow->flags & B_OVERLAY_HORIZONTAL_FILTERING)
490	{
491		/* horizontal filtering is ON */
492		if ((my_ov.width == ow->width) | (ow->width < 2))
493		{
494			/* no horizontal scaling used, OR destination width < 2 */
495			intrep = 0;
496		}
497		else
498		{
499			intrep = 1;
500		}
501	}
502	else
503	{
504		/* horizontal filtering is OFF */
505		if ((ow->width < my_ov.width) & (ow->width >= 2))
506		{
507			/* horizontal downscaling used AND destination width >= 2 */
508			intrep = 1;
509		}
510		else
511		{
512			intrep = 0;
513		}
514	}
515	LOG(4,("Overlay: horizontal interval representation value is %d\n",intrep));
516
517	/* calculate inverse horizontal scaling factor, taking zoom into account */
518	/* standard scaling formula: */
519	ifactor = (((uint32)(my_ov.width - intrep)) << 16) / (ow->width - intrep);
520
521	/* correct factor to prevent most-right visible 'line' from distorting */
522	ifactor -= (1 << 2);
523	hiscalv = ifactor;
524	/* save for nv_bes_calc_move_overlay() */
525	si->overlay.h_ifactor = ifactor;
526	LOG(4,("Overlay: horizontal scaling factor is %f\n", (float)65536 / ifactor));
527
528	/* check scaling factor (and modify if needed) to be within scaling limits */
529	/* all cards have a upscaling limit of 8.0 (see official nVidia specsheets) */
530	if (hiscalv < 0x00002000)
531	{
532		/* (non-inverse) factor too large, set factor to max. valid value */
533		hiscalv = 0x00002000;
534		LOG(4,("Overlay: horizontal scaling factor too large, clamping at %f\n", (float)65536 / hiscalv));
535	}
536	switch (si->ps.card_arch)
537	{
538	case NV04A:
539		/* Riva128-TNT2 series have a 'downscaling' limit of 1.000489
540		 * (16bit register with 0.11 format value) */
541		if (hiscalv > 0x0000ffff)
542		{
543			/* (non-inverse) factor too small, set factor to min. valid value */
544			hiscalv = 0x0000ffff;
545			LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)2048 / (hiscalv >> 5)));
546		}
547		break;
548	case NV30A:
549	case NV40A:
550		/* GeForceFX series and up have a downscaling limit of 0.5 (except NV31!) */
551		if ((hiscalv > (2 << 16)) && (si->ps.card_type != NV31))
552		{
553			/* (non-inverse) factor too small, set factor to min. valid value */
554			hiscalv = (2 << 16);
555			LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)65536 / hiscalv));
556		}
557		/* NV31 (confirmed GeForceFX 5600) has NV20A scaling limits!
558		 * So let it fall through... */
559		if (si->ps.card_type != NV31) break;
560	default:
561		/* the rest has a downscaling limit of 0.125 */
562		if (hiscalv > (8 << 16))
563		{
564			/* (non-inverse) factor too small, set factor to min. valid value */
565			hiscalv = (8 << 16);
566			LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)65536 / hiscalv));
567		}
568		break;
569	}
570	/* AND below is required by hardware */
571	hiscalv &= 0x001ffffc;
572
573
574	/******************************
575	 *** setup vertical scaling ***
576	 ******************************/
577
578	/* determine interval representation value, taking zoom into account */
579	if (ow->flags & B_OVERLAY_VERTICAL_FILTERING)
580	{
581		/* vertical filtering is ON */
582		if ((my_ov.height == ow->height) | (ow->height < 2))
583		{
584			/* no vertical scaling used, OR destination height < 2 */
585			intrep = 0;
586		}
587		else
588		{
589			intrep = 1;
590		}
591	}
592	else
593	{
594		/* vertical filtering is OFF */
595		if ((ow->height < my_ov.height) & (ow->height >= 2))
596		{
597			/* vertical downscaling used AND destination height >= 2 */
598			intrep = 1;
599		}
600		else
601		{
602			intrep = 0;
603		}
604	}
605	LOG(4,("Overlay: vertical interval representation value is %d\n",intrep));
606
607	/* calculate inverse vertical scaling factor, taking zoom into account */
608	/* standard scaling formula: */
609	ifactor = (((uint32)(my_ov.height - intrep)) << 16) / (ow->height - intrep);
610
611	/* correct factor to prevent lowest visible line from distorting */
612	ifactor -= (1 << 2);
613	LOG(4,("Overlay: vertical scaling factor is %f\n", (float)65536 / ifactor));
614
615	/* preserve ifactor for source positioning calculations later on */
616	viscalv = ifactor;
617	/* save for nv_bes_calc_move_overlay() */
618	si->overlay.v_ifactor = ifactor;
619
620	/* check scaling factor (and modify if needed) to be within scaling limits */
621	/* all cards have a upscaling limit of 8.0 (see official nVidia specsheets) */
622	if (viscalv < 0x00002000)
623	{
624		/* (non-inverse) factor too large, set factor to max. valid value */
625		viscalv = 0x00002000;
626		LOG(4,("Overlay: vertical scaling factor too large, clamping at %f\n", (float)65536 / viscalv));
627	}
628	switch (si->ps.card_arch)
629	{
630	case NV04A:
631		/* Riva128-TNT2 series have a 'downscaling' limit of 1.000489
632		 * (16bit register with 0.11 format value) */
633		if (viscalv > 0x0000ffff)
634		{
635			/* (non-inverse) factor too small, set factor to min. valid value */
636			viscalv = 0x0000ffff;
637			LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)2048 / (viscalv >> 5)));
638		}
639		break;
640	case NV30A:
641	case NV40A:
642		/* GeForceFX series and up have a downscaling limit of 0.5 (except NV31!) */
643		if ((viscalv > (2 << 16)) && (si->ps.card_type != NV31))
644		{
645			/* (non-inverse) factor too small, set factor to min. valid value */
646			viscalv = (2 << 16);
647			LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)65536 / viscalv));
648		}
649		/* NV31 (confirmed GeForceFX 5600) has NV20A scaling limits!
650		 * So let it fall through... */
651		if (si->ps.card_type != NV31) break;
652	default:
653		/* the rest has a downscaling limit of 0.125 */
654		if (viscalv > (8 << 16))
655		{
656			/* (non-inverse) factor too small, set factor to min. valid value */
657			viscalv = (8 << 16);
658			LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)65536 / viscalv));
659		}
660		break;
661	}
662	/* AND below is required by hardware */
663	viscalv &= 0x001ffffc;
664
665
666	/********************************************************************************
667	 *** setup all edges of output window, setup horizontal and vertical clipping ***
668	 ********************************************************************************/
669	nv_bes_calc_move_overlay(&moi);
670
671
672	/*****************************
673	 *** log color keying info ***
674	 *****************************/
675
676	LOG(4,("Overlay: key_red %d, key_green %d, key_blue %d, key_alpha %d\n",
677		ow->red.value, ow->green.value, ow->blue.value, ow->alpha.value));
678	LOG(4,("Overlay: mask_red %d, mask_green %d, mask_blue %d, mask_alpha %d\n",
679		ow->red.mask, ow->green.mask, ow->blue.mask, ow->alpha.mask));
680
681
682	/*****************
683	 *** log flags ***
684	 *****************/
685
686	LOG(4,("Overlay: ow->flags is $%08x\n",ow->flags));
687	/* BTW: horizontal and vertical filtering are fixed and turned on for GeForce overlay. */
688
689
690	/*************************************
691	 *** sync to BES (Back End Scaler) ***
692	 *************************************/
693
694	/* Done in card hardware:
695	 * double buffered registers + trigger if programming complete feature. */
696
697
698	/**************************************
699	 *** actually program the registers ***
700	 **************************************/
701
702	if (si->ps.card_arch < NV10A)
703	{
704		/* unknown, but needed (otherwise high-res distortions and only half the frames */
705		BESW(NV04_OE_STATE, 0x00000000);
706		/* select buffer 0 as active (b16) */
707		BESW(NV04_SU_STATE, 0x00000000);
708		/* unknown (no effect?) */
709		BESW(NV04_RM_STATE, 0x00000000);
710		/* setup clipped(!) buffer startadress in RAM */
711		/* RIVA128 - TNT bes doesn't have clipping registers, so no subpixelprecise clipping
712		 * either. We do pixelprecise vertical and 'two pixel' precise horizontal clipping here. */
713		/* (program both buffers to prevent sync distortions) */
714		/* first include 'pixel precise' left clipping... (top clipping was already included) */
715		moi.a1orgv += ((moi.hsrcstv >> 16) * 2);
716		/* we need to step in 4-byte (2 pixel) granularity due to the nature of yuy2 */
717		BESW(NV04_0BUFADR, (moi.a1orgv & ~0x03));
718		BESW(NV04_1BUFADR, (moi.a1orgv & ~0x03));
719		/* setup buffer source pitch including slopspace (in bytes).
720		 * Note:
721		 * source pitch granularity = 16 pixels on the RIVA128 - TNT (so pre-NV10) bes */
722		/* (program both buffers to prevent sync distortions) */
723		BESW(NV04_0SRCPTCH, (ob->width * 2));
724		BESW(NV04_1SRCPTCH, (ob->width * 2));
725		/* setup output window position */
726		BESW(NV04_DSTREF, ((moi.vcoordv & 0xffff0000) | ((moi.hcoordv & 0xffff0000) >> 16)));
727		/* setup output window size */
728		BESW(NV04_DSTSIZE, (
729			(((moi.vcoordv & 0x0000ffff) - ((moi.vcoordv & 0xffff0000) >> 16) + 1) << 16) |
730			((moi.hcoordv & 0x0000ffff) - ((moi.hcoordv & 0xffff0000) >> 16) + 1)
731			));
732		/* setup horizontal and vertical scaling */
733		BESW(NV04_ISCALVH, (((viscalv << 16) >> 5) | (hiscalv >> 5)));
734		/* enable vertical filtering (b0) */
735		BESW(NV04_CTRL_V, 0x00000001);
736		/* enable horizontal filtering (no effect?) */
737		BESW(NV04_CTRL_H, 0x00000111);
738		/* enable BES (b0), set colorkeying (b4), format yuy2 (b8: 0 = ccir) */
739		if (ow->flags & B_OVERLAY_COLOR_KEY)
740			BESW(NV04_GENCTRL, 0x00000111);
741		else
742			BESW(NV04_GENCTRL, 0x00000101);
743		/* select buffer 1 as active (b16) */
744		BESW(NV04_SU_STATE, 0x00010000);
745
746		/**************************
747		 *** setup color keying ***
748		 **************************/
749
750		/* setup colorkeying */
751		switch(si->dm.space)
752		{
753		case B_RGB15_LITTLE:
754			BESW(NV04_COLKEY, (
755				((ow->blue.value & ow->blue.mask) << 0)   |
756				((ow->green.value & ow->green.mask) << 5) |
757				((ow->red.value & ow->red.mask) << 10)    |
758				((ow->alpha.value & ow->alpha.mask) << 15)
759				));
760			break;
761		case B_RGB16_LITTLE:
762			BESW(NV04_COLKEY, (
763				((ow->blue.value & ow->blue.mask) << 0)   |
764				((ow->green.value & ow->green.mask) << 5) |
765				((ow->red.value & ow->red.mask) << 11)
766				/* this space has no alpha bits */
767				));
768			break;
769		case B_CMAP8:
770		case B_RGB32_LITTLE:
771		default:
772			BESW(NV04_COLKEY, (
773				((ow->blue.value & ow->blue.mask) << 0)   |
774				((ow->green.value & ow->green.mask) << 8) |
775				((ow->red.value & ow->red.mask) << 16)    |
776				((ow->alpha.value & ow->alpha.mask) << 24)
777				));
778			break;
779		}
780	}
781	else
782	{
783		/* >= NV10A */
784
785		/* setup buffer origin: GeForce uses subpixel precise clipping on left and top! (12.4 values) */
786		BESW(NV10_0SRCREF, ((moi.v1srcstv << 4) & 0xffff0000) | ((moi.hsrcstv >> 12) & 0x0000ffff));
787		/* setup buffersize */
788		//fixme if needed: width must be even officially...
789		BESW(NV10_0SRCSIZE, ((ob->height << 16) | ob->width));
790		/* setup source pitch including slopspace (in bytes),
791		 * b16: select YUY2 (0 = YV12), b20: set colorkeying, b24: no iturbt_709 (do iturbt_601) */
792		/* Note:
793		 * source pitch granularity = 32 pixels on GeForce cards!! */
794		if (ow->flags & B_OVERLAY_COLOR_KEY)
795			BESW(NV10_0SRCPTCH, (((ob->width * 2) & 0x0000ffff) | (1 << 16) | (1 << 20) | (0 << 24)));
796		else
797			BESW(NV10_0SRCPTCH, (((ob->width * 2) & 0x0000ffff) | (1 << 16) | (0 << 20) | (0 << 24)));
798		/* setup output window position */
799		BESW(NV10_0DSTREF, ((moi.vcoordv & 0xffff0000) | ((moi.hcoordv & 0xffff0000) >> 16)));
800		/* setup output window size */
801		BESW(NV10_0DSTSIZE, (
802			(((moi.vcoordv & 0x0000ffff) - ((moi.vcoordv & 0xffff0000) >> 16) + 1) << 16) |
803			((moi.hcoordv & 0x0000ffff) - ((moi.hcoordv & 0xffff0000) >> 16) + 1)
804			));
805		/* setup horizontal scaling */
806		BESW(NV10_0ISCALH, (hiscalv << 4));
807		/* setup vertical scaling */
808		BESW(NV10_0ISCALV, (viscalv << 4));
809		/* setup (unclipped!) buffer startadress in RAM */
810		BESW(NV10_0BUFADR, moi.a1orgv);
811		/* enable BES (b0 = 0) */
812		BESW(NV10_GENCTRL, 0x00000000);
813		/* We only use buffer buffer 0: select it. (0x01 = buffer 0, 0x10 = buffer 1) */
814		/* This also triggers activation of programmed values (double buffered registers feature) */
815		BESW(NV10_BUFSEL, 0x00000001);
816
817		/**************************
818		 *** setup color keying ***
819		 **************************/
820
821		/* setup colorkeying */
822		switch(si->dm.space)
823		{
824		case B_RGB15_LITTLE:
825			BESW(NV10_COLKEY, (
826				((ow->blue.value & ow->blue.mask) << 0)   |
827				((ow->green.value & ow->green.mask) << 5) |
828				((ow->red.value & ow->red.mask) << 10)    |
829				((ow->alpha.value & ow->alpha.mask) << 15)
830				));
831			break;
832		case B_RGB16_LITTLE:
833			BESW(NV10_COLKEY, (
834				((ow->blue.value & ow->blue.mask) << 0)   |
835				((ow->green.value & ow->green.mask) << 5) |
836				((ow->red.value & ow->red.mask) << 11)
837				/* this space has no alpha bits */
838				));
839			break;
840		case B_CMAP8:
841		case B_RGB32_LITTLE:
842		default:
843			BESW(NV10_COLKEY, (
844				((ow->blue.value & ow->blue.mask) << 0)   |
845				((ow->green.value & ow->green.mask) << 8) |
846				((ow->red.value & ow->red.mask) << 16)    |
847				((ow->alpha.value & ow->alpha.mask) << 24)
848				));
849			break;
850		}
851	}
852
853	/* note that overlay is in use (for nv_bes_move_overlay()) */
854	si->overlay.active = true;
855
856	return B_OK;
857}
858
859status_t nv_release_bes()
860{
861	if (si->ps.card_arch < NV10A)
862	{
863		/* setup BES control: disable scaler (b0 = 0) */
864		BESW(NV04_GENCTRL, 0x00000000);
865	}
866	else
867	{
868		/* setup BES control: disable scaler (b0 = 1) */
869		BESW(NV10_GENCTRL, 0x00000001);
870	}
871
872	/* note that overlay is not in use (for nv_bes_move_overlay()) */
873	si->overlay.active = false;
874
875	return B_OK;
876}
877