1/*
2	Copyright (c) 2002, Thomas Kurschel
3
4
5	Part of Radeon accelerant
6
7	CP initialization/sync/cleanup.
8
9	It also handles command buffer synchronization.
10
11	non-local memory is used as following:
12	- 2048 dwords for ring buffer
13	- 253 indirect buffers a 4k (1024 dwords)
14	- 8 dwords for returned data (i.e. current read ptr)
15	  & 6 dwords for "scratch registers"
16
17	usage of scratch registers:
18	- reg 0 = reached engine.count
19
20	with a granularity of 4 KByte, we need 2+253+1=256 blocks, which is exactly 1 MB
21*/
22
23#include "radeon_driver.h"
24#include "CPMicroCode.h"
25#include "mmio.h"
26#include "cp_regs.h"
27#include "pll_regs.h"
28#include "rbbm_regs.h"
29#include "buscntrl_regs.h"
30#include "config_regs.h"
31#include "memcntrl_regs.h"
32#include "utils.h"
33#include "pll_access.h"
34
35#include "log_coll.h"
36#include "log_enum.h"
37
38#include <string.h>
39
40#if 0
41
42// macros for user-space
43
44#define ALLOC_MEM( asize, mem_type, aglobal, handle, offset ) \
45	{ \
46		radeon_alloc_mem am; \
47\
48		am.magic = RADEON_PRIVATE_DATA_MAGIC; \
49		am.size = (asize) * 4; \
50		am.memory_type = (mt_nonlocal); \
51		am.global = (aglobal); \
52\
53		res = ioctl( ai->fd, RADEON_ALLOC_MEM, &am ); \
54		if( res == B_OK ) \
55			*(handle) = am.handle; \
56			*(offset) = am.offset; \
57	}
58
59#define MEM2CPU( mem ) \
60	((uint32 *)(ai->mapped_memory[(mem).memory_type].data + (mem).offset))
61
62#define MEM2GC( mem ) ((mem).offset + si->memory[(mem).memory_type].virtual_addr_start)
63
64#define FREE_MEM( mem_type, handle ) \
65	{ \
66		radeon_free_mem fm; \
67\
68		fm.magic = RADEON_PRIVATE_DATA_MAGIC; \
69		fm.memory_type = mem_type; \
70		fm.handle = offset; \
71\
72		ioctl( ai->fd, RADEON_FREE_MEM, &fm ); \
73	}
74
75#else
76
77// macros for kernel-space
78
79// allocate memory
80// if memory_type is non-local, it is replaced with default non-local type
81#define ALLOC_MEM( asize, mem_type, aglobal, handle, offset ) \
82	if( mem_type == mt_nonlocal ) \
83		mem_type = di->si->nonlocal_type; \
84	res = mem_alloc( di->memmgr[mem_type], asize, NULL, handle, offset );
85
86// get address as seen by program to access allocated memory
87// (memory_type must _not_ be non-local, see ALLOC_MEM)
88#define MEM2CPU( memory_type, offset ) \
89	((uint8 *)(memory_type == mt_local ? di->si->local_mem : \
90	(memory_type == mt_PCI ? di->pci_gart.buffer.ptr : di->agp_gart.buffer.ptr)) \
91	+ (offset))
92
93// get graphics card's virtual address of allocated memory
94// (memory_type must _not_ be non-local, see ALLOC_MEM)
95#define MEM2GC( memory_type, offset ) \
96	(di->si->memory[(memory_type)].virtual_addr_start + (offset))
97
98// free memory
99// if memory_type is non-local, it is replaced with default non-local type
100#define FREE_MEM( mem_type, handle ) \
101	mem_free( \
102		di->memmgr[ mem_type == mt_nonlocal ? di->si->nonlocal_type : mem_type], \
103		handle, NULL );
104
105#endif
106
107
108void Radeon_DiscardAllIndirectBuffers( device_info *di );
109
110#define RADEON_SCRATCH_REG_OFFSET	32
111
112
113void Radeon_FlushPixelCache( device_info *di );
114
115// wait until engine is idle;
116// acquire_lock - 	true, if lock must be hold
117//					false, if lock is already acquired
118// keep_lock -		true, keep lock on exit (only valid if acquire_lock is true)
119void Radeon_WaitForIdle( device_info *di, bool acquire_lock, bool keep_lock )
120{
121	if( acquire_lock )
122		ACQUIRE_BEN( di->si->cp.lock );
123
124	Radeon_WaitForFifo( di, 64 );
125
126	while( 1 ) {
127		bigtime_t start_time = system_time();
128
129		do {
130			if( (INREG( di->regs, RADEON_RBBM_STATUS ) & RADEON_RBBM_ACTIVE) == 0 ) {
131				Radeon_FlushPixelCache( di );
132
133				if( acquire_lock && !keep_lock)
134					RELEASE_BEN( di->si->cp.lock );
135
136				return;
137			}
138
139			snooze( 1 );
140		} while( system_time() - start_time < 1000000 );
141
142		SHOW_ERROR( 3,
143			"Engine didn't become idle (rbbm_status=%" B_PRIx32 ", "
144			"cp_stat=%" B_PRIx32 ", "
145			"tlb_address=%" B_PRIx32 ", "
146			"tlb_data=%" B_PRIx32 ")",
147			INREG( di->regs, RADEON_RBBM_STATUS ),
148			INREG( di->regs, RADEON_CP_STAT ),
149			INREG( di->regs, RADEON_AIC_TLB_ADDR ),
150			INREG( di->regs, RADEON_AIC_TLB_DATA ));
151
152		LOG( di->si->log, _Radeon_WaitForIdle );
153
154		Radeon_ResetEngine( di );
155	}
156}
157
158
159// wait until "entries" FIFO entries are empty
160// lock must be hold
161void Radeon_WaitForFifo( device_info *di, int entries )
162{
163	while( 1 ) {
164		bigtime_t start_time = system_time();
165
166		do {
167			int slots = INREG( di->regs, RADEON_RBBM_STATUS ) & RADEON_RBBM_FIFOCNT_MASK;
168
169			if ( slots >= entries )
170				return;
171
172			snooze( 1 );
173		} while( system_time() - start_time < 1000000 );
174
175		LOG( di->si->log, _Radeon_WaitForFifo );
176
177		Radeon_ResetEngine( di );
178	}
179}
180
181// flush pixel cache of graphics card
182void Radeon_FlushPixelCache( device_info *di )
183{
184	bigtime_t start_time;
185
186	OUTREGP( di->regs, RADEON_RB2D_DSTCACHE_CTLSTAT, RADEON_RB2D_DC_FLUSH_ALL,
187		~RADEON_RB2D_DC_FLUSH_ALL );
188
189	start_time = system_time();
190
191	do {
192		if( (INREG( di->regs, RADEON_RB2D_DSTCACHE_CTLSTAT )
193			 & RADEON_RB2D_DC_BUSY) == 0 )
194			return;
195
196		snooze( 1 );
197	} while( system_time() - start_time < 1000000 );
198
199	LOG( di->si->log, _Radeon_FlushPixelCache );
200
201	SHOW_ERROR0( 0, "pixel cache didn't become empty" );
202}
203
204// reset graphics card's engine
205// lock must be hold
206void Radeon_ResetEngine( device_info *di )
207{
208	vuint8 *regs = di->regs;
209	shared_info *si = di->si;
210	uint32 clock_cntl_index, mclk_cntl, rbbm_soft_reset, host_path_cntl;
211	uint32 cur_read_ptr;
212
213	SHOW_FLOW0( 3, "" );
214
215	Radeon_FlushPixelCache( di );
216
217	clock_cntl_index = INREG( regs, RADEON_CLOCK_CNTL_INDEX );
218	RADEONPllErrataAfterIndex( regs, di->asic );	// drm has no errata here!
219	mclk_cntl = Radeon_INPLL( regs, di->asic, RADEON_MCLK_CNTL );
220
221	// enable clock of units to be reset
222	Radeon_OUTPLL( regs, di->asic, RADEON_MCLK_CNTL, mclk_cntl |
223      RADEON_FORCEON_MCLKA |
224      RADEON_FORCEON_MCLKB |
225      RADEON_FORCEON_YCLKA |
226      RADEON_FORCEON_YCLKB |
227      RADEON_FORCEON_MC |
228      RADEON_FORCEON_AIC );
229
230	// do the reset
231    host_path_cntl = INREG( regs, RADEON_HOST_PATH_CNTL );
232	rbbm_soft_reset = INREG( regs, RADEON_RBBM_SOFT_RESET );
233
234	OUTREG( regs, RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset |
235			RADEON_SOFT_RESET_CP |
236			RADEON_SOFT_RESET_HI |
237			RADEON_SOFT_RESET_SE |
238			RADEON_SOFT_RESET_RE |
239			RADEON_SOFT_RESET_PP |
240			RADEON_SOFT_RESET_E2 |
241			RADEON_SOFT_RESET_RB ) );
242	INREG( regs, RADEON_RBBM_SOFT_RESET);
243	OUTREG( regs, RADEON_RBBM_SOFT_RESET, rbbm_soft_reset &
244		~( RADEON_SOFT_RESET_CP |
245		   RADEON_SOFT_RESET_HI |
246		   RADEON_SOFT_RESET_SE |
247		   RADEON_SOFT_RESET_RE |
248		   RADEON_SOFT_RESET_PP |
249		   RADEON_SOFT_RESET_E2 |
250		   RADEON_SOFT_RESET_RB ) );
251	INREG( regs, RADEON_RBBM_SOFT_RESET);
252
253    OUTREG( regs, RADEON_HOST_PATH_CNTL, host_path_cntl | RADEON_HDP_SOFT_RESET );
254    INREG( regs, RADEON_HOST_PATH_CNTL );
255    OUTREG( regs, RADEON_HOST_PATH_CNTL, host_path_cntl );
256
257	Radeon_OUTPLL( regs, di->asic, RADEON_MCLK_CNTL, mclk_cntl );
258   	OUTREG( regs, RADEON_CLOCK_CNTL_INDEX, clock_cntl_index );
259   	//RADEONPllErrataAfterIndex( regs, di->asic ); // drm doesn't do this here!
260   	OUTREG( regs, RADEON_RBBM_SOFT_RESET, rbbm_soft_reset);
261
262	if ( di->acc_dma )
263	{
264		// reset ring buffer
265		cur_read_ptr = INREG( regs, RADEON_CP_RB_RPTR );
266		OUTREG( regs, RADEON_CP_RB_WPTR, cur_read_ptr );
267
268		//if( si->cp.ring.head ) {
269		// during init, there are no feedback data
270		if( si->cp.feedback.mem_handle != 0 ) {
271			*(uint32 *)MEM2CPU( si->cp.feedback.mem_type, si->cp.feedback.head_mem_offset) =
272				cur_read_ptr;
273			//	*si->cp.ring.head = cur_read_ptr;
274			si->cp.ring.tail = cur_read_ptr;
275		}
276
277		// mark all buffers as being finished
278		Radeon_DiscardAllIndirectBuffers( di );
279	}
280
281	++si->engine.count;
282	return;
283}
284
285
286// upload Micro-Code of CP
287static void loadMicroEngineRAMData( device_info *di )
288{
289	int i;
290	const uint32 (*microcode)[2];
291
292	SHOW_FLOW0( 3, "" );
293
294	switch( di->asic ) {
295	case rt_r300:
296	case rt_rv350:
297	case rt_r350:
298	case rt_rv380:
299	case rt_r420:
300		microcode = r300_cp_microcode;
301		break;
302	case rt_r200:
303		microcode = r200_cp_microcode;
304		break;
305	case rt_rs100:
306	default:
307		microcode = radeon_cp_microcode;
308	}
309
310	Radeon_WaitForIdle( di, false, false );
311
312	OUTREG( di->regs, RADEON_CP_ME_RAM_ADDR, 0 );
313
314	for ( i = 0 ; i < 256 ; i++ ) {
315		OUTREG( di->regs, RADEON_CP_ME_RAM_DATAH, microcode[i][1] );
316		OUTREG( di->regs, RADEON_CP_ME_RAM_DATAL, microcode[i][0] );
317	}
318}
319
320// aring_size - size of ring in dwords
321static status_t initRingBuffer( device_info *di, int aring_size )
322{
323	status_t res;
324	shared_info *si = di->si;
325	CP_info *cp = &si->cp;
326	vuint8 *regs = di->regs;
327	int32 offset;
328	memory_type_e memory_type;
329
330	memset( &cp->ring, 0, sizeof( cp->ring ));
331
332	// ring and indirect buffers can be either in AGP or PCI GART
333	// (it seems that they cannot be in graphics memory, at least
334	//  I had serious coherency problems when I tried that)
335	memory_type = mt_nonlocal;
336
337	ALLOC_MEM( aring_size * 4, memory_type, true,
338		&cp->ring.mem_handle, &offset );
339
340	if( res != B_OK ) {
341		SHOW_ERROR0( 0, "Cannot allocate ring buffer" );
342		return res;
343	}
344
345	// setup CP buffer
346	cp->ring.mem_type = memory_type;
347	cp->ring.mem_offset = offset;
348	cp->ring.vm_base = MEM2GC( memory_type, offset );
349	cp->ring.size = aring_size;
350	cp->ring.tail_mask = aring_size - 1;
351	OUTREG( regs, RADEON_CP_RB_BASE, cp->ring.vm_base );
352	SHOW_INFO( 3, "CP buffer address=%" B_PRIx32, cp->ring.vm_base );
353
354	// set ring buffer size
355	// (it's log2 of qwords)
356	OUTREG( regs, RADEON_CP_RB_CNTL, radeon_log2( cp->ring.size / 2 ));
357	SHOW_INFO( 3, "CP buffer size mask=%d", radeon_log2( cp->ring.size / 2 ) );
358
359	// set write pointer delay to zero;
360	// we assume that memory synchronization is done correctly my MoBo
361	// and Radeon_SendCP contains a hack that hopefully fixes such problems
362	OUTREG( regs, RADEON_CP_RB_WPTR_DELAY, 0 );
363
364	memset( MEM2CPU( cp->ring.mem_type, cp->ring.mem_offset), 0, cp->ring.size * 4 );
365
366	// set CP buffer pointers
367	OUTREG( regs, RADEON_CP_RB_RPTR, 0 );
368	OUTREG( regs, RADEON_CP_RB_WPTR, 0 );
369	//*cp->ring.head = 0;
370	cp->ring.tail = 0;
371
372	return B_OK;
373}
374
375static void uninitRingBuffer( device_info *di )
376{
377	vuint8 *regs = di->regs;
378
379	// abort any activity
380	Radeon_ResetEngine( di );
381
382	// disable CP BM
383	OUTREG( regs, RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIDIS_INDDIS );
384	// read-back for flushing
385	INREG( regs, RADEON_CP_CSQ_CNTL );
386
387	FREE_MEM( mt_nonlocal, di->si->cp.ring.mem_handle );
388}
389
390static status_t initCPFeedback( device_info *di )
391{
392	CP_info *cp = &di->si->cp;
393	vuint8 *regs = di->regs;
394	int32 offset;
395	memory_type_e memory_type;
396	status_t res;
397
398	// status information should be in PCI memory, so CPU can
399	// poll it without locking the bus (PCI memory is the only
400	// cachable memory available)
401	memory_type = mt_PCI;
402
403	ALLOC_MEM( RADEON_SCRATCH_REG_OFFSET + 0x40, memory_type, true,
404		&cp->feedback.mem_handle, &offset );
405
406	if( res != B_OK ) {
407		SHOW_ERROR0( 0, "Cannot allocate buffers for status information" );
408		return res;
409	}
410
411	// setup CP read pointer buffer
412	cp->feedback.mem_type = memory_type;
413	cp->feedback.head_mem_offset = offset;
414	cp->feedback.head_vm_address = MEM2GC( memory_type, cp->feedback.head_mem_offset );
415	OUTREG( regs, RADEON_CP_RB_RPTR_ADDR, cp->feedback.head_vm_address );
416	SHOW_INFO( 3, "CP read pointer buffer==%" B_PRIx32,
417		cp->feedback.head_vm_address );
418
419	// setup scratch register buffer
420	cp->feedback.scratch_mem_offset = offset + RADEON_SCRATCH_REG_OFFSET;
421	cp->feedback.scratch_vm_start = MEM2GC( memory_type, cp->feedback.scratch_mem_offset );
422	OUTREG( regs, RADEON_SCRATCH_ADDR, cp->feedback.scratch_vm_start );
423	OUTREG( regs, RADEON_SCRATCH_UMSK, 0x3f );
424
425	*(uint32 *)MEM2CPU( cp->feedback.mem_type, cp->feedback.head_mem_offset) = 0;
426	memset( MEM2CPU( cp->feedback.mem_type, cp->feedback.scratch_mem_offset), 0, 0x40 );
427	//*cp->ring.head = 0;
428
429	return B_OK;
430}
431
432static void uninitCPFeedback( device_info *di )
433{
434	vuint8 *regs = di->regs;
435
436	// don't allow any scratch buffer update
437	OUTREG( regs, RADEON_SCRATCH_UMSK, 0x0 );
438
439	FREE_MEM( mt_PCI, di->si->cp.feedback.mem_handle );
440}
441
442static status_t initIndirectBuffers( device_info *di )
443{
444	CP_info *cp = &di->si->cp;
445	int32 offset;
446	memory_type_e memory_type;
447	int i;
448	status_t res;
449
450	memory_type = mt_nonlocal;
451
452	ALLOC_MEM( NUM_INDIRECT_BUFFERS * INDIRECT_BUFFER_SIZE * 4, memory_type,
453		true, &cp->buffers.mem_handle, &offset );
454
455	if( res != B_OK ) {
456		SHOW_ERROR0( 0, "Cannot allocate indirect buffers" );
457		return B_ERROR;
458	}
459
460	cp->buffers.mem_type = memory_type;
461	cp->buffers.mem_offset = offset;
462	cp->buffers.vm_start = MEM2GC( memory_type, cp->buffers.mem_offset );
463
464	for( i = 0; i < NUM_INDIRECT_BUFFERS - 1; ++i ) {
465		cp->buffers.buffers[i].next = i + 1;
466	}
467
468	cp->buffers.buffers[i].next = -1;
469
470	cp->buffers.free_list = 0;
471	cp->buffers.oldest = -1;
472	cp->buffers.newest = -1;
473	cp->buffers.active_state = -1;
474	cp->buffers.cur_tag = 0;
475
476	memset( MEM2CPU( cp->buffers.mem_type, cp->buffers.mem_offset), 0,
477		NUM_INDIRECT_BUFFERS * INDIRECT_BUFFER_SIZE * 4 );
478
479	return B_OK;
480}
481
482static void uninitIndirectBuffers( device_info *di )
483{
484	FREE_MEM( mt_nonlocal, di->si->cp.buffers.mem_handle );
485}
486
487// initialize CP so it's ready for BM
488status_t Radeon_InitCP( device_info *di )
489{
490	thread_id thid;
491    thread_info thinfo;
492	status_t res;
493
494	SHOW_FLOW0( 3, "" );
495
496	// this is _really_ necessary so functions like ResetEngine() know
497	// that the CP is not set up yet
498	memset( &di->si->cp, 0, sizeof( di->si->cp ));
499
500	if( (res = INIT_BEN( di->si->cp.lock, "Radeon CP" )) < 0 )
501		return res;
502
503	// HACK: change owner of benaphore semaphore to team of calling thread;
504	// reason: user code cannot acquire kernel semaphores, but the accelerant
505	// is in user space; interestingly, it's enough to change the semaphore's
506	// owner to _any_ non-system team (that's the only security check done by
507	// the kernel)
508	thid = find_thread( NULL );
509    get_thread_info( thid, &thinfo );
510    set_sem_owner( di->si->cp.lock.sem, thinfo.team );
511
512	// init raw CP
513	if ( di->acc_dma ) loadMicroEngineRAMData( di );
514
515	// do soft-reset
516	Radeon_ResetEngine( di );
517
518	// after warm-reset, the CP may still be active and thus react to
519	// register writes during initialization unpredictably, so we better
520	// stop it first
521	OUTREG( di->regs, RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIDIS_INDDIS );
522	INREG( di->regs, RADEON_CP_CSQ_CNTL );
523
524	// reset CP to make disabling active
525	Radeon_ResetEngine( di );
526
527	if ( di->acc_dma )
528	{
529		res = initRingBuffer( di, CP_RING_SIZE );
530		if( res < 0 )
531			goto err4;
532
533		res = initCPFeedback( di );
534		if( res < 0 )
535			goto err3;
536
537		res = initIndirectBuffers( di );
538		if( res < 0 )
539			goto err2;
540
541		// tell CP to use BM
542		Radeon_WaitForIdle( di, false, false );
543
544		// enable direct and indirect CP bus mastering
545		OUTREG( di->regs, RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIBM_INDBM );
546
547		// allow bus mastering in general
548		OUTREGP( di->regs, RADEON_BUS_CNTL, 0, ~RADEON_BUS_MASTER_DIS );
549	}
550
551
552	// don't allow mixing of 2D/3D/scratch/wait_until commands
553	// (in fact, this doesn't seem to make any difference as we do a
554	// manual sync in all these cases anyway)
555	OUTREG( di->regs, RADEON_ISYNC_CNTL,
556		RADEON_ISYNC_ANY2D_IDLE3D |
557		RADEON_ISYNC_ANY3D_IDLE2D |
558		RADEON_ISYNC_WAIT_IDLEGUI |
559		RADEON_ISYNC_CPSCRATCH_IDLEGUI );
560
561	SHOW_FLOW( 3, "bus_cntl=%" B_PRIx32, INREG( di->regs, RADEON_BUS_CNTL ));
562
563	SHOW_FLOW0( 3, "Done" );
564
565	return B_OK;
566
567//err:
568//	uninitIndirectBuffers( ai );
569err2:
570	uninitCPFeedback( di );
571err3:
572	uninitRingBuffer( di );
573err4:
574	DELETE_BEN( di->si->cp.lock );
575	return res;
576}
577
578
579// shutdown CP, freeing any memory
580void Radeon_UninitCP( device_info *di )
581{
582	vuint8 *regs = di->regs;
583
584	// abort any pending commands
585	Radeon_ResetEngine( di );
586
587	// disable CP BM
588	OUTREG( regs, RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIDIS_INDDIS );
589	// read-back for flushing
590	INREG( regs, RADEON_CP_CSQ_CNTL );
591
592	if ( di->acc_dma )
593	{
594		uninitRingBuffer( di );
595		uninitCPFeedback( di );
596		uninitIndirectBuffers( di );
597	}
598
599	DELETE_BEN( di->si->cp.lock );
600}
601
602
603// mark all indirect buffers as being free;
604// this should only be called after a reset;
605// lock must be hold
606void Radeon_DiscardAllIndirectBuffers( device_info *di )
607{
608	CP_info *cp = &di->si->cp;
609
610	// during init, there is no indirect buffer
611	if( cp->buffers.mem_handle == 0 )
612		return;
613
614	// mark all sent indirect buffers as free
615	while( cp->buffers.oldest != -1 ) {
616		indirect_buffer *oldest_buffer =
617			&cp->buffers.buffers[cp->buffers.oldest];
618		int tmp_oldest_buffer;
619
620		SHOW_FLOW( 0, "%d", cp->buffers.oldest );
621
622		// remove buffer from "used" list
623		tmp_oldest_buffer = oldest_buffer->next;
624
625		if( tmp_oldest_buffer == -1 )
626			cp->buffers.newest = -1;
627
628		// put it on free list
629		oldest_buffer->next = cp->buffers.free_list;
630		cp->buffers.free_list = cp->buffers.oldest;
631
632		cp->buffers.oldest = tmp_oldest_buffer;
633	}
634}
635
636// lets hide this in here, as it's got lots of lovely register headers already...
637// does it go here, or in the accelerant anyway?
638// for now i'm assuming you turn on dynamic clocks, and they take care of themselves onwards...
639// so doing it at driver init seems sensible after a valid detection of course...
640void Radeon_SetDynamicClock( device_info *di, int mode)
641{
642    vuint8 *regs = di->regs;
643    radeon_type asic = di->asic;
644    uint32 tmp;
645
646    switch(mode) {
647	case 0: /* Turn everything OFF (ForceON to everything)*/
648		if ( di->num_crtc != 2 ) {
649			tmp = Radeon_INPLL(regs, asic, RADEON_SCLK_CNTL);
650			tmp |= (RADEON_SCLK_FORCE_CP   | RADEON_SCLK_FORCE_HDP |
651				RADEON_SCLK_FORCE_DISP1 | RADEON_SCLK_FORCE_TOP |
652				RADEON_SCLK_FORCE_E2   | RADEON_SCLK_FORCE_SE  |
653				RADEON_SCLK_FORCE_IDCT | RADEON_SCLK_FORCE_VIP |
654				RADEON_SCLK_FORCE_RE   | RADEON_SCLK_FORCE_PB  |
655				RADEON_SCLK_FORCE_TAM  | RADEON_SCLK_FORCE_TDM |
656				RADEON_SCLK_FORCE_RB);
657			Radeon_OUTPLL(regs, asic, RADEON_SCLK_CNTL, tmp);
658		} else if (asic == rt_rv350) {
659			/* for RV350/M10, no delays are required. */
660			tmp = Radeon_INPLL(regs, asic, R300_SCLK_CNTL2);
661			tmp |= (R300_SCLK_FORCE_TCL |
662				R300_SCLK_FORCE_GA  |
663				R300_SCLK_FORCE_CBA);
664			Radeon_OUTPLL(regs, asic, R300_SCLK_CNTL2, tmp);
665
666			tmp = Radeon_INPLL(regs, asic, RADEON_SCLK_CNTL);
667			tmp |= (RADEON_SCLK_FORCE_DISP2 | RADEON_SCLK_FORCE_CP      |
668				RADEON_SCLK_FORCE_HDP   | RADEON_SCLK_FORCE_DISP1   |
669				RADEON_SCLK_FORCE_TOP   | RADEON_SCLK_FORCE_E2      |
670				R300_SCLK_FORCE_VAP     | RADEON_SCLK_FORCE_IDCT    |
671				RADEON_SCLK_FORCE_VIP   | R300_SCLK_FORCE_SR        |
672				R300_SCLK_FORCE_PX      | R300_SCLK_FORCE_TX        |
673				R300_SCLK_FORCE_US      | RADEON_SCLK_FORCE_TV_SCLK |
674				R300_SCLK_FORCE_SU      | RADEON_SCLK_FORCE_OV0);
675			Radeon_OUTPLL(regs, asic, RADEON_SCLK_CNTL, tmp);
676
677			tmp = Radeon_INPLL(regs, asic, RADEON_SCLK_MORE_CNTL);
678			tmp |= RADEON_SCLK_MORE_FORCEON;
679			Radeon_OUTPLL(regs, asic, RADEON_SCLK_MORE_CNTL, tmp);
680
681			tmp = Radeon_INPLL(regs, asic, RADEON_MCLK_CNTL);
682			tmp |= (RADEON_FORCEON_MCLKA |
683				RADEON_FORCEON_MCLKB |
684				RADEON_FORCEON_YCLKA |
685				RADEON_FORCEON_YCLKB |
686				RADEON_FORCEON_MC);
687			Radeon_OUTPLL(regs, asic, RADEON_MCLK_CNTL, tmp);
688
689			tmp = Radeon_INPLL(regs, asic, RADEON_VCLK_ECP_CNTL);
690			tmp &= ~(RADEON_PIXCLK_ALWAYS_ONb  |
691				RADEON_PIXCLK_DAC_ALWAYS_ONb |
692			R300_DISP_DAC_PIXCLK_DAC_BLANK_OFF);
693			Radeon_OUTPLL(regs, asic, RADEON_VCLK_ECP_CNTL, tmp);
694
695			tmp = Radeon_INPLL(regs, asic, RADEON_PIXCLKS_CNTL);
696			tmp &= ~(RADEON_PIX2CLK_ALWAYS_ONb         |
697				RADEON_PIX2CLK_DAC_ALWAYS_ONb     |
698				RADEON_DISP_TVOUT_PIXCLK_TV_ALWAYS_ONb |
699				R300_DVOCLK_ALWAYS_ONb            |
700				RADEON_PIXCLK_BLEND_ALWAYS_ONb    |
701				RADEON_PIXCLK_GV_ALWAYS_ONb       |
702				R300_PIXCLK_DVO_ALWAYS_ONb        |
703				RADEON_PIXCLK_LVDS_ALWAYS_ONb     |
704				RADEON_PIXCLK_TMDS_ALWAYS_ONb     |
705				R300_PIXCLK_TRANS_ALWAYS_ONb      |
706				R300_PIXCLK_TVO_ALWAYS_ONb        |
707				R300_P2G2CLK_ALWAYS_ONb            |
708				R300_P2G2CLK_DAC_ALWAYS_ONb           |
709				R300_DISP_DAC_PIXCLK_DAC2_BLANK_OFF);
710			Radeon_OUTPLL(regs, asic, RADEON_PIXCLKS_CNTL, tmp);
711		}  else {
712			tmp = Radeon_INPLL(regs, asic, RADEON_SCLK_CNTL);
713			tmp |= (RADEON_SCLK_FORCE_CP | RADEON_SCLK_FORCE_E2);
714			tmp |= RADEON_SCLK_FORCE_SE;
715
716			if ( di->num_crtc != 2 ) {
717				tmp |= ( RADEON_SCLK_FORCE_RB    |
718				RADEON_SCLK_FORCE_TDM   |
719				RADEON_SCLK_FORCE_TAM   |
720				RADEON_SCLK_FORCE_PB    |
721				RADEON_SCLK_FORCE_RE    |
722				RADEON_SCLK_FORCE_VIP   |
723				RADEON_SCLK_FORCE_IDCT  |
724				RADEON_SCLK_FORCE_TOP   |
725				RADEON_SCLK_FORCE_DISP1 |
726				RADEON_SCLK_FORCE_DISP2 |
727				RADEON_SCLK_FORCE_HDP    );
728			} else if ((asic == rt_r300) || (asic == rt_r350)) {
729				tmp |= ( RADEON_SCLK_FORCE_HDP   |
730					RADEON_SCLK_FORCE_DISP1 |
731					RADEON_SCLK_FORCE_DISP2 |
732					RADEON_SCLK_FORCE_TOP   |
733					RADEON_SCLK_FORCE_IDCT  |
734					RADEON_SCLK_FORCE_VIP);
735			}
736			Radeon_OUTPLL(regs, asic, RADEON_SCLK_CNTL, tmp);
737
738			snooze(16000);
739
740			if ((asic == rt_r300) || (asic == rt_r350)) {
741				tmp = Radeon_INPLL(regs, asic, R300_SCLK_CNTL2);
742				tmp |= ( R300_SCLK_FORCE_TCL |
743					R300_SCLK_FORCE_GA  |
744					R300_SCLK_FORCE_CBA);
745				Radeon_OUTPLL(regs, asic, R300_SCLK_CNTL2, tmp);
746				snooze(16000);
747			}
748
749			if (di->is_igp) {
750				tmp = Radeon_INPLL(regs, asic, RADEON_MCLK_CNTL);
751				tmp &= ~(RADEON_FORCEON_MCLKA |
752					RADEON_FORCEON_YCLKA);
753				Radeon_OUTPLL(regs, asic, RADEON_MCLK_CNTL, tmp);
754				snooze(16000);
755			}
756
757			if ((asic == rt_rv200) ||
758				(asic == rt_rv250) ||
759				(asic == rt_rv280)) {
760				tmp = Radeon_INPLL(regs, asic, RADEON_SCLK_MORE_CNTL);
761				tmp |= RADEON_SCLK_MORE_FORCEON;
762				Radeon_OUTPLL(regs, asic, RADEON_SCLK_MORE_CNTL, tmp);
763				snooze(16000);
764			}
765
766			tmp = Radeon_INPLL(regs, asic, RADEON_PIXCLKS_CNTL);
767			tmp &= ~(RADEON_PIX2CLK_ALWAYS_ONb         |
768				RADEON_PIX2CLK_DAC_ALWAYS_ONb     |
769				RADEON_PIXCLK_BLEND_ALWAYS_ONb    |
770				RADEON_PIXCLK_GV_ALWAYS_ONb       |
771				RADEON_PIXCLK_DIG_TMDS_ALWAYS_ONb |
772				RADEON_PIXCLK_LVDS_ALWAYS_ONb     |
773				RADEON_PIXCLK_TMDS_ALWAYS_ONb);
774
775			Radeon_OUTPLL(regs, asic, RADEON_PIXCLKS_CNTL, tmp);
776			snooze(16000);
777
778			tmp = Radeon_INPLL(regs, asic, RADEON_VCLK_ECP_CNTL);
779			tmp &= ~(RADEON_PIXCLK_ALWAYS_ONb  |
780				RADEON_PIXCLK_DAC_ALWAYS_ONb);
781			Radeon_OUTPLL(regs, asic, RADEON_VCLK_ECP_CNTL, tmp);
782		}
783		SHOW_FLOW0( 3, "Dynamic Clock Scaling Disabled" );
784		break;
785	case 1:
786		if ( di->num_crtc != 2 ) {
787			tmp = Radeon_INPLL(regs, asic, RADEON_SCLK_CNTL);
788			if ((INREG( regs, RADEON_CONFIG_CNTL) & RADEON_CFG_ATI_REV_ID_MASK) > RADEON_CFG_ATI_REV_A13) {
789				tmp &= ~(RADEON_SCLK_FORCE_CP | RADEON_SCLK_FORCE_RB);
790			}
791			tmp &= ~(RADEON_SCLK_FORCE_HDP  | RADEON_SCLK_FORCE_DISP1 |
792				RADEON_SCLK_FORCE_TOP  | RADEON_SCLK_FORCE_SE   |
793				RADEON_SCLK_FORCE_IDCT | RADEON_SCLK_FORCE_RE   |
794				RADEON_SCLK_FORCE_PB   | RADEON_SCLK_FORCE_TAM  |
795				RADEON_SCLK_FORCE_TDM);
796			Radeon_OUTPLL(regs, asic, RADEON_SCLK_CNTL, tmp);
797		} else if ((asic == rt_r300)
798				|| (asic == rt_r350)
799				|| (asic == rt_rv350)) {
800			if (asic == rt_rv350) {
801				tmp = Radeon_INPLL(regs, asic, R300_SCLK_CNTL2);
802				tmp &= ~(R300_SCLK_FORCE_TCL |
803					R300_SCLK_FORCE_GA  |
804					R300_SCLK_FORCE_CBA);
805				tmp |=  (R300_SCLK_TCL_MAX_DYN_STOP_LAT |
806					R300_SCLK_GA_MAX_DYN_STOP_LAT  |
807					R300_SCLK_CBA_MAX_DYN_STOP_LAT);
808				Radeon_OUTPLL(regs, asic, R300_SCLK_CNTL2, tmp);
809
810				tmp = Radeon_INPLL(regs, asic, RADEON_SCLK_CNTL);
811				tmp &= ~(RADEON_SCLK_FORCE_DISP2 | RADEON_SCLK_FORCE_CP      |
812					RADEON_SCLK_FORCE_HDP   | RADEON_SCLK_FORCE_DISP1   |
813					RADEON_SCLK_FORCE_TOP   | RADEON_SCLK_FORCE_E2      |
814					R300_SCLK_FORCE_VAP     | RADEON_SCLK_FORCE_IDCT    |
815					RADEON_SCLK_FORCE_VIP   | R300_SCLK_FORCE_SR        |
816					R300_SCLK_FORCE_PX      | R300_SCLK_FORCE_TX        |
817					R300_SCLK_FORCE_US      | RADEON_SCLK_FORCE_TV_SCLK |
818					R300_SCLK_FORCE_SU      | RADEON_SCLK_FORCE_OV0);
819					tmp |=  RADEON_DYN_STOP_LAT_MASK;
820				Radeon_OUTPLL(regs, asic, RADEON_SCLK_CNTL, tmp);
821
822				tmp = Radeon_INPLL(regs, asic, RADEON_SCLK_MORE_CNTL);
823				tmp &= ~RADEON_SCLK_MORE_FORCEON;
824				tmp |=  RADEON_SCLK_MORE_MAX_DYN_STOP_LAT;
825				Radeon_OUTPLL(regs, asic, RADEON_SCLK_MORE_CNTL, tmp);
826
827				tmp = Radeon_INPLL(regs, asic, RADEON_VCLK_ECP_CNTL);
828				tmp |= (RADEON_PIXCLK_ALWAYS_ONb |
829					RADEON_PIXCLK_DAC_ALWAYS_ONb);
830				Radeon_OUTPLL(regs, asic, RADEON_VCLK_ECP_CNTL, tmp);
831
832				tmp = Radeon_INPLL(regs, asic, RADEON_PIXCLKS_CNTL);
833				tmp |= (RADEON_PIX2CLK_ALWAYS_ONb         |
834					RADEON_PIX2CLK_DAC_ALWAYS_ONb     |
835					RADEON_DISP_TVOUT_PIXCLK_TV_ALWAYS_ONb |
836					R300_DVOCLK_ALWAYS_ONb            |
837					RADEON_PIXCLK_BLEND_ALWAYS_ONb    |
838					RADEON_PIXCLK_GV_ALWAYS_ONb       |
839					R300_PIXCLK_DVO_ALWAYS_ONb        |
840					RADEON_PIXCLK_LVDS_ALWAYS_ONb     |
841					RADEON_PIXCLK_TMDS_ALWAYS_ONb     |
842					R300_PIXCLK_TRANS_ALWAYS_ONb      |
843					R300_PIXCLK_TVO_ALWAYS_ONb        |
844					R300_P2G2CLK_ALWAYS_ONb           |
845					R300_P2G2CLK_DAC_ALWAYS_ONb);
846				Radeon_OUTPLL(regs, asic, RADEON_PIXCLKS_CNTL, tmp);
847
848				tmp = Radeon_INPLL(regs, asic, RADEON_MCLK_MISC);
849				tmp |= (RADEON_MC_MCLK_DYN_ENABLE |
850					RADEON_IO_MCLK_DYN_ENABLE);
851				Radeon_OUTPLL(regs, asic, RADEON_MCLK_MISC, tmp);
852
853				tmp = Radeon_INPLL(regs, asic, RADEON_MCLK_CNTL);
854				tmp |= (RADEON_FORCEON_MCLKA |
855					RADEON_FORCEON_MCLKB);
856
857				tmp &= ~(RADEON_FORCEON_YCLKA  |
858					RADEON_FORCEON_YCLKB  |
859					RADEON_FORCEON_MC);
860
861				/* Some releases of vbios have set DISABLE_MC_MCLKA
862				and DISABLE_MC_MCLKB bits in the vbios table.  Setting these
863				bits will cause H/W hang when reading video memory with dynamic clocking
864				enabled. */
865				if ((tmp & R300_DISABLE_MC_MCLKA) &&
866				(tmp & R300_DISABLE_MC_MCLKB)) {
867					/* If both bits are set, then check the active channels */
868					tmp = Radeon_INPLL(regs, asic, RADEON_MCLK_CNTL);
869					if (di->ram.width == 64) {
870						if (INREG( regs, RADEON_MEM_CNTL) & R300_MEM_USE_CD_CH_ONLY)
871						tmp &= ~R300_DISABLE_MC_MCLKB;
872						else
873						tmp &= ~R300_DISABLE_MC_MCLKA;
874					} else {
875						tmp &= ~(R300_DISABLE_MC_MCLKA |
876						R300_DISABLE_MC_MCLKB);
877					}
878				}
879
880				Radeon_OUTPLL(regs, asic, RADEON_MCLK_CNTL, tmp);
881			} else {
882				tmp = Radeon_INPLL(regs, asic, RADEON_SCLK_CNTL);
883				tmp &= ~(R300_SCLK_FORCE_VAP);
884				tmp |= RADEON_SCLK_FORCE_CP;
885				Radeon_OUTPLL(regs, asic, RADEON_SCLK_CNTL, tmp);
886				snooze(15000);
887
888				tmp = Radeon_INPLL(regs, asic, R300_SCLK_CNTL2);
889				tmp &= ~(R300_SCLK_FORCE_TCL |
890				R300_SCLK_FORCE_GA  |
891				R300_SCLK_FORCE_CBA);
892				Radeon_OUTPLL(regs, asic, R300_SCLK_CNTL2, tmp);
893			}
894		} else {
895			tmp = Radeon_INPLL(regs, asic, RADEON_CLK_PWRMGT_CNTL);
896
897			tmp &= ~(RADEON_ACTIVE_HILO_LAT_MASK     |
898				RADEON_DISP_DYN_STOP_LAT_MASK   |
899				RADEON_DYN_STOP_MODE_MASK);
900
901			tmp |= (RADEON_ENGIN_DYNCLK_MODE |
902			(0x01 << RADEON_ACTIVE_HILO_LAT_SHIFT));
903			Radeon_OUTPLL(regs, asic, RADEON_CLK_PWRMGT_CNTL, tmp);
904			snooze(15000);
905
906			tmp = Radeon_INPLL(regs, asic, RADEON_CLK_PIN_CNTL);
907			tmp |= RADEON_SCLK_DYN_START_CNTL;
908			Radeon_OUTPLL(regs, asic, RADEON_CLK_PIN_CNTL, tmp);
909			snooze(15000);
910
911			/* When DRI is enabled, setting DYN_STOP_LAT to zero can cause some R200
912			to lockup randomly, leave them as set by BIOS.
913			*/
914			tmp = Radeon_INPLL(regs, asic, RADEON_SCLK_CNTL);
915			/*tmp &= RADEON_SCLK_SRC_SEL_MASK;*/
916			tmp &= ~RADEON_SCLK_FORCEON_MASK;
917
918			/*RAGE_6::A11 A12 A12N1 A13, RV250::A11 A12, R300*/
919			if (((asic == rt_rv250) &&
920				((INREG( regs, RADEON_CONFIG_CNTL) & RADEON_CFG_ATI_REV_ID_MASK) <
921				  RADEON_CFG_ATI_REV_A13)) ||
922				((asic == rt_rv100) &&
923				((INREG( regs, RADEON_CONFIG_CNTL) & RADEON_CFG_ATI_REV_ID_MASK) <=
924				  RADEON_CFG_ATI_REV_A13)))
925			{
926				tmp |= RADEON_SCLK_FORCE_CP;
927				tmp |= RADEON_SCLK_FORCE_VIP;
928			}
929
930			Radeon_OUTPLL(regs, asic, RADEON_SCLK_CNTL, tmp);
931
932			if ((asic == rt_rv200) ||
933				(asic == rt_rv250) ||
934				(asic == rt_rv280)) {
935				tmp = Radeon_INPLL(regs, asic, RADEON_SCLK_MORE_CNTL);
936				tmp &= ~RADEON_SCLK_MORE_FORCEON;
937
938				/* RV200::A11 A12 RV250::A11 A12 */
939				if (((asic == rt_rv200) ||
940					 (asic == rt_rv250)) &&
941					((INREG( regs, RADEON_CONFIG_CNTL) & RADEON_CFG_ATI_REV_ID_MASK) <
942					  RADEON_CFG_ATI_REV_A13))
943				{
944					tmp |= RADEON_SCLK_MORE_FORCEON;
945				}
946				Radeon_OUTPLL(regs, asic, RADEON_SCLK_MORE_CNTL, tmp);
947				snooze(15000);
948			}
949
950			/* RV200::A11 A12, RV250::A11 A12 */
951			if (((asic == rt_rv200) ||
952				 (asic == rt_rv250)) &&
953				((INREG( regs, RADEON_CONFIG_CNTL) & RADEON_CFG_ATI_REV_ID_MASK) <
954				  RADEON_CFG_ATI_REV_A13))
955			{
956				tmp = Radeon_INPLL(regs, asic, RADEON_PLL_PWRMGT_CNTL);
957				tmp |= RADEON_TCL_BYPASS_DISABLE;
958				Radeon_OUTPLL(regs, asic, RADEON_PLL_PWRMGT_CNTL, tmp);
959			}
960			snooze(15000);
961
962			/*enable dynamic mode for display clocks (PIXCLK and PIX2CLK)*/
963			tmp = Radeon_INPLL(regs, asic, RADEON_PIXCLKS_CNTL);
964			tmp |=  (RADEON_PIX2CLK_ALWAYS_ONb         |
965				RADEON_PIX2CLK_DAC_ALWAYS_ONb     |
966				RADEON_PIXCLK_BLEND_ALWAYS_ONb    |
967				RADEON_PIXCLK_GV_ALWAYS_ONb       |
968				RADEON_PIXCLK_DIG_TMDS_ALWAYS_ONb |
969				RADEON_PIXCLK_LVDS_ALWAYS_ONb     |
970				RADEON_PIXCLK_TMDS_ALWAYS_ONb);
971
972			Radeon_OUTPLL(regs, asic, RADEON_PIXCLKS_CNTL, tmp);
973			snooze(15000);
974
975			tmp = Radeon_INPLL(regs, asic, RADEON_VCLK_ECP_CNTL);
976			tmp |= (RADEON_PIXCLK_ALWAYS_ONb  |
977				RADEON_PIXCLK_DAC_ALWAYS_ONb);
978
979			Radeon_OUTPLL(regs, asic, RADEON_VCLK_ECP_CNTL, tmp);
980			snooze(15000);
981		}
982		SHOW_FLOW0( 3, "Dynamic Clock Scaling Enabled" );
983		break;
984	default:
985		break;
986	}
987}
988