• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /netgear-R7000-V1.0.7.12_1.2.5/components/opensource/linux/linux-2.6.36/arch/blackfin/mach-bf561/
1/*
2 * Copyright 2007-2008 Analog Devices Inc.
3 *              Philippe Gerum <rpm@xenomai.org>
4 *
5 * Licensed under the GPL-2 or later.
6 */
7
8#include <linux/linkage.h>
9#include <asm/blackfin.h>
10#include <asm/cache.h>
11#include <asm/asm-offsets.h>
12#include <asm/rwlock.h>
13#include <asm/cplb.h>
14
15.text
16
17.macro coreslot_loadaddr reg:req
18	\reg\().l = _corelock;
19	\reg\().h = _corelock;
20.endm
21
22.macro safe_testset addr:req, scratch:req
23#if ANOMALY_05000477
24	cli \scratch;
25	testset (\addr);
26	sti \scratch;
27#else
28	testset (\addr);
29#endif
30.endm
31
32/*
33 * r0 = address of atomic data to flush and invalidate (32bit).
34 *
35 * Clear interrupts and return the old mask.
36 * We assume that no atomic data can span cachelines.
37 *
38 * Clobbers: r2:0, p0
39 */
40ENTRY(_get_core_lock)
41	r1 = -L1_CACHE_BYTES;
42	r1 = r0 & r1;
43	cli r0;
44	coreslot_loadaddr p0;
45.Lretry_corelock:
46	safe_testset p0, r2;
47	if cc jump .Ldone_corelock;
48	SSYNC(r2);
49	jump .Lretry_corelock
50.Ldone_corelock:
51	p0 = r1;
52	CSYNC(r2);
53	flushinv[p0];
54	SSYNC(r2);
55	rts;
56ENDPROC(_get_core_lock)
57
58/*
59 * r0 = address of atomic data in uncacheable memory region (32bit).
60 *
61 * Clear interrupts and return the old mask.
62 *
63 * Clobbers: r0, p0
64 */
65ENTRY(_get_core_lock_noflush)
66	cli r0;
67	coreslot_loadaddr p0;
68.Lretry_corelock_noflush:
69	safe_testset p0, r2;
70	if cc jump .Ldone_corelock_noflush;
71	SSYNC(r2);
72	jump .Lretry_corelock_noflush
73.Ldone_corelock_noflush:
74	rts;
75ENDPROC(_get_core_lock_noflush)
76
77/*
78 * r0 = interrupt mask to restore.
79 * r1 = address of atomic data to flush and invalidate (32bit).
80 *
81 * Interrupts are masked on entry (see _get_core_lock).
82 * Clobbers: r2:0, p0
83 */
84ENTRY(_put_core_lock)
85	/* Write-through cache assumed, so no flush needed here. */
86	coreslot_loadaddr p0;
87	r1 = 0;
88	[p0] = r1;
89	SSYNC(r2);
90	sti r0;
91	rts;
92ENDPROC(_put_core_lock)
93
94#ifdef __ARCH_SYNC_CORE_DCACHE
95
96ENTRY(___raw_smp_mark_barrier_asm)
97	[--sp] = rets;
98	[--sp] = ( r7:5 );
99	[--sp] = r0;
100	[--sp] = p1;
101	[--sp] = p0;
102	call _get_core_lock_noflush;
103
104	/*
105	 * Calculate current core mask
106	 */
107	GET_CPUID(p1, r7);
108	r6 = 1;
109	r6 <<= r7;
110
111	/*
112	 * Set bit of other cores in barrier mask. Don't change current core bit.
113	 */
114	p1.l = _barrier_mask;
115	p1.h = _barrier_mask;
116	r7 = [p1];
117	r5 = r7 & r6;
118	r7 = ~r6;
119	cc = r5 == 0;
120	if cc jump 1f;
121	r7 = r7 | r6;
1221:
123	[p1] = r7;
124	SSYNC(r2);
125
126	call _put_core_lock;
127	p0 = [sp++];
128	p1 = [sp++];
129	r0 = [sp++];
130	( r7:5 ) = [sp++];
131	rets = [sp++];
132	rts;
133ENDPROC(___raw_smp_mark_barrier_asm)
134
135ENTRY(___raw_smp_check_barrier_asm)
136	[--sp] = rets;
137	[--sp] = ( r7:5 );
138	[--sp] = r0;
139	[--sp] = p1;
140	[--sp] = p0;
141	call _get_core_lock_noflush;
142
143	/*
144	 * Calculate current core mask
145	 */
146	GET_CPUID(p1, r7);
147	r6 = 1;
148	r6 <<= r7;
149
150	/*
151	 * Clear current core bit in barrier mask if it is set.
152	 */
153	p1.l = _barrier_mask;
154	p1.h = _barrier_mask;
155	r7 = [p1];
156	r5 = r7 & r6;
157	cc = r5 == 0;
158	if cc jump 1f;
159	r6 = ~r6;
160	r7 = r7 & r6;
161	[p1] = r7;
162	SSYNC(r2);
163
164	call _put_core_lock;
165
166	/*
167	 * Invalidate the entire D-cache of current core.
168	 */
169	sp += -12;
170	call _resync_core_dcache
171	sp += 12;
172	jump 2f;
1731:
174	call _put_core_lock;
1752:
176	p0 = [sp++];
177	p1 = [sp++];
178	r0 = [sp++];
179	( r7:5 ) = [sp++];
180	rets = [sp++];
181	rts;
182ENDPROC(___raw_smp_check_barrier_asm)
183
184/*
185 * r0 = irqflags
186 * r1 = address of atomic data
187 *
188 * Clobbers: r2:0, p1:0
189 */
190_start_lock_coherent:
191
192	[--sp] = rets;
193	[--sp] = ( r7:6 );
194	r7 = r0;
195	p1 = r1;
196
197	/*
198	 * Determine whether the atomic data was previously
199	 * owned by another CPU (=r6).
200	 */
201	GET_CPUID(p0, r2);
202	r1 = 1;
203	r1 <<= r2;
204	r2 = ~r1;
205
206	r1 = [p1];
207	r1 >>= 28;   /* CPU fingerprints are stored in the high nibble. */
208	r6 = r1 & r2;
209	r1 = [p1];
210	r1 <<= 4;
211	r1 >>= 4;
212	[p1] = r1;
213
214	/*
215	 * Release the core lock now, but keep IRQs disabled while we are
216	 * performing the remaining housekeeping chores for the current CPU.
217	 */
218	coreslot_loadaddr p0;
219	r1 = 0;
220	[p0] = r1;
221
222	/*
223	 * If another CPU has owned the same atomic section before us,
224	 * then our D-cached copy of the shared data protected by the
225	 * current spin/write_lock may be obsolete.
226	 */
227	cc = r6 == 0;
228	if cc jump .Lcache_synced
229
230	/*
231	 * Invalidate the entire D-cache of the current core.
232	 */
233	sp += -12;
234	call _resync_core_dcache
235	sp += 12;
236
237.Lcache_synced:
238	SSYNC(r2);
239	sti r7;
240	( r7:6 ) = [sp++];
241	rets = [sp++];
242	rts
243
244/*
245 * r0 = irqflags
246 * r1 = address of atomic data
247 *
248 * Clobbers: r2:0, p1:0
249 */
250_end_lock_coherent:
251
252	p1 = r1;
253	GET_CPUID(p0, r2);
254	r2 += 28;
255	r1 = 1;
256	r1 <<= r2;
257	r2 = [p1];
258	r2 = r1 | r2;
259	[p1] = r2;
260	r1 = p1;
261	jump _put_core_lock;
262
263#endif /* __ARCH_SYNC_CORE_DCACHE */
264
265/*
266 * r0 = &spinlock->lock
267 *
268 * Clobbers: r3:0, p1:0
269 */
270ENTRY(___raw_spin_is_locked_asm)
271	p1 = r0;
272	[--sp] = rets;
273	call _get_core_lock;
274	r3 = [p1];
275	cc = bittst( r3, 0 );
276	r3 = cc;
277	r1 = p1;
278	call _put_core_lock;
279	rets = [sp++];
280	r0 = r3;
281	rts;
282ENDPROC(___raw_spin_is_locked_asm)
283
284/*
285 * r0 = &spinlock->lock
286 *
287 * Clobbers: r3:0, p1:0
288 */
289ENTRY(___raw_spin_lock_asm)
290	p1 = r0;
291	[--sp] = rets;
292.Lretry_spinlock:
293	call _get_core_lock;
294	r1 = p1;
295	r2 = [p1];
296	cc = bittst( r2, 0 );
297	if cc jump .Lbusy_spinlock
298#ifdef __ARCH_SYNC_CORE_DCACHE
299	r3 = p1;
300	bitset ( r2, 0 ); /* Raise the lock bit. */
301	[p1] = r2;
302	call _start_lock_coherent
303#else
304	r2 = 1;
305	[p1] = r2;
306	call _put_core_lock;
307#endif
308	rets = [sp++];
309	rts;
310
311.Lbusy_spinlock:
312	/* We don't touch the atomic area if busy, so that flush
313	   will behave like nop in _put_core_lock. */
314	call _put_core_lock;
315	SSYNC(r2);
316	r0 = p1;
317	jump .Lretry_spinlock
318ENDPROC(___raw_spin_lock_asm)
319
320/*
321 * r0 = &spinlock->lock
322 *
323 * Clobbers: r3:0, p1:0
324 */
325ENTRY(___raw_spin_trylock_asm)
326	p1 = r0;
327	[--sp] = rets;
328	call _get_core_lock;
329	r1 = p1;
330	r3 = [p1];
331	cc = bittst( r3, 0 );
332	if cc jump .Lfailed_trylock
333#ifdef __ARCH_SYNC_CORE_DCACHE
334	bitset ( r3, 0 ); /* Raise the lock bit. */
335	[p1] = r3;
336	call _start_lock_coherent
337#else
338	r2 = 1;
339	[p1] = r2;
340	call _put_core_lock;
341#endif
342	r0 = 1;
343	rets = [sp++];
344	rts;
345.Lfailed_trylock:
346	call _put_core_lock;
347	r0 = 0;
348	rets = [sp++];
349	rts;
350ENDPROC(___raw_spin_trylock_asm)
351
352/*
353 * r0 = &spinlock->lock
354 *
355 * Clobbers: r2:0, p1:0
356 */
357ENTRY(___raw_spin_unlock_asm)
358	p1 = r0;
359	[--sp] = rets;
360	call _get_core_lock;
361	r2 = [p1];
362	bitclr ( r2, 0 );
363	[p1] = r2;
364	r1 = p1;
365#ifdef __ARCH_SYNC_CORE_DCACHE
366	call _end_lock_coherent
367#else
368	call _put_core_lock;
369#endif
370	rets = [sp++];
371	rts;
372ENDPROC(___raw_spin_unlock_asm)
373
374/*
375 * r0 = &rwlock->lock
376 *
377 * Clobbers: r2:0, p1:0
378 */
379ENTRY(___raw_read_lock_asm)
380	p1 = r0;
381	[--sp] = rets;
382	call _get_core_lock;
383.Lrdlock_try:
384	r1 = [p1];
385	r1 += -1;
386	[p1] = r1;
387	cc = r1 < 0;
388	if cc jump .Lrdlock_failed
389	r1 = p1;
390#ifdef __ARCH_SYNC_CORE_DCACHE
391	call _start_lock_coherent
392#else
393	call _put_core_lock;
394#endif
395	rets = [sp++];
396	rts;
397
398.Lrdlock_failed:
399	r1 += 1;
400	[p1] = r1;
401.Lrdlock_wait:
402	r1 = p1;
403	call _put_core_lock;
404	SSYNC(r2);
405	r0 = p1;
406	call _get_core_lock;
407	r1 = [p1];
408	cc = r1 < 2;
409	if cc jump .Lrdlock_wait;
410	jump .Lrdlock_try
411ENDPROC(___raw_read_lock_asm)
412
413/*
414 * r0 = &rwlock->lock
415 *
416 * Clobbers: r3:0, p1:0
417 */
418ENTRY(___raw_read_trylock_asm)
419	p1 = r0;
420	[--sp] = rets;
421	call _get_core_lock;
422	r1 = [p1];
423	cc = r1 <= 0;
424	if cc jump .Lfailed_tryrdlock;
425	r1 += -1;
426	[p1] = r1;
427	r1 = p1;
428#ifdef __ARCH_SYNC_CORE_DCACHE
429	call _start_lock_coherent
430#else
431	call _put_core_lock;
432#endif
433	rets = [sp++];
434	r0 = 1;
435	rts;
436.Lfailed_tryrdlock:
437	r1 = p1;
438	call _put_core_lock;
439	rets = [sp++];
440	r0 = 0;
441	rts;
442ENDPROC(___raw_read_trylock_asm)
443
444/*
445 * r0 = &rwlock->lock
446 *
447 * Note: Processing controlled by a reader lock should not have
448 * any side-effect on cache issues with the other core, so we
449 * just release the core lock and exit (no _end_lock_coherent).
450 *
451 * Clobbers: r3:0, p1:0
452 */
453ENTRY(___raw_read_unlock_asm)
454	p1 = r0;
455	[--sp] = rets;
456	call _get_core_lock;
457	r1 = [p1];
458	r1 += 1;
459	[p1] = r1;
460	r1 = p1;
461	call _put_core_lock;
462	rets = [sp++];
463	rts;
464ENDPROC(___raw_read_unlock_asm)
465
466/*
467 * r0 = &rwlock->lock
468 *
469 * Clobbers: r3:0, p1:0
470 */
471ENTRY(___raw_write_lock_asm)
472	p1 = r0;
473	r3.l = lo(RW_LOCK_BIAS);
474	r3.h = hi(RW_LOCK_BIAS);
475	[--sp] = rets;
476	call _get_core_lock;
477.Lwrlock_try:
478	r1 = [p1];
479	r1 = r1 - r3;
480#ifdef __ARCH_SYNC_CORE_DCACHE
481	r2 = r1;
482	r2 <<= 4;
483	r2 >>= 4;
484	cc = r2 == 0;
485#else
486	cc = r1 == 0;
487#endif
488	if !cc jump .Lwrlock_wait
489	[p1] = r1;
490	r1 = p1;
491#ifdef __ARCH_SYNC_CORE_DCACHE
492	call _start_lock_coherent
493#else
494	call _put_core_lock;
495#endif
496	rets = [sp++];
497	rts;
498
499.Lwrlock_wait:
500	r1 = p1;
501	call _put_core_lock;
502	SSYNC(r2);
503	r0 = p1;
504	call _get_core_lock;
505	r1 = [p1];
506#ifdef __ARCH_SYNC_CORE_DCACHE
507	r1 <<= 4;
508	r1 >>= 4;
509#endif
510	cc = r1 == r3;
511	if !cc jump .Lwrlock_wait;
512	jump .Lwrlock_try
513ENDPROC(___raw_write_lock_asm)
514
515/*
516 * r0 = &rwlock->lock
517 *
518 * Clobbers: r3:0, p1:0
519 */
520ENTRY(___raw_write_trylock_asm)
521	p1 = r0;
522	[--sp] = rets;
523	call _get_core_lock;
524	r1 = [p1];
525	r2.l = lo(RW_LOCK_BIAS);
526	r2.h = hi(RW_LOCK_BIAS);
527	cc = r1 == r2;
528	if !cc jump .Lfailed_trywrlock;
529#ifdef __ARCH_SYNC_CORE_DCACHE
530	r1 >>= 28;
531	r1 <<= 28;
532#else
533	r1 = 0;
534#endif
535	[p1] = r1;
536	r1 = p1;
537#ifdef __ARCH_SYNC_CORE_DCACHE
538	call _start_lock_coherent
539#else
540	call _put_core_lock;
541#endif
542	rets = [sp++];
543	r0 = 1;
544	rts;
545
546.Lfailed_trywrlock:
547	r1 = p1;
548	call _put_core_lock;
549	rets = [sp++];
550	r0 = 0;
551	rts;
552ENDPROC(___raw_write_trylock_asm)
553
554/*
555 * r0 = &rwlock->lock
556 *
557 * Clobbers: r3:0, p1:0
558 */
559ENTRY(___raw_write_unlock_asm)
560	p1 = r0;
561	r3.l = lo(RW_LOCK_BIAS);
562	r3.h = hi(RW_LOCK_BIAS);
563	[--sp] = rets;
564	call _get_core_lock;
565	r1 = [p1];
566	r1 = r1 + r3;
567	[p1] = r1;
568	r1 = p1;
569#ifdef __ARCH_SYNC_CORE_DCACHE
570	call _end_lock_coherent
571#else
572	call _put_core_lock;
573#endif
574	rets = [sp++];
575	rts;
576ENDPROC(___raw_write_unlock_asm)
577
578/*
579 * r0 = ptr
580 * r1 = value
581 *
582 * Add a signed value to a 32bit word and return the new value atomically.
583 * Clobbers: r3:0, p1:0
584 */
585ENTRY(___raw_atomic_update_asm)
586	p1 = r0;
587	r3 = r1;
588	[--sp] = rets;
589	call _get_core_lock;
590	r2 = [p1];
591	r3 = r3 + r2;
592	[p1] = r3;
593	r1 = p1;
594	call _put_core_lock;
595	r0 = r3;
596	rets = [sp++];
597	rts;
598ENDPROC(___raw_atomic_update_asm)
599
600/*
601 * r0 = ptr
602 * r1 = mask
603 *
604 * Clear the mask bits from a 32bit word and return the old 32bit value
605 * atomically.
606 * Clobbers: r3:0, p1:0
607 */
608ENTRY(___raw_atomic_clear_asm)
609	p1 = r0;
610	r3 = ~r1;
611	[--sp] = rets;
612	call _get_core_lock;
613	r2 = [p1];
614	r3 = r2 & r3;
615	[p1] = r3;
616	r3 = r2;
617	r1 = p1;
618	call _put_core_lock;
619	r0 = r3;
620	rets = [sp++];
621	rts;
622ENDPROC(___raw_atomic_clear_asm)
623
624/*
625 * r0 = ptr
626 * r1 = mask
627 *
628 * Set the mask bits into a 32bit word and return the old 32bit value
629 * atomically.
630 * Clobbers: r3:0, p1:0
631 */
632ENTRY(___raw_atomic_set_asm)
633	p1 = r0;
634	r3 = r1;
635	[--sp] = rets;
636	call _get_core_lock;
637	r2 = [p1];
638	r3 = r2 | r3;
639	[p1] = r3;
640	r3 = r2;
641	r1 = p1;
642	call _put_core_lock;
643	r0 = r3;
644	rets = [sp++];
645	rts;
646ENDPROC(___raw_atomic_set_asm)
647
648/*
649 * r0 = ptr
650 * r1 = mask
651 *
652 * XOR the mask bits with a 32bit word and return the old 32bit value
653 * atomically.
654 * Clobbers: r3:0, p1:0
655 */
656ENTRY(___raw_atomic_xor_asm)
657	p1 = r0;
658	r3 = r1;
659	[--sp] = rets;
660	call _get_core_lock;
661	r2 = [p1];
662	r3 = r2 ^ r3;
663	[p1] = r3;
664	r3 = r2;
665	r1 = p1;
666	call _put_core_lock;
667	r0 = r3;
668	rets = [sp++];
669	rts;
670ENDPROC(___raw_atomic_xor_asm)
671
672/*
673 * r0 = ptr
674 * r1 = mask
675 *
676 * Perform a logical AND between the mask bits and a 32bit word, and
677 * return the masked value. We need this on this architecture in
678 * order to invalidate the local cache before testing.
679 *
680 * Clobbers: r3:0, p1:0
681 */
682ENTRY(___raw_atomic_test_asm)
683	p1 = r0;
684	r3 = r1;
685	r1 = -L1_CACHE_BYTES;
686	r1 = r0 & r1;
687	p0 = r1;
688	flushinv[p0];
689	SSYNC(r2);
690	r0 = [p1];
691	r0 = r0 & r3;
692	rts;
693ENDPROC(___raw_atomic_test_asm)
694
695/*
696 * r0 = ptr
697 * r1 = value
698 *
699 * Swap *ptr with value and return the old 32bit value atomically.
700 * Clobbers: r3:0, p1:0
701 */
702#define	__do_xchg(src, dst) 		\
703	p1 = r0;			\
704	r3 = r1;			\
705	[--sp] = rets;			\
706	call _get_core_lock;		\
707	r2 = src;			\
708	dst = r3;			\
709	r3 = r2;			\
710	r1 = p1;			\
711	call _put_core_lock;		\
712	r0 = r3;			\
713	rets = [sp++];			\
714	rts;
715
716ENTRY(___raw_xchg_1_asm)
717	__do_xchg(b[p1] (z), b[p1])
718ENDPROC(___raw_xchg_1_asm)
719
720ENTRY(___raw_xchg_2_asm)
721	__do_xchg(w[p1] (z), w[p1])
722ENDPROC(___raw_xchg_2_asm)
723
724ENTRY(___raw_xchg_4_asm)
725	__do_xchg([p1], [p1])
726ENDPROC(___raw_xchg_4_asm)
727
728/*
729 * r0 = ptr
730 * r1 = new
731 * r2 = old
732 *
733 * Swap *ptr with new if *ptr == old and return the previous *ptr
734 * value atomically.
735 *
736 * Clobbers: r3:0, p1:0
737 */
738#define	__do_cmpxchg(src, dst) 		\
739	[--sp] = rets;			\
740	[--sp] = r4;			\
741	p1 = r0;			\
742	r3 = r1;			\
743	r4 = r2;			\
744	call _get_core_lock;		\
745	r2 = src;			\
746	cc = r2 == r4;			\
747	if !cc jump 1f;			\
748	dst = r3;			\
749     1: r3 = r2;			\
750	r1 = p1;			\
751	call _put_core_lock;		\
752	r0 = r3;			\
753	r4 = [sp++];			\
754	rets = [sp++];			\
755	rts;
756
757ENTRY(___raw_cmpxchg_1_asm)
758	__do_cmpxchg(b[p1] (z), b[p1])
759ENDPROC(___raw_cmpxchg_1_asm)
760
761ENTRY(___raw_cmpxchg_2_asm)
762	__do_cmpxchg(w[p1] (z), w[p1])
763ENDPROC(___raw_cmpxchg_2_asm)
764
765ENTRY(___raw_cmpxchg_4_asm)
766	__do_cmpxchg([p1], [p1])
767ENDPROC(___raw_cmpxchg_4_asm)
768
769/*
770 * r0 = ptr
771 * r1 = bitnr
772 *
773 * Set a bit in a 32bit word and return the old 32bit value atomically.
774 * Clobbers: r3:0, p1:0
775 */
776ENTRY(___raw_bit_set_asm)
777	r2 = r1;
778	r1 = 1;
779	r1 <<= r2;
780	jump ___raw_atomic_set_asm
781ENDPROC(___raw_bit_set_asm)
782
783/*
784 * r0 = ptr
785 * r1 = bitnr
786 *
787 * Clear a bit in a 32bit word and return the old 32bit value atomically.
788 * Clobbers: r3:0, p1:0
789 */
790ENTRY(___raw_bit_clear_asm)
791	r2 = r1;
792	r1 = 1;
793	r1 <<= r2;
794	jump ___raw_atomic_clear_asm
795ENDPROC(___raw_bit_clear_asm)
796
797/*
798 * r0 = ptr
799 * r1 = bitnr
800 *
801 * Toggle a bit in a 32bit word and return the old 32bit value atomically.
802 * Clobbers: r3:0, p1:0
803 */
804ENTRY(___raw_bit_toggle_asm)
805	r2 = r1;
806	r1 = 1;
807	r1 <<= r2;
808	jump ___raw_atomic_xor_asm
809ENDPROC(___raw_bit_toggle_asm)
810
811/*
812 * r0 = ptr
813 * r1 = bitnr
814 *
815 * Test-and-set a bit in a 32bit word and return the old bit value atomically.
816 * Clobbers: r3:0, p1:0
817 */
818ENTRY(___raw_bit_test_set_asm)
819	[--sp] = rets;
820	[--sp] = r1;
821	call ___raw_bit_set_asm
822	r1 = [sp++];
823	r2 = 1;
824	r2 <<= r1;
825	r0 = r0 & r2;
826	cc = r0 == 0;
827	if cc jump 1f
828	r0 = 1;
8291:
830	rets = [sp++];
831	rts;
832ENDPROC(___raw_bit_test_set_asm)
833
834/*
835 * r0 = ptr
836 * r1 = bitnr
837 *
838 * Test-and-clear a bit in a 32bit word and return the old bit value atomically.
839 * Clobbers: r3:0, p1:0
840 */
841ENTRY(___raw_bit_test_clear_asm)
842	[--sp] = rets;
843	[--sp] = r1;
844	call ___raw_bit_clear_asm
845	r1 = [sp++];
846	r2 = 1;
847	r2 <<= r1;
848	r0 = r0 & r2;
849	cc = r0 == 0;
850	if cc jump 1f
851	r0 = 1;
8521:
853	rets = [sp++];
854	rts;
855ENDPROC(___raw_bit_test_clear_asm)
856
857/*
858 * r0 = ptr
859 * r1 = bitnr
860 *
861 * Test-and-toggle a bit in a 32bit word,
862 * and return the old bit value atomically.
863 * Clobbers: r3:0, p1:0
864 */
865ENTRY(___raw_bit_test_toggle_asm)
866	[--sp] = rets;
867	[--sp] = r1;
868	call ___raw_bit_toggle_asm
869	r1 = [sp++];
870	r2 = 1;
871	r2 <<= r1;
872	r0 = r0 & r2;
873	cc = r0 == 0;
874	if cc jump 1f
875	r0 = 1;
8761:
877	rets = [sp++];
878	rts;
879ENDPROC(___raw_bit_test_toggle_asm)
880
881/*
882 * r0 = ptr
883 * r1 = bitnr
884 *
885 * Test a bit in a 32bit word and return its value.
886 * We need this on this architecture in order to invalidate
887 * the local cache before testing.
888 *
889 * Clobbers: r3:0, p1:0
890 */
891ENTRY(___raw_bit_test_asm)
892	r2 = r1;
893	r1 = 1;
894	r1 <<= r2;
895	jump ___raw_atomic_test_asm
896ENDPROC(___raw_bit_test_asm)
897
898/*
899 * r0 = ptr
900 *
901 * Fetch and return an uncached 32bit value.
902 *
903 * Clobbers: r2:0, p1:0
904 */
905ENTRY(___raw_uncached_fetch_asm)
906	p1 = r0;
907	r1 = -L1_CACHE_BYTES;
908	r1 = r0 & r1;
909	p0 = r1;
910	flushinv[p0];
911	SSYNC(r2);
912	r0 = [p1];
913	rts;
914ENDPROC(___raw_uncached_fetch_asm)
915