1160814Ssimon#if defined(__SUNPRO_C) && defined(__sparcv9) 2160814Ssimon# define ABI64 /* They've said -xarch=v9 at command line */ 3160814Ssimon#elif defined(__GNUC__) && defined(__arch64__) 4160814Ssimon# define ABI64 /* They've said -m64 at command line */ 5160814Ssimon#endif 6160814Ssimon 7160814Ssimon#ifdef ABI64 8160814Ssimon .register %g2,#scratch 9160814Ssimon .register %g3,#scratch 10160814Ssimon# define FRAME -192 11160814Ssimon# define BIAS 2047 12160814Ssimon#else 13160814Ssimon# define FRAME -96 14160814Ssimon# define BIAS 0 15160814Ssimon#endif 16160814Ssimon 17160814Ssimon.text 18160814Ssimon.align 32 19160814Ssimon.global OPENSSL_wipe_cpu 20160814Ssimon.type OPENSSL_wipe_cpu,#function 21160814Ssimon! Keep in mind that this does not excuse us from wiping the stack! 22160814Ssimon! This routine wipes registers, but not the backing store [which 23160814Ssimon! resides on the stack, toward lower addresses]. To facilitate for 24160814Ssimon! stack wiping I return pointer to the top of stack of the *caller*. 25160814SsimonOPENSSL_wipe_cpu: 26160814Ssimon save %sp,FRAME,%sp 27160814Ssimon nop 28160814Ssimon#ifdef __sun 29160814Ssimon#include <sys/trap.h> 30160814Ssimon ta ST_CLEAN_WINDOWS 31160814Ssimon#else 32160814Ssimon call .walk.reg.wins 33160814Ssimon#endif 34160814Ssimon nop 35160814Ssimon call .PIC.zero.up 36160814Ssimon mov .zero-(.-4),%o0 37238405Sjkim ld [%o0],%f0 38238405Sjkim ld [%o0],%f1 39160814Ssimon 40160814Ssimon subcc %g0,1,%o0 41160814Ssimon ! Following is V9 "rd %ccr,%o0" instruction. However! V8 42160814Ssimon ! specification says that it ("rd %asr2,%o0" in V8 terms) does 43160814Ssimon ! not cause illegal_instruction trap. It therefore can be used 44160814Ssimon ! to determine if the CPU the code is executing on is V8- or 45160814Ssimon ! V9-compliant, as V9 returns a distinct value of 0x99, 46160814Ssimon ! "negative" and "borrow" bits set in both %icc and %xcc. 47160814Ssimon .word 0x91408000 !rd %ccr,%o0 48160814Ssimon cmp %o0,0x99 49160814Ssimon bne .v8 50160814Ssimon nop 51160814Ssimon ! Even though we do not use %fp register bank, 52160814Ssimon ! we wipe it as memcpy might have used it... 53160814Ssimon .word 0xbfa00040 !fmovd %f0,%f62 54160814Ssimon .word 0xbba00040 !... 55160814Ssimon .word 0xb7a00040 56160814Ssimon .word 0xb3a00040 57160814Ssimon .word 0xafa00040 58160814Ssimon .word 0xaba00040 59160814Ssimon .word 0xa7a00040 60160814Ssimon .word 0xa3a00040 61160814Ssimon .word 0x9fa00040 62160814Ssimon .word 0x9ba00040 63160814Ssimon .word 0x97a00040 64160814Ssimon .word 0x93a00040 65160814Ssimon .word 0x8fa00040 66160814Ssimon .word 0x8ba00040 67160814Ssimon .word 0x87a00040 68160814Ssimon .word 0x83a00040 !fmovd %f0,%f32 69160814Ssimon.v8: fmovs %f1,%f31 70160814Ssimon clr %o0 71160814Ssimon fmovs %f0,%f30 72160814Ssimon clr %o1 73160814Ssimon fmovs %f1,%f29 74160814Ssimon clr %o2 75160814Ssimon fmovs %f0,%f28 76160814Ssimon clr %o3 77160814Ssimon fmovs %f1,%f27 78160814Ssimon clr %o4 79160814Ssimon fmovs %f0,%f26 80160814Ssimon clr %o5 81160814Ssimon fmovs %f1,%f25 82160814Ssimon clr %o7 83160814Ssimon fmovs %f0,%f24 84160814Ssimon clr %l0 85160814Ssimon fmovs %f1,%f23 86160814Ssimon clr %l1 87160814Ssimon fmovs %f0,%f22 88160814Ssimon clr %l2 89160814Ssimon fmovs %f1,%f21 90160814Ssimon clr %l3 91160814Ssimon fmovs %f0,%f20 92160814Ssimon clr %l4 93160814Ssimon fmovs %f1,%f19 94160814Ssimon clr %l5 95160814Ssimon fmovs %f0,%f18 96160814Ssimon clr %l6 97160814Ssimon fmovs %f1,%f17 98160814Ssimon clr %l7 99160814Ssimon fmovs %f0,%f16 100160814Ssimon clr %i0 101160814Ssimon fmovs %f1,%f15 102160814Ssimon clr %i1 103160814Ssimon fmovs %f0,%f14 104160814Ssimon clr %i2 105160814Ssimon fmovs %f1,%f13 106160814Ssimon clr %i3 107160814Ssimon fmovs %f0,%f12 108160814Ssimon clr %i4 109160814Ssimon fmovs %f1,%f11 110160814Ssimon clr %i5 111160814Ssimon fmovs %f0,%f10 112160814Ssimon clr %g1 113160814Ssimon fmovs %f1,%f9 114160814Ssimon clr %g2 115160814Ssimon fmovs %f0,%f8 116160814Ssimon clr %g3 117160814Ssimon fmovs %f1,%f7 118160814Ssimon clr %g4 119160814Ssimon fmovs %f0,%f6 120160814Ssimon clr %g5 121160814Ssimon fmovs %f1,%f5 122160814Ssimon fmovs %f0,%f4 123160814Ssimon fmovs %f1,%f3 124160814Ssimon fmovs %f0,%f2 125160814Ssimon 126291721Sjkim add %fp,BIAS,%i0 ! return pointer to caller��s top of stack 127160814Ssimon 128160814Ssimon ret 129160814Ssimon restore 130160814Ssimon 131160814Ssimon.zero: .long 0x0,0x0 132160814Ssimon.PIC.zero.up: 133160814Ssimon retl 134160814Ssimon add %o0,%o7,%o0 135160814Ssimon#ifdef DEBUG 136160814Ssimon.global walk_reg_wins 137160814Ssimon.type walk_reg_wins,#function 138160814Ssimonwalk_reg_wins: 139160814Ssimon#endif 140160814Ssimon.walk.reg.wins: 141160814Ssimon save %sp,FRAME,%sp 142160814Ssimon cmp %i7,%o7 143160814Ssimon be 2f 144160814Ssimon clr %o0 145160814Ssimon cmp %o7,0 ! compiler never cleans %o7... 146160814Ssimon be 1f ! could have been a leaf function... 147160814Ssimon clr %o1 148160814Ssimon call .walk.reg.wins 149160814Ssimon nop 150160814Ssimon1: clr %o2 151160814Ssimon clr %o3 152160814Ssimon clr %o4 153160814Ssimon clr %o5 154160814Ssimon clr %o7 155160814Ssimon clr %l0 156160814Ssimon clr %l1 157160814Ssimon clr %l2 158160814Ssimon clr %l3 159160814Ssimon clr %l4 160160814Ssimon clr %l5 161160814Ssimon clr %l6 162160814Ssimon clr %l7 163160814Ssimon add %o0,1,%i0 ! used for debugging 164160814Ssimon2: ret 165160814Ssimon restore 166160814Ssimon.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu 167160814Ssimon 168160814Ssimon.global OPENSSL_atomic_add 169160814Ssimon.type OPENSSL_atomic_add,#function 170238405Sjkim.align 32 171160814SsimonOPENSSL_atomic_add: 172160814Ssimon#ifndef ABI64 173160814Ssimon subcc %g0,1,%o2 174160814Ssimon .word 0x95408000 !rd %ccr,%o2, see comment above 175160814Ssimon cmp %o2,0x99 176160814Ssimon be .v9 177160814Ssimon nop 178160814Ssimon save %sp,FRAME,%sp 179160814Ssimon ba .enter 180160814Ssimon nop 181160814Ssimon#ifdef __sun 182238405Sjkim! Note that you do not have to link with libthread to call thr_yield, 183160814Ssimon! as libc provides a stub, which is overloaded the moment you link 184160814Ssimon! with *either* libpthread or libthread... 185160814Ssimon#define YIELD_CPU thr_yield 186160814Ssimon#else 187160814Ssimon! applies at least to Linux and FreeBSD... Feedback expected... 188160814Ssimon#define YIELD_CPU sched_yield 189160814Ssimon#endif 190160814Ssimon.spin: call YIELD_CPU 191160814Ssimon nop 192160814Ssimon.enter: ld [%i0],%i2 193160814Ssimon cmp %i2,-4096 194160814Ssimon be .spin 195160814Ssimon mov -1,%i2 196160814Ssimon swap [%i0],%i2 197160814Ssimon cmp %i2,-1 198160814Ssimon be .spin 199160814Ssimon add %i2,%i1,%i2 200160814Ssimon stbar 201160814Ssimon st %i2,[%i0] 202160814Ssimon sra %i2,%g0,%i0 203160814Ssimon ret 204160814Ssimon restore 205160814Ssimon.v9: 206160814Ssimon#endif 207160814Ssimon ld [%o0],%o2 208160814Ssimon1: add %o1,%o2,%o3 209160814Ssimon .word 0xd7e2100a !cas [%o0],%o2,%o3, compare [%o0] with %o2 and swap %o3 210160814Ssimon cmp %o2,%o3 211160814Ssimon bne 1b 212160814Ssimon mov %o3,%o2 ! cas is always fetching to dest. register 213160814Ssimon add %o1,%o2,%o0 ! OpenSSL expects the new value 214160814Ssimon retl 215160814Ssimon sra %o0,%g0,%o0 ! we return signed int, remember? 216160814Ssimon.size OPENSSL_atomic_add,.-OPENSSL_atomic_add 217160814Ssimon 218238405Sjkim.global _sparcv9_rdtick 219238405Sjkim.align 32 220238405Sjkim_sparcv9_rdtick: 221160814Ssimon subcc %g0,1,%o0 222160814Ssimon .word 0x91408000 !rd %ccr,%o0 223160814Ssimon cmp %o0,0x99 224238405Sjkim bne .notick 225160814Ssimon xor %o0,%o0,%o0 226238405Sjkim .word 0x91410000 !rd %tick,%o0 227238405Sjkim retl 228238405Sjkim .word 0x93323020 !srlx %o0,32,%o1 229238405Sjkim.notick: 230238405Sjkim retl 231238405Sjkim xor %o1,%o1,%o1 232238405Sjkim.type _sparcv9_rdtick,#function 233238405Sjkim.size _sparcv9_rdtick,.-_sparcv9_rdtick 234160814Ssimon 235238405Sjkim.global _sparcv9_vis1_probe 236238405Sjkim.align 8 237238405Sjkim_sparcv9_vis1_probe: 238238405Sjkim add %sp,BIAS+2,%o1 239246772Sjkim .word 0xc19a5a40 !ldda [%o1]ASI_FP16_P,%f0 240160814Ssimon retl 241246772Sjkim .word 0x81b00d80 !fxor %f0,%f0,%f0 242238405Sjkim.type _sparcv9_vis1_probe,#function 243238405Sjkim.size _sparcv9_vis1_probe,.-_sparcv9_vis1_probe 244238405Sjkim 245238405Sjkim! Probe and instrument VIS1 instruction. Output is number of cycles it 246238405Sjkim! takes to execute rdtick and pair of VIS1 instructions. US-Tx VIS unit 247238405Sjkim! is slow (documented to be 6 cycles on T2) and the core is in-order 248238405Sjkim! single-issue, it should be possible to distinguish Tx reliably... 249238405Sjkim! Observed return values are: 250238405Sjkim! 251238405Sjkim! UltraSPARC IIe 7 252238405Sjkim! UltraSPARC III 7 253238405Sjkim! UltraSPARC T1 24 254238405Sjkim! 255238405Sjkim! Numbers for T2 and SPARC64 V-VII are more than welcomed. 256238405Sjkim! 257238405Sjkim! It would be possible to detect specifically US-T1 by instrumenting 258238405Sjkim! fmul8ulx16, which is emulated on T1 and as such accounts for quite 259238405Sjkim! a lot of %tick-s, couple of thousand on Linux... 260238405Sjkim.global _sparcv9_vis1_instrument 261238405Sjkim.align 8 262238405Sjkim_sparcv9_vis1_instrument: 263238405Sjkim .word 0x91410000 !rd %tick,%o0 264238405Sjkim .word 0x81b00d80 !fxor %f0,%f0,%f0 265238405Sjkim .word 0x85b08d82 !fxor %f2,%f2,%f2 266238405Sjkim .word 0x93410000 !rd %tick,%o1 267238405Sjkim .word 0x81b00d80 !fxor %f0,%f0,%f0 268238405Sjkim .word 0x85b08d82 !fxor %f2,%f2,%f2 269238405Sjkim .word 0x95410000 !rd %tick,%o2 270238405Sjkim .word 0x81b00d80 !fxor %f0,%f0,%f0 271238405Sjkim .word 0x85b08d82 !fxor %f2,%f2,%f2 272238405Sjkim .word 0x97410000 !rd %tick,%o3 273238405Sjkim .word 0x81b00d80 !fxor %f0,%f0,%f0 274238405Sjkim .word 0x85b08d82 !fxor %f2,%f2,%f2 275238405Sjkim .word 0x99410000 !rd %tick,%o4 276238405Sjkim 277238405Sjkim ! calculate intervals 278238405Sjkim sub %o1,%o0,%o0 279238405Sjkim sub %o2,%o1,%o1 280238405Sjkim sub %o3,%o2,%o2 281238405Sjkim sub %o4,%o3,%o3 282238405Sjkim 283238405Sjkim ! find minumum value 284238405Sjkim cmp %o0,%o1 285238405Sjkim .word 0x38680002 !bgu,a %xcc,.+8 286238405Sjkim mov %o1,%o0 287238405Sjkim cmp %o0,%o2 288238405Sjkim .word 0x38680002 !bgu,a %xcc,.+8 289238405Sjkim mov %o2,%o0 290238405Sjkim cmp %o0,%o3 291238405Sjkim .word 0x38680002 !bgu,a %xcc,.+8 292238405Sjkim mov %o3,%o0 293238405Sjkim 294238405Sjkim retl 295160814Ssimon nop 296238405Sjkim.type _sparcv9_vis1_instrument,#function 297238405Sjkim.size _sparcv9_vis1_instrument,.-_sparcv9_vis1_instrument 298238405Sjkim 299238405Sjkim.global _sparcv9_vis2_probe 300238405Sjkim.align 8 301238405Sjkim_sparcv9_vis2_probe: 302238405Sjkim retl 303238405Sjkim .word 0x81b00980 !bshuffle %f0,%f0,%f0 304238405Sjkim.type _sparcv9_vis2_probe,#function 305238405Sjkim.size _sparcv9_vis2_probe,.-_sparcv9_vis2_probe 306238405Sjkim 307238405Sjkim.global _sparcv9_fmadd_probe 308238405Sjkim.align 8 309238405Sjkim_sparcv9_fmadd_probe: 310238405Sjkim .word 0x81b00d80 !fxor %f0,%f0,%f0 311238405Sjkim .word 0x85b08d82 !fxor %f2,%f2,%f2 312238405Sjkim retl 313238405Sjkim .word 0x81b80440 !fmaddd %f0,%f0,%f2,%f0 314238405Sjkim.type _sparcv9_fmadd_probe,#function 315238405Sjkim.size _sparcv9_fmadd_probe,.-_sparcv9_fmadd_probe 316238405Sjkim 317238405Sjkim.global OPENSSL_cleanse 318238405Sjkim.align 32 319238405SjkimOPENSSL_cleanse: 320238405Sjkim cmp %o1,14 321238405Sjkim nop 322238405Sjkim#ifdef ABI64 323238405Sjkim bgu %xcc,.Lot 324238405Sjkim#else 325238405Sjkim bgu .Lot 326238405Sjkim#endif 327238405Sjkim cmp %o1,0 328238405Sjkim bne .Little 329238405Sjkim nop 330238405Sjkim retl 331238405Sjkim nop 332238405Sjkim 333238405Sjkim.Little: 334238405Sjkim stb %g0,[%o0] 335238405Sjkim subcc %o1,1,%o1 336238405Sjkim bnz .Little 337238405Sjkim add %o0,1,%o0 338238405Sjkim retl 339238405Sjkim nop 340238405Sjkim.align 32 341238405Sjkim.Lot: 342238405Sjkim#ifndef ABI64 343238405Sjkim subcc %g0,1,%g1 344238405Sjkim ! see above for explanation 345238405Sjkim .word 0x83408000 !rd %ccr,%g1 346238405Sjkim cmp %g1,0x99 347238405Sjkim bne .v8lot 348238405Sjkim nop 349238405Sjkim#endif 350238405Sjkim 351238405Sjkim.v9lot: andcc %o0,7,%g0 352238405Sjkim bz .v9aligned 353238405Sjkim nop 354238405Sjkim stb %g0,[%o0] 355238405Sjkim sub %o1,1,%o1 356238405Sjkim ba .v9lot 357238405Sjkim add %o0,1,%o0 358238405Sjkim.align 16,0x01000000 359238405Sjkim.v9aligned: 360238405Sjkim .word 0xc0720000 !stx %g0,[%o0] 361238405Sjkim sub %o1,8,%o1 362238405Sjkim andcc %o1,-8,%g0 363238405Sjkim#ifdef ABI64 364238405Sjkim .word 0x126ffffd !bnz %xcc,.v9aligned 365238405Sjkim#else 366238405Sjkim .word 0x124ffffd !bnz %icc,.v9aligned 367238405Sjkim#endif 368238405Sjkim add %o0,8,%o0 369238405Sjkim 370238405Sjkim cmp %o1,0 371238405Sjkim bne .Little 372238405Sjkim nop 373238405Sjkim retl 374238405Sjkim nop 375238405Sjkim#ifndef ABI64 376238405Sjkim.v8lot: andcc %o0,3,%g0 377238405Sjkim bz .v8aligned 378238405Sjkim nop 379238405Sjkim stb %g0,[%o0] 380238405Sjkim sub %o1,1,%o1 381238405Sjkim ba .v8lot 382238405Sjkim add %o0,1,%o0 383238405Sjkim nop 384238405Sjkim.v8aligned: 385238405Sjkim st %g0,[%o0] 386238405Sjkim sub %o1,4,%o1 387238405Sjkim andcc %o1,-4,%g0 388238405Sjkim bnz .v8aligned 389238405Sjkim add %o0,4,%o0 390238405Sjkim 391238405Sjkim cmp %o1,0 392238405Sjkim bne .Little 393238405Sjkim nop 394238405Sjkim retl 395238405Sjkim nop 396238405Sjkim#endif 397238405Sjkim.type OPENSSL_cleanse,#function 398238405Sjkim.size OPENSSL_cleanse,.-OPENSSL_cleanse 399238405Sjkim 400238405Sjkim.section ".init",#alloc,#execinstr 401238405Sjkim call OPENSSL_cpuid_setup 402238405Sjkim nop 403