1238106Sdes// Copyright (c) 2003-2006 Hewlett-Packard Development Company, L.P. 2238106Sdes// Permission is hereby granted, free of charge, to any person 3238106Sdes// obtaining a copy of this software and associated documentation 4238106Sdes// files (the "Software"), to deal in the Software without 5238106Sdes// restriction, including without limitation the rights to use, 6238106Sdes// copy, modify, merge, publish, distribute, sublicense, and/or sell 7238106Sdes// copies of the Software, and to permit persons to whom the 8238106Sdes// Software is furnished to do so, subject to the following 9238106Sdes// conditions: 10238106Sdes// 11238106Sdes// The above copyright notice and this permission notice shall be 12238106Sdes// included in all copies or substantial portions of the Software. 13238106Sdes// 14238106Sdes// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15238106Sdes// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 16238106Sdes// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17238106Sdes// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 18238106Sdes// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 19238106Sdes// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20238106Sdes// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21238106Sdes// OTHER DEALINGS IN THE SOFTWARE. 22238106Sdes 23238106Sdes#ifdef _LP64 24269257Sdes#define SWIZZLE add 25269257Sdes#define STPTR st8 26269257Sdes#else 27269257Sdes#define SWIZZLE addp4 28269257Sdes#define STPTR st4 29269257Sdes#endif 30269257Sdes 31269257SdesrRP = r14 32269257SdesrPFS = r15 33269257SdesrUNAT = r16 34238106SdesrRNAT = r17 35238106SdesrENV0 = r18 36238106SdesrENV1 = r19 37238106SdesrENV2 = r20 38238106SdesrNSLOT = r21 39238106SdesrBSP = r22 40238106SdesrPBSP = r23 41238106SdesrRSC = r24 42238106SdesrNATP = r25 43238106SdesrBIAS = r26 44238106SdesrRSC0 = r27 45238106SdesrTMP1 = r28 46238106SdesrTMP2 = r29 47238106SdesrTMP3 = r30 48238106SdesrTMP4 = r31 49238106SdesrTMP5 = r8 50238106SdesrMYPFS = r9 51238106SdesrPSP = r10 52238106Sdes 53238106SdesVALID_IP = 1 54238106SdesVALID_SP = 1 << 1 55238106SdesVALID_BSP = 1 << 2 56238106SdesVALID_CFM = 1 << 3 57238106SdesVALID_PREDS = 1 << 7 58238106SdesVALID_PRIUNAT = 1 << 8 59238106SdesVALID_RNAT = 1 << 10 60238106SdesVALID_UNAT = 1 << 11 61238106SdesVALID_FPSR = 1 << 12 62238106SdesVALID_LC = 1 << 13 63269257SdesVALID_GRS = 0xf << 16 64238106SdesVALID_BRS = 0x1f << 20 65238106SdesVALID_BASIC4 = VALID_IP | VALID_SP | VALID_BSP | VALID_CFM 66238106SdesVALID_SPEC = VALID_PREDS | VALID_PRIUNAT | VALID_RNAT | VALID_UNAT | VALID_FPSR | VALID_LC 67238106SdesVALID_REGS = VALID_BASIC4 | VALID_SPEC | VALID_GRS | VALID_BRS 68238106SdesVALID_FRS = 0xfffff 69238106Sdes// valid_regs and valid_frs are separate unsigned int fields. 70238106Sdes// In order to store them with a single st8, we need to know 71238106Sdes// the endianness. 72238106Sdes#ifdef __LITTLE_ENDIAN__ 73238106SdesVALID_BITS = (VALID_FRS << 32) | VALID_REGS 74238106Sdes#else 75238106SdesVALID_BITS = (VALID_REGS << 32) | VALID_FRS 76238106Sdes#endif 77238106Sdes 78238106Sdes .text 79238106Sdes 80238106Sdes// int uwx_self_init_context(struct uwx_env *env); 81238106Sdes// 82238106Sdes// Stores a snapshot of the caller's context in the uwx_env structure. 83238106Sdes 84238106Sdes .proc uwx_self_init_context 85238106Sdes .global uwx_self_init_context 86238106Sdesuwx_self_init_context: 87238106Sdes .prologue 88238106Sdes alloc rPFS = ar.pfs, 1, 0, 0, 0 89238106Sdes mov rUNAT = ar.unat 90238106Sdes .body 91238106Sdes SWIZZLE rENV0 = r0, r32 // rENV0 = &env 92238106Sdes ;; 93238106Sdes flushrs 94238106Sdes extr.u rNSLOT = rPFS, 7, 7 // nslots = pfs.sol 95238106Sdes mov rRP = b0 96238106Sdes ;; 97238106Sdes mov rRSC = ar.rsc 98238106Sdes add rENV1 = 136, rENV0 // rENV1 = &env->context.gr[0] 99238106Sdes add rENV2 = 144, rENV0 // rENV2 = &env->context.gr[1] 100238106Sdes ;; 101238106Sdes and rRSC0 = -4, rRSC // clear ar.rsc.mode 102238106Sdes adds rNATP = 0x1f8, r0 103238106Sdes mov rTMP1 = b1 104238106Sdes ;; 105238106Sdes st8.spill [rENV1] = r4, 16 // env+136: r4 106238106Sdes st8.spill [rENV2] = r5, 16 // env+144: r5 107238106Sdes mov rTMP2 = b2 108238106Sdes ;; 109238106Sdes st8.spill [rENV1] = r6, 16 // env+152: r6 110238106Sdes st8.spill [rENV2] = r7, 16 // env+160: r7 111238106Sdes mov rTMP3 = b3 112238106Sdes ;; 113238106Sdes st8 [rENV1] = rTMP1, 16 // env+168: b1 114238106Sdes st8 [rENV2] = rTMP2, 16 // env+176: b2 115238106Sdes mov rTMP1 = b4 116238106Sdes ;; 117238106Sdes st8 [rENV1] = rTMP3, 16 // env+184: b3 118238106Sdes st8 [rENV2] = rTMP1, 16 // env+192: b4 119238106Sdes mov rTMP2 = b5 120238106Sdes ;; 121238106Sdes st8 [rENV1] = rTMP2 // env+200: b5 122238106Sdes mov ar.rsc = rRSC0 // enforced lazy mode 123238106Sdes add rENV1 = 8, rENV0 124238106Sdes ;; 125238106Sdes mov rRNAT = ar.rnat // get copy of ar.rnat 126238106Sdes movl rTMP1 = VALID_BITS // valid_regs: ip, sp, bsp, cfm, 127238106Sdes // preds, priunat, rnat, unat, fpsr, 128238106Sdes // lc, grs, brs 129238106Sdes // = 0x1ff3d8f00000000 130238106Sdes ;; 131238106Sdes mov ar.rsc = rRSC // restore ar.rsc 132238106Sdes mov rBSP = ar.bsp 133238106Sdes add rTMP3 = 136, rENV0 // spill_loc = &env->context.gr[0] 134238106Sdes ;; 135238106Sdes mov rTMP2 = ar.unat 136238106Sdes nop 137238106Sdes extr.u rTMP3 = rTMP3, 3, 6 // bitpos = spill_loc{8:3} 138238106Sdes ;; 139238106Sdes and rBIAS = rBSP, rNATP // bias = (bsp & 0x1f8) ... 140238106Sdes sub rTMP4 = 64, rTMP3 // (64 - bitpos) 141238106Sdes shr rTMP5 = rTMP2, rTMP3 // (unat >> bitpos) 142238106Sdes ;; 143238106Sdes nop 144238106Sdes extr.u rBIAS = rBIAS, 3, 6 // ... div 8 145238106Sdes shl rTMP2 = rTMP2, rTMP4 // (unat << (64 - bitpos)) 146238106Sdes ;; 147238106Sdes or rTMP2 = rTMP2, rTMP5 // rotate_right(unat, bitpos) 148238106Sdes nop 149238106Sdes mov rTMP4 = pr 150238106Sdes ;; 151238106Sdes st8 [rENV0] = rTMP1, 16 // env+0: valid_regs mask 152269257Sdes st8 [rENV1] = rRP, 24 // env+8: ip (my rp) 153238106Sdes sub rBIAS = rNSLOT, rBIAS // bias = nslots - bias 154238106Sdes ;; 155238106Sdes cmp.lt p6, p0 = 0, rBIAS // if (0 < bias) ... 156238106Sdes cmp.lt p7, p0 = 63, rBIAS // if (63 < bias) ... 157238106Sdes ;; 158238106Sdes st8 [rENV0] = r12, 48 // env+16: sp 159238106Sdes st8 [rENV1] = rPFS, 40 // env+32: cfm (my pfs) 160238106Sdes(p6) add rNSLOT = 1, rNSLOT // ... nslots++ 161238106Sdes ;; 162238106Sdes st8 [rENV0] = rTMP4, 24 // env+64: preds 163238106Sdes st8 [rENV1] = rTMP2, 24 // env+72: priunat 164238106Sdes(p7) add rNSLOT = 1, rNSLOT // ... nslots++ 165238106Sdes ;; 166238106Sdes st8 [rENV0] = rRNAT, -64 // env+88: ar.rnat 167291767Sdes st8 [rENV1] = rUNAT, 8 // env+96: ar.unat 168291767Sdes dep.z rTMP3 = rNSLOT, 3, 7 // (nslots << 3) 169238106Sdes ;; 170238106Sdes sub rPBSP = rBSP, rTMP3 // prev_bsp = bsp - (nslots << 3) 171238106Sdes mov rTMP3 = ar.fpsr 172238106Sdes mov rTMP1 = ar.lc 173238106Sdes ;; 174238106Sdes st8 [rENV0] = rPBSP, 184 // env+24: bsp (my prev bsp) 175238106Sdes st8 [rENV1] = rTMP3, 8 // env+104: ar.fpsr 176238106Sdes add rENV2 = 320, rENV2 // rENV2 = &env->context.rstate 177238106Sdes ;; 178238106Sdes st8 [rENV1] = rTMP1, 112 // env+112: ar.lc 179238106Sdes STPTR [rENV2] = r0 // env+528: env->rstate = 0 180238106Sdes nop 181238106Sdes ;; 182238106Sdes // THIS CODE NEEDS TO BE SCHEDULED!!! 183238106Sdes stf.spill [rENV0] = f2, 32 // env+208: f2 184238106Sdes stf.spill [rENV1] = f3, 32 // env+224: f3 185238106Sdes ;; 186238106Sdes stf.spill [rENV0] = f4, 32 // env+240: f4 187238106Sdes stf.spill [rENV1] = f5, 32 // env+256: f5 188238106Sdes ;; 189238106Sdes stf.spill [rENV0] = f16, 32 // env+272: f16 190238106Sdes stf.spill [rENV1] = f17, 32 // env+288: f17 191238106Sdes ;; 192238106Sdes stf.spill [rENV0] = f18, 32 // env+304: f16 193285206Sdes stf.spill [rENV1] = f19, 32 // env+320: f17 194285206Sdes ;; 195285206Sdes stf.spill [rENV0] = f20, 32 // env+336: f16 196285206Sdes stf.spill [rENV1] = f21, 32 // env+352: f17 197238106Sdes ;; 198238106Sdes stf.spill [rENV0] = f22, 32 // env+368: f16 199238106Sdes stf.spill [rENV1] = f23, 32 // env+384: f17 200238106Sdes ;; 201238106Sdes stf.spill [rENV0] = f24, 32 // env+400: f16 202238106Sdes stf.spill [rENV1] = f25, 32 // env+416: f17 203238106Sdes ;; 204238106Sdes stf.spill [rENV0] = f26, 32 // env+432: f16 205238106Sdes stf.spill [rENV1] = f27, 32 // env+448: f17 206238106Sdes ;; 207238106Sdes stf.spill [rENV0] = f28, 32 // env+464: f16 208238106Sdes stf.spill [rENV1] = f29, 32 // env+480: f17 209238106Sdes ;; 210238106Sdes stf.spill [rENV0] = f30, 32 // env+496: f16 211238106Sdes stf.spill [rENV1] = f31, 32 // env+512: f17 212238106Sdes ;; 213238106Sdes mov ar.unat = rUNAT 214238106Sdes mov ret0 = r0 // return UWX_OK 215238106Sdes br.ret.sptk b0 216238106Sdes .endp 217238106Sdes 218238106Sdes// uwx_self_install_context( 219238106Sdes// struct uwx_env *env, 220238106Sdes// uint64_t r15, 221238106Sdes// uint64_t r16, 222238106Sdes// uint64_t r17, 223238106Sdes// uint64_t r18, 224238106Sdes// uint64_t ret 225238106Sdes// ); 226238106Sdes// 227238106Sdes// Installs the given context, and sets the landing pad binding 228238106Sdes// registers r15-r18 to the values given. 229238106Sdes// Returns the value "ret" to the new context (for testing -- 230238106Sdes// when transferring to a landing pad, the new context won't 231238106Sdes// care about the return value). 232238106Sdes 233238106Sdes .proc uwx_self_install_context 234238106Sdes .global uwx_self_install_context 235238106Sdesuwx_self_install_context: 236238106Sdes .prologue 237238106Sdes alloc rMYPFS = ar.pfs, 6, 0, 0, 0 238238106Sdes .body 239238106Sdes SWIZZLE rENV0 = r0, r32 // rENV0 = &env 240238106Sdes ;; 241238106Sdes 242238106Sdes // THIS CODE NEEDS TO BE SCHEDULED!!! 243238106Sdes 244238106Sdes // Restore GR 4-7 and ar.unat 245238106Sdes add rENV1 = 136, rENV0 // &env->context.gr[0] 246238106Sdes add rENV2 = 72, rENV0 // &env->context.priunat 247238106Sdes ;; 248238106Sdes ld8 rTMP2 = [rENV2], 24 // env+72: priunat 249238106Sdes extr.u rTMP3 = rENV1, 3, 6 // bitpos = spill_loc{8:3} 250238106Sdes ;; 251238106Sdes ld8 rUNAT = [rENV2], 48 // env+96: ar.unat 252238106Sdes sub rTMP4 = 64, rTMP3 // (64 - bitpos) 253238106Sdes shl rTMP5 = rTMP2, rTMP3 // (unat << bitpos) 254238106Sdes ;; 255238106Sdes shr rTMP2 = rTMP2, rTMP4 // (unat >> (64 - bitpos)) 256238106Sdes ;; 257238106Sdes or rTMP2 = rTMP2, rTMP5 // rotate_left(unat, bitpos) 258238106Sdes ;; 259238106Sdes mov ar.unat = rTMP2 // put priunat in place 260238106Sdes ;; 261238106Sdes ld8.fill r4 = [rENV1], 16 // env+136: r4 262238106Sdes ld8.fill r5 = [rENV2], 16 // env+144: r5 263238106Sdes ;; 264238106Sdes ld8.fill r6 = [rENV1], 16 // env+152: r6 265238106Sdes ld8.fill r7 = [rENV2], 16 // env+160: r7 266238106Sdes ;; 267238106Sdes mov ar.unat = rUNAT // restore real ar.unat 268238106Sdes 269238106Sdes // Restore BR 1-5 270238106Sdes ld8 rTMP1 = [rENV1], 16 // env+168: b1 271238106Sdes ld8 rTMP2 = [rENV2], 16 // env+176: b2 272238106Sdes ;; 273238106Sdes ld8 rTMP3 = [rENV1], 16 // env+184: b3 274238106Sdes ld8 rTMP4 = [rENV2], -168 // env+192: b4 275238106Sdes mov b1 = rTMP1 276238106Sdes ;; 277238106Sdes ld8 rTMP1 = [rENV1], -168 // env+200: b5 278238106Sdes mov b2 = rTMP2 279238106Sdes mov b3 = rTMP3 280238106Sdes mov b4 = rTMP4 281238106Sdes ;; 282238106Sdes mov b5 = rTMP1 283238106Sdes 284238106Sdes // Restore ar.bsp, ar.pfs, and ar.rnat 285238106Sdes ld8 rPFS = [rENV1], 56 // env+32: cfm (+saved ar.ec) 286238106Sdes mov rRSC = ar.rsc 287238106Sdes adds rBIAS = 0x1f8, r0 288238106Sdes ;; 289238106Sdes flushrs 290238106Sdes ld8 rRNAT = [rENV1], -24 // env+88: ar.rnat 291238106Sdes ld8 rPBSP = [rENV2], 88 // env+24: prev_bsp 292238106Sdes and rRSC0 = -4, rRSC // clear ar.rsc.mode 293238106Sdes ;; 294238106Sdes mov ar.rsc = rRSC0 // enforced lazy mode 295238106Sdes extr.u rNSLOT = rPFS, 7, 7 // nslots = pfs.sol 296238106Sdes ;; 297238106Sdes invala 298238106Sdes and rBIAS = rPBSP, rBIAS // bias = prev_bsp & 0x1f8 ... 299238106Sdes ;; 300238106Sdes extr.u rBIAS = rBIAS, 3, 6 // ... div 8 301238106Sdes ;; 302238106Sdes add rBIAS = rNSLOT, rBIAS // bias += nslots 303238106Sdes ;; 304269257Sdes cmp.lt p6, p0 = 63, rBIAS // if (63 < bias) ... 305269257Sdes cmp.lt p7, p0 = 126, rBIAS // if (126 < bias) ... 306269257Sdes ;; 307269257Sdes(p6) add rNSLOT = 1, rNSLOT // ... nslots++ 308269257Sdes ;; 309269257Sdes(p7) add rNSLOT = 1, rNSLOT // ... nslots++ 310269257Sdes ;; 311269257Sdes dep.z rTMP3 = rNSLOT, 3, 7 // (nslots << 3) 312269257Sdes ;; 313269257Sdes add rBSP = rPBSP, rTMP3 // bsp = prev_bsp + (nslots << 3) 314269257Sdes ;; 315269257Sdes mov ar.bspstore = rBSP // restore ar.bsp 316269257Sdes ;; 317269257Sdes mov ar.rnat = rRNAT // restore ar.rnat 318269257Sdes mov ar.pfs = rPFS // restore ar.pfs 319238106Sdes ;; 320238106Sdes mov ar.rsc = rRSC // restore ar.rsc 321238106Sdes 322238106Sdes // Restore preds and ar.lc 323238106Sdes ld8 rTMP1 = [rENV1], -56 // env+64: preds 324238106Sdes ld8 rTMP2 = [rENV2], -96 // env+112: ar.lc 325238106Sdes ;; 326238106Sdes mov pr = rTMP1 327238106Sdes mov ar.lc = rTMP2 328238106Sdes 329238106Sdes // Get previous sp and ip 330238106Sdes ld8 rRP = [rENV1], 96 // env+8: ip (my rp) 331238106Sdes ld8 rPSP = [rENV2], 112 // env+16: sp 332269257Sdes ;; 333238106Sdes 334238106Sdes // Restore ar.fpsr and gp 335238106Sdes ld8 rTMP1 = [rENV1], 104 // env+104: ar.fpsr 336238106Sdes ld8 r1 = [rENV2], 96 // env+128: gp 337238106Sdes ;; 338238106Sdes mov ar.fpsr = rTMP1 // restore ar.fpsr 339238106Sdes 340238106Sdes // Restore FR 2-5 and 16-31 341238106Sdes ldf.fill f2 = [rENV1], 32 // env+208: f2 342238106Sdes ldf.fill f3 = [rENV2], 32 // env+224: f3 343238106Sdes ;; 344238106Sdes ldf.fill f4 = [rENV1], 32 // env+240: f4 345238106Sdes ldf.fill f5 = [rENV2], 32 // env+256: f5 346238106Sdes ;; 347238106Sdes ldf.fill f16 = [rENV1], 32 // env+272: f16 348238106Sdes ldf.fill f17 = [rENV2], 32 // env+288: f17 349238106Sdes ;; 350238106Sdes ldf.fill f18 = [rENV1], 32 // env+304: f16 351238106Sdes ldf.fill f19 = [rENV2], 32 // env+320: f17 352238106Sdes ;; 353238106Sdes ldf.fill f20 = [rENV1], 32 // env+336: f16 354238106Sdes ldf.fill f21 = [rENV2], 32 // env+352: f17 355238106Sdes ;; 356238106Sdes ldf.fill f22 = [rENV1], 32 // env+368: f16 357238106Sdes ldf.fill f23 = [rENV2], 32 // env+384: f17 358238106Sdes ;; 359238106Sdes ldf.fill f24 = [rENV1], 32 // env+400: f16 360238106Sdes ldf.fill f25 = [rENV2], 32 // env+416: f17 361238106Sdes ;; 362238106Sdes ldf.fill f26 = [rENV1], 32 // env+432: f16 363238106Sdes ldf.fill f27 = [rENV2], 32 // env+448: f17 364238106Sdes ;; 365238106Sdes ldf.fill f28 = [rENV1], 32 // env+464: f16 366238106Sdes ldf.fill f29 = [rENV2], 32 // env+480: f17 367238106Sdes ;; 368238106Sdes ldf.fill f30 = [rENV1], 32 // env+496: f16 369238106Sdes ldf.fill f31 = [rENV2], 32 // env+512: f17 370238106Sdes 371238106Sdes // Set landing pad parameter registers 372238106Sdes mov r15 = r33 373238106Sdes mov r16 = r34 374238106Sdes mov r17 = r35 375238106Sdes mov r18 = r36 376269257Sdes 377238106Sdes // Restore previous sp and Return 378238106Sdes mov ret0 = r37 379238106Sdes mov sp = rPSP 380238106Sdes mov b0 = rRP 381238106Sdes br.ret.sptk b0 382238106Sdes 383238106Sdes .endp 384238106Sdes