1160157Smarcel// Copyright (c) 2003-2006 Hewlett-Packard Development Company, L.P. 2129059Smarcel// Permission is hereby granted, free of charge, to any person 3129059Smarcel// obtaining a copy of this software and associated documentation 4129059Smarcel// files (the "Software"), to deal in the Software without 5129059Smarcel// restriction, including without limitation the rights to use, 6129059Smarcel// copy, modify, merge, publish, distribute, sublicense, and/or sell 7129059Smarcel// copies of the Software, and to permit persons to whom the 8129059Smarcel// Software is furnished to do so, subject to the following 9129059Smarcel// conditions: 10129059Smarcel// 11129059Smarcel// The above copyright notice and this permission notice shall be 12129059Smarcel// included in all copies or substantial portions of the Software. 13129059Smarcel// 14129059Smarcel// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15129059Smarcel// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 16129059Smarcel// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17129059Smarcel// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 18129059Smarcel// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 19129059Smarcel// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20129059Smarcel// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21129059Smarcel// OTHER DEALINGS IN THE SOFTWARE. 22115013Smarcel 23115013Smarcel#ifdef _LP64 24115013Smarcel#define SWIZZLE add 25115013Smarcel#define STPTR st8 26115013Smarcel#else 27115013Smarcel#define SWIZZLE addp4 28115013Smarcel#define STPTR st4 29115013Smarcel#endif 30115013Smarcel 31115013SmarcelrRP = r14 32115013SmarcelrPFS = r15 33115013SmarcelrUNAT = r16 34115013SmarcelrRNAT = r17 35115013SmarcelrENV0 = r18 36115013SmarcelrENV1 = r19 37115013SmarcelrENV2 = r20 38115013SmarcelrNSLOT = r21 39115013SmarcelrBSP = r22 40115013SmarcelrPBSP = r23 41115013SmarcelrRSC = r24 42115013SmarcelrNATP = r25 43115013SmarcelrBIAS = r26 44115013SmarcelrRSC0 = r27 45115013SmarcelrTMP1 = r28 46115013SmarcelrTMP2 = r29 47115013SmarcelrTMP3 = r30 48115013SmarcelrTMP4 = r31 49115013SmarcelrTMP5 = r8 50160157SmarcelrMYPFS = r9 51160157SmarcelrPSP = r10 52115013Smarcel 53160157SmarcelVALID_IP = 1 54160157SmarcelVALID_SP = 1 << 1 55160157SmarcelVALID_BSP = 1 << 2 56160157SmarcelVALID_CFM = 1 << 3 57160157SmarcelVALID_PREDS = 1 << 7 58160157SmarcelVALID_PRIUNAT = 1 << 8 59160157SmarcelVALID_RNAT = 1 << 10 60160157SmarcelVALID_UNAT = 1 << 11 61160157SmarcelVALID_FPSR = 1 << 12 62160157SmarcelVALID_LC = 1 << 13 63160157SmarcelVALID_GRS = 0xf << 16 64160157SmarcelVALID_BRS = 0x1f << 20 65160157SmarcelVALID_BASIC4 = VALID_IP | VALID_SP | VALID_BSP | VALID_CFM 66160157SmarcelVALID_SPEC = VALID_PREDS | VALID_PRIUNAT | VALID_RNAT | VALID_UNAT | VALID_FPSR | VALID_LC 67160157SmarcelVALID_REGS = VALID_BASIC4 | VALID_SPEC | VALID_GRS | VALID_BRS 68160157SmarcelVALID_FRS = 0xfffff 69160157Smarcel// valid_regs and valid_frs are separate unsigned int fields. 70160157Smarcel// In order to store them with a single st8, we need to know 71160157Smarcel// the endianness. 72160157Smarcel#ifdef __LITTLE_ENDIAN__ 73160157SmarcelVALID_BITS = (VALID_FRS << 32) | VALID_REGS 74160157Smarcel#else 75160157SmarcelVALID_BITS = (VALID_REGS << 32) | VALID_FRS 76160157Smarcel#endif 77129059Smarcel 78115013Smarcel .text 79160157Smarcel 80160157Smarcel// int uwx_self_init_context(struct uwx_env *env); 81160157Smarcel// 82160157Smarcel// Stores a snapshot of the caller's context in the uwx_env structure. 83160157Smarcel 84115013Smarcel .proc uwx_self_init_context 85115013Smarcel .global uwx_self_init_context 86115013Smarceluwx_self_init_context: 87115013Smarcel .prologue 88115013Smarcel alloc rPFS = ar.pfs, 1, 0, 0, 0 89115013Smarcel mov rUNAT = ar.unat 90115013Smarcel .body 91115013Smarcel SWIZZLE rENV0 = r0, r32 // rENV0 = &env 92115013Smarcel ;; 93115013Smarcel flushrs 94115013Smarcel extr.u rNSLOT = rPFS, 7, 7 // nslots = pfs.sol 95115013Smarcel mov rRP = b0 96115013Smarcel ;; 97115013Smarcel mov rRSC = ar.rsc 98129059Smarcel add rENV1 = 136, rENV0 // rENV1 = &env->context.gr[0] 99129059Smarcel add rENV2 = 144, rENV0 // rENV2 = &env->context.gr[1] 100115013Smarcel ;; 101115013Smarcel and rRSC0 = -4, rRSC // clear ar.rsc.mode 102115013Smarcel adds rNATP = 0x1f8, r0 103115013Smarcel mov rTMP1 = b1 104115013Smarcel ;; 105129059Smarcel st8.spill [rENV1] = r4, 16 // env+136: r4 106129059Smarcel st8.spill [rENV2] = r5, 16 // env+144: r5 107115013Smarcel mov rTMP2 = b2 108115013Smarcel ;; 109129059Smarcel st8.spill [rENV1] = r6, 16 // env+152: r6 110129059Smarcel st8.spill [rENV2] = r7, 16 // env+160: r7 111115013Smarcel mov rTMP3 = b3 112115013Smarcel ;; 113129059Smarcel st8 [rENV1] = rTMP1, 16 // env+168: b1 114129059Smarcel st8 [rENV2] = rTMP2, 16 // env+176: b2 115115013Smarcel mov rTMP1 = b4 116115013Smarcel ;; 117129059Smarcel st8 [rENV1] = rTMP3, 16 // env+184: b3 118129059Smarcel st8 [rENV2] = rTMP1, 16 // env+192: b4 119115013Smarcel mov rTMP2 = b5 120115013Smarcel ;; 121129059Smarcel st8 [rENV1] = rTMP2 // env+200: b5 122115013Smarcel mov ar.rsc = rRSC0 // enforced lazy mode 123115013Smarcel add rENV1 = 8, rENV0 124115013Smarcel ;; 125115013Smarcel mov rRNAT = ar.rnat // get copy of ar.rnat 126129059Smarcel movl rTMP1 = VALID_BITS // valid_regs: ip, sp, bsp, cfm, 127160157Smarcel // preds, priunat, rnat, unat, fpsr, 128129059Smarcel // lc, grs, brs 129160157Smarcel // = 0x1ff3d8f00000000 130115013Smarcel ;; 131115013Smarcel mov ar.rsc = rRSC // restore ar.rsc 132115013Smarcel mov rBSP = ar.bsp 133129059Smarcel add rTMP3 = 136, rENV0 // spill_loc = &env->context.gr[0] 134115013Smarcel ;; 135115013Smarcel mov rTMP2 = ar.unat 136115013Smarcel nop 137115013Smarcel extr.u rTMP3 = rTMP3, 3, 6 // bitpos = spill_loc{8:3} 138115013Smarcel ;; 139160157Smarcel and rBIAS = rBSP, rNATP // bias = (bsp & 0x1f8) ... 140115013Smarcel sub rTMP4 = 64, rTMP3 // (64 - bitpos) 141115013Smarcel shr rTMP5 = rTMP2, rTMP3 // (unat >> bitpos) 142115013Smarcel ;; 143115013Smarcel nop 144160157Smarcel extr.u rBIAS = rBIAS, 3, 6 // ... div 8 145115013Smarcel shl rTMP2 = rTMP2, rTMP4 // (unat << (64 - bitpos)) 146115013Smarcel ;; 147115013Smarcel or rTMP2 = rTMP2, rTMP5 // rotate_right(unat, bitpos) 148160157Smarcel nop 149115013Smarcel mov rTMP4 = pr 150115013Smarcel ;; 151115013Smarcel st8 [rENV0] = rTMP1, 16 // env+0: valid_regs mask 152115013Smarcel st8 [rENV1] = rRP, 24 // env+8: ip (my rp) 153160157Smarcel sub rBIAS = rNSLOT, rBIAS // bias = nslots - bias 154115013Smarcel ;; 155160157Smarcel cmp.lt p6, p0 = 0, rBIAS // if (0 < bias) ... 156160157Smarcel cmp.lt p7, p0 = 63, rBIAS // if (63 < bias) ... 157115013Smarcel ;; 158115013Smarcel st8 [rENV0] = r12, 48 // env+16: sp 159115013Smarcel st8 [rENV1] = rPFS, 40 // env+32: cfm (my pfs) 160115013Smarcel(p6) add rNSLOT = 1, rNSLOT // ... nslots++ 161115013Smarcel ;; 162115013Smarcel st8 [rENV0] = rTMP4, 24 // env+64: preds 163115013Smarcel st8 [rENV1] = rTMP2, 24 // env+72: priunat 164115013Smarcel(p7) add rNSLOT = 1, rNSLOT // ... nslots++ 165115013Smarcel ;; 166160157Smarcel st8 [rENV0] = rRNAT, -64 // env+88: ar.rnat 167160157Smarcel st8 [rENV1] = rUNAT, 8 // env+96: ar.unat 168115013Smarcel dep.z rTMP3 = rNSLOT, 3, 7 // (nslots << 3) 169115013Smarcel ;; 170115013Smarcel sub rPBSP = rBSP, rTMP3 // prev_bsp = bsp - (nslots << 3) 171115013Smarcel mov rTMP3 = ar.fpsr 172115013Smarcel mov rTMP1 = ar.lc 173115013Smarcel ;; 174160157Smarcel st8 [rENV0] = rPBSP, 184 // env+24: bsp (my prev bsp) 175160157Smarcel st8 [rENV1] = rTMP3, 8 // env+104: ar.fpsr 176115013Smarcel add rENV2 = 320, rENV2 // rENV2 = &env->context.rstate 177115013Smarcel ;; 178160157Smarcel st8 [rENV1] = rTMP1, 112 // env+112: ar.lc 179129059Smarcel STPTR [rENV2] = r0 // env+528: env->rstate = 0 180115013Smarcel nop 181115013Smarcel ;; 182160157Smarcel // THIS CODE NEEDS TO BE SCHEDULED!!! 183160157Smarcel stf.spill [rENV0] = f2, 32 // env+208: f2 184160157Smarcel stf.spill [rENV1] = f3, 32 // env+224: f3 185160157Smarcel ;; 186160157Smarcel stf.spill [rENV0] = f4, 32 // env+240: f4 187160157Smarcel stf.spill [rENV1] = f5, 32 // env+256: f5 188160157Smarcel ;; 189160157Smarcel stf.spill [rENV0] = f16, 32 // env+272: f16 190160157Smarcel stf.spill [rENV1] = f17, 32 // env+288: f17 191160157Smarcel ;; 192160157Smarcel stf.spill [rENV0] = f18, 32 // env+304: f16 193160157Smarcel stf.spill [rENV1] = f19, 32 // env+320: f17 194160157Smarcel ;; 195160157Smarcel stf.spill [rENV0] = f20, 32 // env+336: f16 196160157Smarcel stf.spill [rENV1] = f21, 32 // env+352: f17 197160157Smarcel ;; 198160157Smarcel stf.spill [rENV0] = f22, 32 // env+368: f16 199160157Smarcel stf.spill [rENV1] = f23, 32 // env+384: f17 200160157Smarcel ;; 201160157Smarcel stf.spill [rENV0] = f24, 32 // env+400: f16 202160157Smarcel stf.spill [rENV1] = f25, 32 // env+416: f17 203160157Smarcel ;; 204160157Smarcel stf.spill [rENV0] = f26, 32 // env+432: f16 205160157Smarcel stf.spill [rENV1] = f27, 32 // env+448: f17 206160157Smarcel ;; 207160157Smarcel stf.spill [rENV0] = f28, 32 // env+464: f16 208160157Smarcel stf.spill [rENV1] = f29, 32 // env+480: f17 209160157Smarcel ;; 210160157Smarcel stf.spill [rENV0] = f30, 32 // env+496: f16 211160157Smarcel stf.spill [rENV1] = f31, 32 // env+512: f17 212160157Smarcel ;; 213115013Smarcel mov ar.unat = rUNAT 214115013Smarcel mov ret0 = r0 // return UWX_OK 215160157Smarcel br.ret.sptk b0 216115013Smarcel .endp 217115013Smarcel 218160157Smarcel// uwx_self_install_context( 219160157Smarcel// struct uwx_env *env, 220160157Smarcel// uint64_t r15, 221160157Smarcel// uint64_t r16, 222160157Smarcel// uint64_t r17, 223160157Smarcel// uint64_t r18, 224160157Smarcel// uint64_t ret 225160157Smarcel// ); 226160157Smarcel// 227160157Smarcel// Installs the given context, and sets the landing pad binding 228160157Smarcel// registers r15-r18 to the values given. 229160157Smarcel// Returns the value "ret" to the new context (for testing -- 230160157Smarcel// when transferring to a landing pad, the new context won't 231160157Smarcel// care about the return value). 232160157Smarcel 233160157Smarcel .proc uwx_self_install_context 234160157Smarcel .global uwx_self_install_context 235160157Smarceluwx_self_install_context: 236160157Smarcel .prologue 237160157Smarcel alloc rMYPFS = ar.pfs, 6, 0, 0, 0 238160157Smarcel .body 239160157Smarcel SWIZZLE rENV0 = r0, r32 // rENV0 = &env 240160157Smarcel ;; 241160157Smarcel 242160157Smarcel // THIS CODE NEEDS TO BE SCHEDULED!!! 243160157Smarcel 244160157Smarcel // Restore GR 4-7 and ar.unat 245160157Smarcel add rENV1 = 136, rENV0 // &env->context.gr[0] 246160157Smarcel add rENV2 = 72, rENV0 // &env->context.priunat 247160157Smarcel ;; 248160157Smarcel ld8 rTMP2 = [rENV2], 24 // env+72: priunat 249160157Smarcel extr.u rTMP3 = rENV1, 3, 6 // bitpos = spill_loc{8:3} 250160157Smarcel ;; 251160157Smarcel ld8 rUNAT = [rENV2], 48 // env+96: ar.unat 252160157Smarcel sub rTMP4 = 64, rTMP3 // (64 - bitpos) 253160157Smarcel shl rTMP5 = rTMP2, rTMP3 // (unat << bitpos) 254160157Smarcel ;; 255160157Smarcel shr rTMP2 = rTMP2, rTMP4 // (unat >> (64 - bitpos)) 256160157Smarcel ;; 257160157Smarcel or rTMP2 = rTMP2, rTMP5 // rotate_left(unat, bitpos) 258160157Smarcel ;; 259160157Smarcel mov ar.unat = rTMP2 // put priunat in place 260160157Smarcel ;; 261160157Smarcel ld8.fill r4 = [rENV1], 16 // env+136: r4 262160157Smarcel ld8.fill r5 = [rENV2], 16 // env+144: r5 263160157Smarcel ;; 264160157Smarcel ld8.fill r6 = [rENV1], 16 // env+152: r6 265160157Smarcel ld8.fill r7 = [rENV2], 16 // env+160: r7 266160157Smarcel ;; 267160157Smarcel mov ar.unat = rUNAT // restore real ar.unat 268160157Smarcel 269160157Smarcel // Restore BR 1-5 270160157Smarcel ld8 rTMP1 = [rENV1], 16 // env+168: b1 271160157Smarcel ld8 rTMP2 = [rENV2], 16 // env+176: b2 272160157Smarcel ;; 273160157Smarcel ld8 rTMP3 = [rENV1], 16 // env+184: b3 274160157Smarcel ld8 rTMP4 = [rENV2], -168 // env+192: b4 275160157Smarcel mov b1 = rTMP1 276160157Smarcel ;; 277160157Smarcel ld8 rTMP1 = [rENV1], -168 // env+200: b5 278160157Smarcel mov b2 = rTMP2 279160157Smarcel mov b3 = rTMP3 280160157Smarcel mov b4 = rTMP4 281160157Smarcel ;; 282160157Smarcel mov b5 = rTMP1 283160157Smarcel 284160157Smarcel // Restore ar.bsp, ar.pfs, and ar.rnat 285160157Smarcel ld8 rPFS = [rENV1], 56 // env+32: cfm (+saved ar.ec) 286160157Smarcel mov rRSC = ar.rsc 287160157Smarcel adds rBIAS = 0x1f8, r0 288160157Smarcel ;; 289160157Smarcel flushrs 290160157Smarcel ld8 rRNAT = [rENV1], -24 // env+88: ar.rnat 291160157Smarcel ld8 rPBSP = [rENV2], 88 // env+24: prev_bsp 292160157Smarcel and rRSC0 = -4, rRSC // clear ar.rsc.mode 293160157Smarcel ;; 294160157Smarcel mov ar.rsc = rRSC0 // enforced lazy mode 295160157Smarcel extr.u rNSLOT = rPFS, 7, 7 // nslots = pfs.sol 296160157Smarcel ;; 297160157Smarcel invala 298160157Smarcel and rBIAS = rPBSP, rBIAS // bias = prev_bsp & 0x1f8 ... 299160157Smarcel ;; 300160157Smarcel extr.u rBIAS = rBIAS, 3, 6 // ... div 8 301160157Smarcel ;; 302160157Smarcel add rBIAS = rNSLOT, rBIAS // bias += nslots 303160157Smarcel ;; 304160157Smarcel cmp.lt p6, p0 = 63, rBIAS // if (63 < bias) ... 305160157Smarcel cmp.lt p7, p0 = 126, rBIAS // if (126 < bias) ... 306160157Smarcel ;; 307160157Smarcel(p6) add rNSLOT = 1, rNSLOT // ... nslots++ 308160157Smarcel ;; 309160157Smarcel(p7) add rNSLOT = 1, rNSLOT // ... nslots++ 310160157Smarcel ;; 311160157Smarcel dep.z rTMP3 = rNSLOT, 3, 7 // (nslots << 3) 312160157Smarcel ;; 313160157Smarcel add rBSP = rPBSP, rTMP3 // bsp = prev_bsp + (nslots << 3) 314160157Smarcel ;; 315160157Smarcel mov ar.bspstore = rBSP // restore ar.bsp 316160157Smarcel ;; 317160157Smarcel mov ar.rnat = rRNAT // restore ar.rnat 318160157Smarcel mov ar.pfs = rPFS // restore ar.pfs 319160157Smarcel ;; 320160157Smarcel mov ar.rsc = rRSC // restore ar.rsc 321160157Smarcel 322160157Smarcel // Restore preds and ar.lc 323160157Smarcel ld8 rTMP1 = [rENV1], -56 // env+64: preds 324160157Smarcel ld8 rTMP2 = [rENV2], -96 // env+112: ar.lc 325160157Smarcel ;; 326160157Smarcel mov pr = rTMP1 327160157Smarcel mov ar.lc = rTMP2 328160157Smarcel 329160157Smarcel // Get previous sp and ip 330160157Smarcel ld8 rRP = [rENV1], 96 // env+8: ip (my rp) 331160157Smarcel ld8 rPSP = [rENV2], 112 // env+16: sp 332160157Smarcel ;; 333160157Smarcel 334160157Smarcel // Restore ar.fpsr and gp 335160157Smarcel ld8 rTMP1 = [rENV1], 104 // env+104: ar.fpsr 336160157Smarcel ld8 r1 = [rENV2], 96 // env+128: gp 337160157Smarcel ;; 338160157Smarcel mov ar.fpsr = rTMP1 // restore ar.fpsr 339160157Smarcel 340160157Smarcel // Restore FR 2-5 and 16-31 341160157Smarcel ldf.fill f2 = [rENV1], 32 // env+208: f2 342160157Smarcel ldf.fill f3 = [rENV2], 32 // env+224: f3 343160157Smarcel ;; 344160157Smarcel ldf.fill f4 = [rENV1], 32 // env+240: f4 345160157Smarcel ldf.fill f5 = [rENV2], 32 // env+256: f5 346160157Smarcel ;; 347160157Smarcel ldf.fill f16 = [rENV1], 32 // env+272: f16 348160157Smarcel ldf.fill f17 = [rENV2], 32 // env+288: f17 349160157Smarcel ;; 350160157Smarcel ldf.fill f18 = [rENV1], 32 // env+304: f16 351160157Smarcel ldf.fill f19 = [rENV2], 32 // env+320: f17 352160157Smarcel ;; 353160157Smarcel ldf.fill f20 = [rENV1], 32 // env+336: f16 354160157Smarcel ldf.fill f21 = [rENV2], 32 // env+352: f17 355160157Smarcel ;; 356160157Smarcel ldf.fill f22 = [rENV1], 32 // env+368: f16 357160157Smarcel ldf.fill f23 = [rENV2], 32 // env+384: f17 358160157Smarcel ;; 359160157Smarcel ldf.fill f24 = [rENV1], 32 // env+400: f16 360160157Smarcel ldf.fill f25 = [rENV2], 32 // env+416: f17 361160157Smarcel ;; 362160157Smarcel ldf.fill f26 = [rENV1], 32 // env+432: f16 363160157Smarcel ldf.fill f27 = [rENV2], 32 // env+448: f17 364160157Smarcel ;; 365160157Smarcel ldf.fill f28 = [rENV1], 32 // env+464: f16 366160157Smarcel ldf.fill f29 = [rENV2], 32 // env+480: f17 367160157Smarcel ;; 368160157Smarcel ldf.fill f30 = [rENV1], 32 // env+496: f16 369160157Smarcel ldf.fill f31 = [rENV2], 32 // env+512: f17 370160157Smarcel 371160157Smarcel // Set landing pad parameter registers 372160157Smarcel mov r15 = r33 373160157Smarcel mov r16 = r34 374160157Smarcel mov r17 = r35 375160157Smarcel mov r18 = r36 376160157Smarcel 377160157Smarcel // Restore previous sp and Return 378160157Smarcel mov ret0 = r37 379160157Smarcel mov sp = rPSP 380160157Smarcel mov b0 = rRP 381160157Smarcel br.ret.sptk b0 382160157Smarcel 383160157Smarcel .endp 384