1/* 2 * Copyright (c) 2000-2001,2011,2014 Apple Inc. All Rights Reserved. 3 * 4 * The contents of this file constitute Original Code as defined in and are 5 * subject to the Apple Public Source License Version 1.2 (the 'License'). 6 * You may not use this file except in compliance with the License. Please obtain 7 * a copy of the License at http://www.apple.com/publicsource and read it before 8 * using this file. 9 * 10 * This Original Code and all software distributed under the License are 11 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESS 12 * OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, INCLUDING WITHOUT 13 * LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 14 * PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. Please see the License for the 15 * specific language governing rights and limitations under the License. 16 */ 17 18 19/* 20 * vRijndael-alg-ref.c 21 * 22 * Copyright (c) 2001,2011,2014 Apple Inc. All Rights Reserved. 23 * 24 */ 25 26#include "rijndaelApi.h" 27#include "rijndael-alg-ref.h" 28#include "boxes-ref.h" 29#include <string.h> 30 31/* debugger seems to have trouble with this code... */ 32#define VAES_DEBUG 1 33#if VAES_DEBUG 34#include <stdio.h> 35#define vdprintf(s) printf s 36#else 37#define vdprintf(s) 38#endif 39 40#define SC ((BC - 4) >> 1) 41 42#if defined(__ppc__) && defined(ALTIVEC_ENABLE) 43 44typedef union { 45 unsigned char s[4][8]; 46 unsigned long l[8]; 47 vector unsigned char v[2]; 48} doubleVec; 49 50typedef union { 51 unsigned long s[4]; 52 vector unsigned long v; 53} vecLong; 54 55static word8 shifts[3][4][2] = { 56 { { 0, 0 }, 57 { 1, 3 }, 58 { 2, 2 }, 59 { 3, 1 } 60 }, 61 { { 0, 0 }, 62 { 1, 5 }, 63 { 2, 4 }, 64 { 3, 3 } 65 }, 66 { { 0, 0 }, 67 { 1, 7 }, 68 { 3, 5 }, 69 { 4, 4 } 70 } 71}; 72 73int vRijndaelKeySched ( vector unsigned char vk[2], int keyBits, int blockBits, 74 unsigned char W[MAXROUNDS+1][4][MAXBC]) 75{ 76 /* Calculate the necessary round keys 77 * The number of calculations depends on keyBits and blockBits 78 */ 79 int KC, BC, ROUNDS; 80 int i, j, t, rconpointer = 0; 81 doubleVec tk; 82 register vector unsigned char v1, v2, mask; 83 84 switch (keyBits) { 85 case 128: KC = 4; break; 86 case 192: KC = 6; break; 87 case 256: KC = 8; break; 88 default : return (-1); 89 } 90 91 switch (blockBits) { 92 case 128: BC = 4; break; 93 case 192: BC = 6; break; 94 case 256: BC = 8; break; 95 default : return (-2); 96 } 97 98 switch (keyBits >= blockBits ? keyBits : blockBits) { 99 case 128: ROUNDS = 10; break; 100 case 192: ROUNDS = 12; break; 101 case 256: ROUNDS = 14; break; 102 default : return (-3); /* this cannot happen */ 103 } 104 105 tk.v[0] = vk[0]; 106 tk.v[1] = vk[1]; 107 108 t = 0; 109 /* copy values into round key array */ 110 for(j = 0; (j < KC) && (t < (ROUNDS+1)*BC); j++, t++) 111 for(i = 0; i < 4; i++) W[t / BC][i][t % BC] = tk.s[i][j]; 112 113 while (t < (ROUNDS+1)*BC) { /* while not enough round key material calculated */ 114 /* calculate new values */ 115 for(i = 0; i < 4; i++) 116 tk.s[i][0] ^= *((word8 *)S + tk.s[(i+1)%4][KC-1]); 117 tk.s[0][0] ^= rcon[rconpointer++]; 118 119 if (KC != 8) { 120 /* xor bytes 1-7 of each row with previous byte */ 121 mask = (vector unsigned char) ( 0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff ); 122 for ( i = 0; i < 2; i++ ) { 123 v1 = vec_sld( tk.v[i], tk.v[i], 15 ); 124 v2 = vec_and( v1, mask ); 125 tk.v[i] = vec_xor( tk.v[i], v2 ); 126 } 127 } 128 else { 129 /* xor bytes 1-3 of each row with previous byte */ 130 mask = (vector unsigned char) ( 0, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0xff, 0xff, 0xff, 0, 0, 0, 0 ); 131 for ( i = 0; i < 2; i++ ) { 132 v1 = vec_sld( tk.v[i], tk.v[i], 15 ); 133 v2 = vec_and( v1, mask ); 134 tk.v[i] = vec_xor( tk.v[i], v2 ); 135 for(j = 0; j < 4; j++) tk.s[i][KC/2] ^= *((word8 *)S + tk.s[i][KC/2 - 1]); 136 /* xor bytes 5-7 of each row with previous byte */ 137 mask = vec_sld( mask, mask, 4 ); 138 v2 = vec_and( v1, mask ); 139 tk.v[i] = vec_xor( tk.v[i], v2 ); 140 mask = vec_sld( mask, mask, 4 ); 141 } 142 } 143 /* copy values into round key array */ 144 for(j = 0; (j < KC) && (t < (ROUNDS+1)*BC); j++, t++) 145 for(i = 0; i < 4; i++) W[t / BC][i][t % BC] = tk.s[i][j]; 146 } 147 return 0; 148} 149 150 151void vMakeKey(BYTE *keyMaterial, keyInstance *key) 152{ 153 register vector unsigned char v1, v2, v3, mask; 154 vector unsigned char vk[2]; 155 156 /* load and align input */ 157 v1 = vec_ld( 0, (vector unsigned char *) keyMaterial ); 158 v2 = vec_ld( 16, (vector unsigned char *) keyMaterial ); 159 if ( (long) keyMaterial & 0x0fL ) 160 { // this is required if keyMaterial is not on a 16-byte boundary 161 v3 = vec_ld( 32, (vector unsigned char *) keyMaterial ); 162 mask = vec_lvsl( 0, keyMaterial ); 163 v1 = vec_perm( v1, v2, mask ); 164 v2 = vec_perm( v2, v3, mask ); 165 } 166 167 /* parse input stream into rectangular array */ 168 vk[0] = vec_perm( v1, v2, (vector unsigned char) ( 0, 4, 8, 12, 16, 20, 24, 28, 1, 5, 9, 13, 17, 21, 25, 29 ) ); 169 vk[1] = vec_perm( v1, v2, (vector unsigned char) ( 2, 6, 10, 14, 18, 22, 26, 30, 3, 7, 11, 15, 19, 23, 27, 31 ) ); 170 vRijndaelKeySched (vk, key->keyLen, key->blockLen, key->keySched); 171 memset( (char *) vk, 0, 4 * MAXKC); 172} 173 174 175/* This routine does 16 simultaneous lookups in a 256-byte table. */ 176vector unsigned char rimskyKorsakov ( vector unsigned char v, vector unsigned char * table ) 177{ 178 register vector unsigned char upperBits000, upperBits001, upperBits010, upperBits011, 179 upperBits100, upperBits101, upperBits110, upperBits111, 180 lookupBit00, lookupBit01, lookupBit10, lookupBit11, 181 lookupBit0, lookupBit1, lookup, 182 maskForBit6, maskForBit7, maskForBit8, seven; 183 register vector unsigned char *tabeven, *tabodd; 184 185 seven = vec_splat_u8 ( 7 ); 186 tabeven = table++; 187 tabodd = table; 188 189// Each variable contains the correct values for the corresponding bits 6, 7 and 8. 190 upperBits000 = vec_perm ( *tabeven, *tabodd, v ); 191 tabeven += 2; tabodd += 2; 192 upperBits001 = vec_perm ( *tabeven, *tabodd, v ); 193 tabeven += 2; tabodd += 2; 194 upperBits010 = vec_perm ( *tabeven, *tabodd, v ); 195 tabeven += 2; tabodd += 2; 196 upperBits011 = vec_perm ( *tabeven, *tabodd, v ); 197 tabeven += 2; tabodd += 2; 198 upperBits100 = vec_perm ( *tabeven, *tabodd, v ); 199 tabeven += 2; tabodd += 2; 200 upperBits101 = vec_perm ( *tabeven, *tabodd, v ); 201 tabeven += 2; tabodd += 2; 202 upperBits110 = vec_perm ( *tabeven, *tabodd, v ); 203 tabeven += 2; tabodd += 2; 204 upperBits111 = vec_perm ( *tabeven, *tabodd, v ); 205 206// Here we extract all the correct values for bit 6. 207 maskForBit6 = vec_sl ( v, vec_splat_u8 ( 2 ) ); 208 maskForBit6 = vec_sra ( maskForBit6, seven ); 209 lookupBit00 = vec_sel ( upperBits000, upperBits001, maskForBit6 ); 210 lookupBit01 = vec_sel ( upperBits010, upperBits011, maskForBit6 ); 211 lookupBit10 = vec_sel ( upperBits100, upperBits101, maskForBit6 ); 212 lookupBit11 = vec_sel ( upperBits110, upperBits111, maskForBit6 ); 213 214// Then we get the correct values for bit 7. 215 maskForBit7 = vec_sl ( v, vec_splat_u8 ( 1 ) ); 216 maskForBit7 = vec_sra ( maskForBit7, seven ); 217 lookupBit0 = vec_sel ( lookupBit00, lookupBit01, maskForBit7 ); 218 lookupBit1 = vec_sel ( lookupBit10, lookupBit11, maskForBit7 ); 219 220// Finally, the entire correct result vector. 221 maskForBit8 = vec_sra ( v, seven ); 222 223 lookup = vec_sel ( lookupBit0, lookupBit1, maskForBit8 ); 224 225 return lookup; 226} 227 228vector unsigned char vmul(vector unsigned char a, vector unsigned char b) 229{ 230 register vector unsigned char x, y, zero; 231 register vector unsigned short xh, yh, zhi, zlo, two54, two55; 232 233 zero = vec_splat_u8( 0 ); 234 two55 = vec_splat_u16( -1 ); 235 two55 = (vector unsigned short) vec_mergeh( zero, (vector unsigned char) two55 ); 236 two54 = vec_sub( two55, vec_splat_u16( 1 ) ); 237 238 x = rimskyKorsakov( a, (vector unsigned char *)Logtable ); // Logtable[a] 239 y = rimskyKorsakov( b, (vector unsigned char *)Logtable ); // Logtable[b] 240 241 // Convert upper 8 bytes to shorts for addition ond modulo 242 xh = (vector unsigned short) vec_mergeh( zero, x ); 243 yh = (vector unsigned short) vec_mergeh( zero, y ); 244 xh = vec_add( xh, yh ); // xh = Logtable[a] + Logtable[b] 245 yh = vec_sub( xh, two55 ); 246 zhi = vec_sel( xh, yh, vec_cmpgt( xh, two54 ) ); // xh%255 247 248 // Convert lower 8 bytes to shorts for addition ond modulo 249 xh = (vector unsigned short) vec_mergel( zero, x ); 250 yh = (vector unsigned short) vec_mergel( zero, y ); 251 xh = vec_add( xh, yh ); 252 yh = vec_sub( xh, two55 ); 253 zlo = vec_sel( xh, yh, vec_cmpgt( xh, two54 ) ); 254 255 x = vec_pack( zhi, zlo ); // recombine into single byte vector 256 x = rimskyKorsakov( x, (vector unsigned char *)Alogtable ); // Alogtable[x] 257 x = vec_sel( x, zero, vec_cmpeq( a, zero ) ); // check a = 0 258 x = vec_sel( x, zero, vec_cmpeq( b, zero ) ); // check b = 0 259 return x; 260} 261 262void vKeyAddition(vector unsigned char v[2], vector unsigned char rk[2]) 263{ 264 v[0] = vec_xor( v[0], rk[0] ); // first vector contains rows 0 and 1 265 v[1] = vec_xor( v[1], rk[1] ); // second vector contains rows 2 and 3 266} 267 268 269void vShiftRow(vector unsigned char v[2], word8 d, word8 BC) 270{ 271 vecLong sh; 272 register vector unsigned char mask, mask1, t; 273 register vector bool char c; 274 register int i, j; 275 276 sh.s[0] = 0; 277 for (i = 1; i < 4; i++) 278 sh.s[i] = shifts[SC][i][d] % BC; // contains the number of elements to shift each row 279 280 // each vector contains two BC-byte long rows 281 j = 0; 282 for ( i = 0; i < 2; i++ ) { 283 mask = vec_lvsl( 0, (int *) sh.s[j++]); // mask for even row 284 mask1 = vec_lvsl( 0, (int *) sh.s[j++]); // mask for odd row 285 if (BC == 4) { 286 mask = vec_sld( mask, mask1, 8 ); // combined rotation mask for both rows 287 mask = vec_and( mask, vec_splat_u8( 3 ) ); 288 } else if (BC == 6) { 289 mask = vec_sld( mask, mask, 8 ); 290 mask = vec_sld( mask, mask1, 8 ); // combined rotation mask for both rows 291 t = vec_sub( mask, vec_splat_u8( 6 ) ); 292 c = vec_cmpgt( mask, vec_splat_u8( 5 ) ); 293 mask = vec_sel( mask, t, c ); 294 } else { 295 mask = vec_sld( mask, mask1, 8 ); // combined rotation mask for both rows 296 mask = vec_and( mask, vec_splat_u8( 7 ) ); 297 } 298 mask1 = vec_sld( vec_splat_u8( 0 ), vec_splat_u8( 8 ), 8 ); 299 mask = vec_add( mask, mask1 ); 300 v[i] = vec_perm( v[i], v[i], mask ); // rotate each row as required 301 } 302} 303 304void vSubstitution( vector unsigned char v[2], vector unsigned char box[16] ) 305{ 306 v[0] = rimskyKorsakov( v[0], box ); // first vector contains rows 0 and 1 307 v[1] = rimskyKorsakov( v[1], box ); // second vector contains rows 2 and 3 308} 309 310void vMixColumn(vector unsigned char v[2]) 311{ 312 // vector 0 contains row 0 in bytes 0-7 and row 1 in bytes 8-f 313 // vector 1 contains row 2 in bytes 0-7 and row 3 in bytes 8-f 314 315 register vector unsigned char a0, a1, a2, a3, b0, b1, b2, b3; 316 register vector unsigned char two, three; 317 318 two = vec_splat_u8( 2 ); 319 three = vec_splat_u8( 3 ); 320 321 a1 = vec_sld( v[0], v[1], 8 ); // equivalent to a[i+1] % 4 322 b1 = vec_sld( v[1], v[0], 8 ); 323 a2 = vec_sld( a1, b1, 8 ); // equivalent to a[i+2] % 4 324 b2 = vec_sld( b1, a1, 8 ); 325 a3 = vec_sld( a2, b2, 8 ); // equivalent to a[i+3] % 4 326 b3 = vec_sld( b2, a2, 8 ); 327 328 // Calculations for rows 0 and 1 329 a0 = vmul( two, v[0] ); // mul(2,a[i][j]) 330 a0 = vec_xor( a0, vmul( three, a1 ) ); // ^ mul(3,a[(i + 1) % 4][j]) 331 a0 = vec_xor( a0, a2 ); // ^ a[(i + 2) % 4][j] 332 v[0] = vec_xor( a0, a3 ); // ^ a[(i + 3) % 4][j] 333 334 // Calculations for rows 2 and 3 335 b0 = vmul( two, v[1] ); 336 b0 = vec_xor( b0, vmul( three, b1 ) ); 337 b0 = vec_xor( b0, b2 ); 338 v[1] = vec_xor( b0, b3 ); 339} 340 341void vInvMixColumn(vector unsigned char v[2]) 342{ 343 // vector 0 contains row 0 in bytes 0-7 and row 1 in bytes 8-f 344 // vector 1 contains row 2 in bytes 0-7 and row 3 in bytes 8-f 345 346 register vector unsigned char a0, a1, a2, a3, b0, b1, b2, b3; 347 register vector unsigned char nine, eleven, thirteen, fourteen;; 348 349 nine = vec_splat_u8( 0x9 ); 350 eleven = vec_splat_u8( 0xb ); 351 thirteen = vec_splat_u8( 0xd ); 352 fourteen = vec_splat_u8( 0xe ); 353 354 a1 = vec_sld( v[0], v[1], 8 ); // equivalent to a[i+1] % 4 355 b1 = vec_sld( v[1], v[0], 8 ); 356 a2 = vec_sld( a1, b1, 8 ); // equivalent to a[i+2] % 4 357 b2 = vec_sld( b1, a1, 8 ); 358 a3 = vec_sld( a2, b2, 8 ); // equivalent to a[i+3] % 4 359 b3 = vec_sld( b2, a2, 8 ); 360 361 // Calculations for rows 0 and 1 362 a0 = vmul( fourteen, v[0] ); // mul(0xe,a[i][j]) 363 a0 = vec_xor( a0, vmul( eleven, a1 ) ); // ^ mul(0xb,a[(i + 1) % 4][j]) 364 a0 = vec_xor( a0, vmul( thirteen, a2 ) ); // ^ mul(0xd,a[(i + 2) % 4][j]) 365 v[0] = vec_xor( a0, vmul( nine, a3 ) ); // ^ mul(0x9,a[(i + 3) % 4][j]) 366 367 // Calculations for rows 2 and 3 368 b0 = vmul( fourteen, v[1] ); 369 b0 = vec_xor( b0, vmul( eleven, b1 ) ); 370 b0 = vec_xor( b0, vmul( thirteen, b2 ) ); 371 v[1] = vec_xor( b0, vmul( nine, b3 ) ); 372} 373 374int vRijndaelEncrypt (vector unsigned char a[2], int keyBits, int blockBits, vector unsigned char rk[MAXROUNDS+1][2]) 375{ 376 /* Encryption of one block. 377 */ 378 int r, BC, ROUNDS; 379 380 switch (blockBits) { 381 case 128: BC = 4; break; 382 case 192: BC = 6; break; 383 case 256: BC = 8; break; 384 default : return (-2); 385 } 386 387 switch (keyBits >= blockBits ? keyBits : blockBits) { 388 case 128: ROUNDS = 10; break; 389 case 192: ROUNDS = 12; break; 390 case 256: ROUNDS = 14; break; 391 default : return (-3); /* this cannot happen */ 392 } 393 394 vKeyAddition( a, rk[0] ); 395 for(r = 1; r < ROUNDS; r++) { 396 vSubstitution( a, (vector unsigned char *)S); 397 vShiftRow( a, 0, BC); 398 vMixColumn( a ); 399 vKeyAddition( a, rk[r] ); 400 } 401 vSubstitution( a, (vector unsigned char *)S); 402 vShiftRow( a, 0, BC); 403 vKeyAddition( a, rk[ROUNDS] ); 404 405 return 0; 406} 407 408int vRijndaelDecrypt (vector unsigned char a[2], int keyBits, int blockBits, vector unsigned char rk[MAXROUNDS+1][2]) 409{ 410 int r, BC, ROUNDS; 411 412 switch (blockBits) { 413 case 128: BC = 4; break; 414 case 192: BC = 6; break; 415 case 256: BC = 8; break; 416 default : return (-2); 417 } 418 419 switch (keyBits >= blockBits ? keyBits : blockBits) { 420 case 128: ROUNDS = 10; break; 421 case 192: ROUNDS = 12; break; 422 case 256: ROUNDS = 14; break; 423 default : return (-3); /* this cannot happen */ 424 } 425 426 vKeyAddition( a, rk[ROUNDS] ); 427 vSubstitution( a, (vector unsigned char *)Si); 428 vShiftRow( a, 1, BC); 429 for(r = ROUNDS-1; r > 0; r--) { 430 vKeyAddition( a, rk[r] ); 431 vInvMixColumn( a ); 432 vSubstitution( a, (vector unsigned char *)Si); 433 vShiftRow( a, 1, BC); 434 } 435 vKeyAddition( a, rk[0] ); 436 437 return 0; 438} 439 440#if 0 441/* Murley's code, to be deleted */ 442void vBlockEncrypt(cipherInstance *cipher, keyInstance *key, BYTE *input, int inputLen, BYTE *outBuffer) 443{ 444 register vector unsigned char v1, v2, v3, v4, mask; 445 register vector bool char cmp; 446 447 /* load and align input */ 448 v1 = vec_ld( 0, (vector unsigned char *) input ); 449 v2 = vec_ld( 16, (vector unsigned char *) input ); 450 if ( (long) input & 0x0fL ) 451 { // this is required if input is not on a 16-byte boundary 452 v3 = vec_ld( 32, (vector unsigned char *) input ); 453 mask = vec_lvsl( 0, input ); 454 v1 = vec_perm( v1, v2, mask ); 455 v2 = vec_perm( v2, v3, mask ); 456 } 457 458 /* parse input stream into rectangular array */ 459 v3 = vec_perm( v1, v2, (vector unsigned char) ( 0, 4, 8, 12, 16, 20, 24, 28, 1, 5, 9, 13, 17, 21, 25, 29 ) ); 460 v4 = vec_perm( v1, v2, (vector unsigned char) ( 2, 6, 10, 14, 18, 22, 26, 30, 3, 7, 11, 15, 19, 23, 27, 31 ) ); 461 462 /* store into cipher structure */ 463 if (cipher->mode == MODE_CBC) { 464 v3 = vec_xor( v3, *((vector unsigned char *) cipher->chainBlock ) ); 465 v4 = vec_xor( v4, *((vector unsigned char *) cipher->chainBlock + 1 ) ); 466 } 467 vec_st( v3, 0, (vector unsigned char *) cipher->chainBlock ); 468 vec_st( v4, 16, (vector unsigned char *) cipher->chainBlock ); 469 470 vRijndaelEncrypt((vector unsigned char *) cipher->chainBlock, key->keyLen, cipher->blockLen, (vector unsigned char *) key->keySched); 471 472 v1 = vec_ld( 0, (vector unsigned char *) cipher->chainBlock ); 473 v2 = vec_ld( 16, (vector unsigned char *) cipher->chainBlock ); 474 475 /* parse rectangular array into output ciphertext bytes */ 476 v3 = vec_perm( v1, v2, (vector unsigned char) ( 0, 8, 16, 24, 1, 9, 17, 25, 2, 10, 18, 26, 3, 11, 19, 27 ) ); 477 v4 = vec_perm( v1, v2, (vector unsigned char) ( 4, 12, 20, 28, 5, 13, 21, 29, 6, 14, 22, 30, 7, 15, 23, 31 ) ); 478 479 if ( (long) outBuffer & 0x0fL ) 480 { 481 /* store output data into a non-aligned buffer */ 482 mask = vec_lvsr( 0, outBuffer ); 483 cmp = vec_cmpgt( mask, vec_splat_u8( 0x0f ) ); 484 v1 = vec_perm( v3, v3, mask ); 485 v2 = vec_perm( v4, v4, mask ); 486 v3 = vec_ld( 0, (vector unsigned char *) outBuffer ); 487 v4 = vec_sel( v3, v1, cmp ); 488 vec_st( v4, 0, (vector unsigned char *) outBuffer ); 489 v1 = vec_sel( v1, v2, cmp ); 490 vec_st( v1, 16, (vector unsigned char *) outBuffer ); 491 v3 = vec_ld( 32, (vector unsigned char *) outBuffer ); 492 v2 = vec_sel( v2, v3, cmp ); 493 vec_st( v2, 32, (vector unsigned char *) outBuffer ); 494 } else { 495 // store output data into an aligned buffer 496 vec_st( v3, 0, (vector unsigned char *) outBuffer ); 497 vec_st( v4, 16, (vector unsigned char *) outBuffer ); 498 } 499 return; 500} 501 502void vBlockDecrypt(cipherInstance *cipher, keyInstance *key, BYTE *input, int inputLen, BYTE *outBuffer) 503{ 504 // for vector machines 505 register vector unsigned char v1, v2, v3, v4, mask; 506 register vector bool char cmp; 507 vector unsigned char block[2], cblock[2]; 508 509 /* load and align input */ 510 v1 = vec_ld( 0, (vector unsigned char *) input ); 511 v2 = vec_ld( 16, (vector unsigned char *) input ); 512 if ( (long) input & 0x0fL ) 513 { // this is required if input is not on a 16-byte boundary 514 v3 = vec_ld( 32, (vector unsigned char *) input ); 515 mask = vec_lvsl( 0, input ); 516 v1 = vec_perm( v1, v2, mask ); 517 v2 = vec_perm( v2, v3, mask ); 518 } 519 520 /* parse input stream into rectangular array */ 521 v3 = vec_perm( v1, v2, (vector unsigned char) ( 0, 4, 8, 12, 16, 20, 24, 28, 1, 5, 9, 13, 17, 21, 25, 29 ) ); 522 v4 = vec_perm( v1, v2, (vector unsigned char) ( 2, 6, 10, 14, 18, 22, 26, 30, 3, 7, 11, 15, 19, 23, 27, 31 ) ); 523 block[0] = v3; 524 block[1] = v4; 525 526 /* save a copy of incoming ciphertext for later chain */ 527 if (cipher->mode == MODE_CBC) { 528 cblock[0] = v3; 529 cblock[1] = v4; 530 } 531 532 vRijndaelDecrypt ((vector unsigned char *) block, key->keyLen, cipher->blockLen, (vector unsigned char *) key->keySched); 533 534 v1 = block[0]; 535 v2 = block[1]; 536 537 /* exor with last ciphertext */ 538 if (cipher->mode == MODE_CBC) { 539 v1 = vec_xor( v1, *((vector unsigned char *) cipher->chainBlock) ); 540 v2 = vec_xor( v2, *((vector unsigned char *) cipher->chainBlock + 1) ); 541 vec_st( cblock[0], 0, (vector unsigned char *) cipher->chainBlock ); 542 vec_st( cblock[1], 16, (vector unsigned char *) cipher->chainBlock ); 543 } 544 545 /* parse rectangular array into output ciphertext bytes */ 546 v3 = vec_perm( v1, v2, (vector unsigned char) ( 0, 8, 16, 24, 1, 9, 17, 25, 2, 10, 18, 26, 3, 11, 19, 27 ) ); 547 v4 = vec_perm( v1, v2, (vector unsigned char) ( 4, 12, 20, 28, 5, 13, 21, 29, 6, 14, 22, 30, 7, 15, 23, 31 ) ); 548 549 if ( (long) outBuffer & 0x0fL ) 550 { /* store output data into a non-aligned buffer */ 551 mask = vec_lvsr( 0, outBuffer ); 552 cmp = vec_cmpgt( mask, vec_splat_u8( 0x0f ) ); 553 v1 = vec_perm( v3, v3, mask ); 554 v2 = vec_perm( v4, v4, mask ); 555 v3 = vec_ld( 0, (vector unsigned char *) outBuffer ); 556 v4 = vec_sel( v3, v1, cmp ); 557 vec_st( v4, 0, (vector unsigned char *) outBuffer ); 558 v1 = vec_sel( v1, v2, cmp ); 559 vec_st( v1, 16, (vector unsigned char *) outBuffer ); 560 v3 = vec_ld( 32, (vector unsigned char *) outBuffer ); 561 v2 = vec_sel( v2, v3, cmp ); 562 vec_st( v2, 32, (vector unsigned char *) outBuffer ); 563 } else { 564 // store output data into an aligned buffer 565 vec_st( v3, 0, (vector unsigned char *) outBuffer ); 566 vec_st( v4, 16, (vector unsigned char *) outBuffer ); 567 } 568} 569#endif /* Murley's code, to be deleted */ 570 571/* 572 * dmitch addenda 4/11/2001: 128-bit only encrypt/decrypt with no CBC 573 */ 574void vBlockEncrypt128( 575 keyInstance *key, 576 BYTE *input, 577 BYTE *outBuffer) 578{ 579 vector unsigned char block[2]; 580 register vector unsigned char v1, v2; 581 582 if ( (long) input & 0x0fL ) { 583 BYTE localBuf[16]; 584 vdprintf(("vBlockEncrypt128: unaligned input\n")); 585 /* manually re-align - the compiler is supposed to 16-byte align this for us */ 586 if((unsigned)localBuf & 0xf) { 587 vdprintf(("vBlockEncrypt128: unaligned localBuf!\n")); 588 } 589 memmove(localBuf, input, 16); 590 v1 = vec_ld(0, (vector unsigned char *)localBuf); 591 } 592 else { 593 vdprintf(("vBlockEncrypt128: aligned input\n")); 594 v1 = vec_ld( 0, (vector unsigned char *) input ); 595 } 596 597 /* parse input stream into rectangular array */ 598 /* FIXME - do we need to zero v2 (or something)? */ 599 block[0] = vec_perm(v1, v2, 600 (vector unsigned char) ( 0, 4, 8, 12, 16, 20, 24, 28, 1, 601 5, 9, 13, 17, 21, 25, 29 ) ); 602 block[1] = vec_perm( v1, v2, 603 (vector unsigned char) ( 2, 6, 10, 14, 18, 22, 26, 30, 3, 604 7, 11, 15, 19, 23, 27, 31 ) ); 605 606 vRijndaelEncrypt(block, key->keyLen, 128, (vector unsigned char *) key->keySched); 607 608 /* parse rectangular array into output ciphertext bytes */ 609 v1 = vec_perm(block[0], block[1], 610 (vector unsigned char) ( 0, 8, 16, 24, 1, 9, 17, 25, 2, 611 10, 18, 26, 3, 11, 19, 27 ) ); 612 v2 = vec_perm(block[0], block[1], 613 (vector unsigned char) ( 4, 12, 20, 28, 5, 13, 21, 29, 6, 614 14, 22, 30, 7, 15, 23, 31 ) ); 615 616 if ( (long) outBuffer & 0x0fL ) 617 { 618 /* store output data into a non-aligned buffer */ 619 BYTE localBuf[16]; 620 vec_st(v1, 0, (vector unsigned char *) localBuf ); 621 memmove(outBuffer, localBuf, 16); 622 } else { 623 /* store output data into an aligned buffer */ 624 vec_st( v1, 0, (vector unsigned char *) outBuffer ); 625 } 626 return; 627} 628 629void vBlockDecrypt128( 630 keyInstance *key, 631 BYTE *input, 632 BYTE *outBuffer) 633{ 634 vector unsigned char block[2]; 635 register vector unsigned char v1, v2; 636 637 if ( (long) input & 0x0fL ) { 638 /* manually re-align - the compiler is supposed to 16-byte align this for us */ 639 BYTE localBuf[16]; 640 vdprintf(("vBlockDecrypt128: unaligned input\n")); 641 if((unsigned)localBuf & 0xf) { 642 vdprintf(("vBlockDecrypt128: unaligned localBuf!\n")); 643 } 644 memmove(localBuf, input, 16); 645 v1 = vec_ld(0, (vector unsigned char *)localBuf); 646 } 647 else { 648 vdprintf(("vBlockDecrypt128: aligned input\n")); 649 v1 = vec_ld( 0, (vector unsigned char *) input ); 650 } 651 652 /* parse input stream into rectangular array */ 653 /* FIXME - do we need to zero v2 (or something)? */ 654 block[0] = vec_perm(v1, v2, 655 (vector unsigned char) ( 0, 4, 8, 12, 16, 20, 24, 28, 1, 656 5, 9, 13, 17, 21, 25, 29 ) ); 657 block[1] = vec_perm( v1, v2, 658 (vector unsigned char) ( 2, 6, 10, 14, 18, 22, 26, 30, 3, 659 7, 11, 15, 19, 23, 27, 31 ) ); 660 661 vRijndaelDecrypt(block, key->keyLen, 128, (vector unsigned char *) key->keySched); 662 663 /* parse rectangular array into output ciphertext bytes */ 664 v1 = vec_perm(block[0], block[1], 665 (vector unsigned char) ( 0, 8, 16, 24, 1, 9, 17, 25, 2, 666 10, 18, 26, 3, 11, 19, 27 ) ); 667 v2 = vec_perm(block[0], block[1], 668 (vector unsigned char) ( 4, 12, 20, 28, 5, 13, 21, 29, 6, 669 14, 22, 30, 7, 15, 23, 31 ) ); 670 671 if ( (long) outBuffer & 0x0fL ) { 672 /* store output data into a non-aligned buffer */ 673 BYTE localBuf[16]; 674 vec_st(v1, 0, (vector unsigned char *) localBuf ); 675 memmove(outBuffer, localBuf, 16); 676 } else { 677 /* store output data into an aligned buffer */ 678 vec_st( v1, 0, (vector unsigned char *) outBuffer ); 679 } 680 return; 681} 682 683#endif /* defined(__ppc__) && defined(ALTIVEC_ENABLE) */ 684