1/* 2 * Copyright (c) 2000-2001 Apple Computer, Inc. All Rights Reserved. 3 * 4 * The contents of this file constitute Original Code as defined in and are 5 * subject to the Apple Public Source License Version 1.2 (the 'License'). 6 * You may not use this file except in compliance with the License. Please obtain 7 * a copy of the License at http://www.apple.com/publicsource and read it before 8 * using this file. 9 * 10 * This Original Code and all software distributed under the License are 11 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESS 12 * OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, INCLUDING WITHOUT 13 * LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 14 * PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. Please see the License for the 15 * specific language governing rights and limitations under the License. 16 */ 17 18 19/* 20 * vRijndael-alg-ref.c 21 * 22 * Created by Robert A. Murley on Mon Jan 22 2001. 23 * Copyright (c) 2001 Apple Computer, Inc. All rights reserved. 24 * 25 */ 26 27#include "rijndaelApi.h" 28#include "rijndael-alg-ref.h" 29#include "boxes-ref.h" 30#include <string.h> 31 32/* debugger seems to have trouble with this code... */ 33#define VAES_DEBUG 1 34#if VAES_DEBUG 35#include <stdio.h> 36#define vdprintf(s) printf s 37#else 38#define vdprintf(s) 39#endif 40 41#define SC ((BC - 4) >> 1) 42 43#if defined(__ppc__) && defined(ALTIVEC_ENABLE) 44 45typedef union { 46 unsigned char s[4][8]; 47 unsigned long l[8]; 48 vector unsigned char v[2]; 49} doubleVec; 50 51typedef union { 52 unsigned long s[4]; 53 vector unsigned long v; 54} vecLong; 55 56static word8 shifts[3][4][2] = { 57 { { 0, 0 }, 58 { 1, 3 }, 59 { 2, 2 }, 60 { 3, 1 } 61 }, 62 { { 0, 0 }, 63 { 1, 5 }, 64 { 2, 4 }, 65 { 3, 3 } 66 }, 67 { { 0, 0 }, 68 { 1, 7 }, 69 { 3, 5 }, 70 { 4, 4 } 71 } 72}; 73 74int vRijndaelKeySched ( vector unsigned char vk[2], int keyBits, int blockBits, 75 unsigned char W[MAXROUNDS+1][4][MAXBC]) 76{ 77 /* Calculate the necessary round keys 78 * The number of calculations depends on keyBits and blockBits 79 */ 80 int KC, BC, ROUNDS; 81 int i, j, t, rconpointer = 0; 82 doubleVec tk; 83 register vector unsigned char v1, v2, mask; 84 85 switch (keyBits) { 86 case 128: KC = 4; break; 87 case 192: KC = 6; break; 88 case 256: KC = 8; break; 89 default : return (-1); 90 } 91 92 switch (blockBits) { 93 case 128: BC = 4; break; 94 case 192: BC = 6; break; 95 case 256: BC = 8; break; 96 default : return (-2); 97 } 98 99 switch (keyBits >= blockBits ? keyBits : blockBits) { 100 case 128: ROUNDS = 10; break; 101 case 192: ROUNDS = 12; break; 102 case 256: ROUNDS = 14; break; 103 default : return (-3); /* this cannot happen */ 104 } 105 106 tk.v[0] = vk[0]; 107 tk.v[1] = vk[1]; 108 109 t = 0; 110 /* copy values into round key array */ 111 for(j = 0; (j < KC) && (t < (ROUNDS+1)*BC); j++, t++) 112 for(i = 0; i < 4; i++) W[t / BC][i][t % BC] = tk.s[i][j]; 113 114 while (t < (ROUNDS+1)*BC) { /* while not enough round key material calculated */ 115 /* calculate new values */ 116 for(i = 0; i < 4; i++) 117 tk.s[i][0] ^= *((word8 *)S + tk.s[(i+1)%4][KC-1]); 118 tk.s[0][0] ^= rcon[rconpointer++]; 119 120 if (KC != 8) { 121 /* xor bytes 1-7 of each row with previous byte */ 122 mask = (vector unsigned char) ( 0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff ); 123 for ( i = 0; i < 2; i++ ) { 124 v1 = vec_sld( tk.v[i], tk.v[i], 15 ); 125 v2 = vec_and( v1, mask ); 126 tk.v[i] = vec_xor( tk.v[i], v2 ); 127 } 128 } 129 else { 130 /* xor bytes 1-3 of each row with previous byte */ 131 mask = (vector unsigned char) ( 0, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0xff, 0xff, 0xff, 0, 0, 0, 0 ); 132 for ( i = 0; i < 2; i++ ) { 133 v1 = vec_sld( tk.v[i], tk.v[i], 15 ); 134 v2 = vec_and( v1, mask ); 135 tk.v[i] = vec_xor( tk.v[i], v2 ); 136 for(j = 0; j < 4; j++) tk.s[i][KC/2] ^= *((word8 *)S + tk.s[i][KC/2 - 1]); 137 /* xor bytes 5-7 of each row with previous byte */ 138 mask = vec_sld( mask, mask, 4 ); 139 v2 = vec_and( v1, mask ); 140 tk.v[i] = vec_xor( tk.v[i], v2 ); 141 mask = vec_sld( mask, mask, 4 ); 142 } 143 } 144 /* copy values into round key array */ 145 for(j = 0; (j < KC) && (t < (ROUNDS+1)*BC); j++, t++) 146 for(i = 0; i < 4; i++) W[t / BC][i][t % BC] = tk.s[i][j]; 147 } 148 return 0; 149} 150 151 152void vMakeKey(BYTE *keyMaterial, keyInstance *key) 153{ 154 register vector unsigned char v1, v2, v3, mask; 155 vector unsigned char vk[2]; 156 157 /* load and align input */ 158 v1 = vec_ld( 0, (vector unsigned char *) keyMaterial ); 159 v2 = vec_ld( 16, (vector unsigned char *) keyMaterial ); 160 if ( (long) keyMaterial & 0x0fL ) 161 { // this is required if keyMaterial is not on a 16-byte boundary 162 v3 = vec_ld( 32, (vector unsigned char *) keyMaterial ); 163 mask = vec_lvsl( 0, keyMaterial ); 164 v1 = vec_perm( v1, v2, mask ); 165 v2 = vec_perm( v2, v3, mask ); 166 } 167 168 /* parse input stream into rectangular array */ 169 vk[0] = vec_perm( v1, v2, (vector unsigned char) ( 0, 4, 8, 12, 16, 20, 24, 28, 1, 5, 9, 13, 17, 21, 25, 29 ) ); 170 vk[1] = vec_perm( v1, v2, (vector unsigned char) ( 2, 6, 10, 14, 18, 22, 26, 30, 3, 7, 11, 15, 19, 23, 27, 31 ) ); 171 vRijndaelKeySched (vk, key->keyLen, key->blockLen, key->keySched); 172 memset( (char *) vk, 0, 4 * MAXKC); 173} 174 175 176/* This routine does 16 simultaneous lookups in a 256-byte table. */ 177vector unsigned char rimskyKorsakov ( vector unsigned char v, vector unsigned char * table ) 178{ 179 register vector unsigned char upperBits000, upperBits001, upperBits010, upperBits011, 180 upperBits100, upperBits101, upperBits110, upperBits111, 181 lookupBit00, lookupBit01, lookupBit10, lookupBit11, 182 lookupBit0, lookupBit1, lookup, 183 maskForBit6, maskForBit7, maskForBit8, seven; 184 register vector unsigned char *tabeven, *tabodd; 185 186 seven = vec_splat_u8 ( 7 ); 187 tabeven = table++; 188 tabodd = table; 189 190// Each variable contains the correct values for the corresponding bits 6, 7 and 8. 191 upperBits000 = vec_perm ( *tabeven, *tabodd, v ); 192 tabeven += 2; tabodd += 2; 193 upperBits001 = vec_perm ( *tabeven, *tabodd, v ); 194 tabeven += 2; tabodd += 2; 195 upperBits010 = vec_perm ( *tabeven, *tabodd, v ); 196 tabeven += 2; tabodd += 2; 197 upperBits011 = vec_perm ( *tabeven, *tabodd, v ); 198 tabeven += 2; tabodd += 2; 199 upperBits100 = vec_perm ( *tabeven, *tabodd, v ); 200 tabeven += 2; tabodd += 2; 201 upperBits101 = vec_perm ( *tabeven, *tabodd, v ); 202 tabeven += 2; tabodd += 2; 203 upperBits110 = vec_perm ( *tabeven, *tabodd, v ); 204 tabeven += 2; tabodd += 2; 205 upperBits111 = vec_perm ( *tabeven, *tabodd, v ); 206 207// Here we extract all the correct values for bit 6. 208 maskForBit6 = vec_sl ( v, vec_splat_u8 ( 2 ) ); 209 maskForBit6 = vec_sra ( maskForBit6, seven ); 210 lookupBit00 = vec_sel ( upperBits000, upperBits001, maskForBit6 ); 211 lookupBit01 = vec_sel ( upperBits010, upperBits011, maskForBit6 ); 212 lookupBit10 = vec_sel ( upperBits100, upperBits101, maskForBit6 ); 213 lookupBit11 = vec_sel ( upperBits110, upperBits111, maskForBit6 ); 214 215// Then we get the correct values for bit 7. 216 maskForBit7 = vec_sl ( v, vec_splat_u8 ( 1 ) ); 217 maskForBit7 = vec_sra ( maskForBit7, seven ); 218 lookupBit0 = vec_sel ( lookupBit00, lookupBit01, maskForBit7 ); 219 lookupBit1 = vec_sel ( lookupBit10, lookupBit11, maskForBit7 ); 220 221// Finally, the entire correct result vector. 222 maskForBit8 = vec_sra ( v, seven ); 223 224 lookup = vec_sel ( lookupBit0, lookupBit1, maskForBit8 ); 225 226 return lookup; 227} 228 229vector unsigned char vmul(vector unsigned char a, vector unsigned char b) 230{ 231 register vector unsigned char x, y, zero; 232 register vector unsigned short xh, yh, zhi, zlo, two54, two55; 233 234 zero = vec_splat_u8( 0 ); 235 two55 = vec_splat_u16( -1 ); 236 two55 = (vector unsigned short) vec_mergeh( zero, (vector unsigned char) two55 ); 237 two54 = vec_sub( two55, vec_splat_u16( 1 ) ); 238 239 x = rimskyKorsakov( a, (vector unsigned char *)Logtable ); // Logtable[a] 240 y = rimskyKorsakov( b, (vector unsigned char *)Logtable ); // Logtable[b] 241 242 // Convert upper 8 bytes to shorts for addition ond modulo 243 xh = (vector unsigned short) vec_mergeh( zero, x ); 244 yh = (vector unsigned short) vec_mergeh( zero, y ); 245 xh = vec_add( xh, yh ); // xh = Logtable[a] + Logtable[b] 246 yh = vec_sub( xh, two55 ); 247 zhi = vec_sel( xh, yh, vec_cmpgt( xh, two54 ) ); // xh%255 248 249 // Convert lower 8 bytes to shorts for addition ond modulo 250 xh = (vector unsigned short) vec_mergel( zero, x ); 251 yh = (vector unsigned short) vec_mergel( zero, y ); 252 xh = vec_add( xh, yh ); 253 yh = vec_sub( xh, two55 ); 254 zlo = vec_sel( xh, yh, vec_cmpgt( xh, two54 ) ); 255 256 x = vec_pack( zhi, zlo ); // recombine into single byte vector 257 x = rimskyKorsakov( x, (vector unsigned char *)Alogtable ); // Alogtable[x] 258 x = vec_sel( x, zero, vec_cmpeq( a, zero ) ); // check a = 0 259 x = vec_sel( x, zero, vec_cmpeq( b, zero ) ); // check b = 0 260 return x; 261} 262 263void vKeyAddition(vector unsigned char v[2], vector unsigned char rk[2]) 264{ 265 v[0] = vec_xor( v[0], rk[0] ); // first vector contains rows 0 and 1 266 v[1] = vec_xor( v[1], rk[1] ); // second vector contains rows 2 and 3 267} 268 269 270void vShiftRow(vector unsigned char v[2], word8 d, word8 BC) 271{ 272 vecLong sh; 273 register vector unsigned char mask, mask1, t; 274 register vector bool char c; 275 register int i, j; 276 277 sh.s[0] = 0; 278 for (i = 1; i < 4; i++) 279 sh.s[i] = shifts[SC][i][d] % BC; // contains the number of elements to shift each row 280 281 // each vector contains two BC-byte long rows 282 j = 0; 283 for ( i = 0; i < 2; i++ ) { 284 mask = vec_lvsl( 0, (int *) sh.s[j++]); // mask for even row 285 mask1 = vec_lvsl( 0, (int *) sh.s[j++]); // mask for odd row 286 if (BC == 4) { 287 mask = vec_sld( mask, mask1, 8 ); // combined rotation mask for both rows 288 mask = vec_and( mask, vec_splat_u8( 3 ) ); 289 } else if (BC == 6) { 290 mask = vec_sld( mask, mask, 8 ); 291 mask = vec_sld( mask, mask1, 8 ); // combined rotation mask for both rows 292 t = vec_sub( mask, vec_splat_u8( 6 ) ); 293 c = vec_cmpgt( mask, vec_splat_u8( 5 ) ); 294 mask = vec_sel( mask, t, c ); 295 } else { 296 mask = vec_sld( mask, mask1, 8 ); // combined rotation mask for both rows 297 mask = vec_and( mask, vec_splat_u8( 7 ) ); 298 } 299 mask1 = vec_sld( vec_splat_u8( 0 ), vec_splat_u8( 8 ), 8 ); 300 mask = vec_add( mask, mask1 ); 301 v[i] = vec_perm( v[i], v[i], mask ); // rotate each row as required 302 } 303} 304 305void vSubstitution( vector unsigned char v[2], vector unsigned char box[16] ) 306{ 307 v[0] = rimskyKorsakov( v[0], box ); // first vector contains rows 0 and 1 308 v[1] = rimskyKorsakov( v[1], box ); // second vector contains rows 2 and 3 309} 310 311void vMixColumn(vector unsigned char v[2]) 312{ 313 // vector 0 contains row 0 in bytes 0-7 and row 1 in bytes 8-f 314 // vector 1 contains row 2 in bytes 0-7 and row 3 in bytes 8-f 315 316 register vector unsigned char a0, a1, a2, a3, b0, b1, b2, b3; 317 register vector unsigned char two, three; 318 319 two = vec_splat_u8( 2 ); 320 three = vec_splat_u8( 3 ); 321 322 a1 = vec_sld( v[0], v[1], 8 ); // equivalent to a[i+1] % 4 323 b1 = vec_sld( v[1], v[0], 8 ); 324 a2 = vec_sld( a1, b1, 8 ); // equivalent to a[i+2] % 4 325 b2 = vec_sld( b1, a1, 8 ); 326 a3 = vec_sld( a2, b2, 8 ); // equivalent to a[i+3] % 4 327 b3 = vec_sld( b2, a2, 8 ); 328 329 // Calculations for rows 0 and 1 330 a0 = vmul( two, v[0] ); // mul(2,a[i][j]) 331 a0 = vec_xor( a0, vmul( three, a1 ) ); // ^ mul(3,a[(i + 1) % 4][j]) 332 a0 = vec_xor( a0, a2 ); // ^ a[(i + 2) % 4][j] 333 v[0] = vec_xor( a0, a3 ); // ^ a[(i + 3) % 4][j] 334 335 // Calculations for rows 2 and 3 336 b0 = vmul( two, v[1] ); 337 b0 = vec_xor( b0, vmul( three, b1 ) ); 338 b0 = vec_xor( b0, b2 ); 339 v[1] = vec_xor( b0, b3 ); 340} 341 342void vInvMixColumn(vector unsigned char v[2]) 343{ 344 // vector 0 contains row 0 in bytes 0-7 and row 1 in bytes 8-f 345 // vector 1 contains row 2 in bytes 0-7 and row 3 in bytes 8-f 346 347 register vector unsigned char a0, a1, a2, a3, b0, b1, b2, b3; 348 register vector unsigned char nine, eleven, thirteen, fourteen;; 349 350 nine = vec_splat_u8( 0x9 ); 351 eleven = vec_splat_u8( 0xb ); 352 thirteen = vec_splat_u8( 0xd ); 353 fourteen = vec_splat_u8( 0xe ); 354 355 a1 = vec_sld( v[0], v[1], 8 ); // equivalent to a[i+1] % 4 356 b1 = vec_sld( v[1], v[0], 8 ); 357 a2 = vec_sld( a1, b1, 8 ); // equivalent to a[i+2] % 4 358 b2 = vec_sld( b1, a1, 8 ); 359 a3 = vec_sld( a2, b2, 8 ); // equivalent to a[i+3] % 4 360 b3 = vec_sld( b2, a2, 8 ); 361 362 // Calculations for rows 0 and 1 363 a0 = vmul( fourteen, v[0] ); // mul(0xe,a[i][j]) 364 a0 = vec_xor( a0, vmul( eleven, a1 ) ); // ^ mul(0xb,a[(i + 1) % 4][j]) 365 a0 = vec_xor( a0, vmul( thirteen, a2 ) ); // ^ mul(0xd,a[(i + 2) % 4][j]) 366 v[0] = vec_xor( a0, vmul( nine, a3 ) ); // ^ mul(0x9,a[(i + 3) % 4][j]) 367 368 // Calculations for rows 2 and 3 369 b0 = vmul( fourteen, v[1] ); 370 b0 = vec_xor( b0, vmul( eleven, b1 ) ); 371 b0 = vec_xor( b0, vmul( thirteen, b2 ) ); 372 v[1] = vec_xor( b0, vmul( nine, b3 ) ); 373} 374 375int vRijndaelEncrypt (vector unsigned char a[2], int keyBits, int blockBits, vector unsigned char rk[MAXROUNDS+1][2]) 376{ 377 /* Encryption of one block. 378 */ 379 int r, BC, ROUNDS; 380 381 switch (blockBits) { 382 case 128: BC = 4; break; 383 case 192: BC = 6; break; 384 case 256: BC = 8; break; 385 default : return (-2); 386 } 387 388 switch (keyBits >= blockBits ? keyBits : blockBits) { 389 case 128: ROUNDS = 10; break; 390 case 192: ROUNDS = 12; break; 391 case 256: ROUNDS = 14; break; 392 default : return (-3); /* this cannot happen */ 393 } 394 395 vKeyAddition( a, rk[0] ); 396 for(r = 1; r < ROUNDS; r++) { 397 vSubstitution( a, (vector unsigned char *)S); 398 vShiftRow( a, 0, BC); 399 vMixColumn( a ); 400 vKeyAddition( a, rk[r] ); 401 } 402 vSubstitution( a, (vector unsigned char *)S); 403 vShiftRow( a, 0, BC); 404 vKeyAddition( a, rk[ROUNDS] ); 405 406 return 0; 407} 408 409int vRijndaelDecrypt (vector unsigned char a[2], int keyBits, int blockBits, vector unsigned char rk[MAXROUNDS+1][2]) 410{ 411 int r, BC, ROUNDS; 412 413 switch (blockBits) { 414 case 128: BC = 4; break; 415 case 192: BC = 6; break; 416 case 256: BC = 8; break; 417 default : return (-2); 418 } 419 420 switch (keyBits >= blockBits ? keyBits : blockBits) { 421 case 128: ROUNDS = 10; break; 422 case 192: ROUNDS = 12; break; 423 case 256: ROUNDS = 14; break; 424 default : return (-3); /* this cannot happen */ 425 } 426 427 vKeyAddition( a, rk[ROUNDS] ); 428 vSubstitution( a, (vector unsigned char *)Si); 429 vShiftRow( a, 1, BC); 430 for(r = ROUNDS-1; r > 0; r--) { 431 vKeyAddition( a, rk[r] ); 432 vInvMixColumn( a ); 433 vSubstitution( a, (vector unsigned char *)Si); 434 vShiftRow( a, 1, BC); 435 } 436 vKeyAddition( a, rk[0] ); 437 438 return 0; 439} 440 441#if 0 442/* Murley's code, to be deleted */ 443void vBlockEncrypt(cipherInstance *cipher, keyInstance *key, BYTE *input, int inputLen, BYTE *outBuffer) 444{ 445 register vector unsigned char v1, v2, v3, v4, mask; 446 register vector bool char cmp; 447 448 /* load and align input */ 449 v1 = vec_ld( 0, (vector unsigned char *) input ); 450 v2 = vec_ld( 16, (vector unsigned char *) input ); 451 if ( (long) input & 0x0fL ) 452 { // this is required if input is not on a 16-byte boundary 453 v3 = vec_ld( 32, (vector unsigned char *) input ); 454 mask = vec_lvsl( 0, input ); 455 v1 = vec_perm( v1, v2, mask ); 456 v2 = vec_perm( v2, v3, mask ); 457 } 458 459 /* parse input stream into rectangular array */ 460 v3 = vec_perm( v1, v2, (vector unsigned char) ( 0, 4, 8, 12, 16, 20, 24, 28, 1, 5, 9, 13, 17, 21, 25, 29 ) ); 461 v4 = vec_perm( v1, v2, (vector unsigned char) ( 2, 6, 10, 14, 18, 22, 26, 30, 3, 7, 11, 15, 19, 23, 27, 31 ) ); 462 463 /* store into cipher structure */ 464 if (cipher->mode == MODE_CBC) { 465 v3 = vec_xor( v3, *((vector unsigned char *) cipher->chainBlock ) ); 466 v4 = vec_xor( v4, *((vector unsigned char *) cipher->chainBlock + 1 ) ); 467 } 468 vec_st( v3, 0, (vector unsigned char *) cipher->chainBlock ); 469 vec_st( v4, 16, (vector unsigned char *) cipher->chainBlock ); 470 471 vRijndaelEncrypt((vector unsigned char *) cipher->chainBlock, key->keyLen, cipher->blockLen, (vector unsigned char *) key->keySched); 472 473 v1 = vec_ld( 0, (vector unsigned char *) cipher->chainBlock ); 474 v2 = vec_ld( 16, (vector unsigned char *) cipher->chainBlock ); 475 476 /* parse rectangular array into output ciphertext bytes */ 477 v3 = vec_perm( v1, v2, (vector unsigned char) ( 0, 8, 16, 24, 1, 9, 17, 25, 2, 10, 18, 26, 3, 11, 19, 27 ) ); 478 v4 = vec_perm( v1, v2, (vector unsigned char) ( 4, 12, 20, 28, 5, 13, 21, 29, 6, 14, 22, 30, 7, 15, 23, 31 ) ); 479 480 if ( (long) outBuffer & 0x0fL ) 481 { 482 /* store output data into a non-aligned buffer */ 483 mask = vec_lvsr( 0, outBuffer ); 484 cmp = vec_cmpgt( mask, vec_splat_u8( 0x0f ) ); 485 v1 = vec_perm( v3, v3, mask ); 486 v2 = vec_perm( v4, v4, mask ); 487 v3 = vec_ld( 0, (vector unsigned char *) outBuffer ); 488 v4 = vec_sel( v3, v1, cmp ); 489 vec_st( v4, 0, (vector unsigned char *) outBuffer ); 490 v1 = vec_sel( v1, v2, cmp ); 491 vec_st( v1, 16, (vector unsigned char *) outBuffer ); 492 v3 = vec_ld( 32, (vector unsigned char *) outBuffer ); 493 v2 = vec_sel( v2, v3, cmp ); 494 vec_st( v2, 32, (vector unsigned char *) outBuffer ); 495 } else { 496 // store output data into an aligned buffer 497 vec_st( v3, 0, (vector unsigned char *) outBuffer ); 498 vec_st( v4, 16, (vector unsigned char *) outBuffer ); 499 } 500 return; 501} 502 503void vBlockDecrypt(cipherInstance *cipher, keyInstance *key, BYTE *input, int inputLen, BYTE *outBuffer) 504{ 505 // for vector machines 506 register vector unsigned char v1, v2, v3, v4, mask; 507 register vector bool char cmp; 508 vector unsigned char block[2], cblock[2]; 509 510 /* load and align input */ 511 v1 = vec_ld( 0, (vector unsigned char *) input ); 512 v2 = vec_ld( 16, (vector unsigned char *) input ); 513 if ( (long) input & 0x0fL ) 514 { // this is required if input is not on a 16-byte boundary 515 v3 = vec_ld( 32, (vector unsigned char *) input ); 516 mask = vec_lvsl( 0, input ); 517 v1 = vec_perm( v1, v2, mask ); 518 v2 = vec_perm( v2, v3, mask ); 519 } 520 521 /* parse input stream into rectangular array */ 522 v3 = vec_perm( v1, v2, (vector unsigned char) ( 0, 4, 8, 12, 16, 20, 24, 28, 1, 5, 9, 13, 17, 21, 25, 29 ) ); 523 v4 = vec_perm( v1, v2, (vector unsigned char) ( 2, 6, 10, 14, 18, 22, 26, 30, 3, 7, 11, 15, 19, 23, 27, 31 ) ); 524 block[0] = v3; 525 block[1] = v4; 526 527 /* save a copy of incoming ciphertext for later chain */ 528 if (cipher->mode == MODE_CBC) { 529 cblock[0] = v3; 530 cblock[1] = v4; 531 } 532 533 vRijndaelDecrypt ((vector unsigned char *) block, key->keyLen, cipher->blockLen, (vector unsigned char *) key->keySched); 534 535 v1 = block[0]; 536 v2 = block[1]; 537 538 /* exor with last ciphertext */ 539 if (cipher->mode == MODE_CBC) { 540 v1 = vec_xor( v1, *((vector unsigned char *) cipher->chainBlock) ); 541 v2 = vec_xor( v2, *((vector unsigned char *) cipher->chainBlock + 1) ); 542 vec_st( cblock[0], 0, (vector unsigned char *) cipher->chainBlock ); 543 vec_st( cblock[1], 16, (vector unsigned char *) cipher->chainBlock ); 544 } 545 546 /* parse rectangular array into output ciphertext bytes */ 547 v3 = vec_perm( v1, v2, (vector unsigned char) ( 0, 8, 16, 24, 1, 9, 17, 25, 2, 10, 18, 26, 3, 11, 19, 27 ) ); 548 v4 = vec_perm( v1, v2, (vector unsigned char) ( 4, 12, 20, 28, 5, 13, 21, 29, 6, 14, 22, 30, 7, 15, 23, 31 ) ); 549 550 if ( (long) outBuffer & 0x0fL ) 551 { /* store output data into a non-aligned buffer */ 552 mask = vec_lvsr( 0, outBuffer ); 553 cmp = vec_cmpgt( mask, vec_splat_u8( 0x0f ) ); 554 v1 = vec_perm( v3, v3, mask ); 555 v2 = vec_perm( v4, v4, mask ); 556 v3 = vec_ld( 0, (vector unsigned char *) outBuffer ); 557 v4 = vec_sel( v3, v1, cmp ); 558 vec_st( v4, 0, (vector unsigned char *) outBuffer ); 559 v1 = vec_sel( v1, v2, cmp ); 560 vec_st( v1, 16, (vector unsigned char *) outBuffer ); 561 v3 = vec_ld( 32, (vector unsigned char *) outBuffer ); 562 v2 = vec_sel( v2, v3, cmp ); 563 vec_st( v2, 32, (vector unsigned char *) outBuffer ); 564 } else { 565 // store output data into an aligned buffer 566 vec_st( v3, 0, (vector unsigned char *) outBuffer ); 567 vec_st( v4, 16, (vector unsigned char *) outBuffer ); 568 } 569} 570#endif /* Murley's code, to be deleted */ 571 572/* 573 * dmitch addenda 4/11/2001: 128-bit only encrypt/decrypt with no CBC 574 */ 575void vBlockEncrypt128( 576 keyInstance *key, 577 BYTE *input, 578 BYTE *outBuffer) 579{ 580 vector unsigned char block[2]; 581 register vector unsigned char v1, v2; 582 583 if ( (long) input & 0x0fL ) { 584 BYTE localBuf[16]; 585 vdprintf(("vBlockEncrypt128: unaligned input\n")); 586 /* manually re-align - the compiler is supposed to 16-byte align this for us */ 587 if((unsigned)localBuf & 0xf) { 588 vdprintf(("vBlockEncrypt128: unaligned localBuf!\n")); 589 } 590 memmove(localBuf, input, 16); 591 v1 = vec_ld(0, (vector unsigned char *)localBuf); 592 } 593 else { 594 vdprintf(("vBlockEncrypt128: aligned input\n")); 595 v1 = vec_ld( 0, (vector unsigned char *) input ); 596 } 597 598 /* parse input stream into rectangular array */ 599 /* FIXME - do we need to zero v2 (or something)? */ 600 block[0] = vec_perm(v1, v2, 601 (vector unsigned char) ( 0, 4, 8, 12, 16, 20, 24, 28, 1, 602 5, 9, 13, 17, 21, 25, 29 ) ); 603 block[1] = vec_perm( v1, v2, 604 (vector unsigned char) ( 2, 6, 10, 14, 18, 22, 26, 30, 3, 605 7, 11, 15, 19, 23, 27, 31 ) ); 606 607 vRijndaelEncrypt(block, key->keyLen, 128, (vector unsigned char *) key->keySched); 608 609 /* parse rectangular array into output ciphertext bytes */ 610 v1 = vec_perm(block[0], block[1], 611 (vector unsigned char) ( 0, 8, 16, 24, 1, 9, 17, 25, 2, 612 10, 18, 26, 3, 11, 19, 27 ) ); 613 v2 = vec_perm(block[0], block[1], 614 (vector unsigned char) ( 4, 12, 20, 28, 5, 13, 21, 29, 6, 615 14, 22, 30, 7, 15, 23, 31 ) ); 616 617 if ( (long) outBuffer & 0x0fL ) 618 { 619 /* store output data into a non-aligned buffer */ 620 BYTE localBuf[16]; 621 vec_st(v1, 0, (vector unsigned char *) localBuf ); 622 memmove(outBuffer, localBuf, 16); 623 } else { 624 /* store output data into an aligned buffer */ 625 vec_st( v1, 0, (vector unsigned char *) outBuffer ); 626 } 627 return; 628} 629 630void vBlockDecrypt128( 631 keyInstance *key, 632 BYTE *input, 633 BYTE *outBuffer) 634{ 635 vector unsigned char block[2]; 636 register vector unsigned char v1, v2; 637 638 if ( (long) input & 0x0fL ) { 639 /* manually re-align - the compiler is supposed to 16-byte align this for us */ 640 BYTE localBuf[16]; 641 vdprintf(("vBlockDecrypt128: unaligned input\n")); 642 if((unsigned)localBuf & 0xf) { 643 vdprintf(("vBlockDecrypt128: unaligned localBuf!\n")); 644 } 645 memmove(localBuf, input, 16); 646 v1 = vec_ld(0, (vector unsigned char *)localBuf); 647 } 648 else { 649 vdprintf(("vBlockDecrypt128: aligned input\n")); 650 v1 = vec_ld( 0, (vector unsigned char *) input ); 651 } 652 653 /* parse input stream into rectangular array */ 654 /* FIXME - do we need to zero v2 (or something)? */ 655 block[0] = vec_perm(v1, v2, 656 (vector unsigned char) ( 0, 4, 8, 12, 16, 20, 24, 28, 1, 657 5, 9, 13, 17, 21, 25, 29 ) ); 658 block[1] = vec_perm( v1, v2, 659 (vector unsigned char) ( 2, 6, 10, 14, 18, 22, 26, 30, 3, 660 7, 11, 15, 19, 23, 27, 31 ) ); 661 662 vRijndaelDecrypt(block, key->keyLen, 128, (vector unsigned char *) key->keySched); 663 664 /* parse rectangular array into output ciphertext bytes */ 665 v1 = vec_perm(block[0], block[1], 666 (vector unsigned char) ( 0, 8, 16, 24, 1, 9, 17, 25, 2, 667 10, 18, 26, 3, 11, 19, 27 ) ); 668 v2 = vec_perm(block[0], block[1], 669 (vector unsigned char) ( 4, 12, 20, 28, 5, 13, 21, 29, 6, 670 14, 22, 30, 7, 15, 23, 31 ) ); 671 672 if ( (long) outBuffer & 0x0fL ) { 673 /* store output data into a non-aligned buffer */ 674 BYTE localBuf[16]; 675 vec_st(v1, 0, (vector unsigned char *) localBuf ); 676 memmove(outBuffer, localBuf, 16); 677 } else { 678 /* store output data into an aligned buffer */ 679 vec_st( v1, 0, (vector unsigned char *) outBuffer ); 680 } 681 return; 682} 683 684#endif /* defined(__ppc__) && defined(ALTIVEC_ENABLE) */ 685