1261287Sdes/* $OpenBSD: smult_curve25519_ref.c,v 1.2 2013/11/02 22:02:14 markus Exp $ */ 2261287Sdes/* 3261287Sdesversion 20081011 4261287SdesMatthew Dempsky 5261287SdesPublic domain. 6261287SdesDerived from public domain code by D. J. Bernstein. 7261287Sdes*/ 8261287Sdes 9261287Sdesint crypto_scalarmult_curve25519(unsigned char *, const unsigned char *, const unsigned char *); 10261287Sdes 11261287Sdesstatic void add(unsigned int out[32],const unsigned int a[32],const unsigned int b[32]) 12261287Sdes{ 13261287Sdes unsigned int j; 14261287Sdes unsigned int u; 15261287Sdes u = 0; 16261287Sdes for (j = 0;j < 31;++j) { u += a[j] + b[j]; out[j] = u & 255; u >>= 8; } 17261287Sdes u += a[31] + b[31]; out[31] = u; 18261287Sdes} 19261287Sdes 20261287Sdesstatic void sub(unsigned int out[32],const unsigned int a[32],const unsigned int b[32]) 21261287Sdes{ 22261287Sdes unsigned int j; 23261287Sdes unsigned int u; 24261287Sdes u = 218; 25261287Sdes for (j = 0;j < 31;++j) { 26261287Sdes u += a[j] + 65280 - b[j]; 27261287Sdes out[j] = u & 255; 28261287Sdes u >>= 8; 29261287Sdes } 30261287Sdes u += a[31] - b[31]; 31261287Sdes out[31] = u; 32261287Sdes} 33261287Sdes 34261287Sdesstatic void squeeze(unsigned int a[32]) 35261287Sdes{ 36261287Sdes unsigned int j; 37261287Sdes unsigned int u; 38261287Sdes u = 0; 39261287Sdes for (j = 0;j < 31;++j) { u += a[j]; a[j] = u & 255; u >>= 8; } 40261287Sdes u += a[31]; a[31] = u & 127; 41261287Sdes u = 19 * (u >> 7); 42261287Sdes for (j = 0;j < 31;++j) { u += a[j]; a[j] = u & 255; u >>= 8; } 43261287Sdes u += a[31]; a[31] = u; 44261287Sdes} 45261287Sdes 46261287Sdesstatic const unsigned int minusp[32] = { 47261287Sdes 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 48261287Sdes} ; 49261287Sdes 50261287Sdesstatic void freeze(unsigned int a[32]) 51261287Sdes{ 52261287Sdes unsigned int aorig[32]; 53261287Sdes unsigned int j; 54261287Sdes unsigned int negative; 55261287Sdes 56261287Sdes for (j = 0;j < 32;++j) aorig[j] = a[j]; 57261287Sdes add(a,a,minusp); 58261287Sdes negative = -((a[31] >> 7) & 1); 59261287Sdes for (j = 0;j < 32;++j) a[j] ^= negative & (aorig[j] ^ a[j]); 60261287Sdes} 61261287Sdes 62261287Sdesstatic void mult(unsigned int out[32],const unsigned int a[32],const unsigned int b[32]) 63261287Sdes{ 64261287Sdes unsigned int i; 65261287Sdes unsigned int j; 66261287Sdes unsigned int u; 67261287Sdes 68261287Sdes for (i = 0;i < 32;++i) { 69261287Sdes u = 0; 70261287Sdes for (j = 0;j <= i;++j) u += a[j] * b[i - j]; 71261287Sdes for (j = i + 1;j < 32;++j) u += 38 * a[j] * b[i + 32 - j]; 72261287Sdes out[i] = u; 73261287Sdes } 74261287Sdes squeeze(out); 75261287Sdes} 76261287Sdes 77261287Sdesstatic void mult121665(unsigned int out[32],const unsigned int a[32]) 78261287Sdes{ 79261287Sdes unsigned int j; 80261287Sdes unsigned int u; 81261287Sdes 82261287Sdes u = 0; 83261287Sdes for (j = 0;j < 31;++j) { u += 121665 * a[j]; out[j] = u & 255; u >>= 8; } 84261287Sdes u += 121665 * a[31]; out[31] = u & 127; 85261287Sdes u = 19 * (u >> 7); 86261287Sdes for (j = 0;j < 31;++j) { u += out[j]; out[j] = u & 255; u >>= 8; } 87261287Sdes u += out[j]; out[j] = u; 88261287Sdes} 89261287Sdes 90261287Sdesstatic void square(unsigned int out[32],const unsigned int a[32]) 91261287Sdes{ 92261287Sdes unsigned int i; 93261287Sdes unsigned int j; 94261287Sdes unsigned int u; 95261287Sdes 96261287Sdes for (i = 0;i < 32;++i) { 97261287Sdes u = 0; 98261287Sdes for (j = 0;j < i - j;++j) u += a[j] * a[i - j]; 99261287Sdes for (j = i + 1;j < i + 32 - j;++j) u += 38 * a[j] * a[i + 32 - j]; 100261287Sdes u *= 2; 101261287Sdes if ((i & 1) == 0) { 102261287Sdes u += a[i / 2] * a[i / 2]; 103261287Sdes u += 38 * a[i / 2 + 16] * a[i / 2 + 16]; 104261287Sdes } 105261287Sdes out[i] = u; 106261287Sdes } 107261287Sdes squeeze(out); 108261287Sdes} 109261287Sdes 110261287Sdesstatic void select(unsigned int p[64],unsigned int q[64],const unsigned int r[64],const unsigned int s[64],unsigned int b) 111261287Sdes{ 112261287Sdes unsigned int j; 113261287Sdes unsigned int t; 114261287Sdes unsigned int bminus1; 115261287Sdes 116261287Sdes bminus1 = b - 1; 117261287Sdes for (j = 0;j < 64;++j) { 118261287Sdes t = bminus1 & (r[j] ^ s[j]); 119261287Sdes p[j] = s[j] ^ t; 120261287Sdes q[j] = r[j] ^ t; 121261287Sdes } 122261287Sdes} 123261287Sdes 124261287Sdesstatic void mainloop(unsigned int work[64],const unsigned char e[32]) 125261287Sdes{ 126261287Sdes unsigned int xzm1[64]; 127261287Sdes unsigned int xzm[64]; 128261287Sdes unsigned int xzmb[64]; 129261287Sdes unsigned int xzm1b[64]; 130261287Sdes unsigned int xznb[64]; 131261287Sdes unsigned int xzn1b[64]; 132261287Sdes unsigned int a0[64]; 133261287Sdes unsigned int a1[64]; 134261287Sdes unsigned int b0[64]; 135261287Sdes unsigned int b1[64]; 136261287Sdes unsigned int c1[64]; 137261287Sdes unsigned int r[32]; 138261287Sdes unsigned int s[32]; 139261287Sdes unsigned int t[32]; 140261287Sdes unsigned int u[32]; 141261287Sdes unsigned int j; 142261287Sdes unsigned int b; 143261287Sdes int pos; 144261287Sdes 145261287Sdes for (j = 0;j < 32;++j) xzm1[j] = work[j]; 146261287Sdes xzm1[32] = 1; 147261287Sdes for (j = 33;j < 64;++j) xzm1[j] = 0; 148261287Sdes 149261287Sdes xzm[0] = 1; 150261287Sdes for (j = 1;j < 64;++j) xzm[j] = 0; 151261287Sdes 152261287Sdes for (pos = 254;pos >= 0;--pos) { 153261287Sdes b = e[pos / 8] >> (pos & 7); 154261287Sdes b &= 1; 155261287Sdes select(xzmb,xzm1b,xzm,xzm1,b); 156261287Sdes add(a0,xzmb,xzmb + 32); 157261287Sdes sub(a0 + 32,xzmb,xzmb + 32); 158261287Sdes add(a1,xzm1b,xzm1b + 32); 159261287Sdes sub(a1 + 32,xzm1b,xzm1b + 32); 160261287Sdes square(b0,a0); 161261287Sdes square(b0 + 32,a0 + 32); 162261287Sdes mult(b1,a1,a0 + 32); 163261287Sdes mult(b1 + 32,a1 + 32,a0); 164261287Sdes add(c1,b1,b1 + 32); 165261287Sdes sub(c1 + 32,b1,b1 + 32); 166261287Sdes square(r,c1 + 32); 167261287Sdes sub(s,b0,b0 + 32); 168261287Sdes mult121665(t,s); 169261287Sdes add(u,t,b0); 170261287Sdes mult(xznb,b0,b0 + 32); 171261287Sdes mult(xznb + 32,s,u); 172261287Sdes square(xzn1b,c1); 173261287Sdes mult(xzn1b + 32,r,work); 174261287Sdes select(xzm,xzm1,xznb,xzn1b,b); 175261287Sdes } 176261287Sdes 177261287Sdes for (j = 0;j < 64;++j) work[j] = xzm[j]; 178261287Sdes} 179261287Sdes 180261287Sdesstatic void recip(unsigned int out[32],const unsigned int z[32]) 181261287Sdes{ 182261287Sdes unsigned int z2[32]; 183261287Sdes unsigned int z9[32]; 184261287Sdes unsigned int z11[32]; 185261287Sdes unsigned int z2_5_0[32]; 186261287Sdes unsigned int z2_10_0[32]; 187261287Sdes unsigned int z2_20_0[32]; 188261287Sdes unsigned int z2_50_0[32]; 189261287Sdes unsigned int z2_100_0[32]; 190261287Sdes unsigned int t0[32]; 191261287Sdes unsigned int t1[32]; 192261287Sdes int i; 193261287Sdes 194261287Sdes /* 2 */ square(z2,z); 195261287Sdes /* 4 */ square(t1,z2); 196261287Sdes /* 8 */ square(t0,t1); 197261287Sdes /* 9 */ mult(z9,t0,z); 198261287Sdes /* 11 */ mult(z11,z9,z2); 199261287Sdes /* 22 */ square(t0,z11); 200261287Sdes /* 2^5 - 2^0 = 31 */ mult(z2_5_0,t0,z9); 201261287Sdes 202261287Sdes /* 2^6 - 2^1 */ square(t0,z2_5_0); 203261287Sdes /* 2^7 - 2^2 */ square(t1,t0); 204261287Sdes /* 2^8 - 2^3 */ square(t0,t1); 205261287Sdes /* 2^9 - 2^4 */ square(t1,t0); 206261287Sdes /* 2^10 - 2^5 */ square(t0,t1); 207261287Sdes /* 2^10 - 2^0 */ mult(z2_10_0,t0,z2_5_0); 208261287Sdes 209261287Sdes /* 2^11 - 2^1 */ square(t0,z2_10_0); 210261287Sdes /* 2^12 - 2^2 */ square(t1,t0); 211261287Sdes /* 2^20 - 2^10 */ for (i = 2;i < 10;i += 2) { square(t0,t1); square(t1,t0); } 212261287Sdes /* 2^20 - 2^0 */ mult(z2_20_0,t1,z2_10_0); 213261287Sdes 214261287Sdes /* 2^21 - 2^1 */ square(t0,z2_20_0); 215261287Sdes /* 2^22 - 2^2 */ square(t1,t0); 216261287Sdes /* 2^40 - 2^20 */ for (i = 2;i < 20;i += 2) { square(t0,t1); square(t1,t0); } 217261287Sdes /* 2^40 - 2^0 */ mult(t0,t1,z2_20_0); 218261287Sdes 219261287Sdes /* 2^41 - 2^1 */ square(t1,t0); 220261287Sdes /* 2^42 - 2^2 */ square(t0,t1); 221261287Sdes /* 2^50 - 2^10 */ for (i = 2;i < 10;i += 2) { square(t1,t0); square(t0,t1); } 222261287Sdes /* 2^50 - 2^0 */ mult(z2_50_0,t0,z2_10_0); 223261287Sdes 224261287Sdes /* 2^51 - 2^1 */ square(t0,z2_50_0); 225261287Sdes /* 2^52 - 2^2 */ square(t1,t0); 226261287Sdes /* 2^100 - 2^50 */ for (i = 2;i < 50;i += 2) { square(t0,t1); square(t1,t0); } 227261287Sdes /* 2^100 - 2^0 */ mult(z2_100_0,t1,z2_50_0); 228261287Sdes 229261287Sdes /* 2^101 - 2^1 */ square(t1,z2_100_0); 230261287Sdes /* 2^102 - 2^2 */ square(t0,t1); 231261287Sdes /* 2^200 - 2^100 */ for (i = 2;i < 100;i += 2) { square(t1,t0); square(t0,t1); } 232261287Sdes /* 2^200 - 2^0 */ mult(t1,t0,z2_100_0); 233261287Sdes 234261287Sdes /* 2^201 - 2^1 */ square(t0,t1); 235261287Sdes /* 2^202 - 2^2 */ square(t1,t0); 236261287Sdes /* 2^250 - 2^50 */ for (i = 2;i < 50;i += 2) { square(t0,t1); square(t1,t0); } 237261287Sdes /* 2^250 - 2^0 */ mult(t0,t1,z2_50_0); 238261287Sdes 239261287Sdes /* 2^251 - 2^1 */ square(t1,t0); 240261287Sdes /* 2^252 - 2^2 */ square(t0,t1); 241261287Sdes /* 2^253 - 2^3 */ square(t1,t0); 242261287Sdes /* 2^254 - 2^4 */ square(t0,t1); 243261287Sdes /* 2^255 - 2^5 */ square(t1,t0); 244261287Sdes /* 2^255 - 21 */ mult(out,t1,z11); 245261287Sdes} 246261287Sdes 247261287Sdesint crypto_scalarmult_curve25519(unsigned char *q, 248261287Sdes const unsigned char *n, 249261287Sdes const unsigned char *p) 250261287Sdes{ 251261287Sdes unsigned int work[96]; 252261287Sdes unsigned char e[32]; 253261287Sdes unsigned int i; 254261287Sdes for (i = 0;i < 32;++i) e[i] = n[i]; 255261287Sdes e[0] &= 248; 256261287Sdes e[31] &= 127; 257261287Sdes e[31] |= 64; 258261287Sdes for (i = 0;i < 32;++i) work[i] = p[i]; 259261287Sdes mainloop(work,e); 260261287Sdes recip(work + 32,work + 32); 261261287Sdes mult(work + 64,work,work + 32); 262261287Sdes freeze(work + 64); 263261287Sdes for (i = 0;i < 32;++i) q[i] = work[64 + i]; 264261287Sdes return 0; 265261287Sdes} 266