1/* 2 * Copyright 2009-2021 The OpenSSL Project Authors. All Rights Reserved. 3 * 4 * Licensed under the OpenSSL license (the "License"). You may not use 5 * this file except in compliance with the License. You can obtain a copy 6 * in the file LICENSE in the source distribution or at 7 * https://www.openssl.org/source/license.html 8 */ 9 10#include <stdio.h> 11#include <stdlib.h> 12#include <string.h> 13#include <setjmp.h> 14#include <signal.h> 15#include <unistd.h> 16#if defined(__linux) || defined(_AIX) 17# include <sys/utsname.h> 18#endif 19#if defined(_AIX53) /* defined even on post-5.3 */ 20# include <sys/systemcfg.h> 21# if !defined(__power_set) 22# define __power_set(a) (_system_configuration.implementation & (a)) 23# endif 24#endif 25#if defined(__APPLE__) && defined(__MACH__) 26# include <sys/types.h> 27# include <sys/sysctl.h> 28#endif 29#if defined(__NetBSD__) 30# include <sys/param.h> 31# include <sys/sysctl.h> 32#endif 33#include <openssl/crypto.h> 34#include <openssl/bn.h> 35#include <internal/cryptlib.h> 36#include <crypto/chacha.h> 37#include "bn/bn_local.h" 38 39#include "ppc_arch.h" 40 41unsigned int OPENSSL_ppccap_P = 0; 42 43static sigset_t all_masked; 44 45#ifdef OPENSSL_BN_ASM_MONT 46int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, 47 const BN_ULONG *np, const BN_ULONG *n0, int num) 48{ 49 int bn_mul_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, 50 const BN_ULONG *np, const BN_ULONG *n0, int num); 51 int bn_mul4x_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, 52 const BN_ULONG *np, const BN_ULONG *n0, int num); 53 54 if (num < 4) 55 return 0; 56 57 if ((num & 3) == 0) 58 return bn_mul4x_mont_int(rp, ap, bp, np, n0, num); 59 60 /* 61 * There used to be [optional] call to bn_mul_mont_fpu64 here, 62 * but above subroutine is faster on contemporary processors. 63 * Formulation means that there might be old processors where 64 * FPU code path would be faster, POWER6 perhaps, but there was 65 * no opportunity to figure it out... 66 */ 67 68 return bn_mul_mont_int(rp, ap, bp, np, n0, num); 69} 70#endif 71 72#ifdef DONT_USE_SHA256_FROM_LIBC 73void sha256_block_p8(void *ctx, const void *inp, size_t len); 74void sha256_block_ppc(void *ctx, const void *inp, size_t len); 75void sha256_block_data_order(void *ctx, const void *inp, size_t len); 76void sha256_block_data_order(void *ctx, const void *inp, size_t len) 77{ 78 OPENSSL_ppccap_P & PPC_CRYPTO207 ? sha256_block_p8(ctx, inp, len) : 79 sha256_block_ppc(ctx, inp, len); 80} 81#endif 82 83#ifdef DONT_USE_SHA512_FROM_LIBC 84void sha512_block_p8(void *ctx, const void *inp, size_t len); 85void sha512_block_ppc(void *ctx, const void *inp, size_t len); 86void sha512_block_data_order(void *ctx, const void *inp, size_t len); 87void sha512_block_data_order(void *ctx, const void *inp, size_t len) 88{ 89 OPENSSL_ppccap_P & PPC_CRYPTO207 ? sha512_block_p8(ctx, inp, len) : 90 sha512_block_ppc(ctx, inp, len); 91} 92#endif 93 94#ifndef OPENSSL_NO_CHACHA 95void ChaCha20_ctr32_int(unsigned char *out, const unsigned char *inp, 96 size_t len, const unsigned int key[8], 97 const unsigned int counter[4]); 98void ChaCha20_ctr32_vmx(unsigned char *out, const unsigned char *inp, 99 size_t len, const unsigned int key[8], 100 const unsigned int counter[4]); 101void ChaCha20_ctr32_vsx(unsigned char *out, const unsigned char *inp, 102 size_t len, const unsigned int key[8], 103 const unsigned int counter[4]); 104void ChaCha20_ctr32(unsigned char *out, const unsigned char *inp, 105 size_t len, const unsigned int key[8], 106 const unsigned int counter[4]) 107{ 108 OPENSSL_ppccap_P & PPC_CRYPTO207 109 ? ChaCha20_ctr32_vsx(out, inp, len, key, counter) 110 : OPENSSL_ppccap_P & PPC_ALTIVEC 111 ? ChaCha20_ctr32_vmx(out, inp, len, key, counter) 112 : ChaCha20_ctr32_int(out, inp, len, key, counter); 113} 114#endif 115 116#ifndef OPENSSL_NO_POLY1305 117void poly1305_init_int(void *ctx, const unsigned char key[16]); 118void poly1305_blocks(void *ctx, const unsigned char *inp, size_t len, 119 unsigned int padbit); 120void poly1305_emit(void *ctx, unsigned char mac[16], 121 const unsigned int nonce[4]); 122void poly1305_init_fpu(void *ctx, const unsigned char key[16]); 123void poly1305_blocks_fpu(void *ctx, const unsigned char *inp, size_t len, 124 unsigned int padbit); 125void poly1305_emit_fpu(void *ctx, unsigned char mac[16], 126 const unsigned int nonce[4]); 127int poly1305_init(void *ctx, const unsigned char key[16], void *func[2]); 128int poly1305_init(void *ctx, const unsigned char key[16], void *func[2]) 129{ 130 if (sizeof(size_t) == 4 && (OPENSSL_ppccap_P & PPC_FPU)) { 131 poly1305_init_fpu(ctx, key); 132 func[0] = (void*)(uintptr_t)poly1305_blocks_fpu; 133 func[1] = (void*)(uintptr_t)poly1305_emit_fpu; 134 } else { 135 poly1305_init_int(ctx, key); 136 func[0] = (void*)(uintptr_t)poly1305_blocks; 137 func[1] = (void*)(uintptr_t)poly1305_emit; 138 } 139 return 1; 140} 141#endif 142 143#ifdef ECP_NISTZ256_ASM 144void ecp_nistz256_mul_mont(unsigned long res[4], const unsigned long a[4], 145 const unsigned long b[4]); 146 147void ecp_nistz256_to_mont(unsigned long res[4], const unsigned long in[4]); 148void ecp_nistz256_to_mont(unsigned long res[4], const unsigned long in[4]) 149{ 150 static const unsigned long RR[] = { 0x0000000000000003U, 151 0xfffffffbffffffffU, 152 0xfffffffffffffffeU, 153 0x00000004fffffffdU }; 154 155 ecp_nistz256_mul_mont(res, in, RR); 156} 157 158void ecp_nistz256_from_mont(unsigned long res[4], const unsigned long in[4]); 159void ecp_nistz256_from_mont(unsigned long res[4], const unsigned long in[4]) 160{ 161 static const unsigned long one[] = { 1, 0, 0, 0 }; 162 163 ecp_nistz256_mul_mont(res, in, one); 164} 165#endif 166 167static sigjmp_buf ill_jmp; 168static void ill_handler(int sig) 169{ 170 siglongjmp(ill_jmp, sig); 171} 172 173void OPENSSL_fpu_probe(void); 174void OPENSSL_ppc64_probe(void); 175void OPENSSL_altivec_probe(void); 176void OPENSSL_crypto207_probe(void); 177void OPENSSL_madd300_probe(void); 178 179long OPENSSL_rdtsc_mftb(void); 180long OPENSSL_rdtsc_mfspr268(void); 181 182uint32_t OPENSSL_rdtsc(void) 183{ 184 if (OPENSSL_ppccap_P & PPC_MFTB) 185 return OPENSSL_rdtsc_mftb(); 186 else if (OPENSSL_ppccap_P & PPC_MFSPR268) 187 return OPENSSL_rdtsc_mfspr268(); 188 else 189 return 0; 190} 191 192size_t OPENSSL_instrument_bus_mftb(unsigned int *, size_t); 193size_t OPENSSL_instrument_bus_mfspr268(unsigned int *, size_t); 194 195size_t OPENSSL_instrument_bus(unsigned int *out, size_t cnt) 196{ 197 if (OPENSSL_ppccap_P & PPC_MFTB) 198 return OPENSSL_instrument_bus_mftb(out, cnt); 199 else if (OPENSSL_ppccap_P & PPC_MFSPR268) 200 return OPENSSL_instrument_bus_mfspr268(out, cnt); 201 else 202 return 0; 203} 204 205size_t OPENSSL_instrument_bus2_mftb(unsigned int *, size_t, size_t); 206size_t OPENSSL_instrument_bus2_mfspr268(unsigned int *, size_t, size_t); 207 208size_t OPENSSL_instrument_bus2(unsigned int *out, size_t cnt, size_t max) 209{ 210 if (OPENSSL_ppccap_P & PPC_MFTB) 211 return OPENSSL_instrument_bus2_mftb(out, cnt, max); 212 else if (OPENSSL_ppccap_P & PPC_MFSPR268) 213 return OPENSSL_instrument_bus2_mfspr268(out, cnt, max); 214 else 215 return 0; 216} 217 218#if defined(__GLIBC__) && defined(__GLIBC_PREREQ) 219# if __GLIBC_PREREQ(2, 16) 220# include <sys/auxv.h> 221# define OSSL_IMPLEMENT_GETAUXVAL 222# elif defined(__ANDROID_API__) 223/* see https://developer.android.google.cn/ndk/guides/cpu-features */ 224# if __ANDROID_API__ >= 18 225# include <sys/auxv.h> 226# define OSSL_IMPLEMENT_GETAUXVAL 227# endif 228# endif 229#endif 230 231#if defined(__FreeBSD__) 232# include <sys/param.h> 233# if __FreeBSD_version >= 1200000 234# include <sys/auxv.h> 235# define OSSL_IMPLEMENT_GETAUXVAL 236 237static unsigned long getauxval(unsigned long key) 238{ 239 unsigned long val = 0ul; 240 241 if (elf_aux_info((int)key, &val, sizeof(val)) != 0) 242 return 0ul; 243 244 return val; 245} 246# endif 247#endif 248 249/* I wish <sys/auxv.h> was universally available */ 250#define HWCAP 16 /* AT_HWCAP */ 251#define HWCAP_PPC64 (1U << 30) 252#define HWCAP_ALTIVEC (1U << 28) 253#define HWCAP_FPU (1U << 27) 254#define HWCAP_POWER6_EXT (1U << 9) 255#define HWCAP_VSX (1U << 7) 256 257#define HWCAP2 26 /* AT_HWCAP2 */ 258#define HWCAP_VEC_CRYPTO (1U << 25) 259#define HWCAP_ARCH_3_00 (1U << 23) 260 261# if defined(__GNUC__) && __GNUC__>=2 262__attribute__ ((constructor)) 263# endif 264void OPENSSL_cpuid_setup(void) 265{ 266 char *e; 267 struct sigaction ill_oact, ill_act; 268 sigset_t oset; 269 static int trigger = 0; 270 271 if (trigger) 272 return; 273 trigger = 1; 274 275 if ((e = getenv("OPENSSL_ppccap"))) { 276 OPENSSL_ppccap_P = strtoul(e, NULL, 0); 277 return; 278 } 279 280 OPENSSL_ppccap_P = 0; 281 282#if defined(_AIX) 283 OPENSSL_ppccap_P |= PPC_FPU; 284 285 if (sizeof(size_t) == 4) { 286 struct utsname uts; 287# if defined(_SC_AIX_KERNEL_BITMODE) 288 if (sysconf(_SC_AIX_KERNEL_BITMODE) != 64) 289 return; 290# endif 291 if (uname(&uts) != 0 || atoi(uts.version) < 6) 292 return; 293 } 294 295# if defined(__power_set) 296 /* 297 * Value used in __power_set is a single-bit 1<<n one denoting 298 * specific processor class. Incidentally 0xffffffff<<n can be 299 * used to denote specific processor and its successors. 300 */ 301 if (sizeof(size_t) == 4) { 302 /* In 32-bit case PPC_FPU64 is always fastest [if option] */ 303 if (__power_set(0xffffffffU<<13)) /* POWER5 and later */ 304 OPENSSL_ppccap_P |= PPC_FPU64; 305 } else { 306 /* In 64-bit case PPC_FPU64 is fastest only on POWER6 */ 307 if (__power_set(0x1U<<14)) /* POWER6 */ 308 OPENSSL_ppccap_P |= PPC_FPU64; 309 } 310 311 if (__power_set(0xffffffffU<<14)) /* POWER6 and later */ 312 OPENSSL_ppccap_P |= PPC_ALTIVEC; 313 314 if (__power_set(0xffffffffU<<16)) /* POWER8 and later */ 315 OPENSSL_ppccap_P |= PPC_CRYPTO207; 316 317 if (__power_set(0xffffffffU<<17)) /* POWER9 and later */ 318 OPENSSL_ppccap_P |= PPC_MADD300; 319 320 return; 321# endif 322#endif 323 324#if defined(__APPLE__) && defined(__MACH__) 325 OPENSSL_ppccap_P |= PPC_FPU; 326 327 { 328 int val; 329 size_t len = sizeof(val); 330 331 if (sysctlbyname("hw.optional.64bitops", &val, &len, NULL, 0) == 0) { 332 if (val) 333 OPENSSL_ppccap_P |= PPC_FPU64; 334 } 335 336 len = sizeof(val); 337 if (sysctlbyname("hw.optional.altivec", &val, &len, NULL, 0) == 0) { 338 if (val) 339 OPENSSL_ppccap_P |= PPC_ALTIVEC; 340 } 341 342 return; 343 } 344#endif 345 346#ifdef OSSL_IMPLEMENT_GETAUXVAL 347 { 348 unsigned long hwcap = getauxval(HWCAP); 349 unsigned long hwcap2 = getauxval(HWCAP2); 350 351 if (hwcap & HWCAP_FPU) { 352 OPENSSL_ppccap_P |= PPC_FPU; 353 354 if (sizeof(size_t) == 4) { 355 /* In 32-bit case PPC_FPU64 is always fastest [if option] */ 356 if (hwcap & HWCAP_PPC64) 357 OPENSSL_ppccap_P |= PPC_FPU64; 358 } else { 359 /* In 64-bit case PPC_FPU64 is fastest only on POWER6 */ 360 if (hwcap & HWCAP_POWER6_EXT) 361 OPENSSL_ppccap_P |= PPC_FPU64; 362 } 363 } 364 365 if (hwcap & HWCAP_ALTIVEC) { 366 OPENSSL_ppccap_P |= PPC_ALTIVEC; 367 368 if ((hwcap & HWCAP_VSX) && (hwcap2 & HWCAP_VEC_CRYPTO)) 369 OPENSSL_ppccap_P |= PPC_CRYPTO207; 370 } 371 372 if (hwcap2 & HWCAP_ARCH_3_00) { 373 OPENSSL_ppccap_P |= PPC_MADD300; 374 } 375 } 376#endif 377 378 sigfillset(&all_masked); 379 sigdelset(&all_masked, SIGILL); 380 sigdelset(&all_masked, SIGTRAP); 381#ifdef SIGEMT 382 sigdelset(&all_masked, SIGEMT); 383#endif 384 sigdelset(&all_masked, SIGFPE); 385 sigdelset(&all_masked, SIGBUS); 386 sigdelset(&all_masked, SIGSEGV); 387 388 memset(&ill_act, 0, sizeof(ill_act)); 389 ill_act.sa_handler = ill_handler; 390 ill_act.sa_mask = all_masked; 391 392 sigprocmask(SIG_SETMASK, &ill_act.sa_mask, &oset); 393 sigaction(SIGILL, &ill_act, &ill_oact); 394 395#ifndef OSSL_IMPLEMENT_GETAUXVAL 396# ifdef __NetBSD__ 397 int error, val; 398 size_t len = sizeof(val); 399 400 /* 401 * If machdep.fpu_present == 0, FPU is absent and emulated by 402 * software. In that case, using FPU instructions hurts rather 403 * than helps performance, and the software is unlikely to run in 404 * constant time so it would expose us to timing side channel 405 * attacks. So don't do it! 406 */ 407 error = sysctlbyname("machdep.fpu_present", &val, &len, NULL, 0); 408 if (error != 0 || (error == 0 && val != 0)) 409# endif 410 if (sigsetjmp(ill_jmp,1) == 0) { 411 OPENSSL_fpu_probe(); 412 OPENSSL_ppccap_P |= PPC_FPU; 413 414 if (sizeof(size_t) == 4) { 415# ifdef __linux 416 struct utsname uts; 417 if (uname(&uts) == 0 && strcmp(uts.machine, "ppc64") == 0) 418# endif 419 if (sigsetjmp(ill_jmp, 1) == 0) { 420 OPENSSL_ppc64_probe(); 421 OPENSSL_ppccap_P |= PPC_FPU64; 422 } 423 } else { 424 /* 425 * Wanted code detecting POWER6 CPU and setting PPC_FPU64 426 */ 427 } 428 } 429 430 if (sigsetjmp(ill_jmp, 1) == 0) { 431 OPENSSL_altivec_probe(); 432 OPENSSL_ppccap_P |= PPC_ALTIVEC; 433 if (sigsetjmp(ill_jmp, 1) == 0) { 434 OPENSSL_crypto207_probe(); 435 OPENSSL_ppccap_P |= PPC_CRYPTO207; 436 } 437 } 438 439 if (sigsetjmp(ill_jmp, 1) == 0) { 440 OPENSSL_madd300_probe(); 441 OPENSSL_ppccap_P |= PPC_MADD300; 442 } 443#endif 444 445 if (sigsetjmp(ill_jmp, 1) == 0) { 446 OPENSSL_rdtsc_mftb(); 447 OPENSSL_ppccap_P |= PPC_MFTB; 448 } else if (sigsetjmp(ill_jmp, 1) == 0) { 449 OPENSSL_rdtsc_mfspr268(); 450 OPENSSL_ppccap_P |= PPC_MFSPR268; 451 } 452 453 sigaction(SIGILL, &ill_oact, NULL); 454 sigprocmask(SIG_SETMASK, &oset, NULL); 455} 456