1238825Smm/* SPDX-License-Identifier: GPL-2.0 */ 2238825Smm/* 3238825Smm * In-kernel FPU support functions 4238825Smm * 5238825Smm * 6238825Smm * Consider these guidelines before using in-kernel FPU functions: 7238825Smm * 8238825Smm * 1. Use kernel_fpu_begin() and kernel_fpu_end() to enclose all in-kernel 9238825Smm * use of floating-point or vector registers and instructions. 10238825Smm * 11238825Smm * 2. For kernel_fpu_begin(), specify the vector register range you want to 12238825Smm * use with the KERNEL_VXR_* constants. Consider these usage guidelines: 13238825Smm * 14238825Smm * a) If your function typically runs in process-context, use the lower 15238825Smm * half of the vector registers, for example, specify KERNEL_VXR_LOW. 16238825Smm * b) If your function typically runs in soft-irq or hard-irq context, 17238825Smm * prefer using the upper half of the vector registers, for example, 18238825Smm * specify KERNEL_VXR_HIGH. 19238825Smm * 20238825Smm * If you adhere to these guidelines, an interrupted process context 21238825Smm * does not require to save and restore vector registers because of 22238825Smm * disjoint register ranges. 23238825Smm * 24238825Smm * Also note that the __kernel_fpu_begin()/__kernel_fpu_end() functions 25238825Smm * includes logic to save and restore up to 16 vector registers at once. 26238825Smm * 27238825Smm * 3. You can nest kernel_fpu_begin()/kernel_fpu_end() by using different 28238825Smm * struct kernel_fpu states. Vector registers that are in use by outer 29238825Smm * levels are saved and restored. You can minimize the save and restore 30299529Smm * effort by choosing disjoint vector register ranges. 31299529Smm * 32299529Smm * 5. To use vector floating-point instructions, specify the KERNEL_FPC 33238825Smm * flag to save and restore floating-point controls in addition to any 34238825Smm * vector register range. 35238825Smm * 36238825Smm * 6. To use floating-point registers and instructions only, specify the 37238825Smm * KERNEL_FPR flag. This flag triggers a save and restore of vector 38238825Smm * registers V0 to V15 and floating-point controls. 39238825Smm * 40238825Smm * Copyright IBM Corp. 2015 41238825Smm * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> 42238825Smm */ 43238825Smm 44238825Smm#ifndef _ASM_S390_FPU_H 45238825Smm#define _ASM_S390_FPU_H 46238825Smm 47238825Smm#include <linux/processor.h> 48246229Skientzle#include <linux/preempt.h> 49246229Skientzle#include <linux/string.h> 50299529Smm#include <linux/sched.h> 51299529Smm#include <asm/sigcontext.h> 52299529Smm#include <asm/fpu-types.h> 53299529Smm#include <asm/fpu-insn.h> 54238825Smm#include <asm/facility.h> 55238825Smm 56238825Smmstatic inline bool cpu_has_vx(void) 57238825Smm{ 58238825Smm return likely(test_facility(129)); 59238825Smm} 60238825Smm 61238825Smmenum { 62238825Smm KERNEL_FPC_BIT = 0, 63238825Smm KERNEL_VXR_V0V7_BIT, 64238825Smm KERNEL_VXR_V8V15_BIT, 65238825Smm KERNEL_VXR_V16V23_BIT, 66238825Smm KERNEL_VXR_V24V31_BIT, 67238825Smm}; 68238825Smm 69238825Smm#define KERNEL_FPC BIT(KERNEL_FPC_BIT) 70238825Smm#define KERNEL_VXR_V0V7 BIT(KERNEL_VXR_V0V7_BIT) 71238825Smm#define KERNEL_VXR_V8V15 BIT(KERNEL_VXR_V8V15_BIT) 72238825Smm#define KERNEL_VXR_V16V23 BIT(KERNEL_VXR_V16V23_BIT) 73238825Smm#define KERNEL_VXR_V24V31 BIT(KERNEL_VXR_V24V31_BIT) 74238825Smm 75238825Smm#define KERNEL_VXR_LOW (KERNEL_VXR_V0V7 | KERNEL_VXR_V8V15) 76238825Smm#define KERNEL_VXR_MID (KERNEL_VXR_V8V15 | KERNEL_VXR_V16V23) 77238825Smm#define KERNEL_VXR_HIGH (KERNEL_VXR_V16V23 | KERNEL_VXR_V24V31) 78238825Smm 79238825Smm#define KERNEL_VXR (KERNEL_VXR_LOW | KERNEL_VXR_HIGH) 80238825Smm#define KERNEL_FPR (KERNEL_FPC | KERNEL_VXR_LOW) 81238825Smm 82238825Smmvoid load_fpu_state(struct fpu *state, int flags); 83238825Smmvoid save_fpu_state(struct fpu *state, int flags); 84238825Smmvoid __kernel_fpu_begin(struct kernel_fpu *state, int flags); 85238825Smmvoid __kernel_fpu_end(struct kernel_fpu *state, int flags); 86238825Smm 87238825Smmstatic __always_inline void save_vx_regs(__vector128 *vxrs) 88238825Smm{ 89238825Smm fpu_vstm(0, 15, &vxrs[0]); 90 fpu_vstm(16, 31, &vxrs[16]); 91} 92 93static __always_inline void load_vx_regs(__vector128 *vxrs) 94{ 95 fpu_vlm(0, 15, &vxrs[0]); 96 fpu_vlm(16, 31, &vxrs[16]); 97} 98 99static __always_inline void __save_fp_regs(freg_t *fprs, unsigned int offset) 100{ 101 fpu_std(0, &fprs[0 * offset]); 102 fpu_std(1, &fprs[1 * offset]); 103 fpu_std(2, &fprs[2 * offset]); 104 fpu_std(3, &fprs[3 * offset]); 105 fpu_std(4, &fprs[4 * offset]); 106 fpu_std(5, &fprs[5 * offset]); 107 fpu_std(6, &fprs[6 * offset]); 108 fpu_std(7, &fprs[7 * offset]); 109 fpu_std(8, &fprs[8 * offset]); 110 fpu_std(9, &fprs[9 * offset]); 111 fpu_std(10, &fprs[10 * offset]); 112 fpu_std(11, &fprs[11 * offset]); 113 fpu_std(12, &fprs[12 * offset]); 114 fpu_std(13, &fprs[13 * offset]); 115 fpu_std(14, &fprs[14 * offset]); 116 fpu_std(15, &fprs[15 * offset]); 117} 118 119static __always_inline void __load_fp_regs(freg_t *fprs, unsigned int offset) 120{ 121 fpu_ld(0, &fprs[0 * offset]); 122 fpu_ld(1, &fprs[1 * offset]); 123 fpu_ld(2, &fprs[2 * offset]); 124 fpu_ld(3, &fprs[3 * offset]); 125 fpu_ld(4, &fprs[4 * offset]); 126 fpu_ld(5, &fprs[5 * offset]); 127 fpu_ld(6, &fprs[6 * offset]); 128 fpu_ld(7, &fprs[7 * offset]); 129 fpu_ld(8, &fprs[8 * offset]); 130 fpu_ld(9, &fprs[9 * offset]); 131 fpu_ld(10, &fprs[10 * offset]); 132 fpu_ld(11, &fprs[11 * offset]); 133 fpu_ld(12, &fprs[12 * offset]); 134 fpu_ld(13, &fprs[13 * offset]); 135 fpu_ld(14, &fprs[14 * offset]); 136 fpu_ld(15, &fprs[15 * offset]); 137} 138 139static __always_inline void save_fp_regs(freg_t *fprs) 140{ 141 __save_fp_regs(fprs, sizeof(freg_t) / sizeof(freg_t)); 142} 143 144static __always_inline void load_fp_regs(freg_t *fprs) 145{ 146 __load_fp_regs(fprs, sizeof(freg_t) / sizeof(freg_t)); 147} 148 149static __always_inline void save_fp_regs_vx(__vector128 *vxrs) 150{ 151 freg_t *fprs = (freg_t *)&vxrs[0].high; 152 153 __save_fp_regs(fprs, sizeof(__vector128) / sizeof(freg_t)); 154} 155 156static __always_inline void load_fp_regs_vx(__vector128 *vxrs) 157{ 158 freg_t *fprs = (freg_t *)&vxrs[0].high; 159 160 __load_fp_regs(fprs, sizeof(__vector128) / sizeof(freg_t)); 161} 162 163static inline void load_user_fpu_regs(void) 164{ 165 struct thread_struct *thread = ¤t->thread; 166 167 if (!thread->ufpu_flags) 168 return; 169 load_fpu_state(&thread->ufpu, thread->ufpu_flags); 170 thread->ufpu_flags = 0; 171} 172 173static __always_inline void __save_user_fpu_regs(struct thread_struct *thread, int flags) 174{ 175 save_fpu_state(&thread->ufpu, flags); 176 __atomic_or(flags, &thread->ufpu_flags); 177} 178 179static inline void save_user_fpu_regs(void) 180{ 181 struct thread_struct *thread = ¤t->thread; 182 int mask, flags; 183 184 mask = __atomic_or(KERNEL_FPC | KERNEL_VXR, &thread->kfpu_flags); 185 flags = ~READ_ONCE(thread->ufpu_flags) & (KERNEL_FPC | KERNEL_VXR); 186 if (flags) 187 __save_user_fpu_regs(thread, flags); 188 barrier(); 189 WRITE_ONCE(thread->kfpu_flags, mask); 190} 191 192static __always_inline void _kernel_fpu_begin(struct kernel_fpu *state, int flags) 193{ 194 struct thread_struct *thread = ¤t->thread; 195 int mask, uflags; 196 197 mask = __atomic_or(flags, &thread->kfpu_flags); 198 state->hdr.mask = mask; 199 uflags = READ_ONCE(thread->ufpu_flags); 200 if ((uflags & flags) != flags) 201 __save_user_fpu_regs(thread, ~uflags & flags); 202 if (mask & flags) 203 __kernel_fpu_begin(state, flags); 204} 205 206static __always_inline void _kernel_fpu_end(struct kernel_fpu *state, int flags) 207{ 208 int mask = state->hdr.mask; 209 210 if (mask & flags) 211 __kernel_fpu_end(state, flags); 212 barrier(); 213 WRITE_ONCE(current->thread.kfpu_flags, mask); 214} 215 216void __kernel_fpu_invalid_size(void); 217 218static __always_inline void kernel_fpu_check_size(int flags, unsigned int size) 219{ 220 unsigned int cnt = 0; 221 222 if (flags & KERNEL_VXR_V0V7) 223 cnt += 8; 224 if (flags & KERNEL_VXR_V8V15) 225 cnt += 8; 226 if (flags & KERNEL_VXR_V16V23) 227 cnt += 8; 228 if (flags & KERNEL_VXR_V24V31) 229 cnt += 8; 230 if (cnt != size) 231 __kernel_fpu_invalid_size(); 232} 233 234#define kernel_fpu_begin(state, flags) \ 235{ \ 236 typeof(state) s = (state); \ 237 int _flags = (flags); \ 238 \ 239 kernel_fpu_check_size(_flags, ARRAY_SIZE(s->vxrs)); \ 240 _kernel_fpu_begin((struct kernel_fpu *)s, _flags); \ 241} 242 243#define kernel_fpu_end(state, flags) \ 244{ \ 245 typeof(state) s = (state); \ 246 int _flags = (flags); \ 247 \ 248 kernel_fpu_check_size(_flags, ARRAY_SIZE(s->vxrs)); \ 249 _kernel_fpu_end((struct kernel_fpu *)s, _flags); \ 250} 251 252static inline void save_kernel_fpu_regs(struct thread_struct *thread) 253{ 254 if (!thread->kfpu_flags) 255 return; 256 save_fpu_state(&thread->kfpu, thread->kfpu_flags); 257} 258 259static inline void restore_kernel_fpu_regs(struct thread_struct *thread) 260{ 261 if (!thread->kfpu_flags) 262 return; 263 load_fpu_state(&thread->kfpu, thread->kfpu_flags); 264} 265 266static inline void convert_vx_to_fp(freg_t *fprs, __vector128 *vxrs) 267{ 268 int i; 269 270 for (i = 0; i < __NUM_FPRS; i++) 271 fprs[i].ui = vxrs[i].high; 272} 273 274static inline void convert_fp_to_vx(__vector128 *vxrs, freg_t *fprs) 275{ 276 int i; 277 278 for (i = 0; i < __NUM_FPRS; i++) 279 vxrs[i].high = fprs[i].ui; 280} 281 282static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu) 283{ 284 fpregs->pad = 0; 285 fpregs->fpc = fpu->fpc; 286 convert_vx_to_fp((freg_t *)&fpregs->fprs, fpu->vxrs); 287} 288 289static inline void fpregs_load(_s390_fp_regs *fpregs, struct fpu *fpu) 290{ 291 fpu->fpc = fpregs->fpc; 292 convert_fp_to_vx(fpu->vxrs, (freg_t *)&fpregs->fprs); 293} 294 295#endif /* _ASM_S390_FPU_H */ 296