1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or https://opensource.org/licenses/CDDL-1.0. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright (C) 2016 Gvozden Ne��kovi��. All rights reserved. 23 */ 24 25#ifndef _VDEV_RAIDZ_H 26#define _VDEV_RAIDZ_H 27 28#include <sys/types.h> 29#include <sys/debug.h> 30#include <sys/kstat.h> 31#include <sys/abd.h> 32#include <sys/vdev_impl.h> 33#include <sys/abd_impl.h> 34#include <sys/zfs_rlock.h> 35 36#ifdef __cplusplus 37extern "C" { 38#endif 39 40#define CODE_P (0U) 41#define CODE_Q (1U) 42#define CODE_R (2U) 43 44#define PARITY_P (1U) 45#define PARITY_PQ (2U) 46#define PARITY_PQR (3U) 47 48#define TARGET_X (0U) 49#define TARGET_Y (1U) 50#define TARGET_Z (2U) 51 52/* 53 * Parity generation methods indexes 54 */ 55enum raidz_math_gen_op { 56 RAIDZ_GEN_P = 0, 57 RAIDZ_GEN_PQ, 58 RAIDZ_GEN_PQR, 59 RAIDZ_GEN_NUM = 3 60}; 61/* 62 * Data reconstruction methods indexes 63 */ 64enum raidz_rec_op { 65 RAIDZ_REC_P = 0, 66 RAIDZ_REC_Q, 67 RAIDZ_REC_R, 68 RAIDZ_REC_PQ, 69 RAIDZ_REC_PR, 70 RAIDZ_REC_QR, 71 RAIDZ_REC_PQR, 72 RAIDZ_REC_NUM = 7 73}; 74 75extern const char *const raidz_gen_name[RAIDZ_GEN_NUM]; 76extern const char *const raidz_rec_name[RAIDZ_REC_NUM]; 77 78/* 79 * Methods used to define raidz implementation 80 * 81 * @raidz_gen_f Parity generation function 82 * @par1 pointer to raidz_map 83 * @raidz_rec_f Data reconstruction function 84 * @par1 pointer to raidz_map 85 * @par2 array of reconstruction targets 86 * @will_work_f Function returns TRUE if impl. is supported on the system 87 * @init_impl_f Function is called once on init 88 * @fini_impl_f Function is called once on fini 89 */ 90typedef void (*raidz_gen_f)(void *); 91typedef int (*raidz_rec_f)(void *, const int *); 92typedef boolean_t (*will_work_f)(void); 93typedef void (*init_impl_f)(void); 94typedef void (*fini_impl_f)(void); 95 96#define RAIDZ_IMPL_NAME_MAX (20) 97 98typedef struct raidz_impl_ops { 99 init_impl_f init; 100 fini_impl_f fini; 101 raidz_gen_f gen[RAIDZ_GEN_NUM]; /* Parity generate functions */ 102 raidz_rec_f rec[RAIDZ_REC_NUM]; /* Data reconstruction functions */ 103 will_work_f is_supported; /* Support check function */ 104 char name[RAIDZ_IMPL_NAME_MAX]; /* Name of the implementation */ 105} raidz_impl_ops_t; 106 107 108typedef struct raidz_col { 109 int rc_devidx; /* child device index for I/O */ 110 uint32_t rc_size; /* I/O size */ 111 uint64_t rc_offset; /* device offset */ 112 abd_t rc_abdstruct; /* rc_abd probably points here */ 113 abd_t *rc_abd; /* I/O data */ 114 abd_t *rc_orig_data; /* pre-reconstruction */ 115 int rc_error; /* I/O error for this device */ 116 uint8_t rc_tried:1; /* Did we attempt this I/O column? */ 117 uint8_t rc_skipped:1; /* Did we skip this I/O column? */ 118 uint8_t rc_need_orig_restore:1; /* need to restore from orig_data? */ 119 uint8_t rc_force_repair:1; /* Write good data to this column */ 120 uint8_t rc_allow_repair:1; /* Allow repair I/O to this column */ 121 int rc_shadow_devidx; /* for double write during expansion */ 122 int rc_shadow_error; /* for double write during expansion */ 123 uint64_t rc_shadow_offset; /* for double write during expansion */ 124} raidz_col_t; 125 126typedef struct raidz_row { 127 int rr_cols; /* Regular column count */ 128 int rr_scols; /* Count including skipped columns */ 129 int rr_bigcols; /* Remainder data column count */ 130 int rr_missingdata; /* Count of missing data devices */ 131 int rr_missingparity; /* Count of missing parity devices */ 132 int rr_firstdatacol; /* First data column/parity count */ 133 abd_t *rr_abd_empty; /* dRAID empty sector buffer */ 134 int rr_nempty; /* empty sectors included in parity */ 135#ifdef ZFS_DEBUG 136 uint64_t rr_offset; /* Logical offset for *_io_verify() */ 137 uint64_t rr_size; /* Physical size for *_io_verify() */ 138#endif 139 raidz_col_t rr_col[]; /* Flexible array of I/O columns */ 140} raidz_row_t; 141 142typedef struct raidz_map { 143 boolean_t rm_ecksuminjected; /* checksum error was injected */ 144 int rm_nrows; /* Regular row count */ 145 int rm_nskip; /* RAIDZ sectors skipped for padding */ 146 int rm_skipstart; /* Column index of padding start */ 147 int rm_original_width; /* pre-expansion width of raidz vdev */ 148 int rm_nphys_cols; /* num entries in rm_phys_col[] */ 149 zfs_locked_range_t *rm_lr; 150 const raidz_impl_ops_t *rm_ops; /* RAIDZ math operations */ 151 raidz_col_t *rm_phys_col; /* if non-NULL, read i/o aggregation */ 152 raidz_row_t *rm_row[]; /* flexible array of rows */ 153} raidz_map_t; 154 155/* 156 * Nodes in vdev_raidz_t:vd_expand_txgs. 157 * Blocks with physical birth time of re_txg or later have the specified 158 * logical width (until the next node). 159 */ 160typedef struct reflow_node { 161 uint64_t re_txg; 162 uint64_t re_logical_width; 163 avl_node_t re_link; 164} reflow_node_t; 165 166 167#define RAIDZ_ORIGINAL_IMPL (INT_MAX) 168 169extern const raidz_impl_ops_t vdev_raidz_scalar_impl; 170extern boolean_t raidz_will_scalar_work(void); 171 172#if defined(__x86_64) && defined(HAVE_SSE2) /* only x86_64 for now */ 173extern const raidz_impl_ops_t vdev_raidz_sse2_impl; 174#endif 175#if defined(__x86_64) && defined(HAVE_SSSE3) /* only x86_64 for now */ 176extern const raidz_impl_ops_t vdev_raidz_ssse3_impl; 177#endif 178#if defined(__x86_64) && defined(HAVE_AVX2) /* only x86_64 for now */ 179extern const raidz_impl_ops_t vdev_raidz_avx2_impl; 180#endif 181#if defined(__x86_64) && defined(HAVE_AVX512F) /* only x86_64 for now */ 182extern const raidz_impl_ops_t vdev_raidz_avx512f_impl; 183#endif 184#if defined(__x86_64) && defined(HAVE_AVX512BW) /* only x86_64 for now */ 185extern const raidz_impl_ops_t vdev_raidz_avx512bw_impl; 186#endif 187#if defined(__aarch64__) 188extern const raidz_impl_ops_t vdev_raidz_aarch64_neon_impl; 189extern const raidz_impl_ops_t vdev_raidz_aarch64_neonx2_impl; 190#endif 191#if defined(__powerpc__) 192extern const raidz_impl_ops_t vdev_raidz_powerpc_altivec_impl; 193#endif 194 195/* 196 * Commonly used raidz_map helpers 197 * 198 * raidz_parity Returns parity of the RAIDZ block 199 * raidz_ncols Returns number of columns the block spans 200 * Note, all rows have the same number of columns. 201 * raidz_nbigcols Returns number of big columns 202 * raidz_col_p Returns pointer to a column 203 * raidz_col_size Returns size of a column 204 * raidz_big_size Returns size of big columns 205 * raidz_short_size Returns size of short columns 206 */ 207#define raidz_parity(rm) ((rm)->rm_row[0]->rr_firstdatacol) 208#define raidz_ncols(rm) ((rm)->rm_row[0]->rr_cols) 209#define raidz_nbigcols(rm) ((rm)->rm_bigcols) 210#define raidz_col_p(rm, c) ((rm)->rm_col + (c)) 211#define raidz_col_size(rm, c) ((rm)->rm_col[c].rc_size) 212#define raidz_big_size(rm) (raidz_col_size(rm, CODE_P)) 213#define raidz_short_size(rm) (raidz_col_size(rm, raidz_ncols(rm)-1)) 214 215/* 216 * Macro defines an RAIDZ parity generation method 217 * 218 * @code parity the function produce 219 * @impl name of the implementation 220 */ 221#define _RAIDZ_GEN_WRAP(code, impl) \ 222static void \ 223impl ## _gen_ ## code(void *rrp) \ 224{ \ 225 raidz_row_t *rr = (raidz_row_t *)rrp; \ 226 raidz_generate_## code ## _impl(rr); \ 227} 228 229/* 230 * Macro defines an RAIDZ data reconstruction method 231 * 232 * @code parity the function produce 233 * @impl name of the implementation 234 */ 235#define _RAIDZ_REC_WRAP(code, impl) \ 236static int \ 237impl ## _rec_ ## code(void *rrp, const int *tgtidx) \ 238{ \ 239 raidz_row_t *rr = (raidz_row_t *)rrp; \ 240 return (raidz_reconstruct_## code ## _impl(rr, tgtidx)); \ 241} 242 243/* 244 * Define all gen methods for an implementation 245 * 246 * @impl name of the implementation 247 */ 248#define DEFINE_GEN_METHODS(impl) \ 249 _RAIDZ_GEN_WRAP(p, impl); \ 250 _RAIDZ_GEN_WRAP(pq, impl); \ 251 _RAIDZ_GEN_WRAP(pqr, impl) 252 253/* 254 * Define all rec functions for an implementation 255 * 256 * @impl name of the implementation 257 */ 258#define DEFINE_REC_METHODS(impl) \ 259 _RAIDZ_REC_WRAP(p, impl); \ 260 _RAIDZ_REC_WRAP(q, impl); \ 261 _RAIDZ_REC_WRAP(r, impl); \ 262 _RAIDZ_REC_WRAP(pq, impl); \ 263 _RAIDZ_REC_WRAP(pr, impl); \ 264 _RAIDZ_REC_WRAP(qr, impl); \ 265 _RAIDZ_REC_WRAP(pqr, impl) 266 267#define RAIDZ_GEN_METHODS(impl) \ 268{ \ 269 [RAIDZ_GEN_P] = & impl ## _gen_p, \ 270 [RAIDZ_GEN_PQ] = & impl ## _gen_pq, \ 271 [RAIDZ_GEN_PQR] = & impl ## _gen_pqr \ 272} 273 274#define RAIDZ_REC_METHODS(impl) \ 275{ \ 276 [RAIDZ_REC_P] = & impl ## _rec_p, \ 277 [RAIDZ_REC_Q] = & impl ## _rec_q, \ 278 [RAIDZ_REC_R] = & impl ## _rec_r, \ 279 [RAIDZ_REC_PQ] = & impl ## _rec_pq, \ 280 [RAIDZ_REC_PR] = & impl ## _rec_pr, \ 281 [RAIDZ_REC_QR] = & impl ## _rec_qr, \ 282 [RAIDZ_REC_PQR] = & impl ## _rec_pqr \ 283} 284 285 286typedef struct raidz_impl_kstat { 287 uint64_t gen[RAIDZ_GEN_NUM]; /* gen method speed B/s */ 288 uint64_t rec[RAIDZ_REC_NUM]; /* rec method speed B/s */ 289} raidz_impl_kstat_t; 290 291/* 292 * Enumerate various multiplication constants 293 * used in reconstruction methods 294 */ 295typedef enum raidz_mul_info { 296 /* Reconstruct Q */ 297 MUL_Q_X = 0, 298 /* Reconstruct R */ 299 MUL_R_X = 0, 300 /* Reconstruct PQ */ 301 MUL_PQ_X = 0, 302 MUL_PQ_Y = 1, 303 /* Reconstruct PR */ 304 MUL_PR_X = 0, 305 MUL_PR_Y = 1, 306 /* Reconstruct QR */ 307 MUL_QR_XQ = 0, 308 MUL_QR_X = 1, 309 MUL_QR_YQ = 2, 310 MUL_QR_Y = 3, 311 /* Reconstruct PQR */ 312 MUL_PQR_XP = 0, 313 MUL_PQR_XQ = 1, 314 MUL_PQR_XR = 2, 315 MUL_PQR_YU = 3, 316 MUL_PQR_YP = 4, 317 MUL_PQR_YQ = 5, 318 319 MUL_CNT = 6 320} raidz_mul_info_t; 321 322/* 323 * Powers of 2 in the Galois field. 324 */ 325extern const uint8_t vdev_raidz_pow2[256] __attribute__((aligned(256))); 326/* Logs of 2 in the Galois field defined above. */ 327extern const uint8_t vdev_raidz_log2[256] __attribute__((aligned(256))); 328 329/* 330 * Multiply a given number by 2 raised to the given power. 331 */ 332static inline uint8_t 333vdev_raidz_exp2(const uint8_t a, const unsigned exp) 334{ 335 if (a == 0) 336 return (0); 337 338 return (vdev_raidz_pow2[(exp + (unsigned)vdev_raidz_log2[a]) % 255]); 339} 340 341/* 342 * Galois Field operations. 343 * 344 * gf_exp2 - computes 2 raised to the given power 345 * gf_exp4 - computes 4 raised to the given power 346 * gf_mul - multiplication 347 * gf_div - division 348 * gf_inv - multiplicative inverse 349 */ 350typedef unsigned gf_t; 351typedef unsigned gf_log_t; 352 353static inline gf_t 354gf_mul(const gf_t a, const gf_t b) 355{ 356 gf_log_t logsum; 357 358 if (a == 0 || b == 0) 359 return (0); 360 361 logsum = (gf_log_t)vdev_raidz_log2[a] + (gf_log_t)vdev_raidz_log2[b]; 362 363 return ((gf_t)vdev_raidz_pow2[logsum % 255]); 364} 365 366static inline gf_t 367gf_div(const gf_t a, const gf_t b) 368{ 369 gf_log_t logsum; 370 371 ASSERT3U(b, >, 0); 372 if (a == 0) 373 return (0); 374 375 logsum = (gf_log_t)255 + (gf_log_t)vdev_raidz_log2[a] - 376 (gf_log_t)vdev_raidz_log2[b]; 377 378 return ((gf_t)vdev_raidz_pow2[logsum % 255]); 379} 380 381static inline gf_t 382gf_inv(const gf_t a) 383{ 384 gf_log_t logsum; 385 386 ASSERT3U(a, >, 0); 387 388 logsum = (gf_log_t)255 - (gf_log_t)vdev_raidz_log2[a]; 389 390 return ((gf_t)vdev_raidz_pow2[logsum]); 391} 392 393static inline gf_t 394gf_exp2(gf_log_t exp) 395{ 396 return (vdev_raidz_pow2[exp % 255]); 397} 398 399static inline gf_t 400gf_exp4(gf_log_t exp) 401{ 402 ASSERT3U(exp, <=, 255); 403 return ((gf_t)vdev_raidz_pow2[(2 * exp) % 255]); 404} 405 406#ifdef __cplusplus 407} 408#endif 409 410#endif /* _VDEV_RAIDZ_H */ 411