86 87 return (val); 88} 89#endif 90 91/* 92 * Convert a bookmark to a string. 93 */ 94static void 95bookmark_to_name(zbookmark_t *zb, char *buf, size_t len) 96{ 97 (void) snprintf(buf, len, "%llx:%llx:%llx:%llx", 98 (u_longlong_t)zb->zb_objset, (u_longlong_t)zb->zb_object, 99 (u_longlong_t)zb->zb_level, (u_longlong_t)zb->zb_blkid); 100} 101 102/* 103 * Convert a string to a bookmark 104 */ 105#ifdef _KERNEL 106static void 107name_to_bookmark(char *buf, zbookmark_t *zb) 108{ 109 zb->zb_objset = _strtonum(buf, &buf); 110 ASSERT(*buf == ':'); 111 zb->zb_object = _strtonum(buf + 1, &buf); 112 ASSERT(*buf == ':'); 113 zb->zb_level = (int)_strtonum(buf + 1, &buf); 114 ASSERT(*buf == ':'); 115 zb->zb_blkid = _strtonum(buf + 1, &buf); 116 ASSERT(*buf == '\0'); 117} 118#endif 119 120/* 121 * Log an uncorrectable error to the persistent error log. We add it to the 122 * spa's list of pending errors. The changes are actually synced out to disk 123 * during spa_errlog_sync(). 124 */ 125void 126spa_log_error(spa_t *spa, zio_t *zio) 127{ 128 zbookmark_t *zb = &zio->io_logical->io_bookmark; 129 spa_error_entry_t search; 130 spa_error_entry_t *new; 131 avl_tree_t *tree; 132 avl_index_t where; 133 134 /* 135 * If we are trying to import a pool, ignore any errors, as we won't be 136 * writing to the pool any time soon. 137 */ 138 if (spa->spa_load_state == SPA_LOAD_TRYIMPORT) 139 return; 140 141 mutex_enter(&spa->spa_errlist_lock); 142 143 /* 144 * If we have had a request to rotate the log, log it to the next list 145 * instead of the current one. 146 */ 147 if (spa->spa_scrub_active || spa->spa_scrub_finished) 148 tree = &spa->spa_errlist_scrub; 149 else 150 tree = &spa->spa_errlist_last; 151 152 search.se_bookmark = *zb; 153 if (avl_find(tree, &search, &where) != NULL) { 154 mutex_exit(&spa->spa_errlist_lock); 155 return; 156 } 157 158 new = kmem_zalloc(sizeof (spa_error_entry_t), KM_SLEEP); 159 new->se_bookmark = *zb; 160 avl_insert(tree, new, where); 161 162 mutex_exit(&spa->spa_errlist_lock); 163} 164 165/* 166 * Return the number of errors currently in the error log. This is actually the 167 * sum of both the last log and the current log, since we don't know the union 168 * of these logs until we reach userland. 169 */ 170uint64_t 171spa_get_errlog_size(spa_t *spa) 172{ 173 uint64_t total = 0, count; 174 175 mutex_enter(&spa->spa_errlog_lock); 176 if (spa->spa_errlog_scrub != 0 && 177 zap_count(spa->spa_meta_objset, spa->spa_errlog_scrub, 178 &count) == 0) 179 total += count; 180 181 if (spa->spa_errlog_last != 0 && !spa->spa_scrub_finished && 182 zap_count(spa->spa_meta_objset, spa->spa_errlog_last, 183 &count) == 0) 184 total += count; 185 mutex_exit(&spa->spa_errlog_lock); 186 187 mutex_enter(&spa->spa_errlist_lock); 188 total += avl_numnodes(&spa->spa_errlist_last); 189 total += avl_numnodes(&spa->spa_errlist_scrub); 190 mutex_exit(&spa->spa_errlist_lock); 191 192 return (total); 193} 194 195#ifdef _KERNEL 196static int 197process_error_log(spa_t *spa, uint64_t obj, void *addr, size_t *count) 198{ 199 zap_cursor_t zc; 200 zap_attribute_t za; 201 zbookmark_t zb; 202 203 if (obj == 0) 204 return (0); 205 206 for (zap_cursor_init(&zc, spa->spa_meta_objset, obj); 207 zap_cursor_retrieve(&zc, &za) == 0; 208 zap_cursor_advance(&zc)) { 209 210 if (*count == 0) { 211 zap_cursor_fini(&zc); 212 return (ENOMEM); 213 } 214 215 name_to_bookmark(za.za_name, &zb); 216 217 if (copyout(&zb, (char *)addr + 218 (*count - 1) * sizeof (zbookmark_t), 219 sizeof (zbookmark_t)) != 0) 220 return (EFAULT); 221 222 *count -= 1; 223 } 224 225 zap_cursor_fini(&zc); 226 227 return (0); 228} 229 230static int 231process_error_list(avl_tree_t *list, void *addr, size_t *count) 232{ 233 spa_error_entry_t *se; 234 235 for (se = avl_first(list); se != NULL; se = AVL_NEXT(list, se)) { 236 237 if (*count == 0) 238 return (ENOMEM); 239 240 if (copyout(&se->se_bookmark, (char *)addr + 241 (*count - 1) * sizeof (zbookmark_t), 242 sizeof (zbookmark_t)) != 0) 243 return (EFAULT); 244 245 *count -= 1; 246 } 247 248 return (0); 249} 250#endif 251 252/* 253 * Copy all known errors to userland as an array of bookmarks. This is 254 * actually a union of the on-disk last log and current log, as well as any 255 * pending error requests. 256 * 257 * Because the act of reading the on-disk log could cause errors to be 258 * generated, we have two separate locks: one for the error log and one for the 259 * in-core error lists. We only need the error list lock to log and error, so 260 * we grab the error log lock while we read the on-disk logs, and only pick up 261 * the error list lock when we are finished. 262 */ 263int 264spa_get_errlog(spa_t *spa, void *uaddr, size_t *count) 265{ 266 int ret = 0; 267 268#ifdef _KERNEL 269 mutex_enter(&spa->spa_errlog_lock); 270 271 ret = process_error_log(spa, spa->spa_errlog_scrub, uaddr, count); 272 273 if (!ret && !spa->spa_scrub_finished) 274 ret = process_error_log(spa, spa->spa_errlog_last, uaddr, 275 count); 276 277 mutex_enter(&spa->spa_errlist_lock); 278 if (!ret) 279 ret = process_error_list(&spa->spa_errlist_scrub, uaddr, 280 count); 281 if (!ret) 282 ret = process_error_list(&spa->spa_errlist_last, uaddr, 283 count); 284 mutex_exit(&spa->spa_errlist_lock); 285 286 mutex_exit(&spa->spa_errlog_lock); 287#endif 288 289 return (ret); 290} 291 292/* 293 * Called when a scrub completes. This simply set a bit which tells which AVL 294 * tree to add new errors. spa_errlog_sync() is responsible for actually 295 * syncing the changes to the underlying objects. 296 */ 297void 298spa_errlog_rotate(spa_t *spa) 299{ 300 mutex_enter(&spa->spa_errlist_lock); 301 spa->spa_scrub_finished = B_TRUE; 302 mutex_exit(&spa->spa_errlist_lock); 303} 304 305/* 306 * Discard any pending errors from the spa_t. Called when unloading a faulted 307 * pool, as the errors encountered during the open cannot be synced to disk. 308 */ 309void 310spa_errlog_drain(spa_t *spa) 311{ 312 spa_error_entry_t *se; 313 void *cookie; 314 315 mutex_enter(&spa->spa_errlist_lock); 316 317 cookie = NULL; 318 while ((se = avl_destroy_nodes(&spa->spa_errlist_last, 319 &cookie)) != NULL) 320 kmem_free(se, sizeof (spa_error_entry_t)); 321 cookie = NULL; 322 while ((se = avl_destroy_nodes(&spa->spa_errlist_scrub, 323 &cookie)) != NULL) 324 kmem_free(se, sizeof (spa_error_entry_t)); 325 326 mutex_exit(&spa->spa_errlist_lock); 327} 328 329/* 330 * Process a list of errors into the current on-disk log. 331 */ 332static void 333sync_error_list(spa_t *spa, avl_tree_t *t, uint64_t *obj, dmu_tx_t *tx) 334{ 335 spa_error_entry_t *se; 336 char buf[64]; 337 void *cookie; 338 339 if (avl_numnodes(t) != 0) { 340 /* create log if necessary */ 341 if (*obj == 0) 342 *obj = zap_create(spa->spa_meta_objset, 343 DMU_OT_ERROR_LOG, DMU_OT_NONE, 344 0, tx); 345 346 /* add errors to the current log */ 347 for (se = avl_first(t); se != NULL; se = AVL_NEXT(t, se)) { 348 char *name = se->se_name ? se->se_name : ""; 349 350 bookmark_to_name(&se->se_bookmark, buf, sizeof (buf)); 351 352 (void) zap_update(spa->spa_meta_objset, 353 *obj, buf, 1, strlen(name) + 1, name, tx); 354 } 355 356 /* purge the error list */ 357 cookie = NULL; 358 while ((se = avl_destroy_nodes(t, &cookie)) != NULL) 359 kmem_free(se, sizeof (spa_error_entry_t)); 360 } 361} 362 363/* 364 * Sync the error log out to disk. This is a little tricky because the act of 365 * writing the error log requires the spa_errlist_lock. So, we need to lock the 366 * error lists, take a copy of the lists, and then reinitialize them. Then, we 367 * drop the error list lock and take the error log lock, at which point we 368 * do the errlog processing. Then, if we encounter an I/O error during this 369 * process, we can successfully add the error to the list. Note that this will 370 * result in the perpetual recycling of errors, but it is an unlikely situation 371 * and not a performance critical operation. 372 */ 373void 374spa_errlog_sync(spa_t *spa, uint64_t txg) 375{ 376 dmu_tx_t *tx; 377 avl_tree_t scrub, last; 378 int scrub_finished; 379 380 mutex_enter(&spa->spa_errlist_lock); 381 382 /* 383 * Bail out early under normal circumstances. 384 */ 385 if (avl_numnodes(&spa->spa_errlist_scrub) == 0 && 386 avl_numnodes(&spa->spa_errlist_last) == 0 && 387 !spa->spa_scrub_finished) { 388 mutex_exit(&spa->spa_errlist_lock); 389 return; 390 } 391 392 spa_get_errlists(spa, &last, &scrub); 393 scrub_finished = spa->spa_scrub_finished; 394 spa->spa_scrub_finished = B_FALSE; 395 396 mutex_exit(&spa->spa_errlist_lock); 397 mutex_enter(&spa->spa_errlog_lock); 398 399 tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg); 400 401 /* 402 * Sync out the current list of errors. 403 */ 404 sync_error_list(spa, &last, &spa->spa_errlog_last, tx); 405 406 /* 407 * Rotate the log if necessary. 408 */ 409 if (scrub_finished) { 410 if (spa->spa_errlog_last != 0) 411 VERIFY(dmu_object_free(spa->spa_meta_objset, 412 spa->spa_errlog_last, tx) == 0); 413 spa->spa_errlog_last = spa->spa_errlog_scrub; 414 spa->spa_errlog_scrub = 0; 415 416 sync_error_list(spa, &scrub, &spa->spa_errlog_last, tx); 417 } 418 419 /* 420 * Sync out any pending scrub errors. 421 */ 422 sync_error_list(spa, &scrub, &spa->spa_errlog_scrub, tx); 423 424 /* 425 * Update the MOS to reflect the new values. 426 */ 427 (void) zap_update(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 428 DMU_POOL_ERRLOG_LAST, sizeof (uint64_t), 1, 429 &spa->spa_errlog_last, tx); 430 (void) zap_update(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 431 DMU_POOL_ERRLOG_SCRUB, sizeof (uint64_t), 1, 432 &spa->spa_errlog_scrub, tx); 433 434 dmu_tx_commit(tx); 435 436 mutex_exit(&spa->spa_errlog_lock); 437}
| 85 86 return (val); 87} 88#endif 89 90/* 91 * Convert a bookmark to a string. 92 */ 93static void 94bookmark_to_name(zbookmark_t *zb, char *buf, size_t len) 95{ 96 (void) snprintf(buf, len, "%llx:%llx:%llx:%llx", 97 (u_longlong_t)zb->zb_objset, (u_longlong_t)zb->zb_object, 98 (u_longlong_t)zb->zb_level, (u_longlong_t)zb->zb_blkid); 99} 100 101/* 102 * Convert a string to a bookmark 103 */ 104#ifdef _KERNEL 105static void 106name_to_bookmark(char *buf, zbookmark_t *zb) 107{ 108 zb->zb_objset = _strtonum(buf, &buf); 109 ASSERT(*buf == ':'); 110 zb->zb_object = _strtonum(buf + 1, &buf); 111 ASSERT(*buf == ':'); 112 zb->zb_level = (int)_strtonum(buf + 1, &buf); 113 ASSERT(*buf == ':'); 114 zb->zb_blkid = _strtonum(buf + 1, &buf); 115 ASSERT(*buf == '\0'); 116} 117#endif 118 119/* 120 * Log an uncorrectable error to the persistent error log. We add it to the 121 * spa's list of pending errors. The changes are actually synced out to disk 122 * during spa_errlog_sync(). 123 */ 124void 125spa_log_error(spa_t *spa, zio_t *zio) 126{ 127 zbookmark_t *zb = &zio->io_logical->io_bookmark; 128 spa_error_entry_t search; 129 spa_error_entry_t *new; 130 avl_tree_t *tree; 131 avl_index_t where; 132 133 /* 134 * If we are trying to import a pool, ignore any errors, as we won't be 135 * writing to the pool any time soon. 136 */ 137 if (spa->spa_load_state == SPA_LOAD_TRYIMPORT) 138 return; 139 140 mutex_enter(&spa->spa_errlist_lock); 141 142 /* 143 * If we have had a request to rotate the log, log it to the next list 144 * instead of the current one. 145 */ 146 if (spa->spa_scrub_active || spa->spa_scrub_finished) 147 tree = &spa->spa_errlist_scrub; 148 else 149 tree = &spa->spa_errlist_last; 150 151 search.se_bookmark = *zb; 152 if (avl_find(tree, &search, &where) != NULL) { 153 mutex_exit(&spa->spa_errlist_lock); 154 return; 155 } 156 157 new = kmem_zalloc(sizeof (spa_error_entry_t), KM_SLEEP); 158 new->se_bookmark = *zb; 159 avl_insert(tree, new, where); 160 161 mutex_exit(&spa->spa_errlist_lock); 162} 163 164/* 165 * Return the number of errors currently in the error log. This is actually the 166 * sum of both the last log and the current log, since we don't know the union 167 * of these logs until we reach userland. 168 */ 169uint64_t 170spa_get_errlog_size(spa_t *spa) 171{ 172 uint64_t total = 0, count; 173 174 mutex_enter(&spa->spa_errlog_lock); 175 if (spa->spa_errlog_scrub != 0 && 176 zap_count(spa->spa_meta_objset, spa->spa_errlog_scrub, 177 &count) == 0) 178 total += count; 179 180 if (spa->spa_errlog_last != 0 && !spa->spa_scrub_finished && 181 zap_count(spa->spa_meta_objset, spa->spa_errlog_last, 182 &count) == 0) 183 total += count; 184 mutex_exit(&spa->spa_errlog_lock); 185 186 mutex_enter(&spa->spa_errlist_lock); 187 total += avl_numnodes(&spa->spa_errlist_last); 188 total += avl_numnodes(&spa->spa_errlist_scrub); 189 mutex_exit(&spa->spa_errlist_lock); 190 191 return (total); 192} 193 194#ifdef _KERNEL 195static int 196process_error_log(spa_t *spa, uint64_t obj, void *addr, size_t *count) 197{ 198 zap_cursor_t zc; 199 zap_attribute_t za; 200 zbookmark_t zb; 201 202 if (obj == 0) 203 return (0); 204 205 for (zap_cursor_init(&zc, spa->spa_meta_objset, obj); 206 zap_cursor_retrieve(&zc, &za) == 0; 207 zap_cursor_advance(&zc)) { 208 209 if (*count == 0) { 210 zap_cursor_fini(&zc); 211 return (ENOMEM); 212 } 213 214 name_to_bookmark(za.za_name, &zb); 215 216 if (copyout(&zb, (char *)addr + 217 (*count - 1) * sizeof (zbookmark_t), 218 sizeof (zbookmark_t)) != 0) 219 return (EFAULT); 220 221 *count -= 1; 222 } 223 224 zap_cursor_fini(&zc); 225 226 return (0); 227} 228 229static int 230process_error_list(avl_tree_t *list, void *addr, size_t *count) 231{ 232 spa_error_entry_t *se; 233 234 for (se = avl_first(list); se != NULL; se = AVL_NEXT(list, se)) { 235 236 if (*count == 0) 237 return (ENOMEM); 238 239 if (copyout(&se->se_bookmark, (char *)addr + 240 (*count - 1) * sizeof (zbookmark_t), 241 sizeof (zbookmark_t)) != 0) 242 return (EFAULT); 243 244 *count -= 1; 245 } 246 247 return (0); 248} 249#endif 250 251/* 252 * Copy all known errors to userland as an array of bookmarks. This is 253 * actually a union of the on-disk last log and current log, as well as any 254 * pending error requests. 255 * 256 * Because the act of reading the on-disk log could cause errors to be 257 * generated, we have two separate locks: one for the error log and one for the 258 * in-core error lists. We only need the error list lock to log and error, so 259 * we grab the error log lock while we read the on-disk logs, and only pick up 260 * the error list lock when we are finished. 261 */ 262int 263spa_get_errlog(spa_t *spa, void *uaddr, size_t *count) 264{ 265 int ret = 0; 266 267#ifdef _KERNEL 268 mutex_enter(&spa->spa_errlog_lock); 269 270 ret = process_error_log(spa, spa->spa_errlog_scrub, uaddr, count); 271 272 if (!ret && !spa->spa_scrub_finished) 273 ret = process_error_log(spa, spa->spa_errlog_last, uaddr, 274 count); 275 276 mutex_enter(&spa->spa_errlist_lock); 277 if (!ret) 278 ret = process_error_list(&spa->spa_errlist_scrub, uaddr, 279 count); 280 if (!ret) 281 ret = process_error_list(&spa->spa_errlist_last, uaddr, 282 count); 283 mutex_exit(&spa->spa_errlist_lock); 284 285 mutex_exit(&spa->spa_errlog_lock); 286#endif 287 288 return (ret); 289} 290 291/* 292 * Called when a scrub completes. This simply set a bit which tells which AVL 293 * tree to add new errors. spa_errlog_sync() is responsible for actually 294 * syncing the changes to the underlying objects. 295 */ 296void 297spa_errlog_rotate(spa_t *spa) 298{ 299 mutex_enter(&spa->spa_errlist_lock); 300 spa->spa_scrub_finished = B_TRUE; 301 mutex_exit(&spa->spa_errlist_lock); 302} 303 304/* 305 * Discard any pending errors from the spa_t. Called when unloading a faulted 306 * pool, as the errors encountered during the open cannot be synced to disk. 307 */ 308void 309spa_errlog_drain(spa_t *spa) 310{ 311 spa_error_entry_t *se; 312 void *cookie; 313 314 mutex_enter(&spa->spa_errlist_lock); 315 316 cookie = NULL; 317 while ((se = avl_destroy_nodes(&spa->spa_errlist_last, 318 &cookie)) != NULL) 319 kmem_free(se, sizeof (spa_error_entry_t)); 320 cookie = NULL; 321 while ((se = avl_destroy_nodes(&spa->spa_errlist_scrub, 322 &cookie)) != NULL) 323 kmem_free(se, sizeof (spa_error_entry_t)); 324 325 mutex_exit(&spa->spa_errlist_lock); 326} 327 328/* 329 * Process a list of errors into the current on-disk log. 330 */ 331static void 332sync_error_list(spa_t *spa, avl_tree_t *t, uint64_t *obj, dmu_tx_t *tx) 333{ 334 spa_error_entry_t *se; 335 char buf[64]; 336 void *cookie; 337 338 if (avl_numnodes(t) != 0) { 339 /* create log if necessary */ 340 if (*obj == 0) 341 *obj = zap_create(spa->spa_meta_objset, 342 DMU_OT_ERROR_LOG, DMU_OT_NONE, 343 0, tx); 344 345 /* add errors to the current log */ 346 for (se = avl_first(t); se != NULL; se = AVL_NEXT(t, se)) { 347 char *name = se->se_name ? se->se_name : ""; 348 349 bookmark_to_name(&se->se_bookmark, buf, sizeof (buf)); 350 351 (void) zap_update(spa->spa_meta_objset, 352 *obj, buf, 1, strlen(name) + 1, name, tx); 353 } 354 355 /* purge the error list */ 356 cookie = NULL; 357 while ((se = avl_destroy_nodes(t, &cookie)) != NULL) 358 kmem_free(se, sizeof (spa_error_entry_t)); 359 } 360} 361 362/* 363 * Sync the error log out to disk. This is a little tricky because the act of 364 * writing the error log requires the spa_errlist_lock. So, we need to lock the 365 * error lists, take a copy of the lists, and then reinitialize them. Then, we 366 * drop the error list lock and take the error log lock, at which point we 367 * do the errlog processing. Then, if we encounter an I/O error during this 368 * process, we can successfully add the error to the list. Note that this will 369 * result in the perpetual recycling of errors, but it is an unlikely situation 370 * and not a performance critical operation. 371 */ 372void 373spa_errlog_sync(spa_t *spa, uint64_t txg) 374{ 375 dmu_tx_t *tx; 376 avl_tree_t scrub, last; 377 int scrub_finished; 378 379 mutex_enter(&spa->spa_errlist_lock); 380 381 /* 382 * Bail out early under normal circumstances. 383 */ 384 if (avl_numnodes(&spa->spa_errlist_scrub) == 0 && 385 avl_numnodes(&spa->spa_errlist_last) == 0 && 386 !spa->spa_scrub_finished) { 387 mutex_exit(&spa->spa_errlist_lock); 388 return; 389 } 390 391 spa_get_errlists(spa, &last, &scrub); 392 scrub_finished = spa->spa_scrub_finished; 393 spa->spa_scrub_finished = B_FALSE; 394 395 mutex_exit(&spa->spa_errlist_lock); 396 mutex_enter(&spa->spa_errlog_lock); 397 398 tx = dmu_tx_create_assigned(spa->spa_dsl_pool, txg); 399 400 /* 401 * Sync out the current list of errors. 402 */ 403 sync_error_list(spa, &last, &spa->spa_errlog_last, tx); 404 405 /* 406 * Rotate the log if necessary. 407 */ 408 if (scrub_finished) { 409 if (spa->spa_errlog_last != 0) 410 VERIFY(dmu_object_free(spa->spa_meta_objset, 411 spa->spa_errlog_last, tx) == 0); 412 spa->spa_errlog_last = spa->spa_errlog_scrub; 413 spa->spa_errlog_scrub = 0; 414 415 sync_error_list(spa, &scrub, &spa->spa_errlog_last, tx); 416 } 417 418 /* 419 * Sync out any pending scrub errors. 420 */ 421 sync_error_list(spa, &scrub, &spa->spa_errlog_scrub, tx); 422 423 /* 424 * Update the MOS to reflect the new values. 425 */ 426 (void) zap_update(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 427 DMU_POOL_ERRLOG_LAST, sizeof (uint64_t), 1, 428 &spa->spa_errlog_last, tx); 429 (void) zap_update(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 430 DMU_POOL_ERRLOG_SCRUB, sizeof (uint64_t), 1, 431 &spa->spa_errlog_scrub, tx); 432 433 dmu_tx_commit(tx); 434 435 mutex_exit(&spa->spa_errlog_lock); 436}
|