secondary.c revision 259195
1145479Smp/*- 259243Sobrien * Copyright (c) 2009-2010 The FreeBSD Foundation 359243Sobrien * Copyright (c) 2010 Pawel Jakub Dawidek <pjd@FreeBSD.org> 459243Sobrien * All rights reserved. 559243Sobrien * 659243Sobrien * This software was developed by Pawel Jakub Dawidek under sponsorship from 759243Sobrien * the FreeBSD Foundation. 859243Sobrien * 959243Sobrien * Redistribution and use in source and binary forms, with or without 1059243Sobrien * modification, are permitted provided that the following conditions 1159243Sobrien * are met: 1259243Sobrien * 1. Redistributions of source code must retain the above copyright 1359243Sobrien * notice, this list of conditions and the following disclaimer. 1459243Sobrien * 2. Redistributions in binary form must reproduce the above copyright 1559243Sobrien * notice, this list of conditions and the following disclaimer in the 1659243Sobrien * documentation and/or other materials provided with the distribution. 17100616Smp * 1859243Sobrien * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 1959243Sobrien * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2059243Sobrien * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2159243Sobrien * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 2259243Sobrien * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2359243Sobrien * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2459243Sobrien * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2559243Sobrien * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2659243Sobrien * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2759243Sobrien * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2859243Sobrien * SUCH DAMAGE. 2959243Sobrien */ 3059243Sobrien 3159243Sobrien#include <sys/cdefs.h> 3259243Sobrien__FBSDID("$FreeBSD: head/sbin/hastd/secondary.c 259195 2013-12-10 20:06:41Z trociny $"); 3359243Sobrien 3459243Sobrien#include <sys/param.h> 35145479Smp#include <sys/time.h> 3659243Sobrien#include <sys/bio.h> 3759243Sobrien#include <sys/disk.h> 3859243Sobrien#include <sys/stat.h> 3959243Sobrien 4059243Sobrien#include <err.h> 4159243Sobrien#include <errno.h> 4259243Sobrien#include <fcntl.h> 4359243Sobrien#include <libgeom.h> 4459243Sobrien#include <pthread.h> 4559243Sobrien#include <signal.h> 4659243Sobrien#include <stdint.h> 4759243Sobrien#include <stdio.h> 4859243Sobrien#include <string.h> 4959243Sobrien#include <sysexits.h> 5059243Sobrien#include <unistd.h> 5159243Sobrien 5259243Sobrien#include <activemap.h> 5359243Sobrien#include <nv.h> 5459243Sobrien#include <pjdlog.h> 5559243Sobrien 5659243Sobrien#include "control.h" 57145479Smp#include "event.h" 58145479Smp#include "hast.h" 5959243Sobrien#include "hast_proto.h" 6059243Sobrien#include "hastd.h" 6159243Sobrien#include "hooks.h" 6259243Sobrien#include "metadata.h" 6359243Sobrien#include "proto.h" 6459243Sobrien#include "subr.h" 6559243Sobrien#include "synch.h" 6659243Sobrien 6759243Sobrienstruct hio { 6859243Sobrien uint64_t hio_seq; 6959243Sobrien int hio_error; 7059243Sobrien void *hio_data; 7159243Sobrien uint8_t hio_cmd; 7259243Sobrien uint64_t hio_offset; 7359243Sobrien uint64_t hio_length; 7459243Sobrien bool hio_memsync; 7559243Sobrien TAILQ_ENTRY(hio) hio_next; 7659243Sobrien}; 7759243Sobrien 7859243Sobrienstatic struct hast_resource *gres; 7959243Sobrien 8059243Sobrien/* 8159243Sobrien * Free list holds unused structures. When free list is empty, we have to wait 8259243Sobrien * until some in-progress requests are freed. 8359243Sobrien */ 8459243Sobrienstatic TAILQ_HEAD(, hio) hio_free_list; 8559243Sobrienstatic size_t hio_free_list_size; 8659243Sobrienstatic pthread_mutex_t hio_free_list_lock; 8759243Sobrienstatic pthread_cond_t hio_free_list_cond; 8859243Sobrien/* 8959243Sobrien * Disk thread (the one that does I/O requests) takes requests from this list. 9059243Sobrien */ 9159243Sobrienstatic TAILQ_HEAD(, hio) hio_disk_list; 9259243Sobrienstatic size_t hio_disk_list_size; 9359243Sobrienstatic pthread_mutex_t hio_disk_list_lock; 9459243Sobrienstatic pthread_cond_t hio_disk_list_cond; 9559243Sobrien/* 9659243Sobrien * Thread that sends requests back to primary takes requests from this list. 9759243Sobrien */ 9859243Sobrienstatic TAILQ_HEAD(, hio) hio_send_list; 9959243Sobrienstatic size_t hio_send_list_size; 10059243Sobrienstatic pthread_mutex_t hio_send_list_lock; 10159243Sobrienstatic pthread_cond_t hio_send_list_cond; 10259243Sobrien 10359243Sobrien/* 10459243Sobrien * Maximum number of outstanding I/O requests. 10559243Sobrien */ 10659243Sobrien#define HAST_HIO_MAX 256 10759243Sobrien 10859243Sobrienstatic void *recv_thread(void *arg); 10959243Sobrienstatic void *disk_thread(void *arg); 11059243Sobrienstatic void *send_thread(void *arg); 11159243Sobrien 11259243Sobrien#define QUEUE_INSERT(name, hio) do { \ 11359243Sobrien mtx_lock(&hio_##name##_list_lock); \ 11459243Sobrien if (TAILQ_EMPTY(&hio_##name##_list)) \ 11559243Sobrien cv_broadcast(&hio_##name##_list_cond); \ 11659243Sobrien TAILQ_INSERT_TAIL(&hio_##name##_list, (hio), hio_next); \ 11759243Sobrien hio_##name##_list_size++; \ 11859243Sobrien mtx_unlock(&hio_##name##_list_lock); \ 11959243Sobrien} while (0) 12059243Sobrien#define QUEUE_TAKE(name, hio) do { \ 12159243Sobrien mtx_lock(&hio_##name##_list_lock); \ 12259243Sobrien while (((hio) = TAILQ_FIRST(&hio_##name##_list)) == NULL) { \ 12359243Sobrien cv_wait(&hio_##name##_list_cond, \ 12459243Sobrien &hio_##name##_list_lock); \ 12559243Sobrien } \ 12659243Sobrien PJDLOG_ASSERT(hio_##name##_list_size != 0); \ 12759243Sobrien hio_##name##_list_size--; \ 12859243Sobrien TAILQ_REMOVE(&hio_##name##_list, (hio), hio_next); \ 12959243Sobrien mtx_unlock(&hio_##name##_list_lock); \ 13059243Sobrien} while (0) 13159243Sobrien 13259243Sobrienstatic void 13359243Sobrienoutput_status_aux(struct nv *nvout) 13459243Sobrien{ 13559243Sobrien 13659243Sobrien nv_add_uint64(nvout, (uint64_t)hio_free_list_size, "idle_queue_size"); 13759243Sobrien nv_add_uint64(nvout, (uint64_t)hio_disk_list_size, "local_queue_size"); 13859243Sobrien nv_add_uint64(nvout, (uint64_t)hio_send_list_size, "send_queue_size"); 13959243Sobrien} 14059243Sobrien 14159243Sobrienstatic void 14259243Sobrienhio_clear(struct hio *hio) 14359243Sobrien{ 14459243Sobrien 14559243Sobrien hio->hio_seq = 0; 14659243Sobrien hio->hio_error = 0; 14759243Sobrien hio->hio_cmd = HIO_UNDEF; 14859243Sobrien hio->hio_offset = 0; 14959243Sobrien hio->hio_length = 0; 15059243Sobrien hio->hio_memsync = false; 15159243Sobrien} 15259243Sobrien 15359243Sobrienstatic void 15459243Sobrienhio_copy(const struct hio *srchio, struct hio *dsthio) 15559243Sobrien{ 15659243Sobrien 15759243Sobrien /* 15859243Sobrien * We don't copy hio_error, hio_data and hio_next fields. 15959243Sobrien */ 16059243Sobrien 16159243Sobrien dsthio->hio_seq = srchio->hio_seq; 16259243Sobrien dsthio->hio_cmd = srchio->hio_cmd; 16369408Sache dsthio->hio_offset = srchio->hio_offset; 16469408Sache dsthio->hio_length = srchio->hio_length; 16569408Sache dsthio->hio_memsync = srchio->hio_memsync; 16659243Sobrien} 167145479Smp 168145479Smpstatic void 16959243Sobrieninit_environment(void) 17059243Sobrien{ 17159243Sobrien struct hio *hio; 17259243Sobrien unsigned int ii; 17359243Sobrien 17459243Sobrien /* 17559243Sobrien * Initialize lists, their locks and theirs condition variables. 17659243Sobrien */ 17759243Sobrien TAILQ_INIT(&hio_free_list); 17859243Sobrien mtx_init(&hio_free_list_lock); 17959243Sobrien cv_init(&hio_free_list_cond); 18059243Sobrien TAILQ_INIT(&hio_disk_list); 181145479Smp mtx_init(&hio_disk_list_lock); 182145479Smp cv_init(&hio_disk_list_cond); 18359243Sobrien TAILQ_INIT(&hio_send_list); 18459243Sobrien mtx_init(&hio_send_list_lock); 18559243Sobrien cv_init(&hio_send_list_cond); 18659243Sobrien 18759243Sobrien /* 18859243Sobrien * Allocate requests pool and initialize requests. 18959243Sobrien */ 19059243Sobrien for (ii = 0; ii < HAST_HIO_MAX; ii++) { 19159243Sobrien hio = malloc(sizeof(*hio)); 19259243Sobrien if (hio == NULL) { 19359243Sobrien pjdlog_exitx(EX_TEMPFAIL, 19459243Sobrien "Unable to allocate memory (%zu bytes) for hio request.", 19559243Sobrien sizeof(*hio)); 19659243Sobrien } 19759243Sobrien hio->hio_data = malloc(MAXPHYS); 19859243Sobrien if (hio->hio_data == NULL) { 19959243Sobrien pjdlog_exitx(EX_TEMPFAIL, 20059243Sobrien "Unable to allocate memory (%zu bytes) for gctl_data.", 20159243Sobrien (size_t)MAXPHYS); 20259243Sobrien } 20359243Sobrien hio_clear(hio); 20459243Sobrien TAILQ_INSERT_HEAD(&hio_free_list, hio, hio_next); 20559243Sobrien hio_free_list_size++; 20659243Sobrien } 20759243Sobrien} 20859243Sobrien 20959243Sobrienstatic void 21059243Sobrieninit_local(struct hast_resource *res) 21159243Sobrien{ 21259243Sobrien 21359243Sobrien if (metadata_read(res, true) == -1) 21459243Sobrien exit(EX_NOINPUT); 21559243Sobrien} 21659243Sobrien 21759243Sobrienstatic void 21859243Sobrieninit_remote(struct hast_resource *res, struct nv *nvin) 21959243Sobrien{ 22059243Sobrien uint64_t resuid; 22159243Sobrien struct nv *nvout; 22259243Sobrien unsigned char *map; 22359243Sobrien size_t mapsize; 22459243Sobrien 22559243Sobrien#ifdef notyet 22659243Sobrien /* Setup direction. */ 22759243Sobrien if (proto_send(res->hr_remoteout, NULL, 0) == -1) 22859243Sobrien pjdlog_errno(LOG_WARNING, "Unable to set connection direction"); 22959243Sobrien#endif 23059243Sobrien 23159243Sobrien nvout = nv_alloc(); 23259243Sobrien nv_add_int64(nvout, (int64_t)res->hr_datasize, "datasize"); 23359243Sobrien nv_add_int32(nvout, (int32_t)res->hr_extentsize, "extentsize"); 23459243Sobrien resuid = nv_get_uint64(nvin, "resuid"); 23559243Sobrien res->hr_primary_localcnt = nv_get_uint64(nvin, "localcnt"); 23659243Sobrien res->hr_primary_remotecnt = nv_get_uint64(nvin, "remotecnt"); 23759243Sobrien nv_add_uint64(nvout, res->hr_secondary_localcnt, "localcnt"); 23859243Sobrien nv_add_uint64(nvout, res->hr_secondary_remotecnt, "remotecnt"); 23959243Sobrien mapsize = activemap_calc_ondisk_size(res->hr_local_mediasize - 24059243Sobrien METADATA_SIZE, res->hr_extentsize, res->hr_local_sectorsize); 24159243Sobrien map = malloc(mapsize); 24259243Sobrien if (map == NULL) { 24359243Sobrien pjdlog_exitx(EX_TEMPFAIL, 24459243Sobrien "Unable to allocate memory (%zu bytes) for activemap.", 24559243Sobrien mapsize); 24659243Sobrien } 24759243Sobrien /* 24859243Sobrien * When we work as primary and secondary is missing we will increase 24959243Sobrien * localcnt in our metadata. When secondary is connected and synced 25059243Sobrien * we make localcnt be equal to remotecnt, which means nodes are more 25159243Sobrien * or less in sync. 25259243Sobrien * Split-brain condition is when both nodes are not able to communicate 25359243Sobrien * and are both configured as primary nodes. In turn, they can both 25459243Sobrien * make incompatible changes to the data and we have to detect that. 25559243Sobrien * Under split-brain condition we will increase our localcnt on first 25659243Sobrien * write and remote node will increase its localcnt on first write. 25759243Sobrien * When we connect we can see that primary's localcnt is greater than 25859243Sobrien * our remotecnt (primary was modified while we weren't watching) and 25959243Sobrien * our localcnt is greater than primary's remotecnt (we were modified 26059243Sobrien * while primary wasn't watching). 26159243Sobrien * There are many possible combinations which are all gathered below. 26259243Sobrien * Don't pay too much attention to exact numbers, the more important 26359243Sobrien * is to compare them. We compare secondary's local with primary's 26459243Sobrien * remote and secondary's remote with primary's local. 26559243Sobrien * Note that every case where primary's localcnt is smaller than 26659243Sobrien * secondary's remotecnt and where secondary's localcnt is smaller than 26759243Sobrien * primary's remotecnt should be impossible in practise. We will perform 26859243Sobrien * full synchronization then. Those cases are marked with an asterisk. 26959243Sobrien * Regular synchronization means that only extents marked as dirty are 27059243Sobrien * synchronized (regular synchronization). 27159243Sobrien * 27259243Sobrien * SECONDARY METADATA PRIMARY METADATA 27359243Sobrien * local=3 remote=3 local=2 remote=2* ?! Full sync from secondary. 27459243Sobrien * local=3 remote=3 local=2 remote=3* ?! Full sync from primary. 27559243Sobrien * local=3 remote=3 local=2 remote=4* ?! Full sync from primary. 27659243Sobrien * local=3 remote=3 local=3 remote=2 Primary is out-of-date, 27759243Sobrien * regular sync from secondary. 27859243Sobrien * local=3 remote=3 local=3 remote=3 Regular sync just in case. 27959243Sobrien * local=3 remote=3 local=3 remote=4* ?! Full sync from primary. 28059243Sobrien * local=3 remote=3 local=4 remote=2 Split-brain condition. 28159243Sobrien * local=3 remote=3 local=4 remote=3 Secondary out-of-date, 28259243Sobrien * regular sync from primary. 28359243Sobrien * local=3 remote=3 local=4 remote=4* ?! Full sync from primary. 28459243Sobrien */ 28559243Sobrien if (res->hr_resuid == 0) { 28659243Sobrien /* 28759243Sobrien * Provider is used for the first time. If primary node done no 28859243Sobrien * writes yet as well (we will find "virgin" argument) then 28959243Sobrien * there is no need to synchronize anything. If primary node 29059243Sobrien * done any writes already we have to synchronize everything. 29159243Sobrien */ 29259243Sobrien PJDLOG_ASSERT(res->hr_secondary_localcnt == 0); 29359243Sobrien res->hr_resuid = resuid; 29459243Sobrien if (metadata_write(res) == -1) 29559243Sobrien exit(EX_NOINPUT); 29659243Sobrien if (nv_exists(nvin, "virgin")) { 29759243Sobrien free(map); 29859243Sobrien map = NULL; 29959243Sobrien mapsize = 0; 30059243Sobrien } else { 30159243Sobrien memset(map, 0xff, mapsize); 30259243Sobrien } 30359243Sobrien nv_add_int8(nvout, 1, "virgin"); 30459243Sobrien nv_add_uint8(nvout, HAST_SYNCSRC_PRIMARY, "syncsrc"); 30559243Sobrien } else if (res->hr_resuid != resuid) { 30659243Sobrien char errmsg[256]; 30769408Sache 30869408Sache free(map); 30969408Sache (void)snprintf(errmsg, sizeof(errmsg), 31069408Sache "Resource unique ID mismatch (primary=%ju, secondary=%ju).", 31169408Sache (uintmax_t)resuid, (uintmax_t)res->hr_resuid); 31269408Sache pjdlog_error("%s", errmsg); 31359243Sobrien nv_add_string(nvout, errmsg, "errmsg"); 31459243Sobrien if (hast_proto_send(res, res->hr_remotein, nvout, 31559243Sobrien NULL, 0) == -1) { 31659243Sobrien pjdlog_exit(EX_TEMPFAIL, 31759243Sobrien "Unable to send response to %s", 31859243Sobrien res->hr_remoteaddr); 31959243Sobrien } 32059243Sobrien nv_free(nvout); 32159243Sobrien exit(EX_CONFIG); 32259243Sobrien } else if ( 32359243Sobrien /* Is primary out-of-date? */ 32459243Sobrien (res->hr_secondary_localcnt > res->hr_primary_remotecnt && 32559243Sobrien res->hr_secondary_remotecnt == res->hr_primary_localcnt) || 32659243Sobrien /* Are the nodes more or less in sync? */ 32759243Sobrien (res->hr_secondary_localcnt == res->hr_primary_remotecnt && 32859243Sobrien res->hr_secondary_remotecnt == res->hr_primary_localcnt) || 32959243Sobrien /* Is secondary out-of-date? */ 33059243Sobrien (res->hr_secondary_localcnt == res->hr_primary_remotecnt && 33159243Sobrien res->hr_secondary_remotecnt < res->hr_primary_localcnt)) { 33259243Sobrien /* 33359243Sobrien * Nodes are more or less in sync or one of the nodes is 33459243Sobrien * out-of-date. 33559243Sobrien * It doesn't matter at this point which one, we just have to 33659243Sobrien * send out local bitmap to the remote node. 33759243Sobrien */ 33859243Sobrien if (pread(res->hr_localfd, map, mapsize, METADATA_SIZE) != 33959243Sobrien (ssize_t)mapsize) { 34059243Sobrien pjdlog_exit(LOG_ERR, "Unable to read activemap"); 34159243Sobrien } 34259243Sobrien if (res->hr_secondary_localcnt > res->hr_primary_remotecnt && 34359243Sobrien res->hr_secondary_remotecnt == res->hr_primary_localcnt) { 34459243Sobrien /* Primary is out-of-date, sync from secondary. */ 34559243Sobrien nv_add_uint8(nvout, HAST_SYNCSRC_SECONDARY, "syncsrc"); 34659243Sobrien } else { 34759243Sobrien /* 34859243Sobrien * Secondary is out-of-date or counts match. 34959243Sobrien * Sync from primary. 35059243Sobrien */ 35159243Sobrien nv_add_uint8(nvout, HAST_SYNCSRC_PRIMARY, "syncsrc"); 352145479Smp } 35359243Sobrien } else if (res->hr_secondary_localcnt > res->hr_primary_remotecnt && 35459243Sobrien res->hr_primary_localcnt > res->hr_secondary_remotecnt) { 35559243Sobrien /* 35659243Sobrien * Not good, we have split-brain condition. 35759243Sobrien */ 35859243Sobrien free(map); 35959243Sobrien pjdlog_error("Split-brain detected, exiting."); 36059243Sobrien nv_add_string(nvout, "Split-brain condition!", "errmsg"); 36159243Sobrien if (hast_proto_send(res, res->hr_remotein, nvout, 36259243Sobrien NULL, 0) == -1) { 36359243Sobrien pjdlog_exit(EX_TEMPFAIL, 36459243Sobrien "Unable to send response to %s", 36559243Sobrien res->hr_remoteaddr); 36659243Sobrien } 36759243Sobrien nv_free(nvout); 36859243Sobrien /* Exit on split-brain. */ 36959243Sobrien event_send(res, EVENT_SPLITBRAIN); 37059243Sobrien exit(EX_CONFIG); 37159243Sobrien } else /* if (res->hr_secondary_localcnt < res->hr_primary_remotecnt || 37259243Sobrien res->hr_primary_localcnt < res->hr_secondary_remotecnt) */ { 37359243Sobrien /* 37459243Sobrien * This should never happen in practise, but we will perform 37559243Sobrien * full synchronization. 37659243Sobrien */ 37759243Sobrien PJDLOG_ASSERT(res->hr_secondary_localcnt < res->hr_primary_remotecnt || 37859243Sobrien res->hr_primary_localcnt < res->hr_secondary_remotecnt); 37959243Sobrien mapsize = activemap_calc_ondisk_size(res->hr_local_mediasize - 38059243Sobrien METADATA_SIZE, res->hr_extentsize, 38159243Sobrien res->hr_local_sectorsize); 38259243Sobrien memset(map, 0xff, mapsize); 38359243Sobrien if (res->hr_secondary_localcnt > res->hr_primary_remotecnt) { 38459243Sobrien /* In this one of five cases sync from secondary. */ 38559243Sobrien nv_add_uint8(nvout, HAST_SYNCSRC_SECONDARY, "syncsrc"); 38659243Sobrien } else { 38759243Sobrien /* For the rest four cases sync from primary. */ 38859243Sobrien nv_add_uint8(nvout, HAST_SYNCSRC_PRIMARY, "syncsrc"); 38959243Sobrien } 39059243Sobrien pjdlog_warning("This should never happen, asking for full synchronization (primary(local=%ju, remote=%ju), secondary(local=%ju, remote=%ju)).", 39159243Sobrien (uintmax_t)res->hr_primary_localcnt, 39259243Sobrien (uintmax_t)res->hr_primary_remotecnt, 39359243Sobrien (uintmax_t)res->hr_secondary_localcnt, 39459243Sobrien (uintmax_t)res->hr_secondary_remotecnt); 39559243Sobrien } 39659243Sobrien nv_add_uint32(nvout, (uint32_t)mapsize, "mapsize"); 39759243Sobrien if (hast_proto_send(res, res->hr_remotein, nvout, map, mapsize) == -1) { 39859243Sobrien pjdlog_exit(EX_TEMPFAIL, "Unable to send activemap to %s", 39959243Sobrien res->hr_remoteaddr); 40059243Sobrien } 40159243Sobrien if (map != NULL) 40259243Sobrien free(map); 40359243Sobrien nv_free(nvout); 40459243Sobrien#ifdef notyet 40559243Sobrien /* Setup direction. */ 40659243Sobrien if (proto_recv(res->hr_remotein, NULL, 0) == -1) 40759243Sobrien pjdlog_errno(LOG_WARNING, "Unable to set connection direction"); 40859243Sobrien#endif 40959243Sobrien} 41059243Sobrien 41159243Sobrienvoid 41259243Sobrienhastd_secondary(struct hast_resource *res, struct nv *nvin) 41359243Sobrien{ 41459243Sobrien sigset_t mask; 41559243Sobrien pthread_t td; 41659243Sobrien pid_t pid; 41759243Sobrien int error, mode, debuglevel; 41859243Sobrien 41959243Sobrien /* 42059243Sobrien * Create communication channel between parent and child. 42159243Sobrien */ 42259243Sobrien if (proto_client(NULL, "socketpair://", &res->hr_ctrl) == -1) { 42359243Sobrien KEEP_ERRNO((void)pidfile_remove(pfh)); 42459243Sobrien pjdlog_exit(EX_OSERR, 425145479Smp "Unable to create control sockets between parent and child"); 42659243Sobrien } 42759243Sobrien /* 428145479Smp * Create communication channel between child and parent. 42959243Sobrien */ 43059243Sobrien if (proto_client(NULL, "socketpair://", &res->hr_event) == -1) { 43159243Sobrien KEEP_ERRNO((void)pidfile_remove(pfh)); 43259243Sobrien pjdlog_exit(EX_OSERR, 43359243Sobrien "Unable to create event sockets between child and parent"); 434145479Smp } 435145479Smp 436145479Smp pid = fork(); 437145479Smp if (pid == -1) { 438145479Smp KEEP_ERRNO((void)pidfile_remove(pfh)); 439145479Smp pjdlog_exit(EX_OSERR, "Unable to fork"); 440145479Smp } 441145479Smp 442145479Smp if (pid > 0) { 443145479Smp /* This is parent. */ 444145479Smp proto_close(res->hr_remotein); 445145479Smp res->hr_remotein = NULL; 446145479Smp proto_close(res->hr_remoteout); 447145479Smp res->hr_remoteout = NULL; 448145479Smp /* Declare that we are receiver. */ 449145479Smp proto_recv(res->hr_event, NULL, 0); 450145479Smp /* Declare that we are sender. */ 451145479Smp proto_send(res->hr_ctrl, NULL, 0); 452145479Smp res->hr_workerpid = pid; 453145479Smp return; 454145479Smp } 45559243Sobrien 45659243Sobrien gres = res; 45759243Sobrien res->output_status_aux = output_status_aux; 45859243Sobrien mode = pjdlog_mode_get(); 45959243Sobrien debuglevel = pjdlog_debug_get(); 46059243Sobrien 46159243Sobrien /* Declare that we are sender. */ 462145479Smp proto_send(res->hr_event, NULL, 0); 463145479Smp /* Declare that we are receiver. */ 464145479Smp proto_recv(res->hr_ctrl, NULL, 0); 46559243Sobrien descriptors_cleanup(res); 46659243Sobrien 46759243Sobrien descriptors_assert(res, mode); 46859243Sobrien 46959243Sobrien pjdlog_init(mode); 47059243Sobrien pjdlog_debug_set(debuglevel); 47159243Sobrien pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role)); 47259243Sobrien setproctitle("%s (%s)", res->hr_name, role2str(res->hr_role)); 47359243Sobrien 47459243Sobrien PJDLOG_VERIFY(sigemptyset(&mask) == 0); 47559243Sobrien PJDLOG_VERIFY(sigprocmask(SIG_SETMASK, &mask, NULL) == 0); 47659243Sobrien 47759243Sobrien /* Error in setting timeout is not critical, but why should it fail? */ 47859243Sobrien if (proto_timeout(res->hr_remotein, 2 * HAST_KEEPALIVE) == -1) 47959243Sobrien pjdlog_errno(LOG_WARNING, "Unable to set connection timeout"); 48059243Sobrien if (proto_timeout(res->hr_remoteout, res->hr_timeout) == -1) 48159243Sobrien pjdlog_errno(LOG_WARNING, "Unable to set connection timeout"); 482145479Smp 48359243Sobrien init_local(res); 484145479Smp init_environment(); 48559243Sobrien 48659243Sobrien if (drop_privs(res) != 0) 487145479Smp exit(EX_CONFIG); 48859243Sobrien pjdlog_info("Privileges successfully dropped."); 489145479Smp 49059243Sobrien /* 49159243Sobrien * Create the control thread before sending any event to the parent, 49259243Sobrien * as we can deadlock when parent sends control request to worker, 49359243Sobrien * but worker has no control thread started yet, so parent waits. 49459243Sobrien * In the meantime worker sends an event to the parent, but parent 49559243Sobrien * is unable to handle the event, because it waits for control 49659243Sobrien * request response. 49759243Sobrien */ 49859243Sobrien error = pthread_create(&td, NULL, ctrl_thread, res); 49959243Sobrien PJDLOG_ASSERT(error == 0); 50059243Sobrien 50159243Sobrien init_remote(res, nvin); 50259243Sobrien event_send(res, EVENT_CONNECT); 50359243Sobrien 50459243Sobrien error = pthread_create(&td, NULL, recv_thread, res); 50559243Sobrien PJDLOG_ASSERT(error == 0); 50659243Sobrien error = pthread_create(&td, NULL, disk_thread, res); 50759243Sobrien PJDLOG_ASSERT(error == 0); 50859243Sobrien (void)send_thread(res); 50959243Sobrien} 51059243Sobrien 51159243Sobrienstatic void 51259243Sobrienreqlog(int loglevel, int debuglevel, int error, struct hio *hio, 51359243Sobrien const char *fmt, ...) 51459243Sobrien{ 51559243Sobrien char msg[1024]; 51659243Sobrien va_list ap; 51759243Sobrien int len; 51859243Sobrien 51959243Sobrien va_start(ap, fmt); 52059243Sobrien len = vsnprintf(msg, sizeof(msg), fmt, ap); 52159243Sobrien va_end(ap); 52259243Sobrien if ((size_t)len < sizeof(msg)) { 52359243Sobrien switch (hio->hio_cmd) { 52459243Sobrien case HIO_READ: 52559243Sobrien (void)snprintf(msg + len, sizeof(msg) - len, 52659243Sobrien "READ(%ju, %ju).", (uintmax_t)hio->hio_offset, 52759243Sobrien (uintmax_t)hio->hio_length); 52859243Sobrien break; 52959243Sobrien case HIO_DELETE: 53059243Sobrien (void)snprintf(msg + len, sizeof(msg) - len, 53159243Sobrien "DELETE(%ju, %ju).", (uintmax_t)hio->hio_offset, 53259243Sobrien (uintmax_t)hio->hio_length); 53359243Sobrien break; 53459243Sobrien case HIO_FLUSH: 53559243Sobrien (void)snprintf(msg + len, sizeof(msg) - len, "FLUSH."); 53659243Sobrien break; 53759243Sobrien case HIO_WRITE: 53859243Sobrien (void)snprintf(msg + len, sizeof(msg) - len, 53959243Sobrien "WRITE(%ju, %ju).", (uintmax_t)hio->hio_offset, 54059243Sobrien (uintmax_t)hio->hio_length); 54159243Sobrien break; 54259243Sobrien case HIO_KEEPALIVE: 54359243Sobrien (void)snprintf(msg + len, sizeof(msg) - len, "KEEPALIVE."); 54459243Sobrien break; 54559243Sobrien default: 54659243Sobrien (void)snprintf(msg + len, sizeof(msg) - len, 54759243Sobrien "UNKNOWN(%u).", (unsigned int)hio->hio_cmd); 54859243Sobrien break; 54959243Sobrien } 55059243Sobrien } 55159243Sobrien pjdlog_common(loglevel, debuglevel, error, "%s", msg); 55259243Sobrien} 55359243Sobrien 55459243Sobrienstatic int 55559243Sobrienrequnpack(struct hast_resource *res, struct hio *hio, struct nv *nv) 55659243Sobrien{ 55759243Sobrien 55859243Sobrien hio->hio_cmd = nv_get_uint8(nv, "cmd"); 55959243Sobrien if (hio->hio_cmd == 0) { 56059243Sobrien pjdlog_error("Header contains no 'cmd' field."); 56159243Sobrien hio->hio_error = EINVAL; 56259243Sobrien goto end; 56359243Sobrien } 56459243Sobrien if (hio->hio_cmd != HIO_KEEPALIVE) { 56559243Sobrien hio->hio_seq = nv_get_uint64(nv, "seq"); 56659243Sobrien if (hio->hio_seq == 0) { 56759243Sobrien pjdlog_error("Header contains no 'seq' field."); 56859243Sobrien hio->hio_error = EINVAL; 56959243Sobrien goto end; 57059243Sobrien } 57159243Sobrien } 57259243Sobrien switch (hio->hio_cmd) { 57359243Sobrien case HIO_FLUSH: 57459243Sobrien case HIO_KEEPALIVE: 57559243Sobrien break; 57659243Sobrien case HIO_WRITE: 57759243Sobrien hio->hio_memsync = nv_exists(nv, "memsync"); 578145479Smp /* FALLTHROUGH */ 57959243Sobrien case HIO_READ: 58059243Sobrien case HIO_DELETE: 58159243Sobrien hio->hio_offset = nv_get_uint64(nv, "offset"); 58259243Sobrien if (nv_error(nv) != 0) { 58359243Sobrien pjdlog_error("Header is missing 'offset' field."); 58459243Sobrien hio->hio_error = EINVAL; 58559243Sobrien goto end; 58659243Sobrien } 58759243Sobrien hio->hio_length = nv_get_uint64(nv, "length"); 58859243Sobrien if (nv_error(nv) != 0) { 58959243Sobrien pjdlog_error("Header is missing 'length' field."); 59059243Sobrien hio->hio_error = EINVAL; 59159243Sobrien goto end; 59259243Sobrien } 59359243Sobrien if (hio->hio_length == 0) { 59459243Sobrien pjdlog_error("Data length is zero."); 59559243Sobrien hio->hio_error = EINVAL; 59659243Sobrien goto end; 59759243Sobrien } 59859243Sobrien if (hio->hio_cmd != HIO_DELETE && hio->hio_length > MAXPHYS) { 59959243Sobrien pjdlog_error("Data length is too large (%ju > %ju).", 60059243Sobrien (uintmax_t)hio->hio_length, (uintmax_t)MAXPHYS); 60159243Sobrien hio->hio_error = EINVAL; 60259243Sobrien goto end; 60359243Sobrien } 60459243Sobrien if ((hio->hio_offset % res->hr_local_sectorsize) != 0) { 60559243Sobrien pjdlog_error("Offset %ju is not multiple of sector size.", 60659243Sobrien (uintmax_t)hio->hio_offset); 60759243Sobrien hio->hio_error = EINVAL; 60859243Sobrien goto end; 60959243Sobrien } 61059243Sobrien if ((hio->hio_length % res->hr_local_sectorsize) != 0) { 61159243Sobrien pjdlog_error("Length %ju is not multiple of sector size.", 61259243Sobrien (uintmax_t)hio->hio_length); 61359243Sobrien hio->hio_error = EINVAL; 61459243Sobrien goto end; 61559243Sobrien } 61659243Sobrien if (hio->hio_offset + hio->hio_length > 61759243Sobrien (uint64_t)res->hr_datasize) { 61859243Sobrien pjdlog_error("Data offset is too large (%ju > %ju).", 61959243Sobrien (uintmax_t)(hio->hio_offset + hio->hio_length), 62059243Sobrien (uintmax_t)res->hr_datasize); 62159243Sobrien hio->hio_error = EINVAL; 62259243Sobrien goto end; 62359243Sobrien } 62459243Sobrien break; 62559243Sobrien default: 62659243Sobrien pjdlog_error("Header contains invalid 'cmd' (%hhu).", 62759243Sobrien hio->hio_cmd); 62859243Sobrien hio->hio_error = EINVAL; 62959243Sobrien goto end; 63059243Sobrien } 63159243Sobrien hio->hio_error = 0; 63259243Sobrienend: 63359243Sobrien return (hio->hio_error); 63459243Sobrien} 63559243Sobrien 63659243Sobrienstatic __dead2 void 63759243Sobriensecondary_exit(int exitcode, const char *fmt, ...) 63859243Sobrien{ 63959243Sobrien va_list ap; 64059243Sobrien 64159243Sobrien PJDLOG_ASSERT(exitcode != EX_OK); 64259243Sobrien va_start(ap, fmt); 64359243Sobrien pjdlogv_errno(LOG_ERR, fmt, ap); 64459243Sobrien va_end(ap); 64559243Sobrien event_send(gres, EVENT_DISCONNECT); 64659243Sobrien exit(exitcode); 64759243Sobrien} 64859243Sobrien 64959243Sobrien/* 65059243Sobrien * Thread receives requests from the primary node. 65159243Sobrien */ 65259243Sobrienstatic void * 65359243Sobrienrecv_thread(void *arg) 65459243Sobrien{ 65559243Sobrien struct hast_resource *res = arg; 65659243Sobrien struct hio *hio, *mshio; 65759243Sobrien struct nv *nv; 65859243Sobrien 65959243Sobrien for (;;) { 66059243Sobrien pjdlog_debug(2, "recv: Taking free request."); 66159243Sobrien QUEUE_TAKE(free, hio); 66259243Sobrien pjdlog_debug(2, "recv: (%p) Got request.", hio); 66359243Sobrien if (hast_proto_recv_hdr(res->hr_remotein, &nv) == -1) { 66459243Sobrien secondary_exit(EX_TEMPFAIL, 66559243Sobrien "Unable to receive request header"); 66659243Sobrien } 66759243Sobrien if (requnpack(res, hio, nv) != 0) { 66859243Sobrien nv_free(nv); 66959243Sobrien pjdlog_debug(2, 67059243Sobrien "recv: (%p) Moving request to the send queue.", 67159243Sobrien hio); 67259243Sobrien QUEUE_INSERT(send, hio); 67359243Sobrien continue; 67459243Sobrien } 67559243Sobrien switch (hio->hio_cmd) { 67659243Sobrien case HIO_READ: 67759243Sobrien res->hr_stat_read++; 67859243Sobrien break; 67959243Sobrien case HIO_WRITE: 68059243Sobrien res->hr_stat_write++; 68159243Sobrien break; 68259243Sobrien case HIO_DELETE: 68359243Sobrien res->hr_stat_delete++; 68459243Sobrien break; 68559243Sobrien case HIO_FLUSH: 68659243Sobrien res->hr_stat_flush++; 68759243Sobrien break; 68859243Sobrien case HIO_KEEPALIVE: 68959243Sobrien break; 69059243Sobrien default: 69159243Sobrien PJDLOG_ABORT("Unexpected command (cmd=%hhu).", 69259243Sobrien hio->hio_cmd); 69359243Sobrien } 69459243Sobrien reqlog(LOG_DEBUG, 2, -1, hio, 69559243Sobrien "recv: (%p) Got request header: ", hio); 69659243Sobrien if (hio->hio_cmd == HIO_KEEPALIVE) { 69759243Sobrien nv_free(nv); 69859243Sobrien pjdlog_debug(2, 69959243Sobrien "recv: (%p) Moving request to the free queue.", 70059243Sobrien hio); 70159243Sobrien hio_clear(hio); 70259243Sobrien QUEUE_INSERT(free, hio); 70359243Sobrien continue; 70459243Sobrien } else if (hio->hio_cmd == HIO_WRITE) { 70559243Sobrien if (hast_proto_recv_data(res, res->hr_remotein, nv, 70659243Sobrien hio->hio_data, MAXPHYS) == -1) { 70759243Sobrien secondary_exit(EX_TEMPFAIL, 70859243Sobrien "Unable to receive request data"); 70959243Sobrien } 71059243Sobrien if (hio->hio_memsync) { 71159243Sobrien /* 71259243Sobrien * For memsync requests we expect two replies. 71359243Sobrien * Clone the hio so we can handle both of them. 71459243Sobrien */ 71559243Sobrien pjdlog_debug(2, "recv: Taking free request."); 71659243Sobrien QUEUE_TAKE(free, mshio); 71759243Sobrien pjdlog_debug(2, "recv: (%p) Got request.", 71859243Sobrien mshio); 71959243Sobrien hio_copy(hio, mshio); 72059243Sobrien mshio->hio_error = 0; 72159243Sobrien /* 72259243Sobrien * We want to keep 'memsync' tag only on the 72359243Sobrien * request going onto send queue (mshio). 72459243Sobrien */ 72559243Sobrien hio->hio_memsync = false; 72659243Sobrien pjdlog_debug(2, 72759243Sobrien "recv: (%p) Moving memsync request to the send queue.", 72859243Sobrien mshio); 72959243Sobrien QUEUE_INSERT(send, mshio); 73059243Sobrien } 73159243Sobrien } 73259243Sobrien nv_free(nv); 73359243Sobrien pjdlog_debug(2, "recv: (%p) Moving request to the disk queue.", 73459243Sobrien hio); 73559243Sobrien QUEUE_INSERT(disk, hio); 73659243Sobrien } 73759243Sobrien /* NOTREACHED */ 73859243Sobrien return (NULL); 73959243Sobrien} 74059243Sobrien 74159243Sobrien/* 74259243Sobrien * Thread reads from or writes to local component and also handles DELETE and 74359243Sobrien * FLUSH requests. 74459243Sobrien */ 74559243Sobrienstatic void * 74659243Sobriendisk_thread(void *arg) 74759243Sobrien{ 74859243Sobrien struct hast_resource *res = arg; 74959243Sobrien struct hio *hio; 75059243Sobrien ssize_t ret; 75159243Sobrien bool clear_activemap, logerror; 75259243Sobrien 75359243Sobrien clear_activemap = true; 75459243Sobrien 75559243Sobrien for (;;) { 75659243Sobrien pjdlog_debug(2, "disk: Taking request."); 75759243Sobrien QUEUE_TAKE(disk, hio); 75859243Sobrien while (clear_activemap) { 75959243Sobrien unsigned char *map; 76059243Sobrien size_t mapsize; 76159243Sobrien 76259243Sobrien /* 76359243Sobrien * When first request is received, it means that primary 76459243Sobrien * already received our activemap, merged it and stored 76559243Sobrien * locally. We can now safely clear our activemap. 76659243Sobrien */ 76759243Sobrien mapsize = 76859243Sobrien activemap_calc_ondisk_size(res->hr_local_mediasize - 76959243Sobrien METADATA_SIZE, res->hr_extentsize, 77059243Sobrien res->hr_local_sectorsize); 77159243Sobrien map = calloc(1, mapsize); 77259243Sobrien if (map == NULL) { 77359243Sobrien pjdlog_warning("Unable to allocate memory to clear local activemap."); 77459243Sobrien break; 77559243Sobrien } 77659243Sobrien if (pwrite(res->hr_localfd, map, mapsize, 77759243Sobrien METADATA_SIZE) != (ssize_t)mapsize) { 77859243Sobrien pjdlog_errno(LOG_WARNING, 77959243Sobrien "Unable to store cleared activemap"); 78059243Sobrien free(map); 781145479Smp res->hr_stat_activemap_write_error++; 78259243Sobrien break; 78359243Sobrien } 78459243Sobrien free(map); 78559243Sobrien clear_activemap = false; 78659243Sobrien pjdlog_debug(1, "Local activemap cleared."); 78759243Sobrien break; 78859243Sobrien } 78959243Sobrien reqlog(LOG_DEBUG, 2, -1, hio, "disk: (%p) Got request: ", hio); 790145479Smp logerror = true; 79159243Sobrien /* Handle the actual request. */ 792145479Smp switch (hio->hio_cmd) { 79359243Sobrien case HIO_READ: 794145479Smp ret = pread(res->hr_localfd, hio->hio_data, 79559243Sobrien hio->hio_length, 796145479Smp hio->hio_offset + res->hr_localoff); 79769408Sache if (ret == -1) 798145479Smp hio->hio_error = errno; 79969408Sache else if (ret != (int64_t)hio->hio_length) 800145479Smp hio->hio_error = EIO; 80159243Sobrien else 80269408Sache hio->hio_error = 0; 80359243Sobrien break; 80459243Sobrien case HIO_WRITE: 80559243Sobrien ret = pwrite(res->hr_localfd, hio->hio_data, 80659243Sobrien hio->hio_length, 80759243Sobrien hio->hio_offset + res->hr_localoff); 80859243Sobrien if (ret == -1) 80959243Sobrien hio->hio_error = errno; 81059243Sobrien else if (ret != (int64_t)hio->hio_length) 81159243Sobrien hio->hio_error = EIO; 81259243Sobrien else 81359243Sobrien hio->hio_error = 0; 81459243Sobrien break; 81559243Sobrien case HIO_DELETE: 81659243Sobrien ret = g_delete(res->hr_localfd, 81759243Sobrien hio->hio_offset + res->hr_localoff, 81859243Sobrien hio->hio_length); 81969408Sache if (ret == -1) 82069408Sache hio->hio_error = errno; 82169408Sache else 82269408Sache hio->hio_error = 0; 82369408Sache break; 82459243Sobrien case HIO_FLUSH: 82559243Sobrien if (!res->hr_localflush) { 82659243Sobrien ret = -1; 82759243Sobrien hio->hio_error = EOPNOTSUPP; 82859243Sobrien logerror = false; 82959243Sobrien break; 83059243Sobrien } 83159243Sobrien ret = g_flush(res->hr_localfd); 83259243Sobrien if (ret == -1) { 83369408Sache if (errno == EOPNOTSUPP) 83469408Sache res->hr_localflush = false; 83559243Sobrien hio->hio_error = errno; 83659243Sobrien } else { 83759243Sobrien hio->hio_error = 0; 83859243Sobrien } 83969408Sache break; 84069408Sache default: 84159243Sobrien PJDLOG_ABORT("Unexpected command (cmd=%hhu).", 84259243Sobrien hio->hio_cmd); 84369408Sache } 84459243Sobrien if (logerror && hio->hio_error != 0) { 84559243Sobrien reqlog(LOG_ERR, 0, hio->hio_error, hio, 84659243Sobrien "Request failed: "); 84759243Sobrien } 84859243Sobrien pjdlog_debug(2, "disk: (%p) Moving request to the send queue.", 84959243Sobrien hio); 85069408Sache QUEUE_INSERT(send, hio); 85169408Sache } 85259243Sobrien /* NOTREACHED */ 85359243Sobrien return (NULL); 85459243Sobrien} 85559243Sobrien 85669408Sache/* 85769408Sache * Thread sends requests back to primary node. 85859243Sobrien */ 85959243Sobrienstatic void * 86059243Sobriensend_thread(void *arg) 86159243Sobrien{ 86259243Sobrien struct hast_resource *res = arg; 86359243Sobrien struct nv *nvout; 86459243Sobrien struct hio *hio; 86559243Sobrien void *data; 86659243Sobrien size_t length; 86769408Sache 86869408Sache for (;;) { 86959243Sobrien pjdlog_debug(2, "send: Taking request."); 87059243Sobrien QUEUE_TAKE(send, hio); 87159243Sobrien reqlog(LOG_DEBUG, 2, -1, hio, "send: (%p) Got request: ", hio); 87259243Sobrien nvout = nv_alloc(); 87369408Sache /* Copy sequence number. */ 87469408Sache nv_add_uint64(nvout, hio->hio_seq, "seq"); 87569408Sache if (hio->hio_memsync) { 87659243Sobrien PJDLOG_ASSERT(hio->hio_cmd == HIO_WRITE); 87759243Sobrien nv_add_int8(nvout, 1, "received"); 87859243Sobrien } 87959243Sobrien switch (hio->hio_cmd) { 88059243Sobrien case HIO_READ: 88159243Sobrien if (hio->hio_error == 0) { 88269408Sache data = hio->hio_data; 88369408Sache length = hio->hio_length; 88459243Sobrien break; 88559243Sobrien } 88659243Sobrien /* 88759243Sobrien * We send no data in case of an error. 88869408Sache */ 88969408Sache /* FALLTHROUGH */ 89059243Sobrien case HIO_DELETE: 89159243Sobrien case HIO_FLUSH: 89259243Sobrien case HIO_WRITE: 89359243Sobrien data = NULL; 89459243Sobrien length = 0; 89559243Sobrien break; 89659243Sobrien default: 89759243Sobrien PJDLOG_ABORT("Unexpected command (cmd=%hhu).", 89859243Sobrien hio->hio_cmd); 89959243Sobrien } 90059243Sobrien if (hio->hio_error != 0) { 90169408Sache switch (hio->hio_cmd) { 90259243Sobrien case HIO_READ: 90359243Sobrien res->hr_stat_read_error++; 90459243Sobrien break; 90559243Sobrien case HIO_WRITE: 90659243Sobrien res->hr_stat_write_error++; 90759243Sobrien break; 90859243Sobrien case HIO_DELETE: 90959243Sobrien res->hr_stat_delete_error++; 91059243Sobrien break; 91159243Sobrien case HIO_FLUSH: 91259243Sobrien res->hr_stat_flush_error++; 91359243Sobrien break; 91459243Sobrien } 91569408Sache nv_add_int16(nvout, hio->hio_error, "error"); 91659243Sobrien } 91759243Sobrien if (hast_proto_send(res, res->hr_remoteout, nvout, data, 91859243Sobrien length) == -1) { 91959243Sobrien secondary_exit(EX_TEMPFAIL, "Unable to send reply"); 92059243Sobrien } 92159243Sobrien nv_free(nvout); 92259243Sobrien pjdlog_debug(2, "send: (%p) Moving request to the free queue.", 92359243Sobrien hio); 92459243Sobrien hio_clear(hio); 92559243Sobrien QUEUE_INSERT(free, hio); 92669408Sache } 92759243Sobrien /* NOTREACHED */ 92859243Sobrien return (NULL); 92959243Sobrien} 93059243Sobrien