1/*-
2 * Copyright (c) 2016 Microsoft Corp.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice unmodified, this list of conditions, and the following
10 *    disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include <sys/param.h>
28#include <sys/kernel.h>
29#include <sys/conf.h>
30#include <sys/uio.h>
31#include <sys/bus.h>
32#include <sys/malloc.h>
33#include <sys/mbuf.h>
34#include <sys/module.h>
35#include <sys/lock.h>
36#include <sys/taskqueue.h>
37#include <sys/selinfo.h>
38#include <sys/sysctl.h>
39#include <sys/poll.h>
40#include <sys/proc.h>
41#include <sys/queue.h>
42#include <sys/kthread.h>
43#include <sys/syscallsubr.h>
44#include <sys/sysproto.h>
45#include <sys/un.h>
46#include <sys/endian.h>
47#include <sys/sema.h>
48#include <sys/signal.h>
49#include <sys/syslog.h>
50#include <sys/systm.h>
51#include <sys/mutex.h>
52#include <sys/callout.h>
53
54#include <dev/hyperv/include/hyperv.h>
55#include <dev/hyperv/utilities/hv_utilreg.h>
56#include <dev/hyperv/utilities/vmbus_icreg.h>
57#include <dev/hyperv/utilities/vmbus_icvar.h>
58
59#include "hv_snapshot.h"
60#include "vmbus_if.h"
61
62#define VSS_MAJOR		5
63#define VSS_MINOR		0
64#define VSS_MSGVER		VMBUS_IC_VERSION(VSS_MAJOR, VSS_MINOR)
65
66#define VSS_FWVER_MAJOR		3
67#define VSS_FWVER		VMBUS_IC_VERSION(VSS_FWVER_MAJOR, 0)
68
69#define TIMEOUT_LIMIT		(15)	// seconds
70enum hv_vss_op {
71	VSS_OP_CREATE = 0,
72	VSS_OP_DELETE,
73	VSS_OP_HOT_BACKUP,
74	VSS_OP_GET_DM_INFO,
75	VSS_OP_BU_COMPLETE,
76	/*
77	 * Following operations are only supported with IC version >= 5.0
78	 */
79	VSS_OP_FREEZE, /* Freeze the file systems in the VM */
80	VSS_OP_THAW, /* Unfreeze the file systems */
81	VSS_OP_AUTO_RECOVER,
82	VSS_OP_COUNT /* Number of operations, must be last */
83};
84
85/*
86 * Header for all VSS messages.
87 */
88struct hv_vss_hdr {
89	struct vmbus_icmsg_hdr	ic_hdr;
90	uint8_t			operation;
91	uint8_t			reserved[7];
92} __packed;
93
94
95/*
96 * Flag values for the hv_vss_check_feature. Here supports only
97 * one value.
98 */
99#define VSS_HBU_NO_AUTO_RECOVERY		0x00000005
100
101struct hv_vss_check_feature {
102	uint32_t flags;
103} __packed;
104
105struct hv_vss_check_dm_info {
106	uint32_t flags;
107} __packed;
108
109struct hv_vss_msg {
110	union {
111		struct hv_vss_hdr vss_hdr;
112	} hdr;
113	union {
114		struct hv_vss_check_feature vss_cf;
115		struct hv_vss_check_dm_info dm_info;
116	} body;
117} __packed;
118
119struct hv_vss_req {
120	struct hv_vss_opt_msg	opt_msg;	/* used to communicate with daemon */
121	struct hv_vss_msg	msg;		/* used to communicate with host */
122} __packed;
123
124/* hv_vss debug control */
125static int hv_vss_log = 0;
126
127#define	hv_vss_log_error(...)	do {				\
128	if (hv_vss_log > 0)					\
129		log(LOG_ERR, "hv_vss: " __VA_ARGS__);		\
130} while (0)
131
132#define	hv_vss_log_info(...) do {				\
133	if (hv_vss_log > 1)					\
134		log(LOG_INFO, "hv_vss: " __VA_ARGS__);		\
135} while (0)
136
137static const struct vmbus_ic_desc vmbus_vss_descs[] = {
138	{
139		.ic_guid = { .hv_guid = {
140		    0x29, 0x2e, 0xfa, 0x35, 0x23, 0xea, 0x36, 0x42,
141		    0x96, 0xae, 0x3a, 0x6e, 0xba, 0xcb, 0xa4,  0x40} },
142		.ic_desc = "Hyper-V VSS"
143	},
144	VMBUS_IC_DESC_END
145};
146
147static const char * vss_opt_name[] = {"None", "VSSCheck", "Freeze", "Thaw"};
148
149/* character device prototypes */
150static d_open_t		hv_vss_dev_open;
151static d_close_t	hv_vss_dev_close;
152static d_poll_t		hv_vss_dev_daemon_poll;
153static d_ioctl_t	hv_vss_dev_daemon_ioctl;
154
155static d_open_t		hv_appvss_dev_open;
156static d_close_t	hv_appvss_dev_close;
157static d_poll_t		hv_appvss_dev_poll;
158static d_ioctl_t	hv_appvss_dev_ioctl;
159
160/* hv_vss character device structure */
161static struct cdevsw hv_vss_cdevsw =
162{
163	.d_version	= D_VERSION,
164	.d_open		= hv_vss_dev_open,
165	.d_close	= hv_vss_dev_close,
166	.d_poll		= hv_vss_dev_daemon_poll,
167	.d_ioctl	= hv_vss_dev_daemon_ioctl,
168	.d_name		= FS_VSS_DEV_NAME,
169};
170
171static struct cdevsw hv_appvss_cdevsw =
172{
173	.d_version	= D_VERSION,
174	.d_open		= hv_appvss_dev_open,
175	.d_close	= hv_appvss_dev_close,
176	.d_poll		= hv_appvss_dev_poll,
177	.d_ioctl	= hv_appvss_dev_ioctl,
178	.d_name		= APP_VSS_DEV_NAME,
179};
180
181struct hv_vss_sc;
182/*
183 * Global state to track cdev
184 */
185struct hv_vss_dev_sc {
186	/*
187	 * msg was transferred from host to notify queue, and
188	 * ack queue. Finally, it was recyled to free list.
189	 */
190	STAILQ_HEAD(, hv_vss_req_internal) 	to_notify_queue;
191	STAILQ_HEAD(, hv_vss_req_internal) 	to_ack_queue;
192	struct hv_vss_sc			*sc;
193	struct proc				*proc_task;
194	struct selinfo				hv_vss_selinfo;
195};
196/*
197 * Global state to track and synchronize the transaction requests from the host.
198 * The VSS allows user to register their function to do freeze/thaw for application.
199 * VSS kernel will notify both vss daemon and user application if it is registered.
200 * The implementation state transition is illustrated by:
201 * https://clovertrail.github.io/assets/vssdot.png
202 */
203typedef struct hv_vss_sc {
204	struct vmbus_ic_softc			util_sc;
205	device_t				dev;
206
207	struct task				task;
208
209	/*
210	 * mutex is used to protect access of list/queue,
211	 * callout in request is also used this mutex.
212	 */
213	struct mtx				pending_mutex;
214	/*
215	 * req_free_list contains all free items
216	 */
217	LIST_HEAD(, hv_vss_req_internal)	req_free_list;
218
219	/* Indicates if daemon registered with driver */
220	boolean_t				register_done;
221
222	boolean_t				app_register_done;
223
224	/* cdev for file system freeze/thaw */
225	struct cdev				*hv_vss_dev;
226	/* cdev for application freeze/thaw */
227	struct cdev				*hv_appvss_dev;
228
229	/* sc for app */
230	struct hv_vss_dev_sc			app_sc;
231	/* sc for deamon */
232	struct hv_vss_dev_sc			daemon_sc;
233} hv_vss_sc;
234
235typedef struct hv_vss_req_internal {
236	LIST_ENTRY(hv_vss_req_internal)		link;
237	STAILQ_ENTRY(hv_vss_req_internal)	slink;
238	struct hv_vss_req			vss_req;
239
240	/* Rcv buffer for communicating with the host*/
241	uint8_t					*rcv_buf;
242	/* Length of host message */
243	uint32_t				host_msg_len;
244	/* Host message id */
245	uint64_t				host_msg_id;
246
247	hv_vss_sc				*sc;
248
249	struct callout				callout;
250} hv_vss_req_internal;
251
252#define SEARCH_REMOVE_REQ_LOCKED(reqp, queue, link, tmp, id)		\
253	do {								\
254		STAILQ_FOREACH_SAFE(reqp, queue, link, tmp) {		\
255			if (reqp->vss_req.opt_msg.msgid == id) {	\
256				STAILQ_REMOVE(queue,			\
257				    reqp, hv_vss_req_internal, link);	\
258				break;					\
259			}						\
260		}							\
261	} while (0)
262
263static bool
264hv_vss_is_daemon_killed_after_launch(hv_vss_sc *sc)
265{
266	return (!sc->register_done && sc->daemon_sc.proc_task);
267}
268
269/*
270 * Callback routine that gets called whenever there is a message from host
271 */
272static void
273hv_vss_callback(struct vmbus_channel *chan __unused, void *context)
274{
275	hv_vss_sc *sc = (hv_vss_sc*)context;
276	if (hv_vss_is_daemon_killed_after_launch(sc))
277		hv_vss_log_info("%s: daemon was killed!\n", __func__);
278	if (sc->register_done || sc->daemon_sc.proc_task) {
279		hv_vss_log_info("%s: Queuing work item\n", __func__);
280		if (hv_vss_is_daemon_killed_after_launch(sc))
281			hv_vss_log_info("%s: daemon was killed!\n", __func__);
282		taskqueue_enqueue(taskqueue_thread, &sc->task);
283	} else {
284		hv_vss_log_info("%s: daemon has never been registered\n", __func__);
285	}
286	hv_vss_log_info("%s: received msg from host\n", __func__);
287}
288/*
289 * Send the response back to the host.
290 */
291static void
292hv_vss_respond_host(uint8_t *rcv_buf, struct vmbus_channel *ch,
293    uint32_t recvlen, uint64_t requestid, uint32_t error)
294{
295	struct vmbus_icmsg_hdr *hv_icmsg_hdrp;
296
297	hv_icmsg_hdrp = (struct vmbus_icmsg_hdr *)rcv_buf;
298
299	hv_icmsg_hdrp->ic_status = error;
300	hv_icmsg_hdrp->ic_flags = HV_ICMSGHDRFLAG_TRANSACTION | HV_ICMSGHDRFLAG_RESPONSE;
301
302	error = vmbus_chan_send(ch, VMBUS_CHANPKT_TYPE_INBAND, 0,
303	    rcv_buf, recvlen, requestid);
304	if (error)
305		hv_vss_log_info("%s: hv_vss_respond_host: sendpacket error:%d\n",
306		    __func__, error);
307}
308
309static void
310hv_vss_notify_host_result_locked(struct hv_vss_req_internal *reqp, uint32_t status)
311{
312	struct hv_vss_msg* msg = (struct hv_vss_msg *)reqp->rcv_buf;
313	hv_vss_sc *sc = reqp->sc;
314	if (reqp->vss_req.opt_msg.opt == HV_VSS_CHECK) {
315		msg->body.vss_cf.flags = VSS_HBU_NO_AUTO_RECOVERY;
316	}
317	hv_vss_log_info("%s, %s response %s to host\n", __func__,
318	    vss_opt_name[reqp->vss_req.opt_msg.opt],
319	    status == HV_S_OK ? "Success" : "Fail");
320	hv_vss_respond_host(reqp->rcv_buf, vmbus_get_channel(reqp->sc->dev),
321	    reqp->host_msg_len, reqp->host_msg_id, status);
322	/* recycle the request */
323	LIST_INSERT_HEAD(&sc->req_free_list, reqp, link);
324}
325
326static void
327hv_vss_notify_host_result(struct hv_vss_req_internal *reqp, uint32_t status)
328{
329	mtx_lock(&reqp->sc->pending_mutex);
330	hv_vss_notify_host_result_locked(reqp, status);
331	mtx_unlock(&reqp->sc->pending_mutex);
332}
333
334static void
335hv_vss_cp_vssreq_to_user(struct hv_vss_req_internal *reqp,
336    struct hv_vss_opt_msg *userdata)
337{
338	struct hv_vss_req *hv_vss_dev_buf;
339	hv_vss_dev_buf = &reqp->vss_req;
340	hv_vss_dev_buf->opt_msg.opt = HV_VSS_NONE;
341	switch (reqp->vss_req.msg.hdr.vss_hdr.operation) {
342	case VSS_OP_FREEZE:
343		hv_vss_dev_buf->opt_msg.opt = HV_VSS_FREEZE;
344		break;
345	case VSS_OP_THAW:
346		hv_vss_dev_buf->opt_msg.opt = HV_VSS_THAW;
347		break;
348	case VSS_OP_HOT_BACKUP:
349		hv_vss_dev_buf->opt_msg.opt = HV_VSS_CHECK;
350		break;
351	}
352	*userdata = hv_vss_dev_buf->opt_msg;
353	hv_vss_log_info("%s, read data from user for "
354	    "%s (%ju) \n", __func__, vss_opt_name[userdata->opt],
355	    (uintmax_t)userdata->msgid);
356}
357
358/**
359 * Remove the request id from app notifiy or ack queue,
360 * and recyle the request by inserting it to free list.
361 *
362 * When app was notified but not yet sending ack, the request
363 * should locate in either notify queue or ack queue.
364 */
365static struct hv_vss_req_internal*
366hv_vss_drain_req_queue_locked(hv_vss_sc *sc, uint64_t req_id)
367{
368	struct hv_vss_req_internal *reqp, *tmp;
369	SEARCH_REMOVE_REQ_LOCKED(reqp, &sc->daemon_sc.to_notify_queue,
370	    slink, tmp, req_id);
371	if (reqp == NULL)
372		SEARCH_REMOVE_REQ_LOCKED(reqp, &sc->daemon_sc.to_ack_queue,
373		    slink, tmp, req_id);
374	if (reqp == NULL)
375		SEARCH_REMOVE_REQ_LOCKED(reqp, &sc->app_sc.to_notify_queue,
376		    slink, tmp, req_id);
377	if (reqp == NULL)
378		SEARCH_REMOVE_REQ_LOCKED(reqp, &sc->app_sc.to_ack_queue, slink,
379		    tmp, req_id);
380	return (reqp);
381}
382/**
383 * Actions for daemon who has been notified.
384 */
385static void
386hv_vss_notified(struct hv_vss_dev_sc *dev_sc, struct hv_vss_opt_msg *userdata)
387{
388	struct hv_vss_req_internal *reqp;
389	mtx_lock(&dev_sc->sc->pending_mutex);
390	if (!STAILQ_EMPTY(&dev_sc->to_notify_queue)) {
391		reqp = STAILQ_FIRST(&dev_sc->to_notify_queue);
392		hv_vss_cp_vssreq_to_user(reqp, userdata);
393		STAILQ_REMOVE_HEAD(&dev_sc->to_notify_queue, slink);
394		/* insert the msg to queue for write */
395		STAILQ_INSERT_TAIL(&dev_sc->to_ack_queue, reqp, slink);
396		userdata->status = VSS_SUCCESS;
397	} else {
398		/* Timeout occur, thus request was removed from queue. */
399		hv_vss_log_info("%s: notify queue is empty!\n", __func__);
400		userdata->status = VSS_FAIL;
401	}
402	mtx_unlock(&dev_sc->sc->pending_mutex);
403}
404
405static void
406hv_vss_notify(struct hv_vss_dev_sc *dev_sc, struct hv_vss_req_internal *reqp)
407{
408	uint32_t opt = reqp->vss_req.opt_msg.opt;
409	mtx_lock(&dev_sc->sc->pending_mutex);
410	STAILQ_INSERT_TAIL(&dev_sc->to_notify_queue, reqp, slink);
411	hv_vss_log_info("%s: issuing query %s (%ju) to %s\n", __func__,
412	    vss_opt_name[opt], (uintmax_t)reqp->vss_req.opt_msg.msgid,
413	    &dev_sc->sc->app_sc == dev_sc ? "app" : "daemon");
414	mtx_unlock(&dev_sc->sc->pending_mutex);
415	selwakeup(&dev_sc->hv_vss_selinfo);
416}
417
418/**
419 * Actions for daemon who has acknowledged.
420 */
421static void
422hv_vss_daemon_acked(struct hv_vss_dev_sc *dev_sc, struct hv_vss_opt_msg *userdata)
423{
424	struct hv_vss_req_internal	*reqp, *tmp;
425	uint64_t			req_id;
426	int				opt;
427	uint32_t			status;
428
429	opt = userdata->opt;
430	req_id = userdata->msgid;
431	status = userdata->status;
432	/* make sure the reserved fields are all zeros. */
433	memset(&userdata->reserved, 0, sizeof(struct hv_vss_opt_msg) -
434	    __offsetof(struct hv_vss_opt_msg, reserved));
435	mtx_lock(&dev_sc->sc->pending_mutex);
436	SEARCH_REMOVE_REQ_LOCKED(reqp, &dev_sc->to_ack_queue, slink, tmp, req_id);
437	mtx_unlock(&dev_sc->sc->pending_mutex);
438	if (reqp == NULL) {
439		hv_vss_log_info("%s Timeout: fail to find daemon ack request\n",
440		    __func__);
441		userdata->status = VSS_FAIL;
442		return;
443	}
444	KASSERT(opt == reqp->vss_req.opt_msg.opt, ("Mismatched VSS operation!"));
445	hv_vss_log_info("%s, get response %d from daemon for %s (%ju) \n", __func__,
446	    status, vss_opt_name[opt], (uintmax_t)req_id);
447	switch (opt) {
448	case HV_VSS_CHECK:
449	case HV_VSS_FREEZE:
450		callout_drain(&reqp->callout);
451		hv_vss_notify_host_result(reqp,
452		    status == VSS_SUCCESS ? HV_S_OK : HV_E_FAIL);
453		break;
454	case HV_VSS_THAW:
455		if (dev_sc->sc->app_register_done) {
456			if (status == VSS_SUCCESS) {
457				hv_vss_notify(&dev_sc->sc->app_sc, reqp);
458			} else {
459				/* handle error */
460				callout_drain(&reqp->callout);
461				hv_vss_notify_host_result(reqp, HV_E_FAIL);
462			}
463		} else {
464			callout_drain(&reqp->callout);
465			hv_vss_notify_host_result(reqp,
466			    status == VSS_SUCCESS ? HV_S_OK : HV_E_FAIL);
467		}
468		break;
469	}
470}
471
472/**
473 * Actions for app who has acknowledged.
474 */
475static void
476hv_vss_app_acked(struct hv_vss_dev_sc *dev_sc, struct hv_vss_opt_msg *userdata)
477{
478	struct hv_vss_req_internal	*reqp, *tmp;
479	uint64_t			req_id;
480	int				opt;
481	uint8_t				status;
482
483	opt = userdata->opt;
484	req_id = userdata->msgid;
485	status = userdata->status;
486	/* make sure the reserved fields are all zeros. */
487	memset(&userdata->reserved, 0, sizeof(struct hv_vss_opt_msg) -
488	    __offsetof(struct hv_vss_opt_msg, reserved));
489	mtx_lock(&dev_sc->sc->pending_mutex);
490	SEARCH_REMOVE_REQ_LOCKED(reqp, &dev_sc->to_ack_queue, slink, tmp, req_id);
491	mtx_unlock(&dev_sc->sc->pending_mutex);
492	if (reqp == NULL) {
493		hv_vss_log_info("%s Timeout: fail to find app ack request\n",
494		    __func__);
495		userdata->status = VSS_FAIL;
496		return;
497	}
498	KASSERT(opt == reqp->vss_req.opt_msg.opt, ("Mismatched VSS operation!"));
499	hv_vss_log_info("%s, get response %d from app for %s (%ju) \n",
500	    __func__, status, vss_opt_name[opt], (uintmax_t)req_id);
501	if (dev_sc->sc->register_done) {
502		switch (opt) {
503		case HV_VSS_CHECK:
504		case HV_VSS_FREEZE:
505			if (status == VSS_SUCCESS) {
506				hv_vss_notify(&dev_sc->sc->daemon_sc, reqp);
507			} else {
508				/* handle error */
509				callout_drain(&reqp->callout);
510				hv_vss_notify_host_result(reqp, HV_E_FAIL);
511			}
512			break;
513		case HV_VSS_THAW:
514			callout_drain(&reqp->callout);
515			hv_vss_notify_host_result(reqp,
516			    status == VSS_SUCCESS ? HV_S_OK : HV_E_FAIL);
517			break;
518		}
519	} else {
520		hv_vss_log_info("%s, Fatal: vss daemon was killed\n", __func__);
521	}
522}
523
524static int
525hv_vss_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
526{
527	struct proc     *td_proc;
528	td_proc = td->td_proc;
529
530	struct hv_vss_dev_sc *dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1;
531	hv_vss_log_info("%s: %s opens device \"%s\" successfully.\n",
532	    __func__, td_proc->p_comm, FS_VSS_DEV_NAME);
533
534	if (dev_sc->sc->register_done)
535		return (EBUSY);
536
537	dev_sc->sc->register_done = true;
538	hv_vss_callback(vmbus_get_channel(dev_sc->sc->dev), dev_sc->sc);
539
540	dev_sc->proc_task = curproc;
541	return (0);
542}
543
544static int
545hv_vss_dev_close(struct cdev *dev, int fflag __unused, int devtype __unused,
546				 struct thread *td)
547{
548	struct proc     *td_proc;
549	td_proc = td->td_proc;
550
551	struct hv_vss_dev_sc *dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1;
552
553	hv_vss_log_info("%s: %s closes device \"%s\"\n",
554	    __func__, td_proc->p_comm, FS_VSS_DEV_NAME);
555	dev_sc->sc->register_done = false;
556	return (0);
557}
558
559static int
560hv_vss_dev_daemon_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag,
561    struct thread *td)
562{
563	struct proc			*td_proc;
564	struct hv_vss_dev_sc		*sc;
565
566	td_proc = td->td_proc;
567	sc = (struct hv_vss_dev_sc*)dev->si_drv1;
568
569	hv_vss_log_info("%s: %s invoked vss ioctl\n", __func__, td_proc->p_comm);
570
571	struct hv_vss_opt_msg* userdata = (struct hv_vss_opt_msg*)data;
572	switch(cmd) {
573	case IOCHVVSSREAD:
574		hv_vss_notified(sc, userdata);
575		break;
576	case IOCHVVSSWRITE:
577		hv_vss_daemon_acked(sc, userdata);
578		break;
579	}
580	return (0);
581}
582
583/*
584 * hv_vss_daemon poll invokes this function to check if data is available
585 * for daemon to read.
586 */
587static int
588hv_vss_dev_daemon_poll(struct cdev *dev, int events, struct thread *td)
589{
590	int revent = 0;
591	struct hv_vss_dev_sc *dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1;
592
593	mtx_lock(&dev_sc->sc->pending_mutex);
594	/**
595	 * if there is data ready, inform daemon's poll
596	 */
597	if (!STAILQ_EMPTY(&dev_sc->to_notify_queue))
598		revent = POLLIN;
599	if (revent == 0)
600		selrecord(td, &dev_sc->hv_vss_selinfo);
601	hv_vss_log_info("%s return 0x%x\n", __func__, revent);
602	mtx_unlock(&dev_sc->sc->pending_mutex);
603	return (revent);
604}
605
606static int
607hv_appvss_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
608{
609	struct proc     *td_proc;
610	td_proc = td->td_proc;
611
612	struct hv_vss_dev_sc *dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1;
613	hv_vss_log_info("%s: %s opens device \"%s\" successfully.\n",
614	    __func__, td_proc->p_comm, APP_VSS_DEV_NAME);
615
616	if (dev_sc->sc->app_register_done)
617		return (EBUSY);
618
619	dev_sc->sc->app_register_done = true;
620	dev_sc->proc_task = curproc;
621	return (0);
622}
623
624static int
625hv_appvss_dev_close(struct cdev *dev, int fflag __unused, int devtype __unused,
626				 struct thread *td)
627{
628	struct proc     *td_proc;
629	td_proc = td->td_proc;
630
631	struct hv_vss_dev_sc *dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1;
632
633	hv_vss_log_info("%s: %s closes device \"%s\".\n",
634	    __func__, td_proc->p_comm, APP_VSS_DEV_NAME);
635	dev_sc->sc->app_register_done = false;
636	return (0);
637}
638
639static int
640hv_appvss_dev_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag,
641    struct thread *td)
642{
643	struct proc			*td_proc;
644	struct hv_vss_dev_sc		*dev_sc;
645
646	td_proc = td->td_proc;
647	dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1;
648
649	hv_vss_log_info("%s: %s invoked vss ioctl\n", __func__, td_proc->p_comm);
650
651	struct hv_vss_opt_msg* userdata = (struct hv_vss_opt_msg*)data;
652	switch(cmd) {
653	case IOCHVVSSREAD:
654		hv_vss_notified(dev_sc, userdata);
655		break;
656	case IOCHVVSSWRITE:
657		hv_vss_app_acked(dev_sc, userdata);
658		break;
659	}
660	return (0);
661}
662
663/*
664 * hv_vss_daemon poll invokes this function to check if data is available
665 * for daemon to read.
666 */
667static int
668hv_appvss_dev_poll(struct cdev *dev, int events, struct thread *td)
669{
670	int revent = 0;
671	struct hv_vss_dev_sc *dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1;
672
673	mtx_lock(&dev_sc->sc->pending_mutex);
674	/**
675	 * if there is data ready, inform daemon's poll
676	 */
677	if (!STAILQ_EMPTY(&dev_sc->to_notify_queue))
678		revent = POLLIN;
679	if (revent == 0)
680		selrecord(td, &dev_sc->hv_vss_selinfo);
681	hv_vss_log_info("%s return 0x%x\n", __func__, revent);
682	mtx_unlock(&dev_sc->sc->pending_mutex);
683	return (revent);
684}
685
686static void
687hv_vss_timeout(void *arg)
688{
689	hv_vss_req_internal *reqp = arg;
690	hv_vss_req_internal *request __diagused;
691	hv_vss_sc* sc = reqp->sc;
692	uint64_t req_id = reqp->vss_req.opt_msg.msgid;
693	/* This thread is locked */
694	KASSERT(mtx_owned(&sc->pending_mutex), ("mutex lock is not owned!"));
695	request = hv_vss_drain_req_queue_locked(sc, req_id);
696	KASSERT(request != NULL, ("timeout but fail to find request"));
697	hv_vss_notify_host_result_locked(reqp, HV_E_FAIL);
698}
699
700/*
701 * This routine is called whenever a message is received from the host
702 */
703static void
704hv_vss_init_req(hv_vss_req_internal *reqp,
705    uint32_t recvlen, uint64_t requestid, uint8_t *vss_buf, hv_vss_sc *sc)
706{
707	struct timespec vm_ts;
708	struct hv_vss_msg* msg = (struct hv_vss_msg *)vss_buf;
709
710	memset(reqp, 0, __offsetof(hv_vss_req_internal, callout));
711	reqp->host_msg_len = recvlen;
712	reqp->host_msg_id = requestid;
713	reqp->rcv_buf = vss_buf;
714	reqp->sc = sc;
715	memcpy(&reqp->vss_req.msg,
716	    (struct hv_vss_msg *)vss_buf, sizeof(struct hv_vss_msg));
717	/* set the opt for users */
718	switch (msg->hdr.vss_hdr.operation) {
719	case VSS_OP_FREEZE:
720		reqp->vss_req.opt_msg.opt = HV_VSS_FREEZE;
721		break;
722	case VSS_OP_THAW:
723		reqp->vss_req.opt_msg.opt = HV_VSS_THAW;
724		break;
725	case VSS_OP_HOT_BACKUP:
726		reqp->vss_req.opt_msg.opt = HV_VSS_CHECK;
727		break;
728	}
729	/* Use a timestamp as msg request ID */
730	nanotime(&vm_ts);
731	reqp->vss_req.opt_msg.msgid = (vm_ts.tv_sec * NANOSEC) + vm_ts.tv_nsec;
732}
733
734static hv_vss_req_internal*
735hv_vss_get_new_req_locked(hv_vss_sc *sc)
736{
737	hv_vss_req_internal *reqp;
738	if (!STAILQ_EMPTY(&sc->daemon_sc.to_notify_queue) ||
739	    !STAILQ_EMPTY(&sc->daemon_sc.to_ack_queue) ||
740	    !STAILQ_EMPTY(&sc->app_sc.to_notify_queue) ||
741	    !STAILQ_EMPTY(&sc->app_sc.to_ack_queue)) {
742		/*
743		 * There is request coming from host before
744		 * finishing previous requests
745		 */
746		hv_vss_log_info("%s: Warning: there is new request "
747		    "coming before finishing previous requests\n", __func__);
748		return (NULL);
749	}
750	if (LIST_EMPTY(&sc->req_free_list)) {
751		/* TODO Error: no buffer */
752		hv_vss_log_info("Error: No buffer\n");
753		return (NULL);
754	}
755	reqp = LIST_FIRST(&sc->req_free_list);
756	LIST_REMOVE(reqp, link);
757	return (reqp);
758}
759
760static void
761hv_vss_start_notify(hv_vss_req_internal *reqp, uint32_t opt)
762{
763	hv_vss_sc *sc = reqp->sc;
764	/*
765	 * Freeze/Check notification sequence: kernel -> app -> daemon(fs)
766	 * Thaw notification sequence:         kernel -> daemon(fs) -> app
767	 *
768	 * We should wake up the daemon, in case it's doing poll().
769	 * The response should be received after 5s, otherwise, trigger timeout.
770	 */
771	switch (opt) {
772	case VSS_OP_FREEZE:
773	case VSS_OP_HOT_BACKUP:
774		if (sc->app_register_done)
775			hv_vss_notify(&sc->app_sc, reqp);
776		else
777			hv_vss_notify(&sc->daemon_sc, reqp);
778		callout_reset(&reqp->callout, TIMEOUT_LIMIT * hz,
779		    hv_vss_timeout, reqp);
780		break;
781	case VSS_OP_THAW:
782		hv_vss_notify(&sc->daemon_sc, reqp);
783		callout_reset(&reqp->callout, TIMEOUT_LIMIT * hz,
784		    hv_vss_timeout, reqp);
785		break;
786	}
787}
788
789/*
790 * Function to read the vss request buffer from host
791 * and interact with daemon
792 */
793static void
794hv_vss_process_request(void *context, int pending __unused)
795{
796	uint8_t *vss_buf;
797	struct vmbus_channel *channel;
798	uint32_t recvlen = 0;
799	uint64_t requestid;
800	struct vmbus_icmsg_hdr *icmsghdrp;
801	int ret = 0;
802	hv_vss_sc *sc;
803	hv_vss_req_internal *reqp;
804
805	hv_vss_log_info("%s: entering hv_vss_process_request\n", __func__);
806
807	sc = (hv_vss_sc*)context;
808	vss_buf = sc->util_sc.ic_buf;
809	channel = vmbus_get_channel(sc->dev);
810
811	recvlen = sc->util_sc.ic_buflen;
812	ret = vmbus_chan_recv(channel, vss_buf, &recvlen, &requestid);
813	KASSERT(ret != ENOBUFS, ("hvvss recvbuf is not large enough"));
814	/* XXX check recvlen to make sure that it contains enough data */
815
816	while ((ret == 0) && (recvlen > 0)) {
817		icmsghdrp = (struct vmbus_icmsg_hdr *)vss_buf;
818
819		if (icmsghdrp->ic_type == HV_ICMSGTYPE_NEGOTIATE) {
820			ret = vmbus_ic_negomsg(&sc->util_sc, vss_buf,
821			    &recvlen, VSS_FWVER, VSS_MSGVER);
822			hv_vss_respond_host(vss_buf, vmbus_get_channel(sc->dev),
823			    recvlen, requestid, ret);
824			hv_vss_log_info("%s: version negotiated\n", __func__);
825		} else if (!hv_vss_is_daemon_killed_after_launch(sc)) {
826			struct hv_vss_msg* msg = (struct hv_vss_msg *)vss_buf;
827			switch(msg->hdr.vss_hdr.operation) {
828			case VSS_OP_FREEZE:
829			case VSS_OP_THAW:
830			case VSS_OP_HOT_BACKUP:
831				mtx_lock(&sc->pending_mutex);
832				reqp = hv_vss_get_new_req_locked(sc);
833				mtx_unlock(&sc->pending_mutex);
834				if (reqp == NULL) {
835					/* ignore this request from host */
836					break;
837				}
838				hv_vss_init_req(reqp, recvlen, requestid, vss_buf, sc);
839				hv_vss_log_info("%s: receive %s (%ju) from host\n",
840				    __func__,
841				    vss_opt_name[reqp->vss_req.opt_msg.opt],
842				    (uintmax_t)reqp->vss_req.opt_msg.msgid);
843				hv_vss_start_notify(reqp, msg->hdr.vss_hdr.operation);
844				break;
845			case VSS_OP_GET_DM_INFO:
846				hv_vss_log_info("%s: receive GET_DM_INFO from host\n",
847				    __func__);
848				msg->body.dm_info.flags = 0;
849				hv_vss_respond_host(vss_buf, vmbus_get_channel(sc->dev),
850				    recvlen, requestid, HV_S_OK);
851				break;
852			default:
853				device_printf(sc->dev, "Unknown opt from host: %d\n",
854				    msg->hdr.vss_hdr.operation);
855				break;
856			}
857		} else {
858			/* daemon was killed for some reason after it was launched */
859			struct hv_vss_msg* msg = (struct hv_vss_msg *)vss_buf;
860			switch(msg->hdr.vss_hdr.operation) {
861			case VSS_OP_FREEZE:
862				hv_vss_log_info("%s: response fail for FREEZE\n",
863				    __func__);
864				break;
865			case VSS_OP_THAW:
866				hv_vss_log_info("%s: response fail for THAW\n",
867				    __func__);
868				break;
869			case VSS_OP_HOT_BACKUP:
870				hv_vss_log_info("%s: response fail for HOT_BACKUP\n",
871				    __func__);
872				msg->body.vss_cf.flags = VSS_HBU_NO_AUTO_RECOVERY;
873				break;
874			case VSS_OP_GET_DM_INFO:
875				hv_vss_log_info("%s: response fail for GET_DM_INFO\n",
876				    __func__);
877				msg->body.dm_info.flags = 0;
878				break;
879			default:
880				device_printf(sc->dev, "Unknown opt from host: %d\n",
881				    msg->hdr.vss_hdr.operation);
882				break;
883			}
884			hv_vss_respond_host(vss_buf, vmbus_get_channel(sc->dev),
885			    recvlen, requestid, HV_E_FAIL);
886		}
887		/*
888		 * Try reading next buffer
889		 */
890		recvlen = sc->util_sc.ic_buflen;
891		ret = vmbus_chan_recv(channel, vss_buf, &recvlen, &requestid);
892		KASSERT(ret != ENOBUFS, ("hvvss recvbuf is not large enough"));
893		/* XXX check recvlen to make sure that it contains enough data */
894
895		hv_vss_log_info("%s: read: context %p, ret =%d, recvlen=%d\n",
896		    __func__, context, ret, recvlen);
897	}
898}
899
900static int
901hv_vss_probe(device_t dev)
902{
903	return (vmbus_ic_probe(dev, vmbus_vss_descs));
904}
905
906static int
907hv_vss_init_send_receive_queue(device_t dev)
908{
909	hv_vss_sc *sc = (hv_vss_sc*)device_get_softc(dev);
910	int i;
911	const int max_list = 4; /* It is big enough for the list */
912	struct hv_vss_req_internal* reqp;
913
914	LIST_INIT(&sc->req_free_list);
915	STAILQ_INIT(&sc->daemon_sc.to_notify_queue);
916	STAILQ_INIT(&sc->daemon_sc.to_ack_queue);
917	STAILQ_INIT(&sc->app_sc.to_notify_queue);
918	STAILQ_INIT(&sc->app_sc.to_ack_queue);
919
920	for (i = 0; i < max_list; i++) {
921		reqp = malloc(sizeof(struct hv_vss_req_internal),
922		    M_DEVBUF, M_WAITOK|M_ZERO);
923		LIST_INSERT_HEAD(&sc->req_free_list, reqp, link);
924		callout_init_mtx(&reqp->callout, &sc->pending_mutex, 0);
925	}
926	return (0);
927}
928
929static int
930hv_vss_destroy_send_receive_queue(device_t dev)
931{
932	hv_vss_sc *sc = (hv_vss_sc*)device_get_softc(dev);
933	hv_vss_req_internal* reqp;
934
935	while (!LIST_EMPTY(&sc->req_free_list)) {
936		reqp = LIST_FIRST(&sc->req_free_list);
937		LIST_REMOVE(reqp, link);
938		free(reqp, M_DEVBUF);
939	}
940
941	while (!STAILQ_EMPTY(&sc->daemon_sc.to_notify_queue)) {
942		reqp = STAILQ_FIRST(&sc->daemon_sc.to_notify_queue);
943		STAILQ_REMOVE_HEAD(&sc->daemon_sc.to_notify_queue, slink);
944		free(reqp, M_DEVBUF);
945	}
946
947	while (!STAILQ_EMPTY(&sc->daemon_sc.to_ack_queue)) {
948		reqp = STAILQ_FIRST(&sc->daemon_sc.to_ack_queue);
949		STAILQ_REMOVE_HEAD(&sc->daemon_sc.to_ack_queue, slink);
950		free(reqp, M_DEVBUF);
951	}
952
953	while (!STAILQ_EMPTY(&sc->app_sc.to_notify_queue)) {
954		reqp = STAILQ_FIRST(&sc->app_sc.to_notify_queue);
955		STAILQ_REMOVE_HEAD(&sc->app_sc.to_notify_queue, slink);
956		free(reqp, M_DEVBUF);
957	}
958
959	while (!STAILQ_EMPTY(&sc->app_sc.to_ack_queue)) {
960		reqp = STAILQ_FIRST(&sc->app_sc.to_ack_queue);
961		STAILQ_REMOVE_HEAD(&sc->app_sc.to_ack_queue, slink);
962		free(reqp, M_DEVBUF);
963	}
964	return (0);
965}
966
967static int
968hv_vss_attach(device_t dev)
969{
970	int error;
971	struct sysctl_oid_list *child;
972	struct sysctl_ctx_list *ctx;
973
974	hv_vss_sc *sc = (hv_vss_sc*)device_get_softc(dev);
975
976	sc->dev = dev;
977	mtx_init(&sc->pending_mutex, "hv_vss pending mutex", NULL, MTX_DEF);
978
979	ctx = device_get_sysctl_ctx(dev);
980	child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
981
982	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "hv_vss_log",
983	    CTLFLAG_RWTUN, &hv_vss_log, 0, "Hyperv VSS service log level");
984
985	TASK_INIT(&sc->task, 0, hv_vss_process_request, sc);
986	hv_vss_init_send_receive_queue(dev);
987	/* create character device for file system freeze/thaw */
988	error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK,
989		    &sc->hv_vss_dev,
990		    &hv_vss_cdevsw,
991		    0,
992		    UID_ROOT,
993		    GID_WHEEL,
994		    0640,
995		    FS_VSS_DEV_NAME);
996
997	if (error != 0) {
998		hv_vss_log_info("Fail to create '%s': %d\n", FS_VSS_DEV_NAME, error);
999		return (error);
1000	}
1001	sc->hv_vss_dev->si_drv1 = &sc->daemon_sc;
1002	sc->daemon_sc.sc = sc;
1003	/* create character device for application freeze/thaw */
1004	error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK,
1005		    &sc->hv_appvss_dev,
1006		    &hv_appvss_cdevsw,
1007		    0,
1008		    UID_ROOT,
1009		    GID_WHEEL,
1010		    0640,
1011		    APP_VSS_DEV_NAME);
1012
1013	if (error != 0) {
1014		hv_vss_log_info("Fail to create '%s': %d\n", APP_VSS_DEV_NAME, error);
1015		return (error);
1016	}
1017	sc->hv_appvss_dev->si_drv1 = &sc->app_sc;
1018	sc->app_sc.sc = sc;
1019
1020	return (vmbus_ic_attach(dev, hv_vss_callback));
1021}
1022
1023static int
1024hv_vss_detach(device_t dev)
1025{
1026	hv_vss_sc *sc = (hv_vss_sc*)device_get_softc(dev);
1027	mtx_destroy(&sc->pending_mutex);
1028	if (sc->daemon_sc.proc_task != NULL) {
1029		PROC_LOCK(sc->daemon_sc.proc_task);
1030		kern_psignal(sc->daemon_sc.proc_task, SIGKILL);
1031		PROC_UNLOCK(sc->daemon_sc.proc_task);
1032	}
1033	if (sc->app_sc.proc_task != NULL) {
1034		PROC_LOCK(sc->app_sc.proc_task);
1035		kern_psignal(sc->app_sc.proc_task, SIGKILL);
1036		PROC_UNLOCK(sc->app_sc.proc_task);
1037	}
1038	hv_vss_destroy_send_receive_queue(dev);
1039	destroy_dev(sc->hv_vss_dev);
1040	destroy_dev(sc->hv_appvss_dev);
1041	return (vmbus_ic_detach(dev));
1042}
1043
1044static device_method_t vss_methods[] = {
1045	/* Device interface */
1046	DEVMETHOD(device_probe, hv_vss_probe),
1047	DEVMETHOD(device_attach, hv_vss_attach),
1048	DEVMETHOD(device_detach, hv_vss_detach),
1049	{ 0, 0 }
1050};
1051
1052static driver_t vss_driver = { "hvvss", vss_methods, sizeof(hv_vss_sc)};
1053
1054DRIVER_MODULE(hv_vss, vmbus, vss_driver, NULL, NULL);
1055MODULE_VERSION(hv_vss, 1);
1056MODULE_DEPEND(hv_vss, vmbus, 1, 1, 1);
1057