1/*-
2 * Copyright (c) 2014 Microsoft Corp.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice unmodified, this list of conditions, and the following
10 *    disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *
26 * $FreeBSD$
27 */
28
29/*
30 * A common driver for all hyper-V util services.
31 */
32
33#include <sys/param.h>
34#include <sys/kernel.h>
35#include <sys/bus.h>
36#include <sys/malloc.h>
37#include <sys/module.h>
38#include <sys/reboot.h>
39#include <sys/timetc.h>
40#include <sys/syscallsubr.h>
41
42#include <dev/hyperv/include/hyperv.h>
43#include "hv_kvp.h"
44
45/* Time Sync data */
46typedef struct {
47	uint64_t data;
48} time_sync_data;
49
50static void hv_shutdown_cb(void *context);
51static void hv_heartbeat_cb(void *context);
52static void hv_timesync_cb(void *context);
53
54static int hv_timesync_init(hv_vmbus_service *serv);
55
56/*
57 * Note: GUID codes below are predefined by the host hypervisor
58 * (Hyper-V and Azure)interface and required for correct operation.
59 */
60hv_vmbus_service service_table[] = {
61	/* Shutdown Service */
62	{ .guid.data = {0x31, 0x60, 0x0B, 0X0E, 0x13, 0x52, 0x34, 0x49,
63			0x81, 0x8B, 0x38, 0XD9, 0x0C, 0xED, 0x39, 0xDB},
64	  .name  = "Hyper-V Shutdown Service\n",
65	  .enabled = TRUE,
66	  .callback = hv_shutdown_cb,
67	},
68
69        /* Time Synch Service */
70        { .guid.data = {0x30, 0xe6, 0x27, 0x95, 0xae, 0xd0, 0x7b, 0x49,
71			0xad, 0xce, 0xe8, 0x0a, 0xb0, 0x17, 0x5c, 0xaf},
72	  .name = "Hyper-V Time Synch Service\n",
73	  .enabled = TRUE,
74	  .init = hv_timesync_init,
75	  .callback = hv_timesync_cb,
76	},
77
78        /* Heartbeat Service */
79        { .guid.data = {0x39, 0x4f, 0x16, 0x57, 0x15, 0x91, 0x78, 0x4e,
80			0xab, 0x55, 0x38, 0x2f, 0x3b, 0xd5, 0x42, 0x2d},
81	  .name = "Hyper-V Heartbeat Service\n",
82	  .enabled = TRUE,
83  	  .callback = hv_heartbeat_cb,
84	},
85
86        /* KVP (Key Value Pair) Service */
87        { .guid.data = {0xe7, 0xf4, 0xa0, 0xa9, 0x45, 0x5a, 0x96, 0x4d,
88			0xb8, 0x27, 0x8a, 0x84, 0x1e, 0x8c, 0x3,  0xe6},
89	  .name = "Hyper-V KVP Service\n",
90	  .enabled = TRUE,
91	  .init = hv_kvp_init,
92	  .callback = hv_kvp_callback,
93	},
94};
95
96/*
97 * Receive buffer pointers. There is one buffer per utility service. The
98 * buffer is allocated during attach().
99 */
100uint8_t *receive_buffer[HV_MAX_UTIL_SERVICES];
101
102static boolean_t destroyed_kvp = FALSE;
103
104struct hv_ictimesync_data {
105	uint64_t    parenttime;
106	uint64_t    childtime;
107	uint64_t    roundtriptime;
108	uint8_t     flags;
109} __packed;
110
111static int
112hv_timesync_init(hv_vmbus_service *serv)
113{
114
115	serv->work_queue = hv_work_queue_create("Time Sync");
116	if (serv->work_queue == NULL)
117		return (ENOMEM);
118	return (0);
119}
120
121static void
122hv_negotiate_version(
123	struct hv_vmbus_icmsg_hdr*		icmsghdrp,
124	struct hv_vmbus_icmsg_negotiate*	negop,
125	uint8_t*				buf)
126{
127	icmsghdrp->icmsgsize = 0x10;
128
129	negop = (struct hv_vmbus_icmsg_negotiate *)&buf[
130		sizeof(struct hv_vmbus_pipe_hdr) +
131		sizeof(struct hv_vmbus_icmsg_hdr)];
132
133	if (negop->icframe_vercnt >= 2 &&
134	    negop->icversion_data[1].major == 3) {
135		negop->icversion_data[0].major = 3;
136		negop->icversion_data[0].minor = 0;
137		negop->icversion_data[1].major = 3;
138		negop->icversion_data[1].minor = 0;
139	} else {
140		negop->icversion_data[0].major = 1;
141		negop->icversion_data[0].minor = 0;
142		negop->icversion_data[1].major = 1;
143		negop->icversion_data[1].minor = 0;
144	}
145
146	negop->icframe_vercnt = 1;
147	negop->icmsg_vercnt = 1;
148}
149
150
151/**
152 * Set host time based on time sync message from host
153 */
154static void
155hv_set_host_time(void *context)
156{
157 	time_sync_data* time_msg = (time_sync_data*) context;
158	uint64_t hosttime = time_msg->data;
159	struct timespec guest_ts, host_ts;
160	uint64_t host_tns;
161	int64_t diff;
162	int error;
163
164	host_tns = (hosttime - HV_WLTIMEDELTA) * 100;
165	host_ts.tv_sec = (time_t)(host_tns/HV_NANO_SEC_PER_SEC);
166	host_ts.tv_nsec = (long)(host_tns%HV_NANO_SEC_PER_SEC);
167
168	nanotime(&guest_ts);
169
170	diff = (int64_t)host_ts.tv_sec - (int64_t)guest_ts.tv_sec;
171
172	/*
173	 * If host differs by 5 seconds then make the guest catch up
174	 */
175	if (diff > 5 || diff < -5) {
176		error = kern_clock_settime(curthread, CLOCK_REALTIME,
177		    &host_ts);
178	}
179
180	/*
181	 * Free the hosttime that was allocated in hv_adj_guesttime()
182	 */
183	free(time_msg, M_DEVBUF);
184}
185
186/**
187 * @brief Synchronize time with host after reboot, restore, etc.
188 *
189 * ICTIMESYNCFLAG_SYNC flag bit indicates reboot, restore events of the VM.
190 * After reboot the flag ICTIMESYNCFLAG_SYNC is included in the first time
191 * message after the timesync channel is opened. Since the hv_utils module is
192 * loaded after hv_vmbus, the first message is usually missed. The other
193 * thing is, systime is automatically set to emulated hardware clock which may
194 * not be UTC time or in the same time zone. So, to override these effects, we
195 * use the first 50 time samples for initial system time setting.
196 */
197static inline
198void hv_adj_guesttime(uint64_t hosttime, uint8_t flags)
199{
200	time_sync_data* time_msg;
201
202	time_msg = malloc(sizeof(time_sync_data), M_DEVBUF, M_NOWAIT);
203
204	if (time_msg == NULL)
205		return;
206
207	time_msg->data = hosttime;
208
209	if ((flags & HV_ICTIMESYNCFLAG_SYNC) != 0) {
210		hv_queue_work_item(service_table[HV_TIME_SYNCH].work_queue,
211		    hv_set_host_time, time_msg);
212	} else if ((flags & HV_ICTIMESYNCFLAG_SAMPLE) != 0) {
213		hv_queue_work_item(service_table[HV_TIME_SYNCH].work_queue,
214		    hv_set_host_time, time_msg);
215	} else {
216		free(time_msg, M_DEVBUF);
217	}
218}
219
220/**
221 * Time Sync Channel message handler
222 */
223static void
224hv_timesync_cb(void *context)
225{
226	hv_vmbus_channel*	channel = context;
227	hv_vmbus_icmsg_hdr*	icmsghdrp;
228	uint32_t		recvlen;
229	uint64_t		requestId;
230	int			ret;
231	uint8_t*		time_buf;
232	struct hv_ictimesync_data* timedatap;
233
234	time_buf = receive_buffer[HV_TIME_SYNCH];
235
236	ret = hv_vmbus_channel_recv_packet(channel, time_buf,
237					    PAGE_SIZE, &recvlen, &requestId);
238
239	if ((ret == 0) && recvlen > 0) {
240	    icmsghdrp = (struct hv_vmbus_icmsg_hdr *) &time_buf[
241		sizeof(struct hv_vmbus_pipe_hdr)];
242
243	    if (icmsghdrp->icmsgtype == HV_ICMSGTYPE_NEGOTIATE) {
244		hv_negotiate_version(icmsghdrp, NULL, time_buf);
245	    } else {
246		timedatap = (struct hv_ictimesync_data *) &time_buf[
247		    sizeof(struct hv_vmbus_pipe_hdr) +
248			sizeof(struct hv_vmbus_icmsg_hdr)];
249		hv_adj_guesttime(timedatap->parenttime, timedatap->flags);
250	    }
251
252	    icmsghdrp->icflags = HV_ICMSGHDRFLAG_TRANSACTION
253		| HV_ICMSGHDRFLAG_RESPONSE;
254
255	    hv_vmbus_channel_send_packet(channel, time_buf,
256		recvlen, requestId,
257		HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
258	}
259}
260
261/**
262 * Shutdown
263 */
264static void
265hv_shutdown_cb(void *context)
266{
267	uint8_t*		buf;
268	hv_vmbus_channel*		channel = context;
269	uint8_t			execute_shutdown = 0;
270	hv_vmbus_icmsg_hdr*		icmsghdrp;
271	uint32_t		recv_len;
272	uint64_t		request_id;
273	int				ret;
274	hv_vmbus_shutdown_msg_data*	shutdown_msg;
275
276	buf = receive_buffer[HV_SHUT_DOWN];
277
278	ret = hv_vmbus_channel_recv_packet(channel, buf, PAGE_SIZE,
279					    &recv_len, &request_id);
280
281	if ((ret == 0) && recv_len > 0) {
282
283	    icmsghdrp = (struct hv_vmbus_icmsg_hdr *)
284		&buf[sizeof(struct hv_vmbus_pipe_hdr)];
285
286	    if (icmsghdrp->icmsgtype == HV_ICMSGTYPE_NEGOTIATE) {
287		hv_negotiate_version(icmsghdrp, NULL, buf);
288
289	    } else {
290		shutdown_msg =
291		    (struct hv_vmbus_shutdown_msg_data *)
292		    &buf[sizeof(struct hv_vmbus_pipe_hdr) +
293			sizeof(struct hv_vmbus_icmsg_hdr)];
294
295		switch (shutdown_msg->flags) {
296		    case 0:
297		    case 1:
298			icmsghdrp->status = HV_S_OK;
299			execute_shutdown = 1;
300			if(bootverbose)
301			    printf("Shutdown request received -"
302				    " graceful shutdown initiated\n");
303			break;
304		    default:
305			icmsghdrp->status = HV_E_FAIL;
306			execute_shutdown = 0;
307			printf("Shutdown request received -"
308			    " Invalid request\n");
309			break;
310		    }
311	    }
312
313	    icmsghdrp->icflags = HV_ICMSGHDRFLAG_TRANSACTION |
314				 HV_ICMSGHDRFLAG_RESPONSE;
315
316	    hv_vmbus_channel_send_packet(channel, buf,
317					recv_len, request_id,
318					HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
319	}
320
321	if (execute_shutdown)
322	    shutdown_nice(RB_POWEROFF);
323}
324
325/**
326 * Process heartbeat message
327 */
328static void
329hv_heartbeat_cb(void *context)
330{
331	uint8_t*		buf;
332	hv_vmbus_channel*	channel = context;
333	uint32_t		recvlen;
334	uint64_t		requestid;
335	int			ret;
336
337	struct hv_vmbus_heartbeat_msg_data*	heartbeat_msg;
338	struct hv_vmbus_icmsg_hdr*		icmsghdrp;
339
340	buf = receive_buffer[HV_HEART_BEAT];
341
342	ret = hv_vmbus_channel_recv_packet(channel, buf, PAGE_SIZE, &recvlen,
343					    &requestid);
344
345	if ((ret == 0) && recvlen > 0) {
346
347	    icmsghdrp = (struct hv_vmbus_icmsg_hdr *)
348		&buf[sizeof(struct hv_vmbus_pipe_hdr)];
349
350	    if (icmsghdrp->icmsgtype == HV_ICMSGTYPE_NEGOTIATE) {
351		hv_negotiate_version(icmsghdrp, NULL, buf);
352
353	    } else {
354		heartbeat_msg =
355		    (struct hv_vmbus_heartbeat_msg_data *)
356			&buf[sizeof(struct hv_vmbus_pipe_hdr) +
357			     sizeof(struct hv_vmbus_icmsg_hdr)];
358
359		heartbeat_msg->seq_num += 1;
360	    }
361
362	    icmsghdrp->icflags = HV_ICMSGHDRFLAG_TRANSACTION |
363				 HV_ICMSGHDRFLAG_RESPONSE;
364
365	    hv_vmbus_channel_send_packet(channel, buf, recvlen, requestid,
366		HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
367	}
368}
369
370
371static int
372hv_util_probe(device_t dev)
373{
374	int i;
375	int rtn_value = ENXIO;
376
377	for (i = 0; i < HV_MAX_UTIL_SERVICES; i++) {
378	    const char *p = vmbus_get_type(dev);
379	    if (service_table[i].enabled && !memcmp(p, &service_table[i].guid, sizeof(hv_guid))) {
380		device_set_softc(dev, (void *) (&service_table[i]));
381		rtn_value = BUS_PROBE_DEFAULT;
382	    }
383	}
384
385	return rtn_value;
386}
387
388static int
389hv_util_attach(device_t dev)
390{
391	struct hv_device*		hv_dev;
392	struct hv_vmbus_service*	service;
393	int				ret;
394	size_t				receive_buffer_offset;
395
396	hv_dev = vmbus_get_devctx(dev);
397	service = device_get_softc(dev);
398	receive_buffer_offset = service - &service_table[0];
399	device_printf(dev, "Hyper-V Service attaching: %s\n", service->name);
400	receive_buffer[receive_buffer_offset] =
401		malloc(4 * PAGE_SIZE, M_DEVBUF, M_WAITOK | M_ZERO);
402
403	if (service->init != NULL) {
404	    ret = service->init(service);
405	    if (ret) {
406		ret = ENODEV;
407		goto error0;
408	    }
409	}
410
411	/*
412	 * These services are not performance critical and do not need
413	 * batched reading. Furthermore, some services such as KVP can
414	 * only handle one message from the host at a time.
415	 * Turn off batched reading for all util drivers before we open the
416	 * channel.
417	 */
418	hv_set_channel_read_state(hv_dev->channel, FALSE);
419
420	ret = hv_vmbus_channel_open(hv_dev->channel, 4 * PAGE_SIZE,
421		    4 * PAGE_SIZE, NULL, 0,
422		    service->callback, hv_dev->channel);
423
424	if (ret)
425	    goto error0;
426
427	return (0);
428
429	error0:
430
431	    free(receive_buffer[receive_buffer_offset], M_DEVBUF);
432	    receive_buffer[receive_buffer_offset] = NULL;
433
434	return (ret);
435}
436
437static int
438hv_util_detach(device_t dev)
439{
440	struct hv_device*		hv_dev;
441	struct hv_vmbus_service*	service;
442	size_t				receive_buffer_offset;
443
444	if (!destroyed_kvp) {
445		hv_kvp_deinit();
446		destroyed_kvp = TRUE;
447	}
448
449	hv_dev = vmbus_get_devctx(dev);
450
451	hv_vmbus_channel_close(hv_dev->channel);
452	service = device_get_softc(dev);
453	receive_buffer_offset = service - &service_table[0];
454
455	if (service->work_queue != NULL)
456	    hv_work_queue_close(service->work_queue);
457
458	free(receive_buffer[receive_buffer_offset], M_DEVBUF);
459	receive_buffer[receive_buffer_offset] = NULL;
460	return (0);
461}
462
463static void
464hv_util_init(void)
465{
466}
467
468static int
469hv_util_modevent(module_t mod, int event, void *arg)
470{
471	switch (event) {
472        case MOD_LOAD:
473                break;
474        case MOD_UNLOAD:
475		break;
476	default:
477		break;
478        }
479        return (0);
480}
481
482static device_method_t util_methods[] = {
483	/* Device interface */
484	DEVMETHOD(device_probe, hv_util_probe),
485	DEVMETHOD(device_attach, hv_util_attach),
486	DEVMETHOD(device_detach, hv_util_detach),
487	DEVMETHOD(device_shutdown, bus_generic_shutdown),
488	{ 0, 0 } }
489;
490
491static driver_t util_driver = { "hyperv-utils", util_methods, 0 };
492
493static devclass_t util_devclass;
494
495DRIVER_MODULE(hv_utils, vmbus, util_driver, util_devclass, hv_util_modevent, 0);
496MODULE_VERSION(hv_utils, 1);
497MODULE_DEPEND(hv_utils, vmbus, 1, 1, 1);
498
499SYSINIT(hv_util_initx, SI_SUB_KTHREAD_IDLE, SI_ORDER_MIDDLE + 1,
500	hv_util_init, NULL);
501