1/*	$NetBSD: viomb.c,v 1.17 2023/03/25 11:04:34 mlelstv Exp $	*/
2
3/*
4 * Copyright (c) 2010 Minoura Makoto.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__KERNEL_RCSID(0, "$NetBSD: viomb.c,v 1.17 2023/03/25 11:04:34 mlelstv Exp $");
30
31#include <sys/param.h>
32#include <sys/systm.h>
33#include <sys/kernel.h>
34#include <sys/bus.h>
35#include <sys/condvar.h>
36#include <sys/device.h>
37#include <sys/kthread.h>
38#include <sys/mutex.h>
39#include <sys/sysctl.h>
40#include <uvm/uvm_page.h>
41#include <sys/module.h>
42
43#include <dev/pci/virtioreg.h>
44#include <dev/pci/virtiovar.h>
45
46#include "ioconf.h"
47
48/* Configuration registers */
49#define VIRTIO_BALLOON_CONFIG_NUM_PAGES	0 /* 32bit */
50#define VIRTIO_BALLOON_CONFIG_ACTUAL	4 /* 32bit */
51
52/* Feature bits */
53#define VIRTIO_BALLOON_F_MUST_TELL_HOST (1<<0)
54#define VIRTIO_BALLOON_F_STATS_VQ	(1<<1)
55
56#define VIRTIO_BALLOON_FLAG_BITS		\
57	VIRTIO_COMMON_FLAG_BITS			\
58	"b\x01" "STATS_VQ\0"			\
59	"b\x00" "MUST_TELL_HOST\0"
60
61#define PGS_PER_REQ		(256) /* 1MB, 4KB/page */
62#define VQ_INFLATE	0
63#define VQ_DEFLATE	1
64
65
66CTASSERT((PAGE_SIZE) == (VIRTIO_PAGE_SIZE)); /* XXX */
67
68struct balloon_req {
69	bus_dmamap_t			bl_dmamap;
70	struct pglist			bl_pglist;
71	int				bl_nentries;
72	uint32_t			bl_pages[PGS_PER_REQ];
73};
74
75struct viomb_softc {
76	device_t		sc_dev;
77
78	struct virtio_softc	*sc_virtio;
79	struct virtqueue	sc_vq[2];
80
81	unsigned int		sc_npages;
82	unsigned int		sc_actual;
83	int			sc_inflight;
84	struct balloon_req	sc_req;
85	struct pglist		sc_balloon_pages;
86
87	int			sc_inflate_done;
88	int			sc_deflate_done;
89
90	kcondvar_t		sc_wait;
91	kmutex_t		sc_waitlock;
92};
93
94static int	balloon_initialized = 0; /* multiple balloon is not allowed */
95
96static int	viomb_match(device_t, cfdata_t, void *);
97static void	viomb_attach(device_t, device_t, void *);
98static void	viomb_read_config(struct viomb_softc *);
99static int	viomb_config_change(struct virtio_softc *);
100static int	inflate(struct viomb_softc *);
101static int	inflateq_done(struct virtqueue *);
102static int	inflate_done(struct viomb_softc *);
103static int	deflate(struct viomb_softc *);
104static int	deflateq_done(struct virtqueue *);
105static int	deflate_done(struct viomb_softc *);
106static void	viomb_thread(void *);
107
108CFATTACH_DECL_NEW(viomb, sizeof(struct viomb_softc),
109    viomb_match, viomb_attach, NULL, NULL);
110
111static int
112viomb_match(device_t parent, cfdata_t match, void *aux)
113{
114	struct virtio_attach_args *va = aux;
115
116	if (va->sc_childdevid == VIRTIO_DEVICE_ID_BALLOON)
117		return 1;
118
119	return 0;
120}
121
122static void
123viomb_attach(device_t parent, device_t self, void *aux)
124{
125	struct viomb_softc *sc = device_private(self);
126	struct virtio_softc *vsc = device_private(parent);
127	const struct sysctlnode *node;
128	uint64_t features;
129
130	if (virtio_child(vsc) != NULL) {
131		aprint_normal(": child already attached for %s; "
132			      "something wrong...\n", device_xname(parent));
133		return;
134	}
135
136	if (balloon_initialized++) {
137		aprint_normal(": balloon already exists; something wrong...\n");
138		return;
139	}
140
141	/* fail on non-4K page size archs */
142	if (VIRTIO_PAGE_SIZE != PAGE_SIZE){
143		aprint_normal("non-4K page size arch found, needs %d, got %d\n",
144		    VIRTIO_PAGE_SIZE, PAGE_SIZE);
145		return;
146	}
147
148	sc->sc_dev = self;
149	sc->sc_virtio = vsc;
150
151	virtio_child_attach_start(vsc, self, IPL_VM,
152	    VIRTIO_BALLOON_F_MUST_TELL_HOST, VIRTIO_BALLOON_FLAG_BITS);
153
154	features = virtio_features(vsc);
155	if (features == 0)
156		goto err_none;
157
158	viomb_read_config(sc);
159	sc->sc_inflight = 0;
160	TAILQ_INIT(&sc->sc_balloon_pages);
161
162	sc->sc_inflate_done = sc->sc_deflate_done = 0;
163	mutex_init(&sc->sc_waitlock, MUTEX_DEFAULT, IPL_VM); /* spin */
164	cv_init(&sc->sc_wait, "balloon");
165
166	virtio_init_vq_vqdone(vsc, &sc->sc_vq[VQ_INFLATE], VQ_INFLATE,
167	    inflateq_done);
168	virtio_init_vq_vqdone(vsc, &sc->sc_vq[VQ_DEFLATE], VQ_DEFLATE,
169	    deflateq_done);
170
171	if (virtio_alloc_vq(vsc, &sc->sc_vq[VQ_INFLATE],
172			     sizeof(uint32_t)*PGS_PER_REQ, 1,
173			     "inflate") != 0)
174		goto err_mutex;
175	if (virtio_alloc_vq(vsc, &sc->sc_vq[VQ_DEFLATE],
176			     sizeof(uint32_t)*PGS_PER_REQ, 1,
177			     "deflate") != 0)
178		goto err_vq0;
179
180	if (bus_dmamap_create(virtio_dmat(vsc), sizeof(uint32_t)*PGS_PER_REQ,
181			      1, sizeof(uint32_t)*PGS_PER_REQ, 0,
182			      BUS_DMA_NOWAIT, &sc->sc_req.bl_dmamap)) {
183		aprint_error_dev(sc->sc_dev, "dmamap creation failed.\n");
184		goto err_vq;
185	}
186	if (bus_dmamap_load(virtio_dmat(vsc), sc->sc_req.bl_dmamap,
187			    &sc->sc_req.bl_pages[0],
188			    sizeof(uint32_t) * PGS_PER_REQ,
189			    NULL, BUS_DMA_NOWAIT)) {
190		aprint_error_dev(sc->sc_dev, "dmamap load failed.\n");
191		goto err_dmamap;
192	}
193
194	if (virtio_child_attach_finish(vsc, sc->sc_vq, __arraycount(sc->sc_vq),
195	    viomb_config_change, VIRTIO_F_INTR_MPSAFE) != 0)
196		goto err_out;
197
198	if (kthread_create(PRI_IDLE, KTHREAD_MPSAFE, NULL,
199			   viomb_thread, sc, NULL, "viomb")) {
200		aprint_error_dev(sc->sc_dev, "cannot create kthread.\n");
201		goto err_out;
202	}
203
204	sysctl_createv(NULL, 0, NULL, &node, 0, CTLTYPE_NODE,
205		       "viomb", SYSCTL_DESCR("VirtIO Balloon status"),
206		       NULL, 0, NULL, 0,
207		       CTL_HW, CTL_CREATE, CTL_EOL);
208	sysctl_createv(NULL, 0, NULL, NULL, 0, CTLTYPE_INT,
209		       "npages", SYSCTL_DESCR("VirtIO Balloon npages value"),
210		       NULL, 0, &sc->sc_npages, 0,
211		       CTL_HW, node->sysctl_num, CTL_CREATE, CTL_EOL);
212	sysctl_createv(NULL, 0, NULL, NULL, 0, CTLTYPE_INT,
213		       "actual", SYSCTL_DESCR("VirtIO Balloon actual value"),
214		       NULL, 0, &sc->sc_actual, 0,
215		       CTL_HW, node->sysctl_num, CTL_CREATE, CTL_EOL);
216	return;
217
218err_out:
219err_dmamap:
220	bus_dmamap_destroy(virtio_dmat(vsc), sc->sc_req.bl_dmamap);
221err_vq:
222	virtio_free_vq(vsc, &sc->sc_vq[VQ_DEFLATE]);
223err_vq0:
224	virtio_free_vq(vsc, &sc->sc_vq[VQ_INFLATE]);
225err_mutex:
226	cv_destroy(&sc->sc_wait);
227	mutex_destroy(&sc->sc_waitlock);
228err_none:
229	virtio_child_attach_failed(vsc);
230	return;
231}
232
233static void
234viomb_read_config(struct viomb_softc *sc)
235{
236	/* these values are explicitly specified as little-endian */
237	sc->sc_npages = virtio_read_device_config_le_4(sc->sc_virtio,
238		  VIRTIO_BALLOON_CONFIG_NUM_PAGES);
239
240	sc->sc_actual = virtio_read_device_config_le_4(sc->sc_virtio,
241		  VIRTIO_BALLOON_CONFIG_ACTUAL);
242}
243
244/*
245 * Config change callback: wakeup the kthread.
246 */
247static int
248viomb_config_change(struct virtio_softc *vsc)
249{
250	struct viomb_softc *sc = device_private(virtio_child(vsc));
251	unsigned int old;
252
253	old = sc->sc_npages;
254	viomb_read_config(sc);
255	mutex_enter(&sc->sc_waitlock);
256	cv_signal(&sc->sc_wait);
257	mutex_exit(&sc->sc_waitlock);
258	if (sc->sc_npages > old)
259		printf("%s: inflating balloon from %u to %u.\n",
260		       device_xname(sc->sc_dev), old, sc->sc_npages);
261	else if  (sc->sc_npages < old)
262		printf("%s: deflating balloon from %u to %u.\n",
263		       device_xname(sc->sc_dev), old, sc->sc_npages);
264
265	return 1;
266}
267
268/*
269 * Inflate: consume some amount of physical memory.
270 */
271static int
272inflate(struct viomb_softc *sc)
273{
274	struct virtio_softc *vsc = sc->sc_virtio;
275	int i, slot;
276	uint64_t nvpages, nhpages;
277	struct balloon_req *b;
278	struct vm_page *p;
279	struct virtqueue *vq = &sc->sc_vq[VQ_INFLATE];
280
281	if (sc->sc_inflight)
282		return 0;
283	nvpages = sc->sc_npages - sc->sc_actual;
284	if (nvpages > PGS_PER_REQ)
285		nvpages = PGS_PER_REQ;
286	nhpages = nvpages * VIRTIO_PAGE_SIZE / PAGE_SIZE;
287
288	b = &sc->sc_req;
289	if (uvm_pglistalloc(nhpages*PAGE_SIZE, 0, UINT32_MAX*(paddr_t)PAGE_SIZE,
290			    0, 0, &b->bl_pglist, nhpages, 0)) {
291		printf("%s: %" PRIu64 " pages of physical memory "
292		       "could not be allocated, retrying...\n",
293		       device_xname(sc->sc_dev), nhpages);
294		return 1;	/* sleep longer */
295	}
296
297	b->bl_nentries = nvpages;
298	i = 0;
299	TAILQ_FOREACH(p, &b->bl_pglist, pageq.queue) {
300		b->bl_pages[i++] =
301			htole32(VM_PAGE_TO_PHYS(p) / VIRTIO_PAGE_SIZE);
302	}
303	KASSERT(i == nvpages);
304
305	if (virtio_enqueue_prep(vsc, vq, &slot) != 0) {
306		printf("%s: inflate enqueue failed.\n",
307		       device_xname(sc->sc_dev));
308		uvm_pglistfree(&b->bl_pglist);
309		return 0;
310	}
311	if (virtio_enqueue_reserve(vsc, vq, slot, 1)) {
312		printf("%s: inflate enqueue failed.\n",
313		       device_xname(sc->sc_dev));
314		uvm_pglistfree(&b->bl_pglist);
315		return 0;
316	}
317	bus_dmamap_sync(virtio_dmat(vsc), b->bl_dmamap, 0,
318	    sizeof(uint32_t)*nvpages, BUS_DMASYNC_PREWRITE);
319	virtio_enqueue(vsc, vq, slot, b->bl_dmamap, true);
320	virtio_enqueue_commit(vsc, vq, slot, true);
321	sc->sc_inflight += nvpages;
322
323	return 0;
324}
325
326static int
327inflateq_done(struct virtqueue *vq)
328{
329	struct virtio_softc *vsc = vq->vq_owner;
330	struct viomb_softc *sc = device_private(virtio_child(vsc));
331
332	mutex_enter(&sc->sc_waitlock);
333	sc->sc_inflate_done = 1;
334	cv_signal(&sc->sc_wait);
335	mutex_exit(&sc->sc_waitlock);
336
337	return 1;
338}
339
340static int
341inflate_done(struct viomb_softc *sc)
342{
343	struct virtio_softc *vsc = sc->sc_virtio;
344	struct virtqueue *vq = &sc->sc_vq[VQ_INFLATE];
345	struct balloon_req *b;
346	int r, slot;
347	uint64_t nvpages;
348	struct vm_page *p;
349
350	r = virtio_dequeue(vsc, vq, &slot, NULL);
351	if (r != 0) {
352		printf("%s: inflate dequeue failed, errno %d.\n",
353		       device_xname(sc->sc_dev), r);
354		return 1;
355	}
356	virtio_dequeue_commit(vsc, vq, slot);
357
358	b = &sc->sc_req;
359	nvpages = b->bl_nentries;
360	bus_dmamap_sync(virtio_dmat(vsc), b->bl_dmamap,
361			0,
362			sizeof(uint32_t)*nvpages,
363			BUS_DMASYNC_POSTWRITE);
364	while (!TAILQ_EMPTY(&b->bl_pglist)) {
365		p = TAILQ_FIRST(&b->bl_pglist);
366		TAILQ_REMOVE(&b->bl_pglist, p, pageq.queue);
367		TAILQ_INSERT_TAIL(&sc->sc_balloon_pages, p, pageq.queue);
368	}
369
370	sc->sc_inflight -= nvpages;
371	virtio_write_device_config_le_4(vsc,
372		     VIRTIO_BALLOON_CONFIG_ACTUAL,
373		     sc->sc_actual + nvpages);
374	viomb_read_config(sc);
375
376	return 1;
377}
378
379/*
380 * Deflate: free previously allocated memory.
381 */
382static int
383deflate(struct viomb_softc *sc)
384{
385	struct virtio_softc *vsc = sc->sc_virtio;
386	int i, slot;
387	uint64_t nvpages, nhpages;
388	struct balloon_req *b;
389	struct vm_page *p;
390	struct virtqueue *vq = &sc->sc_vq[VQ_DEFLATE];
391
392	nvpages = (sc->sc_actual + sc->sc_inflight) - sc->sc_npages;
393	if (nvpages > PGS_PER_REQ)
394		nvpages = PGS_PER_REQ;
395	nhpages = nvpages * VIRTIO_PAGE_SIZE / PAGE_SIZE;
396
397	b = &sc->sc_req;
398
399	b->bl_nentries = nvpages;
400	TAILQ_INIT(&b->bl_pglist);
401	for (i = 0; i < nhpages; i++) {
402		p = TAILQ_FIRST(&sc->sc_balloon_pages);
403		if (p == NULL)
404			break;
405		TAILQ_REMOVE(&sc->sc_balloon_pages, p, pageq.queue);
406		TAILQ_INSERT_TAIL(&b->bl_pglist, p, pageq.queue);
407		b->bl_pages[i] =
408			htole32(VM_PAGE_TO_PHYS(p) / VIRTIO_PAGE_SIZE);
409	}
410
411	if (virtio_enqueue_prep(vsc, vq, &slot) != 0) {
412		printf("%s: deflate enqueue failed.\n",
413		       device_xname(sc->sc_dev));
414		TAILQ_FOREACH_REVERSE(p, &b->bl_pglist, pglist, pageq.queue) {
415			TAILQ_REMOVE(&b->bl_pglist, p, pageq.queue);
416			TAILQ_INSERT_HEAD(&sc->sc_balloon_pages, p,
417			    pageq.queue);
418		}
419		return 0;
420	}
421	if (virtio_enqueue_reserve(vsc, vq, slot, 1) != 0) {
422		printf("%s: deflate enqueue failed.\n",
423		       device_xname(sc->sc_dev));
424		TAILQ_FOREACH_REVERSE(p, &b->bl_pglist, pglist, pageq.queue) {
425			TAILQ_REMOVE(&b->bl_pglist, p, pageq.queue);
426			TAILQ_INSERT_HEAD(&sc->sc_balloon_pages, p,
427			    pageq.queue);
428		}
429		return 0;
430	}
431	bus_dmamap_sync(virtio_dmat(vsc), b->bl_dmamap, 0,
432	    sizeof(uint32_t)*nvpages, BUS_DMASYNC_PREWRITE);
433	virtio_enqueue(vsc, vq, slot, b->bl_dmamap, true);
434	virtio_enqueue_commit(vsc, vq, slot, true);
435	sc->sc_inflight -= nvpages;
436
437	if (!(virtio_features(vsc) & VIRTIO_BALLOON_F_MUST_TELL_HOST))
438		uvm_pglistfree(&b->bl_pglist);
439
440	return 0;
441}
442
443static int
444deflateq_done(struct virtqueue *vq)
445{
446	struct virtio_softc *vsc = vq->vq_owner;
447	struct viomb_softc *sc = device_private(virtio_child(vsc));
448
449	mutex_enter(&sc->sc_waitlock);
450	sc->sc_deflate_done = 1;
451	cv_signal(&sc->sc_wait);
452	mutex_exit(&sc->sc_waitlock);
453
454	return 1;
455}
456
457static int
458deflate_done(struct viomb_softc *sc)
459{
460	struct virtio_softc *vsc = sc->sc_virtio;
461	struct virtqueue *vq = &sc->sc_vq[VQ_DEFLATE];
462	struct balloon_req *b;
463	int r, slot;
464	uint64_t nvpages;
465
466	r = virtio_dequeue(vsc, vq, &slot, NULL);
467	if (r != 0) {
468		printf("%s: deflate dequeue failed, errno %d\n",
469		       device_xname(sc->sc_dev), r);
470		return 1;
471	}
472	virtio_dequeue_commit(vsc, vq, slot);
473
474	b = &sc->sc_req;
475	nvpages = b->bl_nentries;
476	bus_dmamap_sync(virtio_dmat(vsc), b->bl_dmamap,
477			0,
478			sizeof(uint32_t)*nvpages,
479			BUS_DMASYNC_POSTWRITE);
480
481	if (virtio_features(vsc) & VIRTIO_BALLOON_F_MUST_TELL_HOST)
482		uvm_pglistfree(&b->bl_pglist);
483
484	sc->sc_inflight += nvpages;
485	virtio_write_device_config_le_4(vsc,
486		     VIRTIO_BALLOON_CONFIG_ACTUAL,
487		     sc->sc_actual - nvpages);
488	viomb_read_config(sc);
489
490	return 1;
491}
492
493/*
494 * Kthread: sleeps, eventually inflate and deflate.
495 */
496static void
497viomb_thread(void *arg)
498{
499	struct viomb_softc *sc = arg;
500	int sleeptime, r;
501
502	for ( ; ; ) {
503		sleeptime = 30000;
504		if (sc->sc_npages > sc->sc_actual + sc->sc_inflight) {
505			if (sc->sc_inflight == 0) {
506				r = inflate(sc);
507				if (r != 0)
508					sleeptime = 10000;
509				else
510					sleeptime = 100;
511			} else
512				sleeptime = 20;
513		} else if (sc->sc_npages < sc->sc_actual + sc->sc_inflight) {
514			if (sc->sc_inflight == 0)
515				r = deflate(sc);
516			sleeptime = 100;
517		}
518
519	again:
520		mutex_enter(&sc->sc_waitlock);
521		if (sc->sc_inflate_done) {
522			sc->sc_inflate_done = 0;
523			mutex_exit(&sc->sc_waitlock);
524			inflate_done(sc);
525			goto again;
526		}
527		if (sc->sc_deflate_done) {
528			sc->sc_deflate_done = 0;
529			mutex_exit(&sc->sc_waitlock);
530			deflate_done(sc);
531			goto again;
532		}
533		cv_timedwait(&sc->sc_wait, &sc->sc_waitlock,
534			     mstohz(sleeptime));
535		mutex_exit(&sc->sc_waitlock);
536	}
537}
538
539MODULE(MODULE_CLASS_DRIVER, viomb, "virtio");
540
541#ifdef _MODULE
542#include "ioconf.c"
543#endif
544
545static int
546viomb_modcmd(modcmd_t cmd, void *opaque)
547{
548	int error = 0;
549
550#ifdef _MODULE
551	switch (cmd) {
552	case MODULE_CMD_INIT:
553		error = config_init_component(cfdriver_ioconf_viomb,
554		    cfattach_ioconf_viomb, cfdata_ioconf_viomb);
555		break;
556	case MODULE_CMD_FINI:
557		error = config_fini_component(cfdriver_ioconf_viomb,
558		    cfattach_ioconf_viomb, cfdata_ioconf_viomb);
559		break;
560	default:
561		error = ENOTTY;
562		break;
563	}
564#endif
565
566	return error;
567}
568