1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (c) 2009, Microsoft Corporation.
4 *
5 * Authors:
6 *   Haiyang Zhang <haiyangz@microsoft.com>
7 *   Hank Janssen  <hjanssen@microsoft.com>
8 */
9#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10
11#include <linux/kernel.h>
12#include <linux/interrupt.h>
13#include <linux/sched.h>
14#include <linux/wait.h>
15#include <linux/mm.h>
16#include <linux/slab.h>
17#include <linux/list.h>
18#include <linux/module.h>
19#include <linux/completion.h>
20#include <linux/delay.h>
21#include <linux/cpu.h>
22#include <linux/hyperv.h>
23#include <asm/mshyperv.h>
24#include <linux/sched/isolation.h>
25
26#include "hyperv_vmbus.h"
27
28static void init_vp_index(struct vmbus_channel *channel);
29
30const struct vmbus_device vmbus_devs[] = {
31	/* IDE */
32	{ .dev_type = HV_IDE,
33	  HV_IDE_GUID,
34	  .perf_device = true,
35	  .allowed_in_isolated = false,
36	},
37
38	/* SCSI */
39	{ .dev_type = HV_SCSI,
40	  HV_SCSI_GUID,
41	  .perf_device = true,
42	  .allowed_in_isolated = true,
43	},
44
45	/* Fibre Channel */
46	{ .dev_type = HV_FC,
47	  HV_SYNTHFC_GUID,
48	  .perf_device = true,
49	  .allowed_in_isolated = false,
50	},
51
52	/* Synthetic NIC */
53	{ .dev_type = HV_NIC,
54	  HV_NIC_GUID,
55	  .perf_device = true,
56	  .allowed_in_isolated = true,
57	},
58
59	/* Network Direct */
60	{ .dev_type = HV_ND,
61	  HV_ND_GUID,
62	  .perf_device = true,
63	  .allowed_in_isolated = false,
64	},
65
66	/* PCIE */
67	{ .dev_type = HV_PCIE,
68	  HV_PCIE_GUID,
69	  .perf_device = false,
70	  .allowed_in_isolated = false,
71	},
72
73	/* Synthetic Frame Buffer */
74	{ .dev_type = HV_FB,
75	  HV_SYNTHVID_GUID,
76	  .perf_device = false,
77	  .allowed_in_isolated = false,
78	},
79
80	/* Synthetic Keyboard */
81	{ .dev_type = HV_KBD,
82	  HV_KBD_GUID,
83	  .perf_device = false,
84	  .allowed_in_isolated = false,
85	},
86
87	/* Synthetic MOUSE */
88	{ .dev_type = HV_MOUSE,
89	  HV_MOUSE_GUID,
90	  .perf_device = false,
91	  .allowed_in_isolated = false,
92	},
93
94	/* KVP */
95	{ .dev_type = HV_KVP,
96	  HV_KVP_GUID,
97	  .perf_device = false,
98	  .allowed_in_isolated = false,
99	},
100
101	/* Time Synch */
102	{ .dev_type = HV_TS,
103	  HV_TS_GUID,
104	  .perf_device = false,
105	  .allowed_in_isolated = true,
106	},
107
108	/* Heartbeat */
109	{ .dev_type = HV_HB,
110	  HV_HEART_BEAT_GUID,
111	  .perf_device = false,
112	  .allowed_in_isolated = true,
113	},
114
115	/* Shutdown */
116	{ .dev_type = HV_SHUTDOWN,
117	  HV_SHUTDOWN_GUID,
118	  .perf_device = false,
119	  .allowed_in_isolated = true,
120	},
121
122	/* File copy */
123	{ .dev_type = HV_FCOPY,
124	  HV_FCOPY_GUID,
125	  .perf_device = false,
126	  .allowed_in_isolated = false,
127	},
128
129	/* Backup */
130	{ .dev_type = HV_BACKUP,
131	  HV_VSS_GUID,
132	  .perf_device = false,
133	  .allowed_in_isolated = false,
134	},
135
136	/* Dynamic Memory */
137	{ .dev_type = HV_DM,
138	  HV_DM_GUID,
139	  .perf_device = false,
140	  .allowed_in_isolated = false,
141	},
142
143	/* Unknown GUID */
144	{ .dev_type = HV_UNKNOWN,
145	  .perf_device = false,
146	  .allowed_in_isolated = false,
147	},
148};
149
150static const struct {
151	guid_t guid;
152} vmbus_unsupported_devs[] = {
153	{ HV_AVMA1_GUID },
154	{ HV_AVMA2_GUID },
155	{ HV_RDV_GUID	},
156	{ HV_IMC_GUID	},
157};
158
159/*
160 * The rescinded channel may be blocked waiting for a response from the host;
161 * take care of that.
162 */
163static void vmbus_rescind_cleanup(struct vmbus_channel *channel)
164{
165	struct vmbus_channel_msginfo *msginfo;
166	unsigned long flags;
167
168
169	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
170	channel->rescind = true;
171	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
172				msglistentry) {
173
174		if (msginfo->waiting_channel == channel) {
175			complete(&msginfo->waitevent);
176			break;
177		}
178	}
179	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
180}
181
182static bool is_unsupported_vmbus_devs(const guid_t *guid)
183{
184	int i;
185
186	for (i = 0; i < ARRAY_SIZE(vmbus_unsupported_devs); i++)
187		if (guid_equal(guid, &vmbus_unsupported_devs[i].guid))
188			return true;
189	return false;
190}
191
192static u16 hv_get_dev_type(const struct vmbus_channel *channel)
193{
194	const guid_t *guid = &channel->offermsg.offer.if_type;
195	u16 i;
196
197	if (is_hvsock_channel(channel) || is_unsupported_vmbus_devs(guid))
198		return HV_UNKNOWN;
199
200	for (i = HV_IDE; i < HV_UNKNOWN; i++) {
201		if (guid_equal(guid, &vmbus_devs[i].guid))
202			return i;
203	}
204	pr_info("Unknown GUID: %pUl\n", guid);
205	return i;
206}
207
208/**
209 * vmbus_prep_negotiate_resp() - Create default response for Negotiate message
210 * @icmsghdrp: Pointer to msg header structure
211 * @buf: Raw buffer channel data
212 * @buflen: Length of the raw buffer channel data.
213 * @fw_version: The framework versions we can support.
214 * @fw_vercnt: The size of @fw_version.
215 * @srv_version: The service versions we can support.
216 * @srv_vercnt: The size of @srv_version.
217 * @nego_fw_version: The selected framework version.
218 * @nego_srv_version: The selected service version.
219 *
220 * Note: Versions are given in decreasing order.
221 *
222 * Set up and fill in default negotiate response message.
223 * Mainly used by Hyper-V drivers.
224 */
225bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf,
226				u32 buflen, const int *fw_version, int fw_vercnt,
227				const int *srv_version, int srv_vercnt,
228				int *nego_fw_version, int *nego_srv_version)
229{
230	int icframe_major, icframe_minor;
231	int icmsg_major, icmsg_minor;
232	int fw_major, fw_minor;
233	int srv_major, srv_minor;
234	int i, j;
235	bool found_match = false;
236	struct icmsg_negotiate *negop;
237
238	/* Check that there's enough space for icframe_vercnt, icmsg_vercnt */
239	if (buflen < ICMSG_HDR + offsetof(struct icmsg_negotiate, reserved)) {
240		pr_err_ratelimited("Invalid icmsg negotiate\n");
241		return false;
242	}
243
244	icmsghdrp->icmsgsize = 0x10;
245	negop = (struct icmsg_negotiate *)&buf[ICMSG_HDR];
246
247	icframe_major = negop->icframe_vercnt;
248	icframe_minor = 0;
249
250	icmsg_major = negop->icmsg_vercnt;
251	icmsg_minor = 0;
252
253	/* Validate negop packet */
254	if (icframe_major > IC_VERSION_NEGOTIATION_MAX_VER_COUNT ||
255	    icmsg_major > IC_VERSION_NEGOTIATION_MAX_VER_COUNT ||
256	    ICMSG_NEGOTIATE_PKT_SIZE(icframe_major, icmsg_major) > buflen) {
257		pr_err_ratelimited("Invalid icmsg negotiate - icframe_major: %u, icmsg_major: %u\n",
258				   icframe_major, icmsg_major);
259		goto fw_error;
260	}
261
262	/*
263	 * Select the framework version number we will
264	 * support.
265	 */
266
267	for (i = 0; i < fw_vercnt; i++) {
268		fw_major = (fw_version[i] >> 16);
269		fw_minor = (fw_version[i] & 0xFFFF);
270
271		for (j = 0; j < negop->icframe_vercnt; j++) {
272			if ((negop->icversion_data[j].major == fw_major) &&
273			    (negop->icversion_data[j].minor == fw_minor)) {
274				icframe_major = negop->icversion_data[j].major;
275				icframe_minor = negop->icversion_data[j].minor;
276				found_match = true;
277				break;
278			}
279		}
280
281		if (found_match)
282			break;
283	}
284
285	if (!found_match)
286		goto fw_error;
287
288	found_match = false;
289
290	for (i = 0; i < srv_vercnt; i++) {
291		srv_major = (srv_version[i] >> 16);
292		srv_minor = (srv_version[i] & 0xFFFF);
293
294		for (j = negop->icframe_vercnt;
295			(j < negop->icframe_vercnt + negop->icmsg_vercnt);
296			j++) {
297
298			if ((negop->icversion_data[j].major == srv_major) &&
299				(negop->icversion_data[j].minor == srv_minor)) {
300
301				icmsg_major = negop->icversion_data[j].major;
302				icmsg_minor = negop->icversion_data[j].minor;
303				found_match = true;
304				break;
305			}
306		}
307
308		if (found_match)
309			break;
310	}
311
312	/*
313	 * Respond with the framework and service
314	 * version numbers we can support.
315	 */
316
317fw_error:
318	if (!found_match) {
319		negop->icframe_vercnt = 0;
320		negop->icmsg_vercnt = 0;
321	} else {
322		negop->icframe_vercnt = 1;
323		negop->icmsg_vercnt = 1;
324	}
325
326	if (nego_fw_version)
327		*nego_fw_version = (icframe_major << 16) | icframe_minor;
328
329	if (nego_srv_version)
330		*nego_srv_version = (icmsg_major << 16) | icmsg_minor;
331
332	negop->icversion_data[0].major = icframe_major;
333	negop->icversion_data[0].minor = icframe_minor;
334	negop->icversion_data[1].major = icmsg_major;
335	negop->icversion_data[1].minor = icmsg_minor;
336	return found_match;
337}
338EXPORT_SYMBOL_GPL(vmbus_prep_negotiate_resp);
339
340/*
341 * alloc_channel - Allocate and initialize a vmbus channel object
342 */
343static struct vmbus_channel *alloc_channel(void)
344{
345	struct vmbus_channel *channel;
346
347	channel = kzalloc(sizeof(*channel), GFP_ATOMIC);
348	if (!channel)
349		return NULL;
350
351	spin_lock_init(&channel->sched_lock);
352	init_completion(&channel->rescind_event);
353
354	INIT_LIST_HEAD(&channel->sc_list);
355
356	tasklet_init(&channel->callback_event,
357		     vmbus_on_event, (unsigned long)channel);
358
359	hv_ringbuffer_pre_init(channel);
360
361	return channel;
362}
363
364/*
365 * free_channel - Release the resources used by the vmbus channel object
366 */
367static void free_channel(struct vmbus_channel *channel)
368{
369	tasklet_kill(&channel->callback_event);
370	vmbus_remove_channel_attr_group(channel);
371
372	kobject_put(&channel->kobj);
373}
374
375void vmbus_channel_map_relid(struct vmbus_channel *channel)
376{
377	if (WARN_ON(channel->offermsg.child_relid >= MAX_CHANNEL_RELIDS))
378		return;
379	/*
380	 * The mapping of the channel's relid is visible from the CPUs that
381	 * execute vmbus_chan_sched() by the time that vmbus_chan_sched() will
382	 * execute:
383	 *
384	 *  (a) In the "normal (i.e., not resuming from hibernation)" path,
385	 *      the full barrier in virt_store_mb() guarantees that the store
386	 *      is propagated to all CPUs before the add_channel_work work
387	 *      is queued.  In turn, add_channel_work is queued before the
388	 *      channel's ring buffer is allocated/initialized and the
389	 *      OPENCHANNEL message for the channel is sent in vmbus_open().
390	 *      Hyper-V won't start sending the interrupts for the channel
391	 *      before the OPENCHANNEL message is acked.  The memory barrier
392	 *      in vmbus_chan_sched() -> sync_test_and_clear_bit() ensures
393	 *      that vmbus_chan_sched() must find the channel's relid in
394	 *      recv_int_page before retrieving the channel pointer from the
395	 *      array of channels.
396	 *
397	 *  (b) In the "resuming from hibernation" path, the virt_store_mb()
398	 *      guarantees that the store is propagated to all CPUs before
399	 *      the VMBus connection is marked as ready for the resume event
400	 *      (cf. check_ready_for_resume_event()).  The interrupt handler
401	 *      of the VMBus driver and vmbus_chan_sched() can not run before
402	 *      vmbus_bus_resume() has completed execution (cf. resume_noirq).
403	 */
404	virt_store_mb(
405		vmbus_connection.channels[channel->offermsg.child_relid],
406		channel);
407}
408
409void vmbus_channel_unmap_relid(struct vmbus_channel *channel)
410{
411	if (WARN_ON(channel->offermsg.child_relid >= MAX_CHANNEL_RELIDS))
412		return;
413	WRITE_ONCE(
414		vmbus_connection.channels[channel->offermsg.child_relid],
415		NULL);
416}
417
418static void vmbus_release_relid(u32 relid)
419{
420	struct vmbus_channel_relid_released msg;
421	int ret;
422
423	memset(&msg, 0, sizeof(struct vmbus_channel_relid_released));
424	msg.child_relid = relid;
425	msg.header.msgtype = CHANNELMSG_RELID_RELEASED;
426	ret = vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released),
427			     true);
428
429	trace_vmbus_release_relid(&msg, ret);
430}
431
432void hv_process_channel_removal(struct vmbus_channel *channel)
433{
434	lockdep_assert_held(&vmbus_connection.channel_mutex);
435	BUG_ON(!channel->rescind);
436
437	/*
438	 * hv_process_channel_removal() could find INVALID_RELID only for
439	 * hv_sock channels.  See the inline comments in vmbus_onoffer().
440	 */
441	WARN_ON(channel->offermsg.child_relid == INVALID_RELID &&
442		!is_hvsock_channel(channel));
443
444	/*
445	 * Upon suspend, an in-use hv_sock channel is removed from the array of
446	 * channels and the relid is invalidated.  After hibernation, when the
447	 * user-space application destroys the channel, it's unnecessary and
448	 * unsafe to remove the channel from the array of channels.  See also
449	 * the inline comments before the call of vmbus_release_relid() below.
450	 */
451	if (channel->offermsg.child_relid != INVALID_RELID)
452		vmbus_channel_unmap_relid(channel);
453
454	if (channel->primary_channel == NULL)
455		list_del(&channel->listentry);
456	else
457		list_del(&channel->sc_list);
458
459	/*
460	 * If this is a "perf" channel, updates the hv_numa_map[] masks so that
461	 * init_vp_index() can (re-)use the CPU.
462	 */
463	if (hv_is_perf_channel(channel))
464		hv_clear_allocated_cpu(channel->target_cpu);
465
466	/*
467	 * Upon suspend, an in-use hv_sock channel is marked as "rescinded" and
468	 * the relid is invalidated; after hibernation, when the user-space app
469	 * destroys the channel, the relid is INVALID_RELID, and in this case
470	 * it's unnecessary and unsafe to release the old relid, since the same
471	 * relid can refer to a completely different channel now.
472	 */
473	if (channel->offermsg.child_relid != INVALID_RELID)
474		vmbus_release_relid(channel->offermsg.child_relid);
475
476	free_channel(channel);
477}
478
479void vmbus_free_channels(void)
480{
481	struct vmbus_channel *channel, *tmp;
482
483	list_for_each_entry_safe(channel, tmp, &vmbus_connection.chn_list,
484		listentry) {
485		/* hv_process_channel_removal() needs this */
486		channel->rescind = true;
487
488		vmbus_device_unregister(channel->device_obj);
489	}
490}
491
492/* Note: the function can run concurrently for primary/sub channels. */
493static void vmbus_add_channel_work(struct work_struct *work)
494{
495	struct vmbus_channel *newchannel =
496		container_of(work, struct vmbus_channel, add_channel_work);
497	struct vmbus_channel *primary_channel = newchannel->primary_channel;
498	int ret;
499
500	/*
501	 * This state is used to indicate a successful open
502	 * so that when we do close the channel normally, we
503	 * can cleanup properly.
504	 */
505	newchannel->state = CHANNEL_OPEN_STATE;
506
507	if (primary_channel != NULL) {
508		/* newchannel is a sub-channel. */
509		struct hv_device *dev = primary_channel->device_obj;
510
511		if (vmbus_add_channel_kobj(dev, newchannel))
512			goto err_deq_chan;
513
514		if (primary_channel->sc_creation_callback != NULL)
515			primary_channel->sc_creation_callback(newchannel);
516
517		newchannel->probe_done = true;
518		return;
519	}
520
521	/*
522	 * Start the process of binding the primary channel to the driver
523	 */
524	newchannel->device_obj = vmbus_device_create(
525		&newchannel->offermsg.offer.if_type,
526		&newchannel->offermsg.offer.if_instance,
527		newchannel);
528	if (!newchannel->device_obj)
529		goto err_deq_chan;
530
531	newchannel->device_obj->device_id = newchannel->device_id;
532	/*
533	 * Add the new device to the bus. This will kick off device-driver
534	 * binding which eventually invokes the device driver's AddDevice()
535	 * method.
536	 */
537	ret = vmbus_device_register(newchannel->device_obj);
538
539	if (ret != 0) {
540		pr_err("unable to add child device object (relid %d)\n",
541			newchannel->offermsg.child_relid);
542		kfree(newchannel->device_obj);
543		goto err_deq_chan;
544	}
545
546	newchannel->probe_done = true;
547	return;
548
549err_deq_chan:
550	mutex_lock(&vmbus_connection.channel_mutex);
551
552	/*
553	 * We need to set the flag, otherwise
554	 * vmbus_onoffer_rescind() can be blocked.
555	 */
556	newchannel->probe_done = true;
557
558	if (primary_channel == NULL)
559		list_del(&newchannel->listentry);
560	else
561		list_del(&newchannel->sc_list);
562
563	/* vmbus_process_offer() has mapped the channel. */
564	vmbus_channel_unmap_relid(newchannel);
565
566	mutex_unlock(&vmbus_connection.channel_mutex);
567
568	vmbus_release_relid(newchannel->offermsg.child_relid);
569
570	free_channel(newchannel);
571}
572
573/*
574 * vmbus_process_offer - Process the offer by creating a channel/device
575 * associated with this offer
576 */
577static void vmbus_process_offer(struct vmbus_channel *newchannel)
578{
579	struct vmbus_channel *channel;
580	struct workqueue_struct *wq;
581	bool fnew = true;
582
583	/*
584	 * Synchronize vmbus_process_offer() and CPU hotplugging:
585	 *
586	 * CPU1				CPU2
587	 *
588	 * [vmbus_process_offer()]	[Hot removal of the CPU]
589	 *
590	 * CPU_READ_LOCK		CPUS_WRITE_LOCK
591	 * LOAD cpu_online_mask		SEARCH chn_list
592	 * STORE target_cpu		LOAD target_cpu
593	 * INSERT chn_list		STORE cpu_online_mask
594	 * CPUS_READ_UNLOCK		CPUS_WRITE_UNLOCK
595	 *
596	 * Forbids: CPU1's LOAD from *not* seing CPU2's STORE &&
597	 *              CPU2's SEARCH from *not* seeing CPU1's INSERT
598	 *
599	 * Forbids: CPU2's SEARCH from seeing CPU1's INSERT &&
600	 *              CPU2's LOAD from *not* seing CPU1's STORE
601	 */
602	cpus_read_lock();
603
604	/*
605	 * Serializes the modifications of the chn_list list as well as
606	 * the accesses to next_numa_node_id in init_vp_index().
607	 */
608	mutex_lock(&vmbus_connection.channel_mutex);
609
610	list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
611		if (guid_equal(&channel->offermsg.offer.if_type,
612			       &newchannel->offermsg.offer.if_type) &&
613		    guid_equal(&channel->offermsg.offer.if_instance,
614			       &newchannel->offermsg.offer.if_instance)) {
615			fnew = false;
616			newchannel->primary_channel = channel;
617			break;
618		}
619	}
620
621	init_vp_index(newchannel);
622
623	/* Remember the channels that should be cleaned up upon suspend. */
624	if (is_hvsock_channel(newchannel) || is_sub_channel(newchannel))
625		atomic_inc(&vmbus_connection.nr_chan_close_on_suspend);
626
627	/*
628	 * Now that we have acquired the channel_mutex,
629	 * we can release the potentially racing rescind thread.
630	 */
631	atomic_dec(&vmbus_connection.offer_in_progress);
632
633	if (fnew) {
634		list_add_tail(&newchannel->listentry,
635			      &vmbus_connection.chn_list);
636	} else {
637		/*
638		 * Check to see if this is a valid sub-channel.
639		 */
640		if (newchannel->offermsg.offer.sub_channel_index == 0) {
641			mutex_unlock(&vmbus_connection.channel_mutex);
642			cpus_read_unlock();
643			/*
644			 * Don't call free_channel(), because newchannel->kobj
645			 * is not initialized yet.
646			 */
647			kfree(newchannel);
648			WARN_ON_ONCE(1);
649			return;
650		}
651		/*
652		 * Process the sub-channel.
653		 */
654		list_add_tail(&newchannel->sc_list, &channel->sc_list);
655	}
656
657	vmbus_channel_map_relid(newchannel);
658
659	mutex_unlock(&vmbus_connection.channel_mutex);
660	cpus_read_unlock();
661
662	/*
663	 * vmbus_process_offer() mustn't call channel->sc_creation_callback()
664	 * directly for sub-channels, because sc_creation_callback() ->
665	 * vmbus_open() may never get the host's response to the
666	 * OPEN_CHANNEL message (the host may rescind a channel at any time,
667	 * e.g. in the case of hot removing a NIC), and vmbus_onoffer_rescind()
668	 * may not wake up the vmbus_open() as it's blocked due to a non-zero
669	 * vmbus_connection.offer_in_progress, and finally we have a deadlock.
670	 *
671	 * The above is also true for primary channels, if the related device
672	 * drivers use sync probing mode by default.
673	 *
674	 * And, usually the handling of primary channels and sub-channels can
675	 * depend on each other, so we should offload them to different
676	 * workqueues to avoid possible deadlock, e.g. in sync-probing mode,
677	 * NIC1's netvsc_subchan_work() can race with NIC2's netvsc_probe() ->
678	 * rtnl_lock(), and causes deadlock: the former gets the rtnl_lock
679	 * and waits for all the sub-channels to appear, but the latter
680	 * can't get the rtnl_lock and this blocks the handling of
681	 * sub-channels.
682	 */
683	INIT_WORK(&newchannel->add_channel_work, vmbus_add_channel_work);
684	wq = fnew ? vmbus_connection.handle_primary_chan_wq :
685		    vmbus_connection.handle_sub_chan_wq;
686	queue_work(wq, &newchannel->add_channel_work);
687}
688
689/*
690 * Check if CPUs used by other channels of the same device.
691 * It should only be called by init_vp_index().
692 */
693static bool hv_cpuself_used(u32 cpu, struct vmbus_channel *chn)
694{
695	struct vmbus_channel *primary = chn->primary_channel;
696	struct vmbus_channel *sc;
697
698	lockdep_assert_held(&vmbus_connection.channel_mutex);
699
700	if (!primary)
701		return false;
702
703	if (primary->target_cpu == cpu)
704		return true;
705
706	list_for_each_entry(sc, &primary->sc_list, sc_list)
707		if (sc != chn && sc->target_cpu == cpu)
708			return true;
709
710	return false;
711}
712
713/*
714 * We use this state to statically distribute the channel interrupt load.
715 */
716static int next_numa_node_id;
717
718/*
719 * We can statically distribute the incoming channel interrupt load
720 * by binding a channel to VCPU.
721 *
722 * For non-performance critical channels we assign the VMBUS_CONNECT_CPU.
723 * Performance critical channels will be distributed evenly among all
724 * the available NUMA nodes.  Once the node is assigned, we will assign
725 * the CPU based on a simple round robin scheme.
726 */
727static void init_vp_index(struct vmbus_channel *channel)
728{
729	bool perf_chn = hv_is_perf_channel(channel);
730	u32 i, ncpu = num_online_cpus();
731	cpumask_var_t available_mask;
732	struct cpumask *allocated_mask;
733	const struct cpumask *hk_mask = housekeeping_cpumask(HK_TYPE_MANAGED_IRQ);
734	u32 target_cpu;
735	int numa_node;
736
737	if (!perf_chn ||
738	    !alloc_cpumask_var(&available_mask, GFP_KERNEL) ||
739	    cpumask_empty(hk_mask)) {
740		/*
741		 * If the channel is not a performance critical
742		 * channel, bind it to VMBUS_CONNECT_CPU.
743		 * In case alloc_cpumask_var() fails, bind it to
744		 * VMBUS_CONNECT_CPU.
745		 * If all the cpus are isolated, bind it to
746		 * VMBUS_CONNECT_CPU.
747		 */
748		channel->target_cpu = VMBUS_CONNECT_CPU;
749		if (perf_chn)
750			hv_set_allocated_cpu(VMBUS_CONNECT_CPU);
751		return;
752	}
753
754	for (i = 1; i <= ncpu + 1; i++) {
755		while (true) {
756			numa_node = next_numa_node_id++;
757			if (numa_node == nr_node_ids) {
758				next_numa_node_id = 0;
759				continue;
760			}
761			if (cpumask_empty(cpumask_of_node(numa_node)))
762				continue;
763			break;
764		}
765		allocated_mask = &hv_context.hv_numa_map[numa_node];
766
767retry:
768		cpumask_xor(available_mask, allocated_mask, cpumask_of_node(numa_node));
769		cpumask_and(available_mask, available_mask, hk_mask);
770
771		if (cpumask_empty(available_mask)) {
772			/*
773			 * We have cycled through all the CPUs in the node;
774			 * reset the allocated map.
775			 */
776			cpumask_clear(allocated_mask);
777			goto retry;
778		}
779
780		target_cpu = cpumask_first(available_mask);
781		cpumask_set_cpu(target_cpu, allocated_mask);
782
783		if (channel->offermsg.offer.sub_channel_index >= ncpu ||
784		    i > ncpu || !hv_cpuself_used(target_cpu, channel))
785			break;
786	}
787
788	channel->target_cpu = target_cpu;
789
790	free_cpumask_var(available_mask);
791}
792
793#define UNLOAD_DELAY_UNIT_MS	10		/* 10 milliseconds */
794#define UNLOAD_WAIT_MS		(100*1000)	/* 100 seconds */
795#define UNLOAD_WAIT_LOOPS	(UNLOAD_WAIT_MS/UNLOAD_DELAY_UNIT_MS)
796#define UNLOAD_MSG_MS		(5*1000)	/* Every 5 seconds */
797#define UNLOAD_MSG_LOOPS	(UNLOAD_MSG_MS/UNLOAD_DELAY_UNIT_MS)
798
799static void vmbus_wait_for_unload(void)
800{
801	int cpu;
802	void *page_addr;
803	struct hv_message *msg;
804	struct vmbus_channel_message_header *hdr;
805	u32 message_type, i;
806
807	/*
808	 * CHANNELMSG_UNLOAD_RESPONSE is always delivered to the CPU which was
809	 * used for initial contact or to CPU0 depending on host version. When
810	 * we're crashing on a different CPU let's hope that IRQ handler on
811	 * the cpu which receives CHANNELMSG_UNLOAD_RESPONSE is still
812	 * functional and vmbus_unload_response() will complete
813	 * vmbus_connection.unload_event. If not, the last thing we can do is
814	 * read message pages for all CPUs directly.
815	 *
816	 * Wait up to 100 seconds since an Azure host must writeback any dirty
817	 * data in its disk cache before the VMbus UNLOAD request will
818	 * complete. This flushing has been empirically observed to take up
819	 * to 50 seconds in cases with a lot of dirty data, so allow additional
820	 * leeway and for inaccuracies in mdelay(). But eventually time out so
821	 * that the panic path can't get hung forever in case the response
822	 * message isn't seen.
823	 */
824	for (i = 1; i <= UNLOAD_WAIT_LOOPS; i++) {
825		if (completion_done(&vmbus_connection.unload_event))
826			goto completed;
827
828		for_each_online_cpu(cpu) {
829			struct hv_per_cpu_context *hv_cpu
830				= per_cpu_ptr(hv_context.cpu_context, cpu);
831
832			page_addr = hv_cpu->synic_message_page;
833			msg = (struct hv_message *)page_addr
834				+ VMBUS_MESSAGE_SINT;
835
836			message_type = READ_ONCE(msg->header.message_type);
837			if (message_type == HVMSG_NONE)
838				continue;
839
840			hdr = (struct vmbus_channel_message_header *)
841				msg->u.payload;
842
843			if (hdr->msgtype == CHANNELMSG_UNLOAD_RESPONSE)
844				complete(&vmbus_connection.unload_event);
845
846			vmbus_signal_eom(msg, message_type);
847		}
848
849		/*
850		 * Give a notice periodically so someone watching the
851		 * serial output won't think it is completely hung.
852		 */
853		if (!(i % UNLOAD_MSG_LOOPS))
854			pr_notice("Waiting for VMBus UNLOAD to complete\n");
855
856		mdelay(UNLOAD_DELAY_UNIT_MS);
857	}
858	pr_err("Continuing even though VMBus UNLOAD did not complete\n");
859
860completed:
861	/*
862	 * We're crashing and already got the UNLOAD_RESPONSE, cleanup all
863	 * maybe-pending messages on all CPUs to be able to receive new
864	 * messages after we reconnect.
865	 */
866	for_each_online_cpu(cpu) {
867		struct hv_per_cpu_context *hv_cpu
868			= per_cpu_ptr(hv_context.cpu_context, cpu);
869
870		page_addr = hv_cpu->synic_message_page;
871		msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT;
872		msg->header.message_type = HVMSG_NONE;
873	}
874}
875
876/*
877 * vmbus_unload_response - Handler for the unload response.
878 */
879static void vmbus_unload_response(struct vmbus_channel_message_header *hdr)
880{
881	/*
882	 * This is a global event; just wakeup the waiting thread.
883	 * Once we successfully unload, we can cleanup the monitor state.
884	 *
885	 * NB.  A malicious or compromised Hyper-V could send a spurious
886	 * message of type CHANNELMSG_UNLOAD_RESPONSE, and trigger a call
887	 * of the complete() below.  Make sure that unload_event has been
888	 * initialized by the time this complete() is executed.
889	 */
890	complete(&vmbus_connection.unload_event);
891}
892
893void vmbus_initiate_unload(bool crash)
894{
895	struct vmbus_channel_message_header hdr;
896
897	if (xchg(&vmbus_connection.conn_state, DISCONNECTED) == DISCONNECTED)
898		return;
899
900	/* Pre-Win2012R2 hosts don't support reconnect */
901	if (vmbus_proto_version < VERSION_WIN8_1)
902		return;
903
904	reinit_completion(&vmbus_connection.unload_event);
905	memset(&hdr, 0, sizeof(struct vmbus_channel_message_header));
906	hdr.msgtype = CHANNELMSG_UNLOAD;
907	vmbus_post_msg(&hdr, sizeof(struct vmbus_channel_message_header),
908		       !crash);
909
910	/*
911	 * vmbus_initiate_unload() is also called on crash and the crash can be
912	 * happening in an interrupt context, where scheduling is impossible.
913	 */
914	if (!crash)
915		wait_for_completion(&vmbus_connection.unload_event);
916	else
917		vmbus_wait_for_unload();
918}
919
920static void check_ready_for_resume_event(void)
921{
922	/*
923	 * If all the old primary channels have been fixed up, then it's safe
924	 * to resume.
925	 */
926	if (atomic_dec_and_test(&vmbus_connection.nr_chan_fixup_on_resume))
927		complete(&vmbus_connection.ready_for_resume_event);
928}
929
930static void vmbus_setup_channel_state(struct vmbus_channel *channel,
931				      struct vmbus_channel_offer_channel *offer)
932{
933	/*
934	 * Setup state for signalling the host.
935	 */
936	channel->sig_event = VMBUS_EVENT_CONNECTION_ID;
937
938	channel->is_dedicated_interrupt =
939			(offer->is_dedicated_interrupt != 0);
940	channel->sig_event = offer->connection_id;
941
942	memcpy(&channel->offermsg, offer,
943	       sizeof(struct vmbus_channel_offer_channel));
944	channel->monitor_grp = (u8)offer->monitorid / 32;
945	channel->monitor_bit = (u8)offer->monitorid % 32;
946	channel->device_id = hv_get_dev_type(channel);
947}
948
949/*
950 * find_primary_channel_by_offer - Get the channel object given the new offer.
951 * This is only used in the resume path of hibernation.
952 */
953static struct vmbus_channel *
954find_primary_channel_by_offer(const struct vmbus_channel_offer_channel *offer)
955{
956	struct vmbus_channel *channel = NULL, *iter;
957	const guid_t *inst1, *inst2;
958
959	/* Ignore sub-channel offers. */
960	if (offer->offer.sub_channel_index != 0)
961		return NULL;
962
963	mutex_lock(&vmbus_connection.channel_mutex);
964
965	list_for_each_entry(iter, &vmbus_connection.chn_list, listentry) {
966		inst1 = &iter->offermsg.offer.if_instance;
967		inst2 = &offer->offer.if_instance;
968
969		if (guid_equal(inst1, inst2)) {
970			channel = iter;
971			break;
972		}
973	}
974
975	mutex_unlock(&vmbus_connection.channel_mutex);
976
977	return channel;
978}
979
980static bool vmbus_is_valid_offer(const struct vmbus_channel_offer_channel *offer)
981{
982	const guid_t *guid = &offer->offer.if_type;
983	u16 i;
984
985	if (!hv_is_isolation_supported())
986		return true;
987
988	if (is_hvsock_offer(offer))
989		return true;
990
991	for (i = 0; i < ARRAY_SIZE(vmbus_devs); i++) {
992		if (guid_equal(guid, &vmbus_devs[i].guid))
993			return vmbus_devs[i].allowed_in_isolated;
994	}
995	return false;
996}
997
998/*
999 * vmbus_onoffer - Handler for channel offers from vmbus in parent partition.
1000 *
1001 */
1002static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
1003{
1004	struct vmbus_channel_offer_channel *offer;
1005	struct vmbus_channel *oldchannel, *newchannel;
1006	size_t offer_sz;
1007
1008	offer = (struct vmbus_channel_offer_channel *)hdr;
1009
1010	trace_vmbus_onoffer(offer);
1011
1012	if (!vmbus_is_valid_offer(offer)) {
1013		pr_err_ratelimited("Invalid offer %d from the host supporting isolation\n",
1014				   offer->child_relid);
1015		atomic_dec(&vmbus_connection.offer_in_progress);
1016		return;
1017	}
1018
1019	oldchannel = find_primary_channel_by_offer(offer);
1020
1021	if (oldchannel != NULL) {
1022		/*
1023		 * We're resuming from hibernation: all the sub-channel and
1024		 * hv_sock channels we had before the hibernation should have
1025		 * been cleaned up, and now we must be seeing a re-offered
1026		 * primary channel that we had before the hibernation.
1027		 */
1028
1029		/*
1030		 * { Initially: channel relid = INVALID_RELID,
1031		 *		channels[valid_relid] = NULL }
1032		 *
1033		 * CPU1					CPU2
1034		 *
1035		 * [vmbus_onoffer()]			[vmbus_device_release()]
1036		 *
1037		 * LOCK channel_mutex			LOCK channel_mutex
1038		 * STORE channel relid = valid_relid	LOAD r1 = channel relid
1039		 * MAP_RELID channel			if (r1 != INVALID_RELID)
1040		 * UNLOCK channel_mutex			  UNMAP_RELID channel
1041		 *					UNLOCK channel_mutex
1042		 *
1043		 * Forbids: r1 == valid_relid &&
1044		 *              channels[valid_relid] == channel
1045		 *
1046		 * Note.  r1 can be INVALID_RELID only for an hv_sock channel.
1047		 * None of the hv_sock channels which were present before the
1048		 * suspend are re-offered upon the resume.  See the WARN_ON()
1049		 * in hv_process_channel_removal().
1050		 */
1051		mutex_lock(&vmbus_connection.channel_mutex);
1052
1053		atomic_dec(&vmbus_connection.offer_in_progress);
1054
1055		WARN_ON(oldchannel->offermsg.child_relid != INVALID_RELID);
1056		/* Fix up the relid. */
1057		oldchannel->offermsg.child_relid = offer->child_relid;
1058
1059		offer_sz = sizeof(*offer);
1060		if (memcmp(offer, &oldchannel->offermsg, offer_sz) != 0) {
1061			/*
1062			 * This is not an error, since the host can also change
1063			 * the other field(s) of the offer, e.g. on WS RS5
1064			 * (Build 17763), the offer->connection_id of the
1065			 * Mellanox VF vmbus device can change when the host
1066			 * reoffers the device upon resume.
1067			 */
1068			pr_debug("vmbus offer changed: relid=%d\n",
1069				 offer->child_relid);
1070
1071			print_hex_dump_debug("Old vmbus offer: ",
1072					     DUMP_PREFIX_OFFSET, 16, 4,
1073					     &oldchannel->offermsg, offer_sz,
1074					     false);
1075			print_hex_dump_debug("New vmbus offer: ",
1076					     DUMP_PREFIX_OFFSET, 16, 4,
1077					     offer, offer_sz, false);
1078
1079			/* Fix up the old channel. */
1080			vmbus_setup_channel_state(oldchannel, offer);
1081		}
1082
1083		/* Add the channel back to the array of channels. */
1084		vmbus_channel_map_relid(oldchannel);
1085		check_ready_for_resume_event();
1086
1087		mutex_unlock(&vmbus_connection.channel_mutex);
1088		return;
1089	}
1090
1091	/* Allocate the channel object and save this offer. */
1092	newchannel = alloc_channel();
1093	if (!newchannel) {
1094		vmbus_release_relid(offer->child_relid);
1095		atomic_dec(&vmbus_connection.offer_in_progress);
1096		pr_err("Unable to allocate channel object\n");
1097		return;
1098	}
1099
1100	vmbus_setup_channel_state(newchannel, offer);
1101
1102	vmbus_process_offer(newchannel);
1103}
1104
1105static void check_ready_for_suspend_event(void)
1106{
1107	/*
1108	 * If all the sub-channels or hv_sock channels have been cleaned up,
1109	 * then it's safe to suspend.
1110	 */
1111	if (atomic_dec_and_test(&vmbus_connection.nr_chan_close_on_suspend))
1112		complete(&vmbus_connection.ready_for_suspend_event);
1113}
1114
1115/*
1116 * vmbus_onoffer_rescind - Rescind offer handler.
1117 *
1118 * We queue a work item to process this offer synchronously
1119 */
1120static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
1121{
1122	struct vmbus_channel_rescind_offer *rescind;
1123	struct vmbus_channel *channel;
1124	struct device *dev;
1125	bool clean_up_chan_for_suspend;
1126
1127	rescind = (struct vmbus_channel_rescind_offer *)hdr;
1128
1129	trace_vmbus_onoffer_rescind(rescind);
1130
1131	/*
1132	 * The offer msg and the corresponding rescind msg
1133	 * from the host are guranteed to be ordered -
1134	 * offer comes in first and then the rescind.
1135	 * Since we process these events in work elements,
1136	 * and with preemption, we may end up processing
1137	 * the events out of order.  We rely on the synchronization
1138	 * provided by offer_in_progress and by channel_mutex for
1139	 * ordering these events:
1140	 *
1141	 * { Initially: offer_in_progress = 1 }
1142	 *
1143	 * CPU1				CPU2
1144	 *
1145	 * [vmbus_onoffer()]		[vmbus_onoffer_rescind()]
1146	 *
1147	 * LOCK channel_mutex		WAIT_ON offer_in_progress == 0
1148	 * DECREMENT offer_in_progress	LOCK channel_mutex
1149	 * STORE channels[]		LOAD channels[]
1150	 * UNLOCK channel_mutex		UNLOCK channel_mutex
1151	 *
1152	 * Forbids: CPU2's LOAD from *not* seeing CPU1's STORE
1153	 */
1154
1155	while (atomic_read(&vmbus_connection.offer_in_progress) != 0) {
1156		/*
1157		 * We wait here until any channel offer is currently
1158		 * being processed.
1159		 */
1160		msleep(1);
1161	}
1162
1163	mutex_lock(&vmbus_connection.channel_mutex);
1164	channel = relid2channel(rescind->child_relid);
1165	if (channel != NULL) {
1166		/*
1167		 * Guarantee that no other instance of vmbus_onoffer_rescind()
1168		 * has got a reference to the channel object.  Synchronize on
1169		 * &vmbus_connection.channel_mutex.
1170		 */
1171		if (channel->rescind_ref) {
1172			mutex_unlock(&vmbus_connection.channel_mutex);
1173			return;
1174		}
1175		channel->rescind_ref = true;
1176	}
1177	mutex_unlock(&vmbus_connection.channel_mutex);
1178
1179	if (channel == NULL) {
1180		/*
1181		 * We failed in processing the offer message;
1182		 * we would have cleaned up the relid in that
1183		 * failure path.
1184		 */
1185		return;
1186	}
1187
1188	clean_up_chan_for_suspend = is_hvsock_channel(channel) ||
1189				    is_sub_channel(channel);
1190	/*
1191	 * Before setting channel->rescind in vmbus_rescind_cleanup(), we
1192	 * should make sure the channel callback is not running any more.
1193	 */
1194	vmbus_reset_channel_cb(channel);
1195
1196	/*
1197	 * Now wait for offer handling to complete.
1198	 */
1199	vmbus_rescind_cleanup(channel);
1200	while (READ_ONCE(channel->probe_done) == false) {
1201		/*
1202		 * We wait here until any channel offer is currently
1203		 * being processed.
1204		 */
1205		msleep(1);
1206	}
1207
1208	/*
1209	 * At this point, the rescind handling can proceed safely.
1210	 */
1211
1212	if (channel->device_obj) {
1213		if (channel->chn_rescind_callback) {
1214			channel->chn_rescind_callback(channel);
1215
1216			if (clean_up_chan_for_suspend)
1217				check_ready_for_suspend_event();
1218
1219			return;
1220		}
1221		/*
1222		 * We will have to unregister this device from the
1223		 * driver core.
1224		 */
1225		dev = get_device(&channel->device_obj->device);
1226		if (dev) {
1227			vmbus_device_unregister(channel->device_obj);
1228			put_device(dev);
1229		}
1230	} else if (channel->primary_channel != NULL) {
1231		/*
1232		 * Sub-channel is being rescinded. Following is the channel
1233		 * close sequence when initiated from the driveri (refer to
1234		 * vmbus_close() for details):
1235		 * 1. Close all sub-channels first
1236		 * 2. Then close the primary channel.
1237		 */
1238		mutex_lock(&vmbus_connection.channel_mutex);
1239		if (channel->state == CHANNEL_OPEN_STATE) {
1240			/*
1241			 * The channel is currently not open;
1242			 * it is safe for us to cleanup the channel.
1243			 */
1244			hv_process_channel_removal(channel);
1245		} else {
1246			complete(&channel->rescind_event);
1247		}
1248		mutex_unlock(&vmbus_connection.channel_mutex);
1249	}
1250
1251	/* The "channel" may have been freed. Do not access it any longer. */
1252
1253	if (clean_up_chan_for_suspend)
1254		check_ready_for_suspend_event();
1255}
1256
1257void vmbus_hvsock_device_unregister(struct vmbus_channel *channel)
1258{
1259	BUG_ON(!is_hvsock_channel(channel));
1260
1261	/* We always get a rescind msg when a connection is closed. */
1262	while (!READ_ONCE(channel->probe_done) || !READ_ONCE(channel->rescind))
1263		msleep(1);
1264
1265	vmbus_device_unregister(channel->device_obj);
1266}
1267EXPORT_SYMBOL_GPL(vmbus_hvsock_device_unregister);
1268
1269
1270/*
1271 * vmbus_onoffers_delivered -
1272 * This is invoked when all offers have been delivered.
1273 *
1274 * Nothing to do here.
1275 */
1276static void vmbus_onoffers_delivered(
1277			struct vmbus_channel_message_header *hdr)
1278{
1279}
1280
1281/*
1282 * vmbus_onopen_result - Open result handler.
1283 *
1284 * This is invoked when we received a response to our channel open request.
1285 * Find the matching request, copy the response and signal the requesting
1286 * thread.
1287 */
1288static void vmbus_onopen_result(struct vmbus_channel_message_header *hdr)
1289{
1290	struct vmbus_channel_open_result *result;
1291	struct vmbus_channel_msginfo *msginfo;
1292	struct vmbus_channel_message_header *requestheader;
1293	struct vmbus_channel_open_channel *openmsg;
1294	unsigned long flags;
1295
1296	result = (struct vmbus_channel_open_result *)hdr;
1297
1298	trace_vmbus_onopen_result(result);
1299
1300	/*
1301	 * Find the open msg, copy the result and signal/unblock the wait event
1302	 */
1303	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
1304
1305	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
1306				msglistentry) {
1307		requestheader =
1308			(struct vmbus_channel_message_header *)msginfo->msg;
1309
1310		if (requestheader->msgtype == CHANNELMSG_OPENCHANNEL) {
1311			openmsg =
1312			(struct vmbus_channel_open_channel *)msginfo->msg;
1313			if (openmsg->child_relid == result->child_relid &&
1314			    openmsg->openid == result->openid) {
1315				memcpy(&msginfo->response.open_result,
1316				       result,
1317				       sizeof(
1318					struct vmbus_channel_open_result));
1319				complete(&msginfo->waitevent);
1320				break;
1321			}
1322		}
1323	}
1324	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
1325}
1326
1327/*
1328 * vmbus_ongpadl_created - GPADL created handler.
1329 *
1330 * This is invoked when we received a response to our gpadl create request.
1331 * Find the matching request, copy the response and signal the requesting
1332 * thread.
1333 */
1334static void vmbus_ongpadl_created(struct vmbus_channel_message_header *hdr)
1335{
1336	struct vmbus_channel_gpadl_created *gpadlcreated;
1337	struct vmbus_channel_msginfo *msginfo;
1338	struct vmbus_channel_message_header *requestheader;
1339	struct vmbus_channel_gpadl_header *gpadlheader;
1340	unsigned long flags;
1341
1342	gpadlcreated = (struct vmbus_channel_gpadl_created *)hdr;
1343
1344	trace_vmbus_ongpadl_created(gpadlcreated);
1345
1346	/*
1347	 * Find the establish msg, copy the result and signal/unblock the wait
1348	 * event
1349	 */
1350	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
1351
1352	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
1353				msglistentry) {
1354		requestheader =
1355			(struct vmbus_channel_message_header *)msginfo->msg;
1356
1357		if (requestheader->msgtype == CHANNELMSG_GPADL_HEADER) {
1358			gpadlheader =
1359			(struct vmbus_channel_gpadl_header *)requestheader;
1360
1361			if ((gpadlcreated->child_relid ==
1362			     gpadlheader->child_relid) &&
1363			    (gpadlcreated->gpadl == gpadlheader->gpadl)) {
1364				memcpy(&msginfo->response.gpadl_created,
1365				       gpadlcreated,
1366				       sizeof(
1367					struct vmbus_channel_gpadl_created));
1368				complete(&msginfo->waitevent);
1369				break;
1370			}
1371		}
1372	}
1373	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
1374}
1375
1376/*
1377 * vmbus_onmodifychannel_response - Modify Channel response handler.
1378 *
1379 * This is invoked when we received a response to our channel modify request.
1380 * Find the matching request, copy the response and signal the requesting thread.
1381 */
1382static void vmbus_onmodifychannel_response(struct vmbus_channel_message_header *hdr)
1383{
1384	struct vmbus_channel_modifychannel_response *response;
1385	struct vmbus_channel_msginfo *msginfo;
1386	unsigned long flags;
1387
1388	response = (struct vmbus_channel_modifychannel_response *)hdr;
1389
1390	trace_vmbus_onmodifychannel_response(response);
1391
1392	/*
1393	 * Find the modify msg, copy the response and signal/unblock the wait event.
1394	 */
1395	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
1396
1397	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, msglistentry) {
1398		struct vmbus_channel_message_header *responseheader =
1399				(struct vmbus_channel_message_header *)msginfo->msg;
1400
1401		if (responseheader->msgtype == CHANNELMSG_MODIFYCHANNEL) {
1402			struct vmbus_channel_modifychannel *modifymsg;
1403
1404			modifymsg = (struct vmbus_channel_modifychannel *)msginfo->msg;
1405			if (modifymsg->child_relid == response->child_relid) {
1406				memcpy(&msginfo->response.modify_response, response,
1407				       sizeof(*response));
1408				complete(&msginfo->waitevent);
1409				break;
1410			}
1411		}
1412	}
1413	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
1414}
1415
1416/*
1417 * vmbus_ongpadl_torndown - GPADL torndown handler.
1418 *
1419 * This is invoked when we received a response to our gpadl teardown request.
1420 * Find the matching request, copy the response and signal the requesting
1421 * thread.
1422 */
1423static void vmbus_ongpadl_torndown(
1424			struct vmbus_channel_message_header *hdr)
1425{
1426	struct vmbus_channel_gpadl_torndown *gpadl_torndown;
1427	struct vmbus_channel_msginfo *msginfo;
1428	struct vmbus_channel_message_header *requestheader;
1429	struct vmbus_channel_gpadl_teardown *gpadl_teardown;
1430	unsigned long flags;
1431
1432	gpadl_torndown = (struct vmbus_channel_gpadl_torndown *)hdr;
1433
1434	trace_vmbus_ongpadl_torndown(gpadl_torndown);
1435
1436	/*
1437	 * Find the open msg, copy the result and signal/unblock the wait event
1438	 */
1439	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
1440
1441	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
1442				msglistentry) {
1443		requestheader =
1444			(struct vmbus_channel_message_header *)msginfo->msg;
1445
1446		if (requestheader->msgtype == CHANNELMSG_GPADL_TEARDOWN) {
1447			gpadl_teardown =
1448			(struct vmbus_channel_gpadl_teardown *)requestheader;
1449
1450			if (gpadl_torndown->gpadl == gpadl_teardown->gpadl) {
1451				memcpy(&msginfo->response.gpadl_torndown,
1452				       gpadl_torndown,
1453				       sizeof(
1454					struct vmbus_channel_gpadl_torndown));
1455				complete(&msginfo->waitevent);
1456				break;
1457			}
1458		}
1459	}
1460	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
1461}
1462
1463/*
1464 * vmbus_onversion_response - Version response handler
1465 *
1466 * This is invoked when we received a response to our initiate contact request.
1467 * Find the matching request, copy the response and signal the requesting
1468 * thread.
1469 */
1470static void vmbus_onversion_response(
1471		struct vmbus_channel_message_header *hdr)
1472{
1473	struct vmbus_channel_msginfo *msginfo;
1474	struct vmbus_channel_message_header *requestheader;
1475	struct vmbus_channel_version_response *version_response;
1476	unsigned long flags;
1477
1478	version_response = (struct vmbus_channel_version_response *)hdr;
1479
1480	trace_vmbus_onversion_response(version_response);
1481
1482	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
1483
1484	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
1485				msglistentry) {
1486		requestheader =
1487			(struct vmbus_channel_message_header *)msginfo->msg;
1488
1489		if (requestheader->msgtype ==
1490		    CHANNELMSG_INITIATE_CONTACT) {
1491			memcpy(&msginfo->response.version_response,
1492			      version_response,
1493			      sizeof(struct vmbus_channel_version_response));
1494			complete(&msginfo->waitevent);
1495		}
1496	}
1497	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
1498}
1499
1500/* Channel message dispatch table */
1501const struct vmbus_channel_message_table_entry
1502channel_message_table[CHANNELMSG_COUNT] = {
1503	{ CHANNELMSG_INVALID,			0, NULL, 0},
1504	{ CHANNELMSG_OFFERCHANNEL,		0, vmbus_onoffer,
1505		sizeof(struct vmbus_channel_offer_channel)},
1506	{ CHANNELMSG_RESCIND_CHANNELOFFER,	0, vmbus_onoffer_rescind,
1507		sizeof(struct vmbus_channel_rescind_offer) },
1508	{ CHANNELMSG_REQUESTOFFERS,		0, NULL, 0},
1509	{ CHANNELMSG_ALLOFFERS_DELIVERED,	1, vmbus_onoffers_delivered, 0},
1510	{ CHANNELMSG_OPENCHANNEL,		0, NULL, 0},
1511	{ CHANNELMSG_OPENCHANNEL_RESULT,	1, vmbus_onopen_result,
1512		sizeof(struct vmbus_channel_open_result)},
1513	{ CHANNELMSG_CLOSECHANNEL,		0, NULL, 0},
1514	{ CHANNELMSG_GPADL_HEADER,		0, NULL, 0},
1515	{ CHANNELMSG_GPADL_BODY,		0, NULL, 0},
1516	{ CHANNELMSG_GPADL_CREATED,		1, vmbus_ongpadl_created,
1517		sizeof(struct vmbus_channel_gpadl_created)},
1518	{ CHANNELMSG_GPADL_TEARDOWN,		0, NULL, 0},
1519	{ CHANNELMSG_GPADL_TORNDOWN,		1, vmbus_ongpadl_torndown,
1520		sizeof(struct vmbus_channel_gpadl_torndown) },
1521	{ CHANNELMSG_RELID_RELEASED,		0, NULL, 0},
1522	{ CHANNELMSG_INITIATE_CONTACT,		0, NULL, 0},
1523	{ CHANNELMSG_VERSION_RESPONSE,		1, vmbus_onversion_response,
1524		sizeof(struct vmbus_channel_version_response)},
1525	{ CHANNELMSG_UNLOAD,			0, NULL, 0},
1526	{ CHANNELMSG_UNLOAD_RESPONSE,		1, vmbus_unload_response, 0},
1527	{ CHANNELMSG_18,			0, NULL, 0},
1528	{ CHANNELMSG_19,			0, NULL, 0},
1529	{ CHANNELMSG_20,			0, NULL, 0},
1530	{ CHANNELMSG_TL_CONNECT_REQUEST,	0, NULL, 0},
1531	{ CHANNELMSG_MODIFYCHANNEL,		0, NULL, 0},
1532	{ CHANNELMSG_TL_CONNECT_RESULT,		0, NULL, 0},
1533	{ CHANNELMSG_MODIFYCHANNEL_RESPONSE,	1, vmbus_onmodifychannel_response,
1534		sizeof(struct vmbus_channel_modifychannel_response)},
1535};
1536
1537/*
1538 * vmbus_onmessage - Handler for channel protocol messages.
1539 *
1540 * This is invoked in the vmbus worker thread context.
1541 */
1542void vmbus_onmessage(struct vmbus_channel_message_header *hdr)
1543{
1544	trace_vmbus_on_message(hdr);
1545
1546	/*
1547	 * vmbus_on_msg_dpc() makes sure the hdr->msgtype here can not go
1548	 * out of bound and the message_handler pointer can not be NULL.
1549	 */
1550	channel_message_table[hdr->msgtype].message_handler(hdr);
1551}
1552
1553/*
1554 * vmbus_request_offers - Send a request to get all our pending offers.
1555 */
1556int vmbus_request_offers(void)
1557{
1558	struct vmbus_channel_message_header *msg;
1559	struct vmbus_channel_msginfo *msginfo;
1560	int ret;
1561
1562	msginfo = kzalloc(sizeof(*msginfo) +
1563			  sizeof(struct vmbus_channel_message_header),
1564			  GFP_KERNEL);
1565	if (!msginfo)
1566		return -ENOMEM;
1567
1568	msg = (struct vmbus_channel_message_header *)msginfo->msg;
1569
1570	msg->msgtype = CHANNELMSG_REQUESTOFFERS;
1571
1572	ret = vmbus_post_msg(msg, sizeof(struct vmbus_channel_message_header),
1573			     true);
1574
1575	trace_vmbus_request_offers(ret);
1576
1577	if (ret != 0) {
1578		pr_err("Unable to request offers - %d\n", ret);
1579
1580		goto cleanup;
1581	}
1582
1583cleanup:
1584	kfree(msginfo);
1585
1586	return ret;
1587}
1588
1589void vmbus_set_sc_create_callback(struct vmbus_channel *primary_channel,
1590				void (*sc_cr_cb)(struct vmbus_channel *new_sc))
1591{
1592	primary_channel->sc_creation_callback = sc_cr_cb;
1593}
1594EXPORT_SYMBOL_GPL(vmbus_set_sc_create_callback);
1595
1596void vmbus_set_chn_rescind_callback(struct vmbus_channel *channel,
1597		void (*chn_rescind_cb)(struct vmbus_channel *))
1598{
1599	channel->chn_rescind_callback = chn_rescind_cb;
1600}
1601EXPORT_SYMBOL_GPL(vmbus_set_chn_rescind_callback);
1602