drivers/virt/gunyah/gunyah_vcpu.c - kernel/common - Git at Google

 // SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
  */

 #include <linux/anon_inodes.h>
 #include <linux/file.h>
 #include <linux/gunyah.h>
 #include <linux/gunyah_vm_mgr.h>
 #include <linux/interrupt.h>
 #include <linux/kref.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/wait.h>

 #include "vm_mgr.h"

 #include <uapi/linux/gunyah.h>

 #define MAX_VCPU_NAME		20 /* gh-vcpu:u32_max+NUL */

 struct gh_vcpu {
 	struct gh_vm_function_instance *f;
 	struct gh_resource *rsc;
 	struct mutex run_lock;
 	/* Track why vcpu_run left last time around. */
 	enum {
 		GH_VCPU_UNKNOWN = 0,
 		GH_VCPU_READY,
 		GH_VCPU_MMIO_READ,
 		GH_VCPU_SYSTEM_DOWN,
 	} state;
 	u8 mmio_read_len;
 	struct gh_vcpu_run *vcpu_run;
 	struct completion ready;
 	struct gh_vm *ghvm;

 	struct notifier_block nb;
 	struct gh_vm_resource_ticket ticket;
 	struct kref kref;
 };

 static void vcpu_release(struct kref *kref)
 {
 	struct gh_vcpu *vcpu = container_of(kref, struct gh_vcpu, kref);

 	free_page((unsigned long)vcpu->vcpu_run);
 	kfree(vcpu);
 }

 /*
  * When hypervisor allows us to schedule vCPU again, it gives us an interrupt
  */
 static irqreturn_t gh_vcpu_irq_handler(int irq, void *data)
 {
 	struct gh_vcpu *vcpu = data;

 	complete(&vcpu->ready);
 	return IRQ_HANDLED;
 }

 static bool gh_handle_mmio(struct gh_vcpu *vcpu,
 				struct gh_hypercall_vcpu_run_resp *vcpu_run_resp)
 {
 	int ret = 0;
 	u64 addr = vcpu_run_resp->state_data[0],
 	    len  = vcpu_run_resp->state_data[1],
 	    data = vcpu_run_resp->state_data[2];

 	if (WARN_ON(len > sizeof(u64)))
 		len = sizeof(u64);

 	if (vcpu_run_resp->state == GH_VCPU_ADDRSPACE_VMMIO_READ) {
 		vcpu->vcpu_run->mmio.is_write = 0;
 		/* Record that we need to give vCPU user's supplied value next gh_vcpu_run() */
 		vcpu->state = GH_VCPU_MMIO_READ;
 		vcpu->mmio_read_len = len;
 	} else { /* GH_VCPU_ADDRSPACE_VMMIO_WRITE */
 		/* Try internal handlers first */
 		ret = gh_vm_mmio_write(vcpu->f->ghvm, addr, len, data);
 		if (!ret)
 			return true;

 		/* Give userspace the info */
 		vcpu->vcpu_run->mmio.is_write = 1;
 		memcpy(vcpu->vcpu_run->mmio.data, &data, len);
 	}

 	vcpu->vcpu_run->mmio.phys_addr = addr;
 	vcpu->vcpu_run->mmio.len = len;
 	vcpu->vcpu_run->exit_reason = GH_VCPU_EXIT_MMIO;

 	return false;
 }

 static int gh_vcpu_rm_notification(struct notifier_block *nb, unsigned long action, void *data)
 {
 	struct gh_vcpu *vcpu = container_of(nb, struct gh_vcpu, nb);
 	struct gh_rm_vm_exited_payload *exit_payload = data;

 	if (action == GH_RM_NOTIFICATION_VM_EXITED &&
 		le16_to_cpu(exit_payload->vmid) == vcpu->ghvm->vmid)
 		complete(&vcpu->ready);

 	return NOTIFY_OK;
 }

 static inline enum gh_vm_status remap_vm_status(enum gh_rm_vm_status rm_status)
 {
 	switch (rm_status) {
 	case GH_RM_VM_STATUS_INIT_FAILED:
 		return GH_VM_STATUS_LOAD_FAILED;
 	case GH_RM_VM_STATUS_EXITED:
 		return GH_VM_STATUS_EXITED;
 	default:
 		return GH_VM_STATUS_CRASHED;
 	}
 }

 /**
  * gh_vcpu_check_system() - Check whether VM as a whole is running
  * @vcpu: Pointer to gh_vcpu
  *
  * Returns true if the VM is alive.
  * Returns false if the vCPU is the VM is not alive (can only be that VM is shutting down).
  */
 static bool gh_vcpu_check_system(struct gh_vcpu *vcpu)
 	__must_hold(&vcpu->run_lock)
 {
 	bool ret = true;

 	down_read(&vcpu->ghvm->status_lock);
 	if (likely(vcpu->ghvm->vm_status == GH_RM_VM_STATUS_RUNNING))
 		goto out;

 	vcpu->vcpu_run->status.status = remap_vm_status(vcpu->ghvm->vm_status);
 	vcpu->vcpu_run->status.exit_info = vcpu->ghvm->exit_info;
 	vcpu->vcpu_run->exit_reason = GH_VCPU_EXIT_STATUS;
 	vcpu->state = GH_VCPU_SYSTEM_DOWN;
 	ret = false;
 out:
 	up_read(&vcpu->ghvm->status_lock);
 	return ret;
 }

 /**
  * gh_vcpu_run() - Request Gunyah to begin scheduling this vCPU.
  * @vcpu: The client descriptor that was obtained via gh_vcpu_alloc()
  */
 static int gh_vcpu_run(struct gh_vcpu *vcpu)
 {
 	struct gh_hypercall_vcpu_run_resp vcpu_run_resp;
 	u64 state_data[3] = { 0 };
 	enum gh_error gh_error;
 	int ret = 0;

 	if (!vcpu->f)
 		return -ENODEV;

 	if (mutex_lock_interruptible(&vcpu->run_lock))
 		return -ERESTARTSYS;

 	if (!vcpu->rsc) {
 		ret = -ENODEV;
 		goto out;
 	}

 	switch (vcpu->state) {
 	case GH_VCPU_UNKNOWN:
 		if (vcpu->ghvm->vm_status != GH_RM_VM_STATUS_RUNNING) {
 			/* Check if VM is up. If VM is starting, will block until VM is fully up
 			 * since that thread does down_write.
 			 */
 			if (!gh_vcpu_check_system(vcpu))
 				goto out;
 		}
 		vcpu->state = GH_VCPU_READY;
 		break;
 	case GH_VCPU_MMIO_READ:
 		if (unlikely(vcpu->mmio_read_len > sizeof(state_data[0])))
 			vcpu->mmio_read_len = sizeof(state_data[0]);
 		memcpy(&state_data[0], vcpu->vcpu_run->mmio.data, vcpu->mmio_read_len);
 		vcpu->state = GH_VCPU_READY;
 		break;
 	case GH_VCPU_SYSTEM_DOWN:
 		goto out;
 	default:
 		break;
 	}

 	while (!ret && !signal_pending(current)) {
 		if (vcpu->vcpu_run->immediate_exit) {
 			ret = -EINTR;
 			goto out;
 		}

 		gh_error = gh_hypercall_vcpu_run(vcpu->rsc->capid, state_data, &vcpu_run_resp);
 		memset(state_data, 0, sizeof(state_data));
 		if (gh_error == GH_ERROR_OK) {
 			switch (vcpu_run_resp.state) {
 			case GH_VCPU_STATE_READY:
 				if (need_resched())
 					schedule();
 				break;
 			case GH_VCPU_STATE_POWERED_OFF:
 				/* vcpu might be off because the VM is shut down.
 				 * If so, it won't ever run again: exit back to user
 				 */
 				if (!gh_vcpu_check_system(vcpu))
 					goto out;
 				/* Otherwise, another vcpu will turn it on (e.g. by PSCI)
 				 * and hyp sends an interrupt to wake Linux up.
 				 */
 				fallthrough;
 			case GH_VCPU_STATE_EXPECTS_WAKEUP:
 				ret = wait_for_completion_interruptible(&vcpu->ready);
 				/* reinitialize completion before next hypercall. If we reinitialize
 				 * after the hypercall, interrupt may have already come before
 				 * re-initializing the completion and then end up waiting for
 				 * event that already happened.
 				 */
 				reinit_completion(&vcpu->ready);
 				/* Check system status again. Completion might've
 				 * come from gh_vcpu_rm_notification
 				 */
 				if (!ret && !gh_vcpu_check_system(vcpu))
 					goto out;
 				break;
 			case GH_VCPU_STATE_BLOCKED:
 				schedule();
 				break;
 			case GH_VCPU_ADDRSPACE_VMMIO_READ:
 			case GH_VCPU_ADDRSPACE_VMMIO_WRITE:
 				if (!gh_handle_mmio(vcpu, &vcpu_run_resp))
 					goto out;
 				break;
 			default:
 				pr_warn_ratelimited("Unknown vCPU state: %llx\n",
 							vcpu_run_resp.sized_state);
 				schedule();
 				break;
 			}
 		} else if (gh_error == GH_ERROR_RETRY) {
 			schedule();
 		} else {
 			ret = gh_error_remap(gh_error);
 		}
 	}

 out:
 	mutex_unlock(&vcpu->run_lock);

 	if (signal_pending(current))
 		return -ERESTARTSYS;

 	return ret;
 }

 static long gh_vcpu_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
 	struct gh_vcpu *vcpu = filp->private_data;
 	long ret = -EINVAL;

 	switch (cmd) {
 	case GH_VCPU_RUN:
 		ret = gh_vcpu_run(vcpu);
 		break;
 	case GH_VCPU_MMAP_SIZE:
 		ret = PAGE_SIZE;
 		break;
 	default:
 		break;
 	}
 	return ret;
 }

 static int gh_vcpu_release(struct inode *inode, struct file *filp)
 {
 	struct gh_vcpu *vcpu = filp->private_data;

 	gh_vm_put(vcpu->ghvm);
 	kref_put(&vcpu->kref, vcpu_release);
 	return 0;
 }

 static vm_fault_t gh_vcpu_fault(struct vm_fault *vmf)
 {
 	struct gh_vcpu *vcpu = vmf->vma->vm_file->private_data;
 	struct page *page = NULL;

 	if (vmf->pgoff == 0)
 		page = virt_to_page(vcpu->vcpu_run);

 	get_page(page);
 	vmf->page = page;
 	return 0;
 }

 static const struct vm_operations_struct gh_vcpu_ops = {
 	.fault = gh_vcpu_fault,
 };

 static int gh_vcpu_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	vma->vm_ops = &gh_vcpu_ops;
 	return 0;
 }

 static const struct file_operations gh_vcpu_fops = {
 	.owner = THIS_MODULE,
 	.unlocked_ioctl = gh_vcpu_ioctl,
 	.release = gh_vcpu_release,
 	.llseek = noop_llseek,
 	.mmap = gh_vcpu_mmap,
 };

 static bool gh_vcpu_populate(struct gh_vm_resource_ticket *ticket, struct gh_resource *ghrsc)
 {
 	struct gh_vcpu *vcpu = container_of(ticket, struct gh_vcpu, ticket);
 	int ret;

 	mutex_lock(&vcpu->run_lock);
 	if (vcpu->rsc) {
 		pr_warn("vcpu%d already got a Gunyah resource. Check if multiple resources with same label were configured.\n",
 			vcpu->ticket.label);
 		ret = -EEXIST;
 		goto out;
 	}

 	vcpu->rsc = ghrsc;
 	init_completion(&vcpu->ready);

 	ret = request_irq(vcpu->rsc->irq, gh_vcpu_irq_handler, IRQF_TRIGGER_RISING, "gh_vcpu",
 			vcpu);
 	if (ret)
 		pr_warn("Failed to request vcpu irq %d: %d", vcpu->rsc->irq, ret);

 	enable_irq_wake(vcpu->rsc->irq);

 out:
 	mutex_unlock(&vcpu->run_lock);
 	return !ret;
 }

 static void gh_vcpu_unpopulate(struct gh_vm_resource_ticket *ticket,
 				   struct gh_resource *ghrsc)
 {
 	struct gh_vcpu *vcpu = container_of(ticket, struct gh_vcpu, ticket);

 	vcpu->vcpu_run->immediate_exit = true;
 	complete_all(&vcpu->ready);
 	mutex_lock(&vcpu->run_lock);
 	free_irq(vcpu->rsc->irq, vcpu);
 	vcpu->rsc = NULL;
 	mutex_unlock(&vcpu->run_lock);
 }

 static long gh_vcpu_bind(struct gh_vm_function_instance *f)
 {
 	struct gh_fn_vcpu_arg *arg = f->argp;
 	struct gh_vcpu *vcpu;
 	char name[MAX_VCPU_NAME];
 	struct file *file;
 	struct page *page;
 	int fd;
 	long r;

 	if (f->arg_size != sizeof(*arg))
 		return -EINVAL;

 	vcpu = kzalloc(sizeof(*vcpu), GFP_KERNEL);
 	if (!vcpu)
 		return -ENOMEM;

 	vcpu->f = f;
 	f->data = vcpu;
 	mutex_init(&vcpu->run_lock);
 	kref_init(&vcpu->kref);

 	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
 	if (!page) {
 		r = -ENOMEM;
 		goto err_destroy_vcpu;
 	}
 	vcpu->vcpu_run = page_address(page);

 	vcpu->ticket.resource_type = GH_RESOURCE_TYPE_VCPU;
 	vcpu->ticket.label = arg->id;
 	vcpu->ticket.owner = THIS_MODULE;
 	vcpu->ticket.populate = gh_vcpu_populate;
 	vcpu->ticket.unpopulate = gh_vcpu_unpopulate;

 	r = gh_vm_add_resource_ticket(f->ghvm, &vcpu->ticket);
 	if (r)
 		goto err_destroy_page;

 	if (!gh_vm_get(f->ghvm)) {
 		r = -ENODEV;
 		goto err_remove_resource_ticket;
 	}
 	vcpu->ghvm = f->ghvm;

 	vcpu->nb.notifier_call = gh_vcpu_rm_notification;
 	/* Ensure we run after the vm_mgr handles the notification and does
 	 * any necessary state changes. We wake up to check the new state.
 	 */
 	vcpu->nb.priority = -1;
 	r = gh_rm_notifier_register(f->rm, &vcpu->nb);
 	if (r)
 		goto err_put_gh_vm;

 	kref_get(&vcpu->kref);

 	fd = get_unused_fd_flags(O_CLOEXEC);
 	if (fd < 0) {
 		r = fd;
 		goto err_notifier;
 	}

 	snprintf(name, sizeof(name), "gh-vcpu:%u", vcpu->ticket.label);
 	file = anon_inode_getfile(name, &gh_vcpu_fops, vcpu, O_RDWR);
 	if (IS_ERR(file)) {
 		r = PTR_ERR(file);
 		goto err_put_fd;
 	}

 	fd_install(fd, file);

 	return fd;
 err_put_fd:
 	put_unused_fd(fd);
 err_notifier:
 	gh_rm_notifier_unregister(f->rm, &vcpu->nb);
 err_put_gh_vm:
 	gh_vm_put(vcpu->ghvm);
 err_remove_resource_ticket:
 	gh_vm_remove_resource_ticket(f->ghvm, &vcpu->ticket);
 err_destroy_page:
 	free_page((unsigned long)vcpu->vcpu_run);
 err_destroy_vcpu:
 	kfree(vcpu);
 	return r;
 }

 static void gh_vcpu_unbind(struct gh_vm_function_instance *f)
 {
 	struct gh_vcpu *vcpu = f->data;

 	gh_rm_notifier_unregister(f->rm, &vcpu->nb);
 	gh_vm_remove_resource_ticket(vcpu->f->ghvm, &vcpu->ticket);
 	vcpu->f = NULL;

 	kref_put(&vcpu->kref, vcpu_release);
 }

 static bool gh_vcpu_compare(const struct gh_vm_function_instance *f,
 				const void *arg, size_t size)
 {
 	const struct gh_fn_vcpu_arg *instance = f->argp,
 					 *other = arg;

 	if (sizeof(*other) != size)
 		return false;

 	return instance->id == other->id;
 }

 DECLARE_GH_VM_FUNCTION_INIT(vcpu, GH_FN_VCPU, 1, gh_vcpu_bind, gh_vcpu_unbind, gh_vcpu_compare);
 MODULE_DESCRIPTION("Gunyah vCPU Function");
 MODULE_LICENSE("GPL");
	// SPDX-License-Identifier: GPL-2.0-only
	/*
	* Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
	*/

	#include <linux/anon_inodes.h>
	#include <linux/file.h>
	#include <linux/gunyah.h>
	#include <linux/gunyah_vm_mgr.h>
	#include <linux/interrupt.h>
	#include <linux/kref.h>
	#include <linux/mm.h>
	#include <linux/module.h>
	#include <linux/types.h>
	#include <linux/wait.h>

	#include "vm_mgr.h"

	#include <uapi/linux/gunyah.h>

	#define MAX_VCPU_NAME 20 /* gh-vcpu:u32_max+NUL */

	struct gh_vcpu {
	struct gh_vm_function_instance *f;
	struct gh_resource *rsc;
	struct mutex run_lock;
	/* Track why vcpu_run left last time around. */
	enum {
	GH_VCPU_UNKNOWN = 0,
	GH_VCPU_READY,
	GH_VCPU_MMIO_READ,
	GH_VCPU_SYSTEM_DOWN,
	} state;
	u8 mmio_read_len;
	struct gh_vcpu_run *vcpu_run;
	struct completion ready;
	struct gh_vm *ghvm;

	struct notifier_block nb;
	struct gh_vm_resource_ticket ticket;
	struct kref kref;
	};

	static void vcpu_release(struct kref *kref)
	{
	struct gh_vcpu *vcpu = container_of(kref, struct gh_vcpu, kref);

	free_page((unsigned long)vcpu->vcpu_run);
	kfree(vcpu);
	}

	/*
	* When hypervisor allows us to schedule vCPU again, it gives us an interrupt
	*/
	static irqreturn_t gh_vcpu_irq_handler(int irq, void *data)
	{
	struct gh_vcpu *vcpu = data;

	complete(&vcpu->ready);
	return IRQ_HANDLED;
	}

	static bool gh_handle_mmio(struct gh_vcpu *vcpu,
	struct gh_hypercall_vcpu_run_resp *vcpu_run_resp)
	{
	int ret = 0;
	u64 addr = vcpu_run_resp->state_data[0],
	len = vcpu_run_resp->state_data[1],
	data = vcpu_run_resp->state_data[2];

	if (WARN_ON(len > sizeof(u64)))
	len = sizeof(u64);

	if (vcpu_run_resp->state == GH_VCPU_ADDRSPACE_VMMIO_READ) {
	vcpu->vcpu_run->mmio.is_write = 0;
	/* Record that we need to give vCPU user's supplied value next gh_vcpu_run() */
	vcpu->state = GH_VCPU_MMIO_READ;
	vcpu->mmio_read_len = len;
	} else { /* GH_VCPU_ADDRSPACE_VMMIO_WRITE */
	/* Try internal handlers first */
	ret = gh_vm_mmio_write(vcpu->f->ghvm, addr, len, data);
	if (!ret)
	return true;

	/* Give userspace the info */
	vcpu->vcpu_run->mmio.is_write = 1;
	memcpy(vcpu->vcpu_run->mmio.data, &data, len);
	}

	vcpu->vcpu_run->mmio.phys_addr = addr;
	vcpu->vcpu_run->mmio.len = len;
	vcpu->vcpu_run->exit_reason = GH_VCPU_EXIT_MMIO;

	return false;
	}

	static int gh_vcpu_rm_notification(struct notifier_block nb, unsigned long action, void data)
	{
	struct gh_vcpu *vcpu = container_of(nb, struct gh_vcpu, nb);
	struct gh_rm_vm_exited_payload *exit_payload = data;

	if (action == GH_RM_NOTIFICATION_VM_EXITED &&
	le16_to_cpu(exit_payload->vmid) == vcpu->ghvm->vmid)
	complete(&vcpu->ready);

	return NOTIFY_OK;
	}

	static inline enum gh_vm_status remap_vm_status(enum gh_rm_vm_status rm_status)
	{
	switch (rm_status) {
	case GH_RM_VM_STATUS_INIT_FAILED:
	return GH_VM_STATUS_LOAD_FAILED;
	case GH_RM_VM_STATUS_EXITED:
	return GH_VM_STATUS_EXITED;
	default:
	return GH_VM_STATUS_CRASHED;
	}
	}

	/**
	* gh_vcpu_check_system() - Check whether VM as a whole is running
	* @vcpu: Pointer to gh_vcpu
	*
	* Returns true if the VM is alive.
	* Returns false if the vCPU is the VM is not alive (can only be that VM is shutting down).
	*/
	static bool gh_vcpu_check_system(struct gh_vcpu *vcpu)
	__must_hold(&vcpu->run_lock)
	{
	bool ret = true;

	down_read(&vcpu->ghvm->status_lock);
	if (likely(vcpu->ghvm->vm_status == GH_RM_VM_STATUS_RUNNING))
	goto out;

	vcpu->vcpu_run->status.status = remap_vm_status(vcpu->ghvm->vm_status);
	vcpu->vcpu_run->status.exit_info = vcpu->ghvm->exit_info;
	vcpu->vcpu_run->exit_reason = GH_VCPU_EXIT_STATUS;
	vcpu->state = GH_VCPU_SYSTEM_DOWN;
	ret = false;
	out:
	up_read(&vcpu->ghvm->status_lock);
	return ret;
	}

	/**
	* gh_vcpu_run() - Request Gunyah to begin scheduling this vCPU.
	* @vcpu: The client descriptor that was obtained via gh_vcpu_alloc()
	*/
	static int gh_vcpu_run(struct gh_vcpu *vcpu)
	{
	struct gh_hypercall_vcpu_run_resp vcpu_run_resp;
	u64 state_data[3] = { 0 };
	enum gh_error gh_error;
	int ret = 0;

	if (!vcpu->f)
	return -ENODEV;

	if (mutex_lock_interruptible(&vcpu->run_lock))
	return -ERESTARTSYS;

	if (!vcpu->rsc) {
	ret = -ENODEV;
	goto out;
	}

	switch (vcpu->state) {
	case GH_VCPU_UNKNOWN:
	if (vcpu->ghvm->vm_status != GH_RM_VM_STATUS_RUNNING) {
	/* Check if VM is up. If VM is starting, will block until VM is fully up
	* since that thread does down_write.
	*/
	if (!gh_vcpu_check_system(vcpu))
	goto out;
	}
	vcpu->state = GH_VCPU_READY;
	break;
	case GH_VCPU_MMIO_READ:
	if (unlikely(vcpu->mmio_read_len > sizeof(state_data[0])))
	vcpu->mmio_read_len = sizeof(state_data[0]);
	memcpy(&state_data[0], vcpu->vcpu_run->mmio.data, vcpu->mmio_read_len);
	vcpu->state = GH_VCPU_READY;
	break;
	case GH_VCPU_SYSTEM_DOWN:
	goto out;
	default:
	break;
	}

	while (!ret && !signal_pending(current)) {
	if (vcpu->vcpu_run->immediate_exit) {
	ret = -EINTR;
	goto out;
	}

	gh_error = gh_hypercall_vcpu_run(vcpu->rsc->capid, state_data, &vcpu_run_resp);
	memset(state_data, 0, sizeof(state_data));
	if (gh_error == GH_ERROR_OK) {
	switch (vcpu_run_resp.state) {
	case GH_VCPU_STATE_READY:
	if (need_resched())
	schedule();
	break;
	case GH_VCPU_STATE_POWERED_OFF:
	/* vcpu might be off because the VM is shut down.
	* If so, it won't ever run again: exit back to user
	*/
	if (!gh_vcpu_check_system(vcpu))
	goto out;
	/* Otherwise, another vcpu will turn it on (e.g. by PSCI)
	* and hyp sends an interrupt to wake Linux up.
	*/
	fallthrough;
	case GH_VCPU_STATE_EXPECTS_WAKEUP:
	ret = wait_for_completion_interruptible(&vcpu->ready);
	/* reinitialize completion before next hypercall. If we reinitialize
	* after the hypercall, interrupt may have already come before
	* re-initializing the completion and then end up waiting for
	* event that already happened.
	*/
	reinit_completion(&vcpu->ready);
	/* Check system status again. Completion might've
	* come from gh_vcpu_rm_notification
	*/
	if (!ret && !gh_vcpu_check_system(vcpu))
	goto out;
	break;
	case GH_VCPU_STATE_BLOCKED:
	schedule();
	break;
	case GH_VCPU_ADDRSPACE_VMMIO_READ:
	case GH_VCPU_ADDRSPACE_VMMIO_WRITE:
	if (!gh_handle_mmio(vcpu, &vcpu_run_resp))
	goto out;
	break;
	default:
	pr_warn_ratelimited("Unknown vCPU state: %llx\n",
	vcpu_run_resp.sized_state);
	schedule();
	break;
	}
	} else if (gh_error == GH_ERROR_RETRY) {
	schedule();
	} else {
	ret = gh_error_remap(gh_error);
	}
	}

	out:
	mutex_unlock(&vcpu->run_lock);

	if (signal_pending(current))
	return -ERESTARTSYS;

	return ret;
	}

	static long gh_vcpu_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
	{
	struct gh_vcpu *vcpu = filp->private_data;
	long ret = -EINVAL;

	switch (cmd) {
	case GH_VCPU_RUN:
	ret = gh_vcpu_run(vcpu);
	break;
	case GH_VCPU_MMAP_SIZE:
	ret = PAGE_SIZE;
	break;
	default:
	break;
	}
	return ret;
	}

	static int gh_vcpu_release(struct inode inode, struct file filp)
	{
	struct gh_vcpu *vcpu = filp->private_data;

	gh_vm_put(vcpu->ghvm);
	kref_put(&vcpu->kref, vcpu_release);
	return 0;
	}

	static vm_fault_t gh_vcpu_fault(struct vm_fault *vmf)
	{
	struct gh_vcpu *vcpu = vmf->vma->vm_file->private_data;
	struct page *page = NULL;

	if (vmf->pgoff == 0)
	page = virt_to_page(vcpu->vcpu_run);

	get_page(page);
	vmf->page = page;
	return 0;
	}

	static const struct vm_operations_struct gh_vcpu_ops = {
	.fault = gh_vcpu_fault,
	};

	static int gh_vcpu_mmap(struct file file, struct vm_area_struct vma)
	{
	vma->vm_ops = &gh_vcpu_ops;
	return 0;
	}

	static const struct file_operations gh_vcpu_fops = {
	.owner = THIS_MODULE,
	.unlocked_ioctl = gh_vcpu_ioctl,
	.release = gh_vcpu_release,
	.llseek = noop_llseek,
	.mmap = gh_vcpu_mmap,
	};

	static bool gh_vcpu_populate(struct gh_vm_resource_ticket ticket, struct gh_resource ghrsc)
	{
	struct gh_vcpu *vcpu = container_of(ticket, struct gh_vcpu, ticket);
	int ret;

	mutex_lock(&vcpu->run_lock);
	if (vcpu->rsc) {
	pr_warn("vcpu%d already got a Gunyah resource. Check if multiple resources with same label were configured.\n",
	vcpu->ticket.label);
	ret = -EEXIST;
	goto out;
	}

	vcpu->rsc = ghrsc;
	init_completion(&vcpu->ready);

	ret = request_irq(vcpu->rsc->irq, gh_vcpu_irq_handler, IRQF_TRIGGER_RISING, "gh_vcpu",
	vcpu);
	if (ret)
	pr_warn("Failed to request vcpu irq %d: %d", vcpu->rsc->irq, ret);

	enable_irq_wake(vcpu->rsc->irq);

	out:
	mutex_unlock(&vcpu->run_lock);
	return !ret;
	}

	static void gh_vcpu_unpopulate(struct gh_vm_resource_ticket *ticket,
	struct gh_resource *ghrsc)
	{
	struct gh_vcpu *vcpu = container_of(ticket, struct gh_vcpu, ticket);

	vcpu->vcpu_run->immediate_exit = true;
	complete_all(&vcpu->ready);
	mutex_lock(&vcpu->run_lock);
	free_irq(vcpu->rsc->irq, vcpu);
	vcpu->rsc = NULL;
	mutex_unlock(&vcpu->run_lock);
	}

	static long gh_vcpu_bind(struct gh_vm_function_instance *f)
	{
	struct gh_fn_vcpu_arg *arg = f->argp;
	struct gh_vcpu *vcpu;
	char name[MAX_VCPU_NAME];
	struct file *file;
	struct page *page;
	int fd;
	long r;

	if (f->arg_size != sizeof(*arg))
	return -EINVAL;

	vcpu = kzalloc(sizeof(*vcpu), GFP_KERNEL);
	if (!vcpu)
	return -ENOMEM;

	vcpu->f = f;
	f->data = vcpu;
	mutex_init(&vcpu->run_lock);
	kref_init(&vcpu->kref);

	page = alloc_page(GFP_KERNEL \| __GFP_ZERO);
	if (!page) {
	r = -ENOMEM;
	goto err_destroy_vcpu;
	}
	vcpu->vcpu_run = page_address(page);

	vcpu->ticket.resource_type = GH_RESOURCE_TYPE_VCPU;
	vcpu->ticket.label = arg->id;
	vcpu->ticket.owner = THIS_MODULE;
	vcpu->ticket.populate = gh_vcpu_populate;
	vcpu->ticket.unpopulate = gh_vcpu_unpopulate;

	r = gh_vm_add_resource_ticket(f->ghvm, &vcpu->ticket);
	if (r)
	goto err_destroy_page;

	if (!gh_vm_get(f->ghvm)) {
	r = -ENODEV;
	goto err_remove_resource_ticket;
	}
	vcpu->ghvm = f->ghvm;

	vcpu->nb.notifier_call = gh_vcpu_rm_notification;
	/* Ensure we run after the vm_mgr handles the notification and does
	* any necessary state changes. We wake up to check the new state.
	*/
	vcpu->nb.priority = -1;
	r = gh_rm_notifier_register(f->rm, &vcpu->nb);
	if (r)
	goto err_put_gh_vm;

	kref_get(&vcpu->kref);

	fd = get_unused_fd_flags(O_CLOEXEC);
	if (fd < 0) {
	r = fd;
	goto err_notifier;
	}

	snprintf(name, sizeof(name), "gh-vcpu:%u", vcpu->ticket.label);
	file = anon_inode_getfile(name, &gh_vcpu_fops, vcpu, O_RDWR);
	if (IS_ERR(file)) {
	r = PTR_ERR(file);
	goto err_put_fd;
	}

	fd_install(fd, file);

	return fd;
	err_put_fd:
	put_unused_fd(fd);
	err_notifier:
	gh_rm_notifier_unregister(f->rm, &vcpu->nb);
	err_put_gh_vm:
	gh_vm_put(vcpu->ghvm);
	err_remove_resource_ticket:
	gh_vm_remove_resource_ticket(f->ghvm, &vcpu->ticket);
	err_destroy_page:
	free_page((unsigned long)vcpu->vcpu_run);
	err_destroy_vcpu:
	kfree(vcpu);
	return r;
	}

	static void gh_vcpu_unbind(struct gh_vm_function_instance *f)
	{
	struct gh_vcpu *vcpu = f->data;

	gh_rm_notifier_unregister(f->rm, &vcpu->nb);
	gh_vm_remove_resource_ticket(vcpu->f->ghvm, &vcpu->ticket);
	vcpu->f = NULL;

	kref_put(&vcpu->kref, vcpu_release);
	}

	static bool gh_vcpu_compare(const struct gh_vm_function_instance *f,
	const void *arg, size_t size)
	{
	const struct gh_fn_vcpu_arg *instance = f->argp,
	*other = arg;

	if (sizeof(*other) != size)
	return false;

	return instance->id == other->id;
	}

	DECLARE_GH_VM_FUNCTION_INIT(vcpu, GH_FN_VCPU, 1, gh_vcpu_bind, gh_vcpu_unbind, gh_vcpu_compare);
	MODULE_DESCRIPTION("Gunyah vCPU Function");
	MODULE_LICENSE("GPL");