goldfish_pipe: An implementation of more parallel pipe

This is a driver code for a redesigned android pipe.
Currently it works for x86 and x64 emulators with the following
performance results:
  ADB push to /dev/null,
  Ubuntu,
  400 MB file,
  times are for (1 / 10 / 100) parallel adb commands
x86 adb push: (4.4s / 11.5s / 2m10s) -> (2.8s / 6s / 51s)
x64 adb push: (7s / 15s / (too long, 6m+) -> (2.7s / 6.2s / 52s)

ADB pull and push to /data/ have the same %% of speedup
More importantly, I don't see any signs of slowdowns when
run in parallel with Antutu benchmark, so it is definitely
making much better job at multithreading.

Signed-off-by: Yurii Zubrytskyi <[email protected]>
Signed-off-by: Jin Qian <[email protected]>
Signed-off-by: Greg Kroah-Hartman <[email protected]>
diff --git a/drivers/platform/goldfish/goldfish_pipe.c b/drivers/platform/goldfish/goldfish_pipe.c
index 2b21033..9804e9e 100644
--- a/drivers/platform/goldfish/goldfish_pipe.c
+++ b/drivers/platform/goldfish/goldfish_pipe.c
@@ -1,8 +1,8 @@
 /*
- * Copyright (C) 2011 Google, Inc.
  * Copyright (C) 2012 Intel, Inc.
  * Copyright (C) 2013 Intel, Inc.
  * Copyright (C) 2014 Linaro Limited
+ * Copyright (C) 2011-2016 Google, Inc.
  *
  * This software is licensed under the terms of the GNU General Public
  * License version 2, as published by the Free Software Foundation, and
@@ -46,6 +46,7 @@
  * exchange is properly mapped during a transfer.
  */
 
+
 #include <linux/module.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
@@ -63,122 +64,232 @@
 #include <linux/acpi.h>
 
 /*
+ * Update this when something changes in the driver's behavior so the host
+ * can benefit from knowing it
+ */
+enum {
+	PIPE_DRIVER_VERSION = 2,
+	PIPE_CURRENT_DEVICE_VERSION = 2
+};
+
+/*
  * IMPORTANT: The following constants must match the ones used and defined
  * in external/qemu/hw/goldfish_pipe.c in the Android source tree.
  */
 
-/* pipe device registers */
-#define PIPE_REG_COMMAND		0x00  /* write: value = command */
-#define PIPE_REG_STATUS			0x04  /* read */
-#define PIPE_REG_CHANNEL		0x08  /* read/write: channel id */
-#define PIPE_REG_CHANNEL_HIGH	        0x30  /* read/write: channel id */
-#define PIPE_REG_SIZE			0x0c  /* read/write: buffer size */
-#define PIPE_REG_ADDRESS		0x10  /* write: physical address */
-#define PIPE_REG_ADDRESS_HIGH	        0x34  /* write: physical address */
-#define PIPE_REG_WAKES			0x14  /* read: wake flags */
-#define PIPE_REG_PARAMS_ADDR_LOW	0x18  /* read/write: batch data address */
-#define PIPE_REG_PARAMS_ADDR_HIGH	0x1c  /* read/write: batch data address */
-#define PIPE_REG_ACCESS_PARAMS		0x20  /* write: batch access */
-#define PIPE_REG_VERSION		0x24  /* read: device version */
-
-/* list of commands for PIPE_REG_COMMAND */
-#define CMD_OPEN			1  /* open new channel */
-#define CMD_CLOSE			2  /* close channel (from guest) */
-#define CMD_POLL			3  /* poll read/write status */
-
 /* List of bitflags returned in status of CMD_POLL command */
-#define PIPE_POLL_IN			(1 << 0)
-#define PIPE_POLL_OUT			(1 << 1)
-#define PIPE_POLL_HUP			(1 << 2)
-
-/* The following commands are related to write operations */
-#define CMD_WRITE_BUFFER	4  /* send a user buffer to the emulator */
-#define CMD_WAKE_ON_WRITE	5  /* tell the emulator to wake us when writing
-				     is possible */
-#define CMD_READ_BUFFER        6  /* receive a user buffer from the emulator */
-#define CMD_WAKE_ON_READ       7  /* tell the emulator to wake us when reading
-				   * is possible */
+enum PipePollFlags {
+	PIPE_POLL_IN	= 1 << 0,
+	PIPE_POLL_OUT	= 1 << 1,
+	PIPE_POLL_HUP	= 1 << 2
+};
 
 /* Possible status values used to signal errors - see goldfish_pipe_error_convert */
-#define PIPE_ERROR_INVAL       -1
-#define PIPE_ERROR_AGAIN       -2
-#define PIPE_ERROR_NOMEM       -3
-#define PIPE_ERROR_IO          -4
+enum PipeErrors {
+	PIPE_ERROR_INVAL  = -1,
+	PIPE_ERROR_AGAIN  = -2,
+	PIPE_ERROR_NOMEM  = -3,
+	PIPE_ERROR_IO     = -4
+};
 
 /* Bit-flags used to signal events from the emulator */
-#define PIPE_WAKE_CLOSED       (1 << 0)  /* emulator closed pipe */
-#define PIPE_WAKE_READ         (1 << 1)  /* pipe can now be read from */
-#define PIPE_WAKE_WRITE        (1 << 2)  /* pipe can now be written to */
+enum PipeWakeFlags {
+	PIPE_WAKE_CLOSED = 1 << 0,  /* emulator closed pipe */
+	PIPE_WAKE_READ   = 1 << 1,  /* pipe can now be read from */
+	PIPE_WAKE_WRITE  = 1 << 2  /* pipe can now be written to */
+};
 
-struct access_params {
-	unsigned long channel;
-	u32 size;
-	unsigned long address;
-	u32 cmd;
-	u32 result;
-	/* reserved for future extension */
+/* Bit flags for the 'flags' field */
+enum PipeFlagsBits {
+	BIT_CLOSED_ON_HOST = 0,  /* pipe closed by host */
+	BIT_WAKE_ON_WRITE  = 1,  /* want to be woken on writes */
+	BIT_WAKE_ON_READ   = 2,  /* want to be woken on reads */
+};
+
+enum PipeRegs {
+	PIPE_REG_CMD = 0,
+
+	PIPE_REG_SIGNAL_BUFFER_HIGH = 4,
+	PIPE_REG_SIGNAL_BUFFER = 8,
+	PIPE_REG_SIGNAL_BUFFER_COUNT = 12,
+
+	PIPE_REG_OPEN_BUFFER_HIGH = 20,
+	PIPE_REG_OPEN_BUFFER = 24,
+
+	PIPE_REG_VERSION = 36,
+
+	PIPE_REG_GET_SIGNALLED = 48,
+};
+
+enum PipeCmdCode {
+	PIPE_CMD_OPEN = 1,	/* to be used by the pipe device itself */
+	PIPE_CMD_CLOSE,
+	PIPE_CMD_POLL,
+	PIPE_CMD_WRITE,
+	PIPE_CMD_WAKE_ON_WRITE,
+	PIPE_CMD_READ,
+	PIPE_CMD_WAKE_ON_READ,
+
+	/*
+	 * TODO(zyy): implement a deferred read/write execution to allow
+	 * parallel processing of pipe operations on the host.
+	 */
+	PIPE_CMD_WAKE_ON_DONE_IO,
+};
+
+enum {
+	MAX_BUFFERS_PER_COMMAND = 336,
+	MAX_SIGNALLED_PIPES = 64,
+	INITIAL_PIPES_CAPACITY = 64
+};
+
+struct goldfish_pipe_dev;
+struct goldfish_pipe;
+struct goldfish_pipe_command;
+
+/* A per-pipe command structure, shared with the host */
+struct goldfish_pipe_command {
+	s32 cmd;		/* PipeCmdCode, guest -> host */
+	s32 id;			/* pipe id, guest -> host */
+	s32 status;		/* command execution status, host -> guest */
+	s32 reserved;	/* to pad to 64-bit boundary */
+	union {
+		/* Parameters for PIPE_CMD_{READ,WRITE} */
+		struct {
+			/* number of buffers, guest -> host */
+			u32 buffers_count;
+			/* number of consumed bytes, host -> guest */
+			s32 consumed_size;
+			/* buffer pointers, guest -> host */
+			u64 ptrs[MAX_BUFFERS_PER_COMMAND];
+			/* buffer sizes, guest -> host */
+			u32 sizes[MAX_BUFFERS_PER_COMMAND];
+		} rw_params;
+	};
+};
+
+/* A single signalled pipe information */
+struct signalled_pipe_buffer {
+	u32 id;
 	u32 flags;
 };
 
+/* Parameters for the PIPE_CMD_OPEN command */
+struct open_command_param {
+	u64 command_buffer_ptr;
+	u32 rw_params_max_count;
+};
+
+/* Device-level set of buffers shared with the host */
+struct goldfish_pipe_dev_buffers {
+	struct open_command_param open_command_params;
+	struct signalled_pipe_buffer signalled_pipe_buffers[
+		MAX_SIGNALLED_PIPES];
+};
+
+/* This data type models a given pipe instance */
+struct goldfish_pipe {
+	/* pipe ID - index into goldfish_pipe_dev::pipes array */
+	u32 id;
+	/* The wake flags pipe is waiting for
+	 * Note: not protected with any lock, uses atomic operations
+	 *  and barriers to make it thread-safe.
+	 */
+	unsigned long flags;
+	/* wake flags host have signalled,
+	 *  - protected by goldfish_pipe_dev::lock
+	 */
+	unsigned long signalled_flags;
+
+	/* A pointer to command buffer */
+	struct goldfish_pipe_command *command_buffer;
+
+	/* doubly linked list of signalled pipes, protected by
+	 * goldfish_pipe_dev::lock
+	 */
+	struct goldfish_pipe *prev_signalled;
+	struct goldfish_pipe *next_signalled;
+
+	/*
+	 * A pipe's own lock. Protects the following:
+	 *  - *command_buffer - makes sure a command can safely write its
+	 *    parameters to the host and read the results back.
+	 */
+	struct mutex lock;
+
+	/* A wake queue for sleeping until host signals an event */
+	wait_queue_head_t wake_queue;
+	/* Pointer to the parent goldfish_pipe_dev instance */
+	struct goldfish_pipe_dev *dev;
+};
+
 /* The global driver data. Holds a reference to the i/o page used to
  * communicate with the emulator, and a wake queue for blocked tasks
  * waiting to be awoken.
  */
 struct goldfish_pipe_dev {
+	/*
+	 * Global device spinlock. Protects the following members:
+	 *  - pipes, pipes_capacity
+	 *  - [*pipes, *pipes + pipes_capacity) - array data
+	 *  - first_signalled_pipe,
+	 *      goldfish_pipe::prev_signalled,
+	 *      goldfish_pipe::next_signalled,
+	 *      goldfish_pipe::signalled_flags - all singnalled-related fields,
+	 *                                       in all allocated pipes
+	 *  - open_command_params - PIPE_CMD_OPEN-related buffers
+	 *
+	 * It looks like a lot of different fields, but the trick is that
+	 * the only operation that happens often is the signalled pipes array
+	 * manipulation. That's why it's OK for now to keep the rest of the
+	 * fields under the same lock. If we notice too much contention because
+	 * of PIPE_CMD_OPEN, then we should add a separate lock there.
+	 */
 	spinlock_t lock;
-	unsigned char __iomem *base;
-	struct access_params *aps;
+
+	/*
+	 * Array of the pipes of |pipes_capacity| elements,
+	 * indexed by goldfish_pipe::id
+	 */
+	struct goldfish_pipe **pipes;
+	u32 pipes_capacity;
+
+	/* Pointers to the buffers host uses for interaction with this driver */
+	struct goldfish_pipe_dev_buffers *buffers;
+
+	/* Head of a doubly linked list of signalled pipes */
+	struct goldfish_pipe *first_signalled_pipe;
+
+	/* Some device-specific data */
 	int irq;
-	u32 version;
+	int version;
+	unsigned char __iomem *base;
 };
 
-static struct goldfish_pipe_dev   pipe_dev[1];
+struct goldfish_pipe_dev pipe_dev[1] = {};
 
-/* This data type models a given pipe instance */
-struct goldfish_pipe {
-	struct goldfish_pipe_dev *dev;
-	struct mutex lock;
-	unsigned long flags;
-	wait_queue_head_t wake_queue;
-};
-
-
-/* Bit flags for the 'flags' field */
-enum {
-	BIT_CLOSED_ON_HOST = 0,  /* pipe closed by host */
-	BIT_WAKE_ON_WRITE  = 1,  /* want to be woken on writes */
-	BIT_WAKE_ON_READ   = 2,  /* want to be woken on reads */
-};
-
-
-static u32 goldfish_cmd_status(struct goldfish_pipe *pipe, u32 cmd)
+static int goldfish_cmd_locked(struct goldfish_pipe *pipe, enum PipeCmdCode cmd)
 {
-	unsigned long flags;
-	u32 status;
-	struct goldfish_pipe_dev *dev = pipe->dev;
+	pipe->command_buffer->cmd = cmd;
+	/* failure by default */
+	pipe->command_buffer->status = PIPE_ERROR_INVAL;
+	writel(pipe->id, pipe->dev->base + PIPE_REG_CMD);
+	return pipe->command_buffer->status;
+}
 
-	spin_lock_irqsave(&dev->lock, flags);
-	gf_write_ptr(pipe, dev->base + PIPE_REG_CHANNEL,
-		     dev->base + PIPE_REG_CHANNEL_HIGH);
-	writel(cmd, dev->base + PIPE_REG_COMMAND);
-	status = readl(dev->base + PIPE_REG_STATUS);
-	spin_unlock_irqrestore(&dev->lock, flags);
+static int goldfish_cmd(struct goldfish_pipe *pipe, enum PipeCmdCode cmd)
+{
+	int status;
+
+	if (mutex_lock_interruptible(&pipe->lock))
+		return PIPE_ERROR_IO;
+	status = goldfish_cmd_locked(pipe, cmd);
+	mutex_unlock(&pipe->lock);
 	return status;
 }
 
-static void goldfish_cmd(struct goldfish_pipe *pipe, u32 cmd)
-{
-	unsigned long flags;
-	struct goldfish_pipe_dev *dev = pipe->dev;
-
-	spin_lock_irqsave(&dev->lock, flags);
-	gf_write_ptr(pipe, dev->base + PIPE_REG_CHANNEL,
-		     dev->base + PIPE_REG_CHANNEL_HIGH);
-	writel(cmd, dev->base + PIPE_REG_COMMAND);
-	spin_unlock_irqrestore(&dev->lock, flags);
-}
-
-/* This function converts an error code returned by the emulator through
+/*
+ * This function converts an error code returned by the emulator through
  * the PIPE_REG_STATUS i/o register into a valid negative errno value.
  */
 static int goldfish_pipe_error_convert(int status)
@@ -195,184 +306,202 @@ static int goldfish_pipe_error_convert(int status)
 	}
 }
 
-/*
- * Notice: QEMU will return 0 for un-known register access, indicating
- * param_acess is supported or not
- */
-static int valid_batchbuffer_addr(struct goldfish_pipe_dev *dev,
-				  struct access_params *aps)
+static int pin_user_pages(unsigned long first_page, unsigned long last_page,
+	unsigned int last_page_size, int is_write,
+	struct page *pages[MAX_BUFFERS_PER_COMMAND],
+	unsigned int *iter_last_page_size)
 {
-	u32 aph, apl;
-	u64 paddr;
-	aph = readl(dev->base + PIPE_REG_PARAMS_ADDR_HIGH);
-	apl = readl(dev->base + PIPE_REG_PARAMS_ADDR_LOW);
+	int ret;
+	int requested_pages = ((last_page - first_page) >> PAGE_SHIFT) + 1;
 
-	paddr = ((u64)aph << 32) | apl;
-	if (paddr != (__pa(aps)))
-		return 0;
-	return 1;
-}
+	if (requested_pages > MAX_BUFFERS_PER_COMMAND) {
+		requested_pages = MAX_BUFFERS_PER_COMMAND;
+		*iter_last_page_size = PAGE_SIZE;
+	} else {
+		*iter_last_page_size = last_page_size;
+	}
 
-/* 0 on success */
-static int setup_access_params_addr(struct platform_device *pdev,
-					struct goldfish_pipe_dev *dev)
-{
-	dma_addr_t dma_handle;
-	struct access_params *aps;
-
-	aps = dmam_alloc_coherent(&pdev->dev, sizeof(struct access_params),
-				  &dma_handle, GFP_KERNEL);
-	if (!aps)
-		return -ENOMEM;
-
-	writel(upper_32_bits(dma_handle), dev->base + PIPE_REG_PARAMS_ADDR_HIGH);
-	writel(lower_32_bits(dma_handle), dev->base + PIPE_REG_PARAMS_ADDR_LOW);
-
-	if (valid_batchbuffer_addr(dev, aps)) {
-		dev->aps = aps;
-		return 0;
-	} else
-		return -1;
-}
-
-/* A value that will not be set by qemu emulator */
-#define INITIAL_BATCH_RESULT (0xdeadbeaf)
-static int access_with_param(struct goldfish_pipe_dev *dev, const int cmd,
-				unsigned long address, unsigned long avail,
-				struct goldfish_pipe *pipe, int *status)
-{
-	struct access_params *aps = dev->aps;
-
-	if (aps == NULL)
-		return -1;
-
-	aps->result = INITIAL_BATCH_RESULT;
-	aps->channel = (unsigned long)pipe;
-	aps->size = avail;
-	aps->address = address;
-	aps->cmd = cmd;
-	writel(cmd, dev->base + PIPE_REG_ACCESS_PARAMS);
-	/*
-	 * If the aps->result has not changed, that means
-	 * that the batch command failed
-	 */
-	if (aps->result == INITIAL_BATCH_RESULT)
-		return -1;
-	*status = aps->result;
-	return 0;
-}
-
-static ssize_t goldfish_pipe_read_write(struct file *filp, char __user *buffer,
-				       size_t bufflen, int is_write)
-{
-	unsigned long irq_flags;
-	struct goldfish_pipe *pipe = filp->private_data;
-	struct goldfish_pipe_dev *dev = pipe->dev;
-	unsigned long address, address_end;
-	int count = 0, ret = -EINVAL;
-
-	/* If the emulator already closed the pipe, no need to go further */
-	if (test_bit(BIT_CLOSED_ON_HOST, &pipe->flags))
-		return -EIO;
-
-	/* Null reads or writes succeeds */
-	if (unlikely(bufflen == 0))
-		return 0;
-
-	/* Check the buffer range for access */
-	if (!access_ok(is_write ? VERIFY_WRITE : VERIFY_READ,
-			buffer, bufflen))
+	ret = get_user_pages_fast(
+			first_page, requested_pages, !is_write, pages);
+	if (ret <= 0)
 		return -EFAULT;
+	if (ret < requested_pages)
+		*iter_last_page_size = PAGE_SIZE;
+	return ret;
 
-	/* Serialize access to the pipe */
+}
+
+static void release_user_pages(struct page **pages, int pages_count,
+	int is_write, s32 consumed_size)
+{
+	int i;
+
+	for (i = 0; i < pages_count; i++) {
+		if (!is_write && consumed_size > 0)
+			set_page_dirty(pages[i]);
+		put_page(pages[i]);
+	}
+}
+
+/* Populate the call parameters, merging adjacent pages together */
+static void populate_rw_params(
+	struct page **pages, int pages_count,
+	unsigned long address, unsigned long address_end,
+	unsigned long first_page, unsigned long last_page,
+	unsigned int iter_last_page_size, int is_write,
+	struct goldfish_pipe_command *command)
+{
+	/*
+	 * Process the first page separately - it's the only page that
+	 * needs special handling for its start address.
+	 */
+	unsigned long xaddr = page_to_phys(pages[0]);
+	unsigned long xaddr_prev = xaddr;
+	int buffer_idx = 0;
+	int i = 1;
+	int size_on_page = first_page == last_page
+			? (int)(address_end - address)
+			: (PAGE_SIZE - (address & ~PAGE_MASK));
+	command->rw_params.ptrs[0] = (u64)(xaddr | (address & ~PAGE_MASK));
+	command->rw_params.sizes[0] = size_on_page;
+	for (; i < pages_count; ++i) {
+		xaddr = page_to_phys(pages[i]);
+		size_on_page = (i == pages_count - 1) ?
+			iter_last_page_size : PAGE_SIZE;
+		if (xaddr == xaddr_prev + PAGE_SIZE) {
+			command->rw_params.sizes[buffer_idx] += size_on_page;
+		} else {
+			++buffer_idx;
+			command->rw_params.ptrs[buffer_idx] = (u64)xaddr;
+			command->rw_params.sizes[buffer_idx] = size_on_page;
+		}
+		xaddr_prev = xaddr;
+	}
+	command->rw_params.buffers_count = buffer_idx + 1;
+}
+
+static int transfer_max_buffers(struct goldfish_pipe *pipe,
+	unsigned long address, unsigned long address_end, int is_write,
+	unsigned long last_page, unsigned int last_page_size,
+	s32 *consumed_size, int *status)
+{
+	struct page *pages[MAX_BUFFERS_PER_COMMAND];
+	unsigned long first_page = address & PAGE_MASK;
+	unsigned int iter_last_page_size;
+	int pages_count = pin_user_pages(first_page, last_page,
+			last_page_size, is_write,
+			pages, &iter_last_page_size);
+
+	if (pages_count < 0)
+		return pages_count;
+
+	/* Serialize access to the pipe command buffers */
 	if (mutex_lock_interruptible(&pipe->lock))
 		return -ERESTARTSYS;
 
-	address = (unsigned long)(void *)buffer;
+	populate_rw_params(pages, pages_count, address, address_end,
+		first_page, last_page, iter_last_page_size, is_write,
+		pipe->command_buffer);
+
+	/* Transfer the data */
+	*status = goldfish_cmd_locked(pipe,
+				is_write ? PIPE_CMD_WRITE : PIPE_CMD_READ);
+
+	*consumed_size = pipe->command_buffer->rw_params.consumed_size;
+
+	mutex_unlock(&pipe->lock);
+
+	release_user_pages(pages, pages_count, is_write, *consumed_size);
+
+	return 0;
+}
+
+static int wait_for_host_signal(struct goldfish_pipe *pipe, int is_write)
+{
+	u32 wakeBit = is_write ? BIT_WAKE_ON_WRITE : BIT_WAKE_ON_READ;
+
+	set_bit(wakeBit, &pipe->flags);
+
+	/* Tell the emulator we're going to wait for a wake event */
+	(void)goldfish_cmd(pipe,
+		is_write ? PIPE_CMD_WAKE_ON_WRITE : PIPE_CMD_WAKE_ON_READ);
+
+	while (test_bit(wakeBit, &pipe->flags)) {
+		if (wait_event_interruptible(
+				pipe->wake_queue,
+				!test_bit(wakeBit, &pipe->flags)))
+			return -ERESTARTSYS;
+
+		if (test_bit(BIT_CLOSED_ON_HOST, &pipe->flags))
+			return -EIO;
+	}
+
+	return 0;
+}
+
+static ssize_t goldfish_pipe_read_write(struct file *filp,
+	char __user *buffer, size_t bufflen, int is_write)
+{
+	struct goldfish_pipe *pipe = filp->private_data;
+	int count = 0, ret = -EINVAL;
+	unsigned long address, address_end, last_page;
+	unsigned int last_page_size;
+
+	/* If the emulator already closed the pipe, no need to go further */
+	if (unlikely(test_bit(BIT_CLOSED_ON_HOST, &pipe->flags)))
+		return -EIO;
+	/* Null reads or writes succeeds */
+	if (unlikely(bufflen == 0))
+		return 0;
+	/* Check the buffer range for access */
+	if (unlikely(!access_ok(is_write ? VERIFY_WRITE : VERIFY_READ,
+			buffer, bufflen)))
+		return -EFAULT;
+
+	address = (unsigned long)buffer;
 	address_end = address + bufflen;
+	last_page = (address_end - 1) & PAGE_MASK;
+	last_page_size = ((address_end - 1) & ~PAGE_MASK) + 1;
 
 	while (address < address_end) {
-		unsigned long page_end = (address & PAGE_MASK) + PAGE_SIZE;
-		unsigned long next     = page_end < address_end ? page_end
-								: address_end;
-		unsigned long avail    = next - address;
-		int status, wakeBit;
-		struct page *page;
+		s32 consumed_size;
+		int status;
 
-		/* Either vaddr or paddr depending on the device version */
-		unsigned long xaddr;
-
-		/*
-		 * We grab the pages on a page-by-page basis in case user
-		 * space gives us a potentially huge buffer but the read only
-		 * returns a small amount, then there's no need to pin that
-		 * much memory to the process.
-		 */
-		ret = get_user_pages_unlocked(address, 1, &page,
-				is_write ? 0 : FOLL_WRITE);
+		ret = transfer_max_buffers(pipe, address, address_end, is_write,
+				last_page, last_page_size, &consumed_size,
+				&status);
 		if (ret < 0)
 			break;
 
-		if (dev->version) {
-			/* Device version 1 or newer (qemu-android) expects the
-			 * physical address.
+		if (consumed_size > 0) {
+			/* No matter what's the status, we've transferred
+			 * something.
 			 */
-			xaddr = page_to_phys(page) | (address & ~PAGE_MASK);
-		} else {
-			/* Device version 0 (classic emulator) expects the
-			 * virtual address.
-			 */
-			xaddr = address;
+			count += consumed_size;
+			address += consumed_size;
 		}
-
-		/* Now, try to transfer the bytes in the current page */
-		spin_lock_irqsave(&dev->lock, irq_flags);
-		if (access_with_param(dev,
-				is_write ? CMD_WRITE_BUFFER : CMD_READ_BUFFER,
-				xaddr, avail, pipe, &status)) {
-			gf_write_ptr(pipe, dev->base + PIPE_REG_CHANNEL,
-				     dev->base + PIPE_REG_CHANNEL_HIGH);
-			writel(avail, dev->base + PIPE_REG_SIZE);
-			gf_write_ptr((void *)xaddr,
-				     dev->base + PIPE_REG_ADDRESS,
-				     dev->base + PIPE_REG_ADDRESS_HIGH);
-			writel(is_write ? CMD_WRITE_BUFFER : CMD_READ_BUFFER,
-					dev->base + PIPE_REG_COMMAND);
-			status = readl(dev->base + PIPE_REG_STATUS);
-		}
-		spin_unlock_irqrestore(&dev->lock, irq_flags);
-
-		if (status > 0 && !is_write)
-			set_page_dirty(page);
-		put_page(page);
-
-		if (status > 0) { /* Correct transfer */
-			count += status;
-			address += status;
+		if (status > 0)
 			continue;
-		} else if (status == 0) { /* EOF */
+		if (status == 0) {
+			/* EOF */
 			ret = 0;
 			break;
-		} else if (status < 0 && count > 0) {
+		}
+		if (count > 0) {
 			/*
-			 * An error occurred and we already transferred
-			 * something on one of the previous pages.
+			 * An error occurred, but we already transferred
+			 * something on one of the previous iterations.
 			 * Just return what we already copied and log this
 			 * err.
-			 *
-			 * Note: This seems like an incorrect approach but
-			 * cannot change it until we check if any user space
-			 * ABI relies on this behavior.
 			 */
 			if (status != PIPE_ERROR_AGAIN)
-				pr_info_ratelimited("goldfish_pipe: backend returned error %d on %s\n",
+				pr_info_ratelimited("goldfish_pipe: backend error %d on %s\n",
 					status, is_write ? "write" : "read");
-			ret = 0;
 			break;
 		}
 
 		/*
-		 * If the error is not PIPE_ERROR_AGAIN, or if we are not in
+		 * If the error is not PIPE_ERROR_AGAIN, or if we are in
 		 * non-blocking mode, just return the error code.
 		 */
 		if (status != PIPE_ERROR_AGAIN ||
@@ -381,139 +510,214 @@ static ssize_t goldfish_pipe_read_write(struct file *filp, char __user *buffer,
 			break;
 		}
 
-		/*
-		 * The backend blocked the read/write, wait until the backend
-		 * tells us it's ready to process more data.
-		 */
-		wakeBit = is_write ? BIT_WAKE_ON_WRITE : BIT_WAKE_ON_READ;
-		set_bit(wakeBit, &pipe->flags);
-
-		/* Tell the emulator we're going to wait for a wake event */
-		goldfish_cmd(pipe,
-			is_write ? CMD_WAKE_ON_WRITE : CMD_WAKE_ON_READ);
-
-		/* Unlock the pipe, then wait for the wake signal */
-		mutex_unlock(&pipe->lock);
-
-		while (test_bit(wakeBit, &pipe->flags)) {
-			if (wait_event_interruptible(
-					pipe->wake_queue,
-					!test_bit(wakeBit, &pipe->flags)))
-				return -ERESTARTSYS;
-
-			if (test_bit(BIT_CLOSED_ON_HOST, &pipe->flags))
-				return -EIO;
-		}
-
-		/* Try to re-acquire the lock */
-		if (mutex_lock_interruptible(&pipe->lock))
-			return -ERESTARTSYS;
+		status = wait_for_host_signal(pipe, is_write);
+		if (status < 0)
+			return status;
 	}
-	mutex_unlock(&pipe->lock);
 
-	if (ret < 0)
-		return ret;
-	else
+	if (count > 0)
 		return count;
+	return ret;
 }
 
 static ssize_t goldfish_pipe_read(struct file *filp, char __user *buffer,
-			      size_t bufflen, loff_t *ppos)
+				size_t bufflen, loff_t *ppos)
 {
-	return goldfish_pipe_read_write(filp, buffer, bufflen, 0);
+	return goldfish_pipe_read_write(filp, buffer, bufflen,
+			/* is_write */ 0);
 }
 
 static ssize_t goldfish_pipe_write(struct file *filp,
 				const char __user *buffer, size_t bufflen,
 				loff_t *ppos)
 {
-	return goldfish_pipe_read_write(filp, (char __user *)buffer,
-								bufflen, 1);
+	return goldfish_pipe_read_write(filp,
+			/* cast away the const */(char __user *)buffer, bufflen,
+			/* is_write */ 1);
 }
 
-
 static unsigned int goldfish_pipe_poll(struct file *filp, poll_table *wait)
 {
 	struct goldfish_pipe *pipe = filp->private_data;
 	unsigned int mask = 0;
 	int status;
 
-	mutex_lock(&pipe->lock);
-
 	poll_wait(filp, &pipe->wake_queue, wait);
 
-	status = goldfish_cmd_status(pipe, CMD_POLL);
-
-	mutex_unlock(&pipe->lock);
+	status = goldfish_cmd(pipe, PIPE_CMD_POLL);
+	if (status < 0)
+		return -ERESTARTSYS;
 
 	if (status & PIPE_POLL_IN)
 		mask |= POLLIN | POLLRDNORM;
-
 	if (status & PIPE_POLL_OUT)
 		mask |= POLLOUT | POLLWRNORM;
-
 	if (status & PIPE_POLL_HUP)
 		mask |= POLLHUP;
-
 	if (test_bit(BIT_CLOSED_ON_HOST, &pipe->flags))
 		mask |= POLLERR;
 
 	return mask;
 }
 
+static void signalled_pipes_add_locked(struct goldfish_pipe_dev *dev,
+	u32 id, u32 flags)
+{
+	struct goldfish_pipe *pipe;
+
+	if (WARN_ON(id >= dev->pipes_capacity))
+		return;
+
+	pipe = dev->pipes[id];
+	if (!pipe)
+		return;
+	pipe->signalled_flags |= flags;
+
+	if (pipe->prev_signalled || pipe->next_signalled
+		|| dev->first_signalled_pipe == pipe)
+		return;	/* already in the list */
+	pipe->next_signalled = dev->first_signalled_pipe;
+	if (dev->first_signalled_pipe)
+		dev->first_signalled_pipe->prev_signalled = pipe;
+	dev->first_signalled_pipe = pipe;
+}
+
+static void signalled_pipes_remove_locked(struct goldfish_pipe_dev *dev,
+	struct goldfish_pipe *pipe) {
+	if (pipe->prev_signalled)
+		pipe->prev_signalled->next_signalled = pipe->next_signalled;
+	if (pipe->next_signalled)
+		pipe->next_signalled->prev_signalled = pipe->prev_signalled;
+	if (pipe == dev->first_signalled_pipe)
+		dev->first_signalled_pipe = pipe->next_signalled;
+	pipe->prev_signalled = NULL;
+	pipe->next_signalled = NULL;
+}
+
+static struct goldfish_pipe *signalled_pipes_pop_front(
+		struct goldfish_pipe_dev *dev, int *wakes)
+{
+	struct goldfish_pipe *pipe;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev->lock, flags);
+
+	pipe = dev->first_signalled_pipe;
+	if (pipe) {
+		*wakes = pipe->signalled_flags;
+		pipe->signalled_flags = 0;
+		/*
+		 * This is an optimized version of
+		 * signalled_pipes_remove_locked()
+		 * - We want to make it as fast as possible to
+		 * wake the sleeping pipe operations faster.
+		 */
+		dev->first_signalled_pipe = pipe->next_signalled;
+		if (dev->first_signalled_pipe)
+			dev->first_signalled_pipe->prev_signalled = NULL;
+		pipe->next_signalled = NULL;
+	}
+
+	spin_unlock_irqrestore(&dev->lock, flags);
+	return pipe;
+}
+
+static void goldfish_interrupt_task(unsigned long unused)
+{
+	struct goldfish_pipe_dev *dev = pipe_dev;
+	/* Iterate over the signalled pipes and wake them one by one */
+	struct goldfish_pipe *pipe;
+	int wakes;
+
+	while ((pipe = signalled_pipes_pop_front(dev, &wakes)) != NULL) {
+		if (wakes & PIPE_WAKE_CLOSED) {
+			pipe->flags = 1 << BIT_CLOSED_ON_HOST;
+		} else {
+			if (wakes & PIPE_WAKE_READ)
+				clear_bit(BIT_WAKE_ON_READ, &pipe->flags);
+			if (wakes & PIPE_WAKE_WRITE)
+				clear_bit(BIT_WAKE_ON_WRITE, &pipe->flags);
+		}
+		/*
+		 * wake_up_interruptible() implies a write barrier, so don't
+		 * explicitly add another one here.
+		 */
+		wake_up_interruptible(&pipe->wake_queue);
+	}
+}
+DECLARE_TASKLET(goldfish_interrupt_tasklet, goldfish_interrupt_task, 0);
+
+/*
+ * The general idea of the interrupt handling:
+ *
+ *  1. device raises an interrupt if there's at least one signalled pipe
+ *  2. IRQ handler reads the signalled pipes and their count from the device
+ *  3. device writes them into a shared buffer and returns the count
+ *      it only resets the IRQ if it has returned all signalled pipes,
+ *      otherwise it leaves it raised, so IRQ handler will be called
+ *      again for the next chunk
+ *  4. IRQ handler adds all returned pipes to the device's signalled pipes list
+ *  5. IRQ handler launches a tasklet to process the signalled pipes from the
+ *      list in a separate context
+ */
 static irqreturn_t goldfish_pipe_interrupt(int irq, void *dev_id)
 {
+	u32 count;
+	u32 i;
+	unsigned long flags;
 	struct goldfish_pipe_dev *dev = dev_id;
-	unsigned long irq_flags;
-	int count = 0;
 
-	/*
-	 * We're going to read from the emulator a list of (channel,flags)
-	 * pairs corresponding to the wake events that occurred on each
-	 * blocked pipe (i.e. channel).
-	 */
-	spin_lock_irqsave(&dev->lock, irq_flags);
-	for (;;) {
-		/* First read the channel, 0 means the end of the list */
-		struct goldfish_pipe *pipe;
-		unsigned long wakes;
-		unsigned long channel = 0;
+	if (dev != pipe_dev)
+		return IRQ_NONE;
 
-#ifdef CONFIG_64BIT
-		channel = (u64)readl(dev->base + PIPE_REG_CHANNEL_HIGH) << 32;
+	/* Request the signalled pipes from the device */
+	spin_lock_irqsave(&dev->lock, flags);
 
-		if (channel == 0)
-			break;
-#endif
-		channel |= readl(dev->base + PIPE_REG_CHANNEL);
-
-		if (channel == 0)
-			break;
-
-		/* Convert channel to struct pipe pointer + read wake flags */
-		wakes = readl(dev->base + PIPE_REG_WAKES);
-		pipe  = (struct goldfish_pipe *)(ptrdiff_t)channel;
-
-		/* Did the emulator just closed a pipe? */
-		if (wakes & PIPE_WAKE_CLOSED) {
-			set_bit(BIT_CLOSED_ON_HOST, &pipe->flags);
-			wakes |= PIPE_WAKE_READ | PIPE_WAKE_WRITE;
-		}
-		if (wakes & PIPE_WAKE_READ)
-			clear_bit(BIT_WAKE_ON_READ, &pipe->flags);
-		if (wakes & PIPE_WAKE_WRITE)
-			clear_bit(BIT_WAKE_ON_WRITE, &pipe->flags);
-
-		wake_up_interruptible(&pipe->wake_queue);
-		count++;
+	count = readl(dev->base + PIPE_REG_GET_SIGNALLED);
+	if (count == 0) {
+		spin_unlock_irqrestore(&dev->lock, flags);
+		return IRQ_NONE;
 	}
-	spin_unlock_irqrestore(&dev->lock, irq_flags);
+	if (count > MAX_SIGNALLED_PIPES)
+		count = MAX_SIGNALLED_PIPES;
 
-	return (count == 0) ? IRQ_NONE : IRQ_HANDLED;
+	for (i = 0; i < count; ++i)
+		signalled_pipes_add_locked(dev,
+			dev->buffers->signalled_pipe_buffers[i].id,
+			dev->buffers->signalled_pipe_buffers[i].flags);
+
+	spin_unlock_irqrestore(&dev->lock, flags);
+
+	tasklet_schedule(&goldfish_interrupt_tasklet);
+	return IRQ_HANDLED;
+}
+
+static int get_free_pipe_id_locked(struct goldfish_pipe_dev *dev)
+{
+	int id;
+
+	for (id = 0; id < dev->pipes_capacity; ++id)
+		if (!dev->pipes[id])
+			return id;
+
+	{
+		/* Reallocate the array */
+		u32 new_capacity = 2 * dev->pipes_capacity;
+		struct goldfish_pipe **pipes =
+			kcalloc(new_capacity, sizeof(*pipes), GFP_KERNEL);
+		if (!pipes)
+			return -ENOMEM;
+		memcpy(pipes, dev->pipes, sizeof(*pipes) * dev->pipes_capacity);
+		kfree(dev->pipes);
+		dev->pipes = pipes;
+		id = dev->pipes_capacity;
+		dev->pipes_capacity = new_capacity;
+	}
+	return id;
 }
 
 /**
- *	goldfish_pipe_open	-	open a channel to the AVD
+ *	goldfish_pipe_open - open a channel to the AVD
  *	@inode: inode of device
  *	@file: file struct of opener
  *
@@ -525,12 +729,13 @@ static irqreturn_t goldfish_pipe_interrupt(int irq, void *dev_id)
  */
 static int goldfish_pipe_open(struct inode *inode, struct file *file)
 {
-	struct goldfish_pipe *pipe;
 	struct goldfish_pipe_dev *dev = pipe_dev;
-	int32_t status;
+	unsigned long flags;
+	int id;
+	int status;
 
 	/* Allocate new pipe kernel object */
-	pipe = kzalloc(sizeof(*pipe), GFP_KERNEL);
+	struct goldfish_pipe *pipe = kzalloc(sizeof(*pipe), GFP_KERNEL);
 	if (pipe == NULL)
 		return -ENOMEM;
 
@@ -539,29 +744,69 @@ static int goldfish_pipe_open(struct inode *inode, struct file *file)
 	init_waitqueue_head(&pipe->wake_queue);
 
 	/*
-	 * Now, tell the emulator we're opening a new pipe. We use the
-	 * pipe object's address as the channel identifier for simplicity.
+	 * Command buffer needs to be allocated on its own page to make sure
+	 * it is physically contiguous in host's address space.
 	 */
-
-	status = goldfish_cmd_status(pipe, CMD_OPEN);
-	if (status < 0) {
-		kfree(pipe);
-		return status;
+	pipe->command_buffer =
+		(struct goldfish_pipe_command *)__get_free_page(GFP_KERNEL);
+	if (!pipe->command_buffer) {
+		status = -ENOMEM;
+		goto err_pipe;
 	}
 
+	spin_lock_irqsave(&dev->lock, flags);
+
+	id = get_free_pipe_id_locked(dev);
+	if (id < 0) {
+		status = id;
+		goto err_id_locked;
+	}
+
+	dev->pipes[id] = pipe;
+	pipe->id = id;
+	pipe->command_buffer->id = id;
+
+	/* Now tell the emulator we're opening a new pipe. */
+	dev->buffers->open_command_params.rw_params_max_count =
+			MAX_BUFFERS_PER_COMMAND;
+	dev->buffers->open_command_params.command_buffer_ptr =
+			(u64)(unsigned long)__pa(pipe->command_buffer);
+	status = goldfish_cmd_locked(pipe, PIPE_CMD_OPEN);
+	spin_unlock_irqrestore(&dev->lock, flags);
+	if (status < 0)
+		goto err_cmd;
 	/* All is done, save the pipe into the file's private data field */
 	file->private_data = pipe;
 	return 0;
+
+err_cmd:
+	spin_lock_irqsave(&dev->lock, flags);
+	dev->pipes[id] = NULL;
+err_id_locked:
+	spin_unlock_irqrestore(&dev->lock, flags);
+	free_page((unsigned long)pipe->command_buffer);
+err_pipe:
+	kfree(pipe);
+	return status;
 }
 
 static int goldfish_pipe_release(struct inode *inode, struct file *filp)
 {
+	unsigned long flags;
 	struct goldfish_pipe *pipe = filp->private_data;
+	struct goldfish_pipe_dev *dev = pipe->dev;
 
 	/* The guest is closing the channel, so tell the emulator right now */
-	goldfish_cmd(pipe, CMD_CLOSE);
-	kfree(pipe);
+	(void)goldfish_cmd(pipe, PIPE_CMD_CLOSE);
+
+	spin_lock_irqsave(&dev->lock, flags);
+	dev->pipes[pipe->id] = NULL;
+	signalled_pipes_remove_locked(dev, pipe);
+	spin_unlock_irqrestore(&dev->lock, flags);
+
 	filp->private_data = NULL;
+	free_page((unsigned long)pipe->command_buffer);
+	kfree(pipe);
 	return 0;
 }
 
@@ -574,18 +819,91 @@ static const struct file_operations goldfish_pipe_fops = {
 	.release = goldfish_pipe_release,
 };
 
-static struct miscdevice goldfish_pipe_device = {
+static struct miscdevice goldfish_pipe_dev = {
 	.minor = MISC_DYNAMIC_MINOR,
 	.name = "goldfish_pipe",
 	.fops = &goldfish_pipe_fops,
 };
 
+static int goldfish_pipe_device_init(struct platform_device *pdev)
+{
+	char *page;
+	struct goldfish_pipe_dev *dev = pipe_dev;
+	int err = devm_request_irq(&pdev->dev, dev->irq,
+				goldfish_pipe_interrupt,
+				IRQF_SHARED, "goldfish_pipe", dev);
+	if (err) {
+		dev_err(&pdev->dev, "unable to allocate IRQ for v2\n");
+		return err;
+	}
+
+	err = misc_register(&goldfish_pipe_dev);
+	if (err) {
+		dev_err(&pdev->dev, "unable to register v2 device\n");
+		return err;
+	}
+
+	dev->first_signalled_pipe = NULL;
+	dev->pipes_capacity = INITIAL_PIPES_CAPACITY;
+	dev->pipes = kcalloc(dev->pipes_capacity, sizeof(*dev->pipes),
+					GFP_KERNEL);
+	if (!dev->pipes)
+		return -ENOMEM;
+
+	/*
+	 * We're going to pass two buffers, open_command_params and
+	 * signalled_pipe_buffers, to the host. This means each of those buffers
+	 * needs to be contained in a single physical page. The easiest choice
+	 * is to just allocate a page and place the buffers in it.
+	 */
+	if (WARN_ON(sizeof(*dev->buffers) > PAGE_SIZE))
+		return -ENOMEM;
+
+	page = (char *)__get_free_page(GFP_KERNEL);
+	if (!page) {
+		kfree(dev->pipes);
+		return -ENOMEM;
+	}
+	dev->buffers = (struct goldfish_pipe_dev_buffers *)page;
+
+	/* Send the buffer addresses to the host */
+	{
+		u64 paddr = __pa(&dev->buffers->signalled_pipe_buffers);
+
+		writel((u32)(unsigned long)(paddr >> 32),
+			dev->base + PIPE_REG_SIGNAL_BUFFER_HIGH);
+		writel((u32)(unsigned long)paddr,
+			dev->base + PIPE_REG_SIGNAL_BUFFER);
+		writel((u32)MAX_SIGNALLED_PIPES,
+			dev->base + PIPE_REG_SIGNAL_BUFFER_COUNT);
+
+		paddr = __pa(&dev->buffers->open_command_params);
+		writel((u32)(unsigned long)(paddr >> 32),
+			dev->base + PIPE_REG_OPEN_BUFFER_HIGH);
+		writel((u32)(unsigned long)paddr,
+			dev->base + PIPE_REG_OPEN_BUFFER);
+	}
+	return 0;
+}
+
+static void goldfish_pipe_device_deinit(struct platform_device *pdev)
+{
+	struct goldfish_pipe_dev *dev = pipe_dev;
+
+	misc_deregister(&goldfish_pipe_dev);
+	kfree(dev->pipes);
+	free_page((unsigned long)dev->buffers);
+}
+
 static int goldfish_pipe_probe(struct platform_device *pdev)
 {
 	int err;
 	struct resource *r;
 	struct goldfish_pipe_dev *dev = pipe_dev;
 
+	if (WARN_ON(sizeof(struct goldfish_pipe_command) > PAGE_SIZE))
+		return -ENOMEM;
+
 	/* not thread safe, but this should not happen */
 	WARN_ON(dev->base != NULL);
 
@@ -609,26 +927,21 @@ static int goldfish_pipe_probe(struct platform_device *pdev)
 	}
 	dev->irq = r->start;
 
-	err = devm_request_irq(&pdev->dev, dev->irq, goldfish_pipe_interrupt,
-				IRQF_SHARED, "goldfish_pipe", dev);
-	if (err) {
-		dev_err(&pdev->dev, "unable to allocate IRQ\n");
-		goto error;
-	}
-
-	err = misc_register(&goldfish_pipe_device);
-	if (err) {
-		dev_err(&pdev->dev, "unable to register device\n");
-		goto error;
-	}
-	setup_access_params_addr(pdev, dev);
-
-	/* Although the pipe device in the classic Android emulator does not
-	 * recognize the 'version' register, it won't treat this as an error
-	 * either and will simply return 0, which is fine.
+	/*
+	 * Exchange the versions with the host device
+	 *
+	 * Note: v1 driver used to not report its version, so we write it before
+	 *  reading device version back: this allows the host implementation to
+	 *  detect the old driver (if there was no version write before read).
 	 */
+	writel((u32)PIPE_DRIVER_VERSION, dev->base + PIPE_REG_VERSION);
 	dev->version = readl(dev->base + PIPE_REG_VERSION);
-	return 0;
+	if (WARN_ON(dev->version < PIPE_CURRENT_DEVICE_VERSION))
+		return -EINVAL;
+
+	err = goldfish_pipe_device_init(pdev);
+	if (!err)
+		return 0;
 
 error:
 	dev->base = NULL;
@@ -638,7 +951,7 @@ static int goldfish_pipe_probe(struct platform_device *pdev)
 static int goldfish_pipe_remove(struct platform_device *pdev)
 {
 	struct goldfish_pipe_dev *dev = pipe_dev;
-	misc_deregister(&goldfish_pipe_device);
+	goldfish_pipe_device_deinit(pdev);
 	dev->base = NULL;
 	return 0;
 }
@@ -655,17 +968,16 @@ static const struct of_device_id goldfish_pipe_of_match[] = {
 };
 MODULE_DEVICE_TABLE(of, goldfish_pipe_of_match);
 
-static struct platform_driver goldfish_pipe = {
+static struct platform_driver goldfish_pipe_driver = {
 	.probe = goldfish_pipe_probe,
 	.remove = goldfish_pipe_remove,
 	.driver = {
 		.name = "goldfish_pipe",
-		.owner = THIS_MODULE,
 		.of_match_table = goldfish_pipe_of_match,
 		.acpi_match_table = ACPI_PTR(goldfish_pipe_acpi_match),
 	}
 };
 
-module_platform_driver(goldfish_pipe);
+module_platform_driver(goldfish_pipe_driver);
 MODULE_AUTHOR("David Turner <[email protected]>");
 MODULE_LICENSE("GPL");