mm/z3fold.c: improve compression by extending search

The current z3fold implementation only searches this CPU's page lists for
a fitting page to put a new object into.  This patch adds quick search for
very well fitting pages (i.  e.  those having exactly the required number
of free space) on other CPUs too, before allocating a new page for that
object.

Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Vitaly Wool <[email protected]>
Cc: Bartlomiej Zolnierkiewicz <[email protected]>
Cc: Dan Streetman <[email protected]>
Cc: Krzysztof Kozlowski <[email protected]>
Cc: Oleksiy Avramchenko <[email protected]>
Cc: Uladzislau Rezki <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
diff --git a/mm/z3fold.c b/mm/z3fold.c
index 7a59875..29a4f12 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -522,6 +522,42 @@ static inline struct z3fold_header *__z3fold_alloc(struct z3fold_pool *pool,
 	}
 	put_cpu_ptr(pool->unbuddied);
 
+	if (!zhdr) {
+		int cpu;
+
+		/* look for _exact_ match on other cpus' lists */
+		for_each_online_cpu(cpu) {
+			struct list_head *l;
+
+			unbuddied = per_cpu_ptr(pool->unbuddied, cpu);
+			spin_lock(&pool->lock);
+			l = &unbuddied[chunks];
+
+			zhdr = list_first_entry_or_null(READ_ONCE(l),
+						struct z3fold_header, buddy);
+
+			if (!zhdr || !z3fold_page_trylock(zhdr)) {
+				spin_unlock(&pool->lock);
+				zhdr = NULL;
+				continue;
+			}
+			list_del_init(&zhdr->buddy);
+			zhdr->cpu = -1;
+			spin_unlock(&pool->lock);
+
+			page = virt_to_page(zhdr);
+			if (test_bit(NEEDS_COMPACTING, &page->private)) {
+				z3fold_page_unlock(zhdr);
+				zhdr = NULL;
+				if (can_sleep)
+					cond_resched();
+				continue;
+			}
+			kref_get(&zhdr->refcount);
+			break;
+		}
+	}
+
 	return zhdr;
 }