bcache: Fix moving_gc deadlocking with a foreground write

Deadlock happened because a foreground write slept, waiting for a bucket
to be allocated. Normally the gc would mark buckets available for invalidation.
But the moving_gc was stuck waiting for outstanding writes to complete.
These writes used the bcache_wq, the same queue foreground writes used.

This fix gives moving_gc its own work queue, so it was still finish moving
even if foreground writes are stuck waiting for allocation. It also makes
work queue a parameter to the data_insert path, so moving_gc can use its
workqueue for writes.

Signed-off-by: Nicholas Swenson <[email protected]>
Signed-off-by: Kent Overstreet <[email protected]>
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index a4c7306..6d814f4 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -628,6 +628,8 @@
 	/* Number of moving GC bios in flight */
 	struct semaphore	moving_in_flight;
 
+	struct workqueue_struct	*moving_gc_wq;
+
 	struct btree		*root;
 
 #ifdef CONFIG_BCACHE_DEBUG
diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c
index 9eb60d1..8c72051 100644
--- a/drivers/md/bcache/movinggc.c
+++ b/drivers/md/bcache/movinggc.c
@@ -115,7 +115,7 @@
 		closure_call(&op->cl, bch_data_insert, NULL, cl);
 	}
 
-	continue_at(cl, write_moving_finish, system_wq);
+	continue_at(cl, write_moving_finish, op->wq);
 }
 
 static void read_moving_submit(struct closure *cl)
@@ -125,7 +125,7 @@
 
 	bch_submit_bbio(bio, io->op.c, &io->w->key, 0);
 
-	continue_at(cl, write_moving, system_wq);
+	continue_at(cl, write_moving, io->op.wq);
 }
 
 static void read_moving(struct cache_set *c)
@@ -160,6 +160,7 @@
 		io->w		= w;
 		io->op.inode	= KEY_INODE(&w->key);
 		io->op.c	= c;
+		io->op.wq	= c->moving_gc_wq;
 
 		moving_init(io);
 		bio = &io->bio.bio;
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index fc14ba3..3e880869 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -248,7 +248,7 @@
 		atomic_dec_bug(journal_ref);
 
 	if (!op->insert_data_done)
-		continue_at(cl, bch_data_insert_start, bcache_wq);
+		continue_at(cl, bch_data_insert_start, op->wq);
 
 	bch_keylist_free(&op->insert_keys);
 	closure_return(cl);
@@ -297,7 +297,7 @@
 	op->insert_data_done = true;
 	bio_put(bio);
 out:
-	continue_at(cl, bch_data_insert_keys, bcache_wq);
+	continue_at(cl, bch_data_insert_keys, op->wq);
 }
 
 static void bch_data_insert_error(struct closure *cl)
@@ -340,7 +340,7 @@
 		if (op->writeback)
 			op->error = error;
 		else if (!op->replace)
-			set_closure_fn(cl, bch_data_insert_error, bcache_wq);
+			set_closure_fn(cl, bch_data_insert_error, op->wq);
 		else
 			set_closure_fn(cl, NULL, NULL);
 	}
@@ -376,7 +376,7 @@
 		if (bch_keylist_realloc(&op->insert_keys,
 					3 + (op->csum ? 1 : 0),
 					op->c))
-			continue_at(cl, bch_data_insert_keys, bcache_wq);
+			continue_at(cl, bch_data_insert_keys, op->wq);
 
 		k = op->insert_keys.top;
 		bkey_init(k);
@@ -413,7 +413,7 @@
 	} while (n != bio);
 
 	op->insert_data_done = true;
-	continue_at(cl, bch_data_insert_keys, bcache_wq);
+	continue_at(cl, bch_data_insert_keys, op->wq);
 err:
 	/* bch_alloc_sectors() blocks if s->writeback = true */
 	BUG_ON(op->writeback);
@@ -442,7 +442,7 @@
 		bio_put(bio);
 
 		if (!bch_keylist_empty(&op->insert_keys))
-			continue_at(cl, bch_data_insert_keys, bcache_wq);
+			continue_at(cl, bch_data_insert_keys, op->wq);
 		else
 			closure_return(cl);
 	}
@@ -824,6 +824,7 @@
 	s->iop.error		= 0;
 	s->iop.flags		= 0;
 	s->iop.flush_journal	= (bio->bi_rw & (REQ_FLUSH|REQ_FUA)) != 0;
+	s->iop.wq		= bcache_wq;
 
 	return s;
 }
diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h
index 39f21db..c117c40 100644
--- a/drivers/md/bcache/request.h
+++ b/drivers/md/bcache/request.h
@@ -7,6 +7,7 @@
 	struct closure		cl;
 	struct cache_set	*c;
 	struct bio		*bio;
+	struct workqueue_struct *wq;
 
 	unsigned		inode;
 	uint16_t		write_point;
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index fb34327..ddfde38 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1356,6 +1356,8 @@
 	bch_bset_sort_state_free(&c->sort);
 	free_pages((unsigned long) c->uuids, ilog2(bucket_pages(c)));
 
+	if (c->moving_gc_wq)
+		destroy_workqueue(c->moving_gc_wq);
 	if (c->bio_split)
 		bioset_free(c->bio_split);
 	if (c->fill_iter)
@@ -1522,6 +1524,7 @@
 	    !(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) ||
 	    !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
 	    !(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) ||
+	    !(c->moving_gc_wq = create_workqueue("bcache_gc")) ||
 	    bch_journal_alloc(c) ||
 	    bch_btree_cache_alloc(c) ||
 	    bch_open_buckets_alloc(c) ||