Browse Source

Merge branch 'for-3.13/core' of git://git.kernel.dk/linux-block

Pull block IO core updates from Jens Axboe:
 "This is the pull request for the core changes in the block layer for
  3.13.  It contains:

   - The new blk-mq request interface.

     This is a new and more scalable queueing model that marries the
     best part of the request based interface we currently have (which
     is fully featured, but scales poorly) and the bio based "interface"
     which the new drivers for high IOPS devices end up using because
     it's much faster than the request based one.

     The bio interface has no block layer support, since it taps into
     the stack much earlier.  This means that drivers end up having to
     implement a lot of functionality on their own, like tagging,
     timeout handling, requeue, etc.  The blk-mq interface provides all
     these.  Some drivers even provide a switch to select bio or rq and
     has code to handle both, since things like merging only works in
     the rq model and hence is faster for some workloads.  This is a
     huge mess.  Conversion of these drivers nets us a substantial code
     reduction.  Initial results on converting SCSI to this model even
     shows an 8x improvement on single queue devices.  So while the
     model was intended to work on the newer multiqueue devices, it has
     substantial improvements for "classic" hardware as well.  This code
     has gone through extensive testing and development, it's now ready
     to go.  A pull request is coming to convert virtio-blk to this
     model will be will be coming as well, with more drivers scheduled
     for 3.14 conversion.

   - Two blktrace fixes from Jan and Chen Gang.

   - A plug merge fix from Alireza Haghdoost.

   - Conversion of __get_cpu_var() from Christoph Lameter.

   - Fix for sector_div() with 64-bit divider from Geert Uytterhoeven.

   - A fix for a race between request completion and the timeout
     handling from Jeff Moyer.  This is what caused the merge conflict
     with blk-mq/core, in case you are looking at that.

   - A dm stacking fix from Mike Snitzer.

   - A code consolidation fix and duplicated code removal from Kent
     Overstreet.

   - A handful of block bug fixes from Mikulas Patocka, fixing a loop
     crash and memory corruption on blk cg.

   - Elevator switch bug fix from Tomoki Sekiyama.

  A heads-up that I had to rebase this branch.  Initially the immutable
  bio_vecs had been queued up for inclusion, but a week later, it became
  clear that it wasn't fully cooked yet.  So the decision was made to
  pull this out and postpone it until 3.14.  It was a straight forward
  rebase, just pruning out the immutable series and the later fixes of
  problems with it.  The rest of the patches applied directly and no
  further changes were made"

* 'for-3.13/core' of git://git.kernel.dk/linux-block: (31 commits)
  block: replace IS_ERR and PTR_ERR with PTR_ERR_OR_ZERO
  block: replace IS_ERR and PTR_ERR with PTR_ERR_OR_ZERO
  block: Do not call sector_div() with a 64-bit divisor
  kernel: trace: blktrace: remove redundent memcpy() in compat_blk_trace_setup()
  block: Consolidate duplicated bio_trim() implementations
  block: Use rw_copy_check_uvector()
  block: Enable sysfs nomerge control for I/O requests in the plug list
  block: properly stack underlying max_segment_size to DM device
  elevator: acquire q->sysfs_lock in elevator_change()
  elevator: Fix a race in elevator switching and md device initialization
  block: Replace __get_cpu_var uses
  bdi: test bdi_init failure
  block: fix a probe argument to blk_register_region
  loop: fix crash if blk_alloc_queue fails
  blk-core: Fix memory corruption if blkcg_init_queue fails
  block: fix race between request completion and timeout handling
  blktrace: Send BLK_TN_PROCESS events to all running traces
  blk-mq: don't disallow request merges for req->special being set
  blk-mq: mq plug list breakage
  blk-mq: fix for flush deadlock
  ...
master
Linus Torvalds 8 years ago
parent
commit
0910c0bdf7
  1. 5
      block/Makefile
  2. 175
      block/blk-core.c
  3. 14
      block/blk-exec.c
  4. 154
      block/blk-flush.c
  5. 6
      block/blk-iopoll.c
  6. 10
      block/blk-lib.c
  7. 17
      block/blk-merge.c
  8. 93
      block/blk-mq-cpu.c
  9. 108
      block/blk-mq-cpumap.c
  10. 384
      block/blk-mq-sysfs.c
  11. 204
      block/blk-mq-tag.c
  12. 27
      block/blk-mq-tag.h
  13. 1500
      block/blk-mq.c
  14. 52
      block/blk-mq.h
  15. 1
      block/blk-settings.c
  16. 8
      block/blk-softirq.c
  17. 13
      block/blk-sysfs.c
  18. 77
      block/blk-timeout.c
  19. 17
      block/blk.h
  20. 22
      block/elevator.c
  21. 2
      block/ioctl.c
  22. 39
      block/scsi_ioctl.c
  23. 3
      drivers/block/Kconfig
  24. 1
      drivers/block/Makefile
  25. 2
      drivers/block/brd.c
  26. 4
      drivers/block/floppy.c
  27. 6
      drivers/block/loop.c
  28. 635
      drivers/block/null_blk.c
  29. 53
      drivers/block/xen-blkfront.c
  30. 40
      drivers/md/md.c
  31. 1
      drivers/md/md.h
  32. 10
      drivers/md/raid1.c
  33. 18
      drivers/md/raid10.c
  34. 2
      drivers/scsi/sd.c
  35. 46
      fs/bio.c
  36. 3
      fs/char_dev.c
  37. 2
      fs/fscache/object.c
  38. 4
      include/linux/backing-dev.h
  39. 3
      include/linux/bio.h
  40. 183
      include/linux/blk-mq.h
  41. 68
      include/linux/blk_types.h
  42. 60
      include/linux/blkdev.h
  43. 4
      include/linux/blktrace_api.h
  44. 23
      include/linux/percpu_ida.h
  45. 7
      kernel/smp.c
  46. 36
      kernel/trace/blktrace.c
  47. 15
      lib/percpu_counter.c
  48. 89
      lib/percpu_ida.c
  49. 3
      mm/swap.c

5
block/Makefile

@ -5,8 +5,9 @@
obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o \
partition-generic.o partitions/
blk-iopoll.o blk-lib.o blk-mq.o blk-mq-tag.o \
blk-mq-sysfs.o blk-mq-cpu.o blk-mq-cpumap.o ioctl.o \
genhd.o scsi_ioctl.o partition-generic.o partitions/
obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o

175
block/blk-core.c

@ -16,6 +16,7 @@
#include <linux/backing-dev.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/blk-mq.h>
#include <linux/highmem.h>
#include <linux/mm.h>
#include <linux/kernel_stat.h>
@ -48,7 +49,7 @@ DEFINE_IDA(blk_queue_ida);
/*
* For the allocated request tables
*/
static struct kmem_cache *request_cachep;
struct kmem_cache *request_cachep = NULL;
/*
* For queue allocation
@ -60,42 +61,6 @@ struct kmem_cache *blk_requestq_cachep;
*/
static struct workqueue_struct *kblockd_workqueue;
static void drive_stat_acct(struct request *rq, int new_io)
{
struct hd_struct *part;
int rw = rq_data_dir(rq);
int cpu;
if (!blk_do_io_stat(rq))
return;
cpu = part_stat_lock();
if (!new_io) {
part = rq->part;
part_stat_inc(cpu, part, merges[rw]);
} else {
part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
if (!hd_struct_try_get(part)) {
/*
* The partition is already being removed,
* the request will be accounted on the disk only
*
* We take a reference on disk->part0 although that
* partition will never be deleted, so we can treat
* it as any other partition.
*/
part = &rq->rq_disk->part0;
hd_struct_get(part);
}
part_round_stats(cpu, part);
part_inc_in_flight(part, rw);
rq->part = part;
}
part_stat_unlock();
}
void blk_queue_congestion_threshold(struct request_queue *q)
{
int nr;
@ -145,7 +110,6 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
rq->cmd = rq->__cmd;
rq->cmd_len = BLK_MAX_CDB;
rq->tag = -1;
rq->ref_count = 1;
rq->start_time = jiffies;
set_start_time_ns(rq);
rq->part = NULL;
@ -174,9 +138,9 @@ void blk_dump_rq_flags(struct request *rq, char *msg)
{
int bit;
printk(KERN_INFO "%s: dev %s: type=%x, flags=%x\n", msg,
printk(KERN_INFO "%s: dev %s: type=%x, flags=%llx\n", msg,
rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type,
rq->cmd_flags);
(unsigned long long) rq->cmd_flags);
printk(KERN_INFO " sector %llu, nr/cnr %u/%u\n",
(unsigned long long)blk_rq_pos(rq),
@ -595,9 +559,12 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
if (!q)
return NULL;
if (percpu_counter_init(&q->mq_usage_counter, 0))
goto fail_q;
q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask);
if (q->id < 0)
goto fail_q;
goto fail_c;
q->backing_dev_info.ra_pages =
(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
@ -644,13 +611,19 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
q->bypass_depth = 1;
__set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags);
init_waitqueue_head(&q->mq_freeze_wq);
if (blkcg_init_queue(q))
goto fail_id;
goto fail_bdi;
return q;
fail_bdi:
bdi_destroy(&q->backing_dev_info);
fail_id:
ida_simple_remove(&blk_queue_ida, q->id);
fail_c:
percpu_counter_destroy(&q->mq_usage_counter);
fail_q:
kmem_cache_free(blk_requestq_cachep, q);
return NULL;
@ -739,9 +712,17 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
q->sg_reserved_size = INT_MAX;
/* Protect q->elevator from elevator_change */
mutex_lock(&q->sysfs_lock);
/* init elevator */
if (elevator_init(q, NULL))
if (elevator_init(q, NULL)) {
mutex_unlock(&q->sysfs_lock);
return NULL;
}
mutex_unlock(&q->sysfs_lock);
return q;
}
EXPORT_SYMBOL(blk_init_allocated_queue);
@ -1109,7 +1090,8 @@ retry:
goto retry;
}
struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
static struct request *blk_old_get_request(struct request_queue *q, int rw,
gfp_t gfp_mask)
{
struct request *rq;
@ -1126,6 +1108,14 @@ struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
return rq;
}
struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
{
if (q->mq_ops)
return blk_mq_alloc_request(q, rw, gfp_mask, false);
else
return blk_old_get_request(q, rw, gfp_mask);
}
EXPORT_SYMBOL(blk_get_request);
/**
@ -1211,7 +1201,7 @@ EXPORT_SYMBOL(blk_requeue_request);
static void add_acct_request(struct request_queue *q, struct request *rq,
int where)
{
drive_stat_acct(rq, 1);
blk_account_io_start(rq, true);
__elv_add_request(q, rq, where);
}
@ -1272,8 +1262,6 @@ void __blk_put_request(struct request_queue *q, struct request *req)
{
if (unlikely(!q))
return;
if (unlikely(--req->ref_count))
return;
blk_pm_put_request(req);
@ -1302,12 +1290,17 @@ EXPORT_SYMBOL_GPL(__blk_put_request);
void blk_put_request(struct request *req)
{
unsigned long flags;
struct request_queue *q = req->q;
spin_lock_irqsave(q->queue_lock, flags);
__blk_put_request(q, req);
spin_unlock_irqrestore(q->queue_lock, flags);
if (q->mq_ops)
blk_mq_free_request(req);
else {
unsigned long flags;
spin_lock_irqsave(q->queue_lock, flags);
__blk_put_request(q, req);
spin_unlock_irqrestore(q->queue_lock, flags);
}
}
EXPORT_SYMBOL(blk_put_request);
@ -1343,8 +1336,8 @@ void blk_add_request_payload(struct request *rq, struct page *page,
}
EXPORT_SYMBOL_GPL(blk_add_request_payload);
static bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
struct bio *bio)
bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
struct bio *bio)
{
const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
@ -1361,12 +1354,12 @@ static bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
req->__data_len += bio->bi_size;
req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
drive_stat_acct(req, 0);
blk_account_io_start(req, false);
return true;
}
static bool bio_attempt_front_merge(struct request_queue *q,
struct request *req, struct bio *bio)
bool bio_attempt_front_merge(struct request_queue *q, struct request *req,
struct bio *bio)
{
const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
@ -1391,12 +1384,12 @@ static bool bio_attempt_front_merge(struct request_queue *q,
req->__data_len += bio->bi_size;
req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
drive_stat_acct(req, 0);
blk_account_io_start(req, false);
return true;
}
/**
* attempt_plug_merge - try to merge with %current's plugged list
* blk_attempt_plug_merge - try to merge with %current's plugged list
* @q: request_queue new bio is being queued at
* @bio: new bio being queued
* @request_count: out parameter for number of traversed plugged requests
@ -1412,19 +1405,28 @@ static bool bio_attempt_front_merge(struct request_queue *q,
* reliable access to the elevator outside queue lock. Only check basic
* merging parameters without querying the elevator.
*/
static bool attempt_plug_merge(struct request_queue *q, struct bio *bio,
unsigned int *request_count)
bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
unsigned int *request_count)
{
struct blk_plug *plug;
struct request *rq;
bool ret = false;
struct list_head *plug_list;
if (blk_queue_nomerges(q))
goto out;
plug = current->plug;
if (!plug)
goto out;
*request_count = 0;
list_for_each_entry_reverse(rq, &plug->list, queuelist) {
if (q->mq_ops)
plug_list = &plug->mq_list;
else
plug_list = &plug->list;
list_for_each_entry_reverse(rq, plug_list, queuelist) {
int el_ret;
if (rq->q == q)
@ -1492,7 +1494,7 @@ void blk_queue_bio(struct request_queue *q, struct bio *bio)
* Check if we can merge with the plugged list before grabbing
* any locks.
*/
if (attempt_plug_merge(q, bio, &request_count))
if (blk_attempt_plug_merge(q, bio, &request_count))
return;
spin_lock_irq(q->queue_lock);
@ -1560,7 +1562,7 @@ get_rq:
}
}
list_add_tail(&req->queuelist, &plug->list);
drive_stat_acct(req, 1);
blk_account_io_start(req, true);
} else {
spin_lock_irq(q->queue_lock);
add_acct_request(q, req, where);
@ -2014,7 +2016,7 @@ unsigned int blk_rq_err_bytes(const struct request *rq)
}
EXPORT_SYMBOL_GPL(blk_rq_err_bytes);
static void blk_account_io_completion(struct request *req, unsigned int bytes)
void blk_account_io_completion(struct request *req, unsigned int bytes)
{
if (blk_do_io_stat(req)) {
const int rw = rq_data_dir(req);
@ -2028,7 +2030,7 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes)
}
}
static void blk_account_io_done(struct request *req)
void blk_account_io_done(struct request *req)
{
/*
* Account IO completion. flush_rq isn't accounted as a
@ -2076,6 +2078,42 @@ static inline struct request *blk_pm_peek_request(struct request_queue *q,
}
#endif
void blk_account_io_start(struct request *rq, bool new_io)
{
struct hd_struct *part;
int rw = rq_data_dir(rq);
int cpu;
if (!blk_do_io_stat(rq))
return;
cpu = part_stat_lock();
if (!new_io) {
part = rq->part;
part_stat_inc(cpu, part, merges[rw]);
} else {
part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
if (!hd_struct_try_get(part)) {
/*
* The partition is already being removed,
* the request will be accounted on the disk only
*
* We take a reference on disk->part0 although that
* partition will never be deleted, so we can treat
* it as any other partition.
*/
part = &rq->rq_disk->part0;
hd_struct_get(part);
}
part_round_stats(cpu, part);
part_inc_in_flight(part, rw);
rq->part = part;
}
part_stat_unlock();
}
/**
* blk_peek_request - peek at the top of a request queue
* @q: request queue to peek at
@ -2227,6 +2265,7 @@ void blk_start_request(struct request *req)
if (unlikely(blk_bidi_rq(req)))
req->next_rq->resid_len = blk_rq_bytes(req->next_rq);
BUG_ON(test_bit(REQ_ATOM_COMPLETE, &req->atomic_flags));
blk_add_timer(req);
}
EXPORT_SYMBOL(blk_start_request);
@ -2451,7 +2490,6 @@ static void blk_finish_request(struct request *req, int error)
if (req->cmd_flags & REQ_DONTPREP)
blk_unprep_request(req);
blk_account_io_done(req);
if (req->end_io)
@ -2873,6 +2911,7 @@ void blk_start_plug(struct blk_plug *plug)
plug->magic = PLUG_MAGIC;
INIT_LIST_HEAD(&plug->list);
INIT_LIST_HEAD(&plug->mq_list);
INIT_LIST_HEAD(&plug->cb_list);
/*
@ -2970,6 +3009,10 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
BUG_ON(plug->magic != PLUG_MAGIC);
flush_plug_callbacks(plug, from_schedule);
if (!list_empty(&plug->mq_list))
blk_mq_flush_plug_list(plug, from_schedule);
if (list_empty(&plug->list))
return;

14
block/blk-exec.c

@ -5,6 +5,7 @@
#include <linux/module.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/blk-mq.h>
#include <linux/sched/sysctl.h>
#include "blk.h"
@ -24,7 +25,6 @@ static void blk_end_sync_rq(struct request *rq, int error)
struct completion *waiting = rq->end_io_data;
rq->end_io_data = NULL;
__blk_put_request(rq->q, rq);
/*
* complete last, if this is a stack request the process (and thus
@ -59,6 +59,12 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
rq->rq_disk = bd_disk;
rq->end_io = done;
if (q->mq_ops) {
blk_mq_insert_request(q, rq, true);
return;
}
/*
* need to check this before __blk_run_queue(), because rq can
* be freed before that returns.
@ -103,12 +109,6 @@ int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk,
int err = 0;
unsigned long hang_check;
/*
* we need an extra reference to the request, so we can look at
* it after io completion
*/
rq->ref_count++;
if (!rq->sense) {
memset(sense, 0, sizeof(sense));
rq->sense = sense;

154
block/blk-flush.c

@ -69,8 +69,10 @@
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/gfp.h>
#include <linux/blk-mq.h>
#include "blk.h"
#include "blk-mq.h"
/* FLUSH/FUA sequences */
enum {
@ -124,6 +126,24 @@ static void blk_flush_restore_request(struct request *rq)
/* make @rq a normal request */
rq->cmd_flags &= ~REQ_FLUSH_SEQ;
rq->end_io = rq->flush.saved_end_io;
blk_clear_rq_complete(rq);
}
static void mq_flush_data_run(struct work_struct *work)
{
struct request *rq;
rq = container_of(work, struct request, mq_flush_data);
memset(&rq->csd, 0, sizeof(rq->csd));
blk_mq_run_request(rq, true, false);
}
static void blk_mq_flush_data_insert(struct request *rq)
{
INIT_WORK(&rq->mq_flush_data, mq_flush_data_run);
kblockd_schedule_work(rq->q, &rq->mq_flush_data);
}
/**
@ -136,7 +156,7 @@ static void blk_flush_restore_request(struct request *rq)
* completion and trigger the next step.
*
* CONTEXT:
* spin_lock_irq(q->queue_lock)
* spin_lock_irq(q->queue_lock or q->mq_flush_lock)
*
* RETURNS:
* %true if requests were added to the dispatch queue, %false otherwise.
@ -146,7 +166,7 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
{
struct request_queue *q = rq->q;
struct list_head *pending = &q->flush_queue[q->flush_pending_idx];
bool queued = false;
bool queued = false, kicked;
BUG_ON(rq->flush.seq & seq);
rq->flush.seq |= seq;
@ -167,8 +187,12 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
case REQ_FSEQ_DATA:
list_move_tail(&rq->flush.list, &q->flush_data_in_flight);
list_add(&rq->queuelist, &q->queue_head);
queued = true;
if (q->mq_ops)
blk_mq_flush_data_insert(rq);
else {
list_add(&rq->queuelist, &q->queue_head);
queued = true;
}
break;
case REQ_FSEQ_DONE:
@ -181,28 +205,43 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
BUG_ON(!list_empty(&rq->queuelist));
list_del_init(&rq->flush.list);
blk_flush_restore_request(rq);
__blk_end_request_all(rq, error);
if (q->mq_ops)
blk_mq_end_io(rq, error);
else
__blk_end_request_all(rq, error);
break;
default:
BUG();
}
return blk_kick_flush(q) | queued;
kicked = blk_kick_flush(q);
/* blk_mq_run_flush will run queue */
if (q->mq_ops)
return queued;
return kicked | queued;
}
static void flush_end_io(struct request *flush_rq, int error)
{
struct request_queue *q = flush_rq->q;
struct list_head *running = &q->flush_queue[q->flush_running_idx];
struct list_head *running;
bool queued = false;
struct request *rq, *n;
unsigned long flags = 0;
if (q->mq_ops) {
blk_mq_free_request(flush_rq);
spin_lock_irqsave(&q->mq_flush_lock, flags);
}
running = &q->flush_queue[q->flush_running_idx];
BUG_ON(q->flush_pending_idx == q->flush_running_idx);
/* account completion of the flush request */
q->flush_running_idx ^= 1;
elv_completed_request(q, flush_rq);
if (!q->mq_ops)
elv_completed_request(q, flush_rq);
/* and push the waiting requests to the next stage */
list_for_each_entry_safe(rq, n, running, flush.list) {
@ -223,9 +262,48 @@ static void flush_end_io(struct request *flush_rq, int error)
* directly into request_fn may confuse the driver. Always use
* kblockd.
*/
if (queued || q->flush_queue_delayed)
blk_run_queue_async(q);
if (queued || q->flush_queue_delayed) {
if (!q->mq_ops)
blk_run_queue_async(q);
else
/*
* This can be optimized to only run queues with requests
* queued if necessary.
*/
blk_mq_run_queues(q, true);
}
q->flush_queue_delayed = 0;
if (q->mq_ops)
spin_unlock_irqrestore(&q->mq_flush_lock, flags);
}
static void mq_flush_work(struct work_struct *work)
{
struct request_queue *q;
struct request *rq;
q = container_of(work, struct request_queue, mq_flush_work);
/* We don't need set REQ_FLUSH_SEQ, it's for consistency */
rq = blk_mq_alloc_request(q, WRITE_FLUSH|REQ_FLUSH_SEQ,
__GFP_WAIT|GFP_ATOMIC, true);
rq->cmd_type = REQ_TYPE_FS;
rq->end_io = flush_end_io;
blk_mq_run_request(rq, true, false);
}
/*
* We can't directly use q->flush_rq, because it doesn't have tag and is not in
* hctx->rqs[]. so we must allocate a new request, since we can't sleep here,
* so offload the work to workqueue.
*
* Note: we assume a flush request finished in any hardware queue will flush
* the whole disk cache.
*/
static void mq_run_flush(struct request_queue *q)
{
kblockd_schedule_work(q, &q->mq_flush_work);
}
/**
@ -236,7 +314,7 @@ static void flush_end_io(struct request *flush_rq, int error)
* Please read the comment at the top of this file for more info.
*
* CONTEXT:
* spin_lock_irq(q->queue_lock)
* spin_lock_irq(q->queue_lock or q->mq_flush_lock)
*
* RETURNS:
* %true if flush was issued, %false otherwise.
@ -261,13 +339,18 @@ static bool blk_kick_flush(struct request_queue *q)
* Issue flush and toggle pending_idx. This makes pending_idx
* different from running_idx, which means flush is in flight.
*/
q->flush_pending_idx ^= 1;
if (q->mq_ops) {
mq_run_flush(q);
return true;
}
blk_rq_init(q, &q->flush_rq);
q->flush_rq.cmd_type = REQ_TYPE_FS;
q->flush_rq.cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ;
q->flush_rq.rq_disk = first_rq->rq_disk;
q->flush_rq.end_io = flush_end_io;
q->flush_pending_idx ^= 1;
list_add_tail(&q->flush_rq.queuelist, &q->queue_head);
return true;
}
@ -284,16 +367,37 @@ static void flush_data_end_io(struct request *rq, int error)
blk_run_queue_async(q);
}
static void mq_flush_data_end_io(struct request *rq, int error)
{
struct request_queue *q = rq->q;
struct blk_mq_hw_ctx *hctx;
struct blk_mq_ctx *ctx;
unsigned long flags;
ctx = rq->mq_ctx;
hctx = q->mq_ops->map_queue(q, ctx->cpu);
/*
* After populating an empty queue, kick it to avoid stall. Read
* the comment in flush_end_io().
*/
spin_lock_irqsave(&q->mq_flush_lock, flags);
if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error))
blk_mq_run_hw_queue(hctx, true);
spin_unlock_irqrestore(&q->mq_flush_lock, flags);
}
/**
* blk_insert_flush - insert a new FLUSH/FUA request
* @rq: request to insert
*
* To be called from __elv_add_request() for %ELEVATOR_INSERT_FLUSH insertions.
* or __blk_mq_run_hw_queue() to dispatch request.
* @rq is being submitted. Analyze what needs to be done and put it on the
* right queue.
*
* CONTEXT:
* spin_lock_irq(q->queue_lock)
* spin_lock_irq(q->queue_lock) in !mq case
*/
void blk_insert_flush(struct request *rq)
{
@ -316,7 +420,10 @@ void blk_insert_flush(struct request *rq)
* complete the request.
*/
if (!policy) {
__blk_end_bidi_request(rq, 0, 0, 0);
if (q->mq_ops)
blk_mq_end_io(rq, 0);
else
__blk_end_bidi_request(rq, 0, 0, 0);
return;
}
@ -329,7 +436,10 @@ void blk_insert_flush(struct request *rq)
*/
if ((policy & REQ_FSEQ_DATA) &&
!(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
list_add_tail(&rq->queuelist, &q->queue_head);
if (q->mq_ops) {
blk_mq_run_request(rq, false, true);
} else
list_add_tail(&rq->queuelist, &q->queue_head);
return;
}
@ -341,6 +451,14 @@ void blk_insert_flush(struct request *rq)
INIT_LIST_HEAD(&rq->flush.list);
rq->cmd_flags |= REQ_FLUSH_SEQ;
rq->flush.saved_end_io = rq->end_io; /* Usually NULL */
if (q->mq_ops) {
rq->end_io = mq_flush_data_end_io;
spin_lock_irq(&q->mq_flush_lock);
blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0);
spin_unlock_irq(&q->mq_flush_lock);
return;
}
rq->end_io = flush_data_end_io;
blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0);
@ -453,3 +571,9 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
return ret;
}
EXPORT_SYMBOL(blkdev_issue_flush);
void blk_mq_init_flush(struct request_queue *q)
{
spin_lock_init(&q->mq_flush_lock);
INIT_WORK(&q->mq_flush_work, mq_flush_work);
}

6
block/blk-iopoll.c

@ -35,7 +35,7 @@ void blk_iopoll_sched(struct blk_iopoll *iop)
unsigned long flags;
local_irq_save(flags);
list_add_tail(&iop->list, &__get_cpu_var(blk_cpu_iopoll));
list_add_tail(&iop->list, this_cpu_ptr(&blk_cpu_iopoll));
__raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
local_irq_restore(flags);
}
@ -79,7 +79,7 @@ EXPORT_SYMBOL(blk_iopoll_complete);
static void blk_iopoll_softirq(struct softirq_action *h)
{
struct list_head *list = &__get_cpu_var(blk_cpu_iopoll);
struct list_head *list = this_cpu_ptr(&blk_cpu_iopoll);
int rearm = 0, budget = blk_iopoll_budget;
unsigned long start_time = jiffies;
@ -201,7 +201,7 @@ static int blk_iopoll_cpu_notify(struct notifier_block *self,
local_irq_disable();
list_splice_init(&per_cpu(blk_cpu_iopoll, cpu),
&__get_cpu_var(blk_cpu_iopoll));
this_cpu_ptr(&blk_cpu_iopoll));
__raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
local_irq_enable();
}

10
block/blk-lib.c

@ -43,8 +43,8 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
DECLARE_COMPLETION_ONSTACK(wait);
struct request_queue *q = bdev_get_queue(bdev);
int type = REQ_WRITE | REQ_DISCARD;
sector_t max_discard_sectors;
sector_t granularity, alignment;
unsigned int max_discard_sectors, granularity;
int alignment;
struct bio_batch bb;
struct bio *bio;
int ret = 0;
@ -58,16 +58,14 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
/* Zero-sector (unknown) and one-sector granularities are the same. */
granularity = max(q->limits.discard_granularity >> 9, 1U);
alignment = bdev_discard_alignment(bdev) >> 9;
alignment = sector_div(alignment, granularity);
alignment = (bdev_discard_alignment(bdev) >> 9) % granularity;
/*
* Ensure that max_discard_sectors is of the proper
* granularity, so that requests stay aligned after a split.
*/
max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9);
sector_div(max_discard_sectors, granularity);
max_discard_sectors *= granularity;
max_discard_sectors -= max_discard_sectors % granularity;
if (unlikely(!max_discard_sectors)) {
/* Avoid infinite loop below. Being cautious never hurts. */
return -EOPNOTSUPP;

17
block/blk-merge.c

@ -308,6 +308,17 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req,
return ll_new_hw_segment(q, req, bio);
}
/*
* blk-mq uses req->special to carry normal driver per-request payload, it
* does not indicate a prepared command that we cannot merge with.
*/
static bool req_no_special_merge(struct request *req)
{
struct request_queue *q = req->q;
return !q->mq_ops && req->special;
}
static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
struct request *next)
{
@ -319,7 +330,7 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
* First check if the either of the requests are re-queued
* requests. Can't merge them if they are.
*/
if (req->special || next->special)
if (req_no_special_merge(req) || req_no_special_merge(next))
return 0;
/*
@ -416,7 +427,7 @@ static int attempt_merge(struct request_queue *q, struct request *req,
if (rq_data_dir(req) != rq_data_dir(next)
|| req->rq_disk != next->rq_disk
|| next->special)
|| req_no_special_merge(next))
return 0;
if (req->cmd_flags & REQ_WRITE_SAME &&
@ -515,7 +526,7 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
return false;
/* must be same device and not a special request */
if (rq->rq_disk != bio->bi_bdev->bd_disk || rq->special)
if (rq->rq_disk != bio->bi_bdev->bd_disk || req_no_special_merge(rq))
return false;
/* only merge integrity protected bio into ditto rq */

93
block/blk-mq-cpu.c

@ -0,0 +1,93 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/blkdev.h>
#include <linux/list.h>
#include <linux/llist.h>
#include <linux/smp.h>
#include <linux/cpu.h>
#include <linux/blk-mq.h>
#include "blk-mq.h"
static LIST_HEAD(blk_mq_cpu_notify_list);
static DEFINE_SPINLOCK(blk_mq_cpu_notify_lock);
static int __cpuinit blk_mq_main_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
unsigned int cpu = (unsigned long) hcpu;
struct blk_mq_cpu_notifier *notify;
spin_lock(&blk_mq_cpu_notify_lock);
list_for_each_entry(notify, &blk_mq_cpu_notify_list, list)
notify->notify(notify->data, action, cpu);
spin_unlock(&blk_mq_cpu_notify_lock);
return NOTIFY_OK;
}
static void __cpuinit blk_mq_cpu_notify(void *data, unsigned long action,
unsigned int cpu)
{
if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
/*
* If the CPU goes away, ensure that we run any pending
* completions.
*/
struct llist_node *node;
struct request *rq;
local_irq_disable();
node = llist_del_all(&per_cpu(ipi_lists, cpu));
while (node) {
struct llist_node *next = node->next;
rq = llist_entry(node, struct request, ll_list);
__blk_mq_end_io(rq, rq->errors);
node = next;
}
local_irq_enable();
}
}
static struct notifier_block __cpuinitdata blk_mq_main_cpu_notifier = {
.notifier_call = blk_mq_main_cpu_notify,
};
void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier)
{
BUG_ON(!notifier->notify);
spin_lock(&blk_mq_cpu_notify_lock);
list_add_tail(&notifier->list, &blk_mq_cpu_notify_list);
spin_unlock(&blk_mq_cpu_notify_lock);
}
void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier)
{
spin_lock(&blk_mq_cpu_notify_lock);
list_del(&notifier->list);
spin_unlock(&blk_mq_cpu_notify_lock);
}
void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier,
void (*fn)(void *, unsigned long, unsigned int),
void *data)
{
notifier->notify = fn;
notifier->data = data;
}
static struct blk_mq_cpu_notifier __cpuinitdata cpu_notifier = {
.notify = blk_mq_cpu_notify,
};
void __init blk_mq_cpu_init(void)
{
register_hotcpu_notifier(&blk_mq_main_cpu_notifier);
blk_mq_register_cpu_notifier(&cpu_notifier);
}

108
block/blk-mq-cpumap.c

@ -0,0 +1,108 @@
#include <linux/kernel.h>
#include <linux/threads.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/cpu.h>
#include <linux/blk-mq.h>
#include "blk.h"
#include "blk-mq.h"
static void show_map(unsigned int *map, unsigned int nr)
{
int i;
pr_info("blk-mq: CPU -> queue map\n");
for_each_online_cpu(i)
pr_info(" CPU%2u -> Queue %u\n", i, map[i]);
}
static int cpu_to_queue_index(unsigned int nr_cpus, unsigned int nr_queues,
const int cpu)
{
return cpu / ((nr_cpus + nr_queues - 1) / nr_queues);
}
static int get_first_sibling(unsigned int cpu)
{
unsigned int ret;
ret = cpumask_first(topology_thread_cpumask(cpu));
if (ret < nr_cpu_ids)
return ret;
return cpu;
}
int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues)
{
unsigned int i, nr_cpus, nr_uniq_cpus, queue, first_sibling;
cpumask_var_t cpus;
if (!alloc_cpumask_var(&cpus, GFP_ATOMIC))
return 1;
cpumask_clear(cpus);
nr_cpus = nr_uniq_cpus = 0;
for_each_online_cpu(i) {
nr_cpus++;
first_sibling = get_first_sibling(i);
if (!cpumask_test_cpu(first_sibling, cpus))
nr_uniq_cpus++;
cpumask_set_cpu(i, cpus);
}
queue = 0;
for_each_possible_cpu(i) {
if (!cpu_online(i)) {
map[i] = 0;
continue;
}
/*
* Easy case - we have equal or more hardware queues. Or
* there are no thread siblings to take into account. Do
* 1:1 if enough, or sequential mapping if less.
*/
if (nr_queues >= nr_cpus || nr_cpus == nr_uniq_cpus) {
map[i] = cpu_to_queue_index(nr_cpus, nr_queues, queue);
queue++;
continue;
}
/*
* Less then nr_cpus queues, and we have some number of
* threads per cores. Map sibling threads to the same
* queue.
*/
first_sibling = get_first_sibling(i);
if (first_sibling == i) {
map[i] = cpu_to_queue_index(nr_uniq_cpus, nr_queues,
queue);
queue++;
} else
map[i] = map[first_sibling];
}
show_map(map, nr_cpus);
free_cpumask_var(cpus);
return 0;
}
unsigned int *blk_mq_make_queue_map(struct blk_mq_reg *reg)
{
unsigned int *map;
/* If cpus are offline, map them to first hctx */
map = kzalloc_node(sizeof(*map) * num_possible_cpus(), GFP_KERNEL,
reg->numa_node);
if (!map)
return NULL;
if (!blk_mq_update_queue_map(map, reg->nr_hw_queues))
return map;
kfree(map);
return NULL;
}

384
block/blk-mq-sysfs.c

@ -0,0 +1,384 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/backing-dev.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/mm.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/workqueue.h>
#include <linux/smp.h>
#include <linux/blk-mq.h>
#include "blk-mq.h"
#include "blk-mq-tag.h"
static void blk_mq_sysfs_release(struct kobject *kobj)
{
}
struct blk_mq_ctx_sysfs_entry {
struct attribute attr;
ssize_t (*show)(struct blk_mq_ctx *, char *);
ssize_t (*store)(struct blk_mq_ctx *, const char *, size_t);
};
struct blk_mq_hw_ctx_sysfs_entry {
struct attribute attr;
ssize_t (*show)(struct blk_mq_hw_ctx *, char *);
ssize_t (*store)(struct blk_mq_hw_ctx *, const char *, size_t);
};
static ssize_t blk_mq_sysfs_show(struct kobject *kobj, struct attribute *attr,
char *page)
{
struct blk_mq_ctx_sysfs_entry *entry;
struct blk_mq_ctx *ctx;
struct request_queue *q;
ssize_t res;
entry = container_of(attr, struct blk_mq_ctx_sysfs_entry, attr);
ctx = container_of(kobj, struct blk_mq_ctx, kobj);
q = ctx->queue;
if (!entry->show)
return -EIO;
res = -ENOENT;
mutex_lock(&q->sysfs_lock);
if (!blk_queue_dying(q))
res = entry->show(ctx, page);
mutex_unlock(&q->sysfs_lock);
return res;
}
static ssize_t blk_mq_sysfs_store(struct kobject *kobj, struct attribute *attr,
const char *page, size_t length)
{
struct blk_mq_ctx_sysfs_entry *entry;
struct blk_mq_ctx *ctx;
struct request_queue *q;
ssize_t res;
entry = container_of(attr, struct blk_mq_ctx_sysfs_entry, attr);
ctx = container_of(kobj, struct blk_mq_ctx, kobj);
q = ctx->queue;
if (!entry->store)
return -EIO;
res = -ENOENT;
mutex_lock(&q->sysfs_lock);
if (!blk_queue_dying(q))
res = entry->store(ctx, page, length);
mutex_unlock(&q->sysfs_lock);
return res;
}
static ssize_t blk_mq_hw_sysfs_show(struct kobject *kobj,
struct attribute *attr, char *page)
{
struct blk_mq_hw_ctx_sysfs_entry *entry;
struct blk_mq_hw_ctx *hctx;
struct request_queue *q;
ssize_t res;
entry = container_of(attr, struct blk_mq_hw_ctx_sysfs_entry, attr);
hctx = container_of(kobj, struct blk_mq_hw_ctx, kobj);
q = hctx->queue;
if (!entry->show)
return -EIO;
res = -ENOENT;
mutex_lock(&q->sysfs_lock);
if (!blk_queue_dying(q))
res = entry->show(hctx, page);
mutex_unlock(&q->sysfs_lock);
return res;
}
static ssize_t blk_mq_hw_sysfs_store(struct kobject *kobj,
struct attribute *attr, const char *page,
size_t length)
{
struct blk_mq_hw_ctx_sysfs_entry *entry;
struct blk_mq_hw_ctx *hctx;
struct request_queue *q;
ssize_t res;
entry = container_of(attr, struct blk_mq_hw_ctx_sysfs_entry, attr);
hctx = container_of(kobj, struct blk_mq_hw_ctx, kobj);
q = hctx->queue;
if (!entry->store)
return -EIO;
res = -ENOENT;
mutex_lock(&q->sysfs_lock);
if (!blk_queue_dying(q))
res = entry->store(hctx, page, length);
mutex_unlock(&q->sysfs_lock);
return res;
}
static ssize_t blk_mq_sysfs_dispatched_show(struct blk_mq_ctx *ctx, char *page)
{
return sprintf(page, "%lu %lu\n", ctx->rq_dispatched[1],
ctx->rq_dispatched[0]);
}
static ssize_t blk_mq_sysfs_merged_show(struct blk_mq_ctx *ctx, char *page)
{
return sprintf(page, "%lu\n", ctx->rq_merged);
}
static ssize_t blk_mq_sysfs_completed_show(struct blk_mq_ctx *ctx, char *page)
{
return sprintf(page, "%lu %lu\n", ctx->rq_completed[1],
ctx->rq_completed[0]);
}
static ssize_t sysfs_list_show(char *page, struct list_head *list, char *msg)
{
char *start_page = page;
struct request *rq;
page += sprintf(page, "%s:\n", msg);
list_for_each_entry(rq, list, queuelist)
page += sprintf(page, "\t%p\n", rq);
return page - start_page;
}
static ssize_t blk_mq_sysfs_rq_list_show(struct blk_mq_ctx *ctx, char *page)
{
ssize_t ret;
spin_lock(&ctx->lock);
ret = sysfs_list_show(page, &ctx->rq_list, "CTX pending");
spin_unlock(&ctx->lock);
return ret;
}
static ssize_t blk_mq_hw_sysfs_queued_show(struct blk_mq_hw_ctx *hctx,
char *page)
{
return sprintf(page, "%lu\n", hctx->queued);
}
static ssize_t blk_mq_hw_sysfs_run_show(struct blk_mq_hw_ctx *hctx, char *page)
{
return sprintf(page, "%lu\n", hctx->run);
}
static ssize_t blk_mq_hw_sysfs_dispatched_show(struct blk_mq_hw_ctx *hctx,
char *page)
{
char *start_page = page;
int i;
page += sprintf(page, "%8u\t%lu\n", 0U, hctx->dispatched[0]);
for (i = 1; i < BLK_MQ_MAX_DISPATCH_ORDER; i++) {
unsigned long d = 1U << (i - 1);
page += sprintf(page, "%8lu\t%lu\n", d, hctx->dispatched[i]);
}
return page - start_page;
}
static ssize_t blk_mq_hw_sysfs_rq_list_show(struct blk_mq_hw_ctx *hctx,
char *page)
{
ssize_t ret;
spin_lock(&hctx->lock);
ret = sysfs_list_show(page, &hctx->dispatch, "HCTX pending");
spin_unlock(&hctx->lock);
return ret;
}
static ssize_t blk_mq_hw_sysfs_ipi_show(struct blk_mq_hw_ctx *hctx, char *page)
{
ssize_t ret;
spin_lock(&hctx->lock);
ret = sprintf(page, "%u\n", !!(hctx->flags & BLK_MQ_F_SHOULD_IPI));
spin_unlock(&hctx->lock);
return ret;
}
static ssize_t blk_mq_hw_sysfs_ipi_store(struct blk_mq_hw_ctx *hctx,
const char *page, size_t len)
{
struct blk_mq_ctx *ctx;
unsigned long ret;
unsigned int i;
if (kstrtoul(page, 10, &ret)) {
pr_err("blk-mq-sysfs: invalid input '%s'\n", page);
return -EINVAL;
}
spin_lock(&hctx->lock);
if (ret)
hctx->flags |= BLK_MQ_F_SHOULD_IPI;
else
hctx->flags &= ~BLK_MQ_F_SHOULD_IPI;
spin_unlock(&hctx->lock);
hctx_for_each_ctx(hctx, ctx, i)
ctx->ipi_redirect = !!ret;
return len;
}
static ssize_t blk_mq_hw_sysfs_tags_show(struct blk_mq_hw_ctx *hctx, char *page)
{
return blk_mq_tag_sysfs_show(hctx->tags, page);
}
static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_dispatched = {
.attr = {.name = "dispatched", .mode = S_IRUGO },
.show = blk_mq_sysfs_dispatched_show,
};
static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_merged = {
.attr = {.name = "merged", .mode = S_IRUGO },
.show = blk_mq_sysfs_merged_show,
};
static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_completed = {
.attr = {.name = "completed", .mode = S_IRUGO },
.show = blk_mq_sysfs_completed_show,
};
static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_rq_list = {
.attr = {.name = "rq_list", .mode = S_IRUGO },
.show = blk_mq_sysfs_rq_list_show,
};
static struct attribute *default_ctx_attrs[] = {
&blk_mq_sysfs_dispatched.attr,
&blk_mq_sysfs_merged.attr,
&blk_mq_sysfs_completed.attr,
&blk_mq_sysfs_rq_list.attr,
NULL,
};
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_queued = {
.attr = {.name = "queued", .mode = S_IRUGO },
.show = blk_mq_hw_sysfs_queued_show,
};
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_run = {
.attr = {.name = "run", .mode = S_IRUGO },
.show = blk_mq_hw_sysfs_run_show,
};
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_dispatched = {
.attr = {.name = "dispatched", .mode = S_IRUGO },
.show = blk_mq_hw_sysfs_dispatched_show,
};
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_pending = {
.attr = {.name = "pending", .mode = S_IRUGO },
.show = blk_mq_hw_sysfs_rq_list_show,
};
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_ipi = {
.attr = {.name = "ipi_redirect", .mode = S_IRUGO | S_IWUSR},
.show = blk_mq_hw_sysfs_ipi_show,
.store = blk_mq_hw_sysfs_ipi_store,
};
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_tags = {
.attr = {.name = "tags", .mode = S_IRUGO },
.show = blk_mq_hw_sysfs_tags_show,
};
static struct attribute *default_hw_ctx_attrs[] = {
&blk_mq_hw_sysfs_queued.attr,
&blk_mq_hw_sysfs_run.attr,
&blk_mq_hw_sysfs_dispatched.attr,
&blk_mq_hw_sysfs_pending.attr,
&blk_mq_hw_sysfs_ipi.attr,
&blk_mq_hw_sysfs_tags.attr,
NULL,
};
static const struct sysfs_ops blk_mq_sysfs_ops = {
.show = blk_mq_sysfs_show,
.store = blk_mq_sysfs_store,
};
static const struct sysfs_ops blk_mq_hw_sysfs_ops = {
.show = blk_mq_hw_sysfs_show,
.store = blk_mq_hw_sysfs_store,
};
static struct kobj_type blk_mq_ktype = {
.sysfs_ops = &blk_mq_sysfs_ops,
.release = blk_mq_sysfs_release,
};
static struct kobj_type blk_mq_ctx_ktype = {
.sysfs_ops = &blk_mq_sysfs_ops,
.default_attrs = default_ctx_attrs,
.release = blk_mq_sysfs_release,
};
static struct kobj_type blk_mq_hw_ktype = {
.sysfs_ops = &blk_mq_hw_sysfs_ops,
.default_attrs = default_hw_ctx_attrs,
.release = blk_mq_sysfs_release,
};
void blk_mq_unregister_disk(struct gendisk *disk)
{
struct request_queue *q = disk->queue;
kobject_uevent(&q->mq_kobj, KOBJ_REMOVE);
kobject_del(&q->mq_kobj);
kobject_put(&disk_to_dev(disk)->kobj);
}
int blk_mq_register_disk(struct gendisk *disk)
{
struct device *dev = disk_to_dev(disk);
struct request_queue *q = disk->queue;
struct blk_mq_hw_ctx *hctx;
struct blk_mq_ctx *ctx;
int ret, i, j;
kobject_init(&q->mq_kobj, &blk_mq_ktype);
ret = kobject_add(&q->mq_kobj, kobject_get(&dev->kobj), "%s", "mq");
if (ret < 0)
return ret;
kobject_uevent(&q->mq_kobj, KOBJ_ADD);
queue_for_each_hw_ctx(q, hctx, i) {
kobject_init(&hctx->kobj, &blk_mq_hw_ktype);
ret = kobject_add(&hctx->kobj, &q->mq_kobj, "%u", i);
if (ret)
break;
if (!hctx->nr_ctx)
continue;
hctx_for_each_ctx(hctx, ctx, j) {
kobject_init(&ctx->kobj, &blk_mq_ctx_ktype);
ret = kobject_add(&ctx->kobj, &hctx->kobj, "cpu%u", ctx->cpu);
if (ret)
break;
}
}