Linux Block Device Driver (IV) - Data Structure and Related Operations of Block Devices

Memory is a linear structure, and Linux systems divide memory into pages. The largest page can be 64KB, but the current mainstream system pages are 4KB in size. Each page of data is encapsulated into a segment, which is represented by bio_vec. Multiple pages are encapsulated into segments, which are composed of an array of bio_vec elements, which are represented by bio_io_vec.
bio_io_vec is a pointer in bio. One or more bio composes a request request request descriptor. The request will be connected to the request queue request_queue, or merged into an existing request queue request_queue. The merging condition is that the sector positions represented by two adjacent request requests are adjacent.

1. Block I/O Request (bio)
The process of data from memory to disk or from disk to memory is called I/O operation. The kernel uses a core data structure to describe I/O operations. The bio structure contains a segment of data (bio_io_vec), which is the data to be manipulated.

/*
 * main unit of I/O for the block layer and lower layers (ie drivers and
 * stacking drivers)
 */
struct bio {
    /*The first sector to be transmitted*/
    sector_t        bi_sector;  /* device address in 512 byte
                           sectors */
    /*Next sector*/
    struct bio      *bi_next;   /* request queue link */
    struct block_device *bi_bdev;   /*bio Corresponding block devices*/
    unsigned long       bi_flags;   /* status, command, etc */
    unsigned long       bi_rw;      /* bottom bits READ/WRITE,
                         * top bits priority
                         */

    unsigned short      bi_vcnt;    /* how many bio_vec's */
    unsigned short      bi_idx;     /* current index into bvl_vec */

    /* Number of segments in this BIO after
     * physical address coalescing is performed.
     */
    unsigned short      bi_phys_segments;

    /* Number of segments after physical and DMA remapping
     * hardware coalescing is performed.
     */
    unsigned short      bi_hw_segments;
    unsigned int        bi_size;    /* residual I/O count */
    /*
     * To keep track of the max hw size, we account for the
     * sizes of the first and last virtually mergeable segments
     * in this bio
     */
    unsigned int        bi_hw_front_size;
    unsigned int        bi_hw_back_size;
    unsigned int        bi_max_vecs;    /* max bvl_vecs we can hold */
    struct bio_vec      *bi_io_vec; /* the actual vec list */
    bio_end_io_t        *bi_end_io;
    atomic_t        bi_cnt;     /* pin count */
    void            *bi_private;
    bio_destructor_t    *bi_destructor; /* destructor */
};

bio-related macros

/*To get the current page pointer*/
 bio_page(bio)  
/*Offset to get the current page*/
bio_offset(bio) 
bio_cur_sectors(bio)    

2. Request structure
Several consecutive pages form a bio structure, and several adjacent bio structures form a request structure. In this way, it is not necessary to move the magnetic head substantially, which saves the time of I/O operation.

/*
 * try to put the fields that are referenced together in the same cacheline
 */
 /*Request structure request*/
struct request {
    struct list_head queuelist; /*Request queue request_queue list*/
    struct list_head donelist;

    request_queue_t *q;

    unsigned int cmd_flags;
    enum rq_cmd_type_bits cmd_type;

    /* Maintain bio traversal state for part by part I/O submission.
     * hard_* are block layer internals, no driver should touch them!
     */
    /*The first sector number to be transmitted*/
    sector_t sector;        /* next sector to submit */
    /*The next sector to be transmitted*/
    sector_t hard_sector;       /* next sector to complete */
    unsigned long nr_sectors;   /* no. of sectors left to submit */
    unsigned long hard_nr_sectors;  /* no. of sectors left to complete */
    /* no. of sectors left to submit in the current segment */
    unsigned int current_nr_sectors;

    /* no. of sectors left to complete in the current segment */
    unsigned int hard_cur_sectors;
    struct bio *bio; /*Point to the first unfinished bio domain*/
    struct bio *biotail;/*The last bio in the request list*/
    struct hlist_node hash; /* merge hash */
    /*
     * The rb_node is only used inside the io scheduler, requests
     * are pruned when moved to the dispatch queue. So let the
     * completion_data share space with the rb_node.
     */
    union {
        struct rb_node rb_node; /* sort/lookup */
        void *completion_data;
    };

    /*
     * two pointers are available for the IO schedulers, if they need
     * more they have to dynamically allocate it.
     */
    void *elevator_private;  /*Private data pointing to I/O scheduler 1*/
    void *elevator_private2;/*Private data pointing to I/O scheduler 2*/
    struct gendisk *rq_disk;  /*Point to the disk to which the request is directed*/
    unsigned long start_time;

    /* Number of scatter-gather DMA addr+len pairs after
     * physical address coalescing is performed.
     */
    unsigned short nr_phys_segments;/*Number of physical segments requested*/

    /* Number of scatter-gather addr+len pairs after
     * physical and DMA remapping hardware coalescing is performed.
     * This is the number of scatter-gather entries the driver
     * will actually have to deal with after DMA mapping is done.
     */
    unsigned short nr_hw_segments;
    unsigned short ioprio;
    void *special;
    char *buffer;
    int tag;
    int errors;
    int ref_count;

    /*
     * when request is used as a packet command carrier
     */
    unsigned int cmd_len;
    unsigned char cmd[BLK_MAX_CDB];

    unsigned int data_len;
    unsigned int sense_len;
    void *data;
    void *sense;

    unsigned int timeout;
    int retries;

    /*
     * completion callback.
     */
    rq_end_io_fn *end_io;
    void *end_io_data;
};

3. Request queue
Request queues are mainly used to connect multiple request request request structures to the same device. It also contains the type of request supported by the block device, the number of requests, the size of segments, the number of hardware sectors and other device-related information.

/*The kernel designs the request queue request_queue as a bidirectional list.
Link request requests*/
struct request_queue
{
    /*
     * Together with queue_head for cacheline sharing
     */
    struct list_head    queue_head;/*Connect to the request structure,
                                Represents pending requests*/
    struct request      *last_merge;
    elevator_t      *elevator;  /*Pointer of Elevator Scheduling Algorithms*/

    /*
     * the queue request freelist, one for reads and one for writes
     */
    struct request_list rq;/*Data structures used for assigning request descriptors*/

    /*Functions for Implementing Driver Processing Requests*/
    request_fn_proc     *request_fn;
    /*Method of inserting a new request request request into the request queue*/
    make_request_fn     *make_request_fn;
    prep_rq_fn      *prep_rq_fn;
    unplug_fn       *unplug_fn;
    merge_bvec_fn       *merge_bvec_fn;
    issue_flush_fn      *issue_flush_fn;
    prepare_flush_fn    *prepare_flush_fn;
    softirq_done_fn     *softirq_done_fn;

    /*
     * Dispatch queue sorting
     */
    sector_t        end_sector;
    struct request      *boundary_rq;

    /*
     * Auto-unplugging state
     */
    struct timer_list   unplug_timer;
    int         unplug_thresh;  /* After this many requests */
    unsigned long       unplug_delay;   /* After this many jiffies */
    struct work_struct  unplug_work;

    struct backing_dev_info backing_dev_info;

    /*
     * The queue owner gets to use this for whatever they like.
     * ll_rw_blk doesn't touch it.
     */
    void            *queuedata;/*Pointer to Private Data of Block Device Driver*/

    /*
     * queue needs bounce pages for pages above this limit
     */
    unsigned long       bounce_pfn;
    gfp_t           bounce_gfp;

    /*
     * various queue flags, see QUEUE_* below
     */
    unsigned long       queue_flags;

    /*
     * protects queue structures from reentrancy. ->__queue_lock should
     * _never_ be used directly, it is queue private. always use
     * ->queue_lock.
     */
    spinlock_t      __queue_lock;
    spinlock_t      *queue_lock;

    /*
     * queue kobject
     */
    struct kobject kobj;

    /*
     * queue settings
     */
    unsigned long       nr_requests;    /* Max # of requests */
    unsigned int        nr_congestion_on;
    unsigned int        nr_congestion_off;
    unsigned int        nr_batching;

    unsigned int        max_sectors;
    unsigned int        max_hw_sectors;
    unsigned short      max_phys_segments;
    unsigned short      max_hw_segments;
    unsigned short      hardsect_size;
    unsigned int        max_segment_size;

    unsigned long       seg_boundary_mask;
    unsigned int        dma_alignment;

    struct blk_queue_tag    *queue_tags;

    unsigned int        nr_sorted;
    unsigned int        in_flight;

    /*
     * sg stuff
     */
    unsigned int        sg_timeout;
    unsigned int        sg_reserved_size;
    int         node;
#ifdef CONFIG_BLK_DEV_IO_TRACE
    struct blk_trace    *blk_trace;
#endif
    /*
     * reserved for flush operations
     */
    unsigned int        ordered, next_ordered, ordseq;
    int         orderr, ordcolor;
    struct request      pre_flush_rq, bar_rq, post_flush_rq;
    struct request      *orig_bar_rq;
    unsigned int        bi_size;

    struct mutex        sysfs_lock;
};

4. Summary
Relationships among request queue, request structure, bio, etc.

Keywords: Linux IE

Added by scottjcampbell on Sun, 07 Jul 2019 04:40:02 +0300