分类: LINUX
2006-09-20 14:52:05
Linux Device Drivers, 2nd Edition2nd Edition June 2001 0-59600-008-1, Order Number: 0081 586 pages, $39.95 |
Chapter 12
Loading Block DriversContents:
#include
int register_blkdev(unsigned int major, const char *name,
struct block_device_operations *bdops);
int unregister_blkdev(unsigned int major, const char *name);
result = register_blkdev(sbull_major, "sbull", &sbull_bdops);
if (result < 0) {
printk(KERN_WARNING "sbull: can't get major %d\n",sbull_major);
return result;
}
if (sbull_major == 0) sbull_major = result; /* dynamic */
major = sbull_major; /* Use `major' later on to save typing */struct block_device_operations {
int (*open) (struct inode *inode, struct file *filp);
int (*release) (struct inode *inode, struct file *filp);
int (*ioctl) (struct inode *inode, struct file *filp,
unsigned command, unsigned long argument);
int (*check_media_change) (kdev_t dev);
int (*revalidate) (kdev_t dev);
};
struct block_device_operations sbull_bdops = {
open: sbull_open,
release: sbull_release,
ioctl: sbull_ioctl,
check_media_change: sbull_check_change,
revalidate: sbull_revalidate,
};#include
blk_init_queue(request_queue_t *queue, request_fn_proc *request);
blk_cleanup_queue(request_queue_t *queue);struct blk_dev_struct {
request_queue_t request_queue;
queue_proc *queue;
void *data;
};
read_ahead[major] = sbull_rahead;
result = -ENOMEM; /* for the possible errors */
sbull_sizes = kmalloc(sbull_devs * sizeof(int), GFP_KERNEL);
if (!sbull_sizes)
goto fail_malloc;
for (i=0; i < sbull_devs; i++) /* all the same size */
sbull_sizes[i] = sbull_size;
blk_size[major]=sbull_sizes;
sbull_blksizes = kmalloc(sbull_devs * sizeof(int), GFP_KERNEL);
if (!sbull_blksizes)
goto fail_malloc;
for (i=0; i < sbull_devs; i++) /* all the same blocksize */
sbull_blksizes[i] = sbull_blksize;
blksize_size[major]=sbull_blksizes;
sbull_hardsects = kmalloc(sbull_devs * sizeof(int), GFP_KERNEL);
if (!sbull_hardsects)
goto fail_malloc;
for (i=0; i < sbull_devs; i++) /* all the same hardsect */
sbull_hardsects[i] = sbull_hardsect;
hardsect_size[major]=sbull_hardsects;
for (i = 0; i < sbull_devs; i++)
register_disk(NULL, MKDEV(major, i), 1, &sbull_bdops,
sbull_size << 1);
for (i=0; ifsync_dev(MKDEV(sbull_major, i)); /* flush the devices */
unregister_blkdev(major, "sbull");
/*
* Fix up the request queue(s)
*/
blk_cleanup_queue(BLK_DEFAULT_QUEUE(major));
/* Clean up the global arrays */
read_ahead[major] = 0;
kfree(blk_size[major]);
blk_size[major] = NULL;
kfree(blksize_size[major]);
blksize_size[major] = NULL;
kfree(hardsect_size[major]);
hardsect_size[major] = NULL;
The name of the device being created. This string is used in printing error messages.
- DEVICE_NR(kdev_t device)
- DEVICE_ON(kdev_t device)
- DEVICE_OFF(kdev_t device)
#define MAJOR_NR sbull_major /* force definitions on in blk.h */
static int sbull_major; /* must be declared before including blk.h */
#define DEVICE_NR(device) MINOR(device) /* has no partition bits */
#define DEVICE_NAME "sbull" /* name for messaging */
#define DEVICE_INTR sbull_intrptr /* pointer to bottom half */
#define DEVICE_NO_RANDOM /* no entropy to contribute */
#define DEVICE_REQUEST sbull_request
#define DEVICE_OFF(d) /* do-nothing */
#include
#include "sbull.h" /* local definitions */
void sbull_request(request_queue_t *q)
{
while(1) {
INIT_REQUEST;
printk("<1>request %p: cmd %i sec %li (nr. %li)\n", CURRENT,
CURRENT->cmd,
CURRENT->sector,
CURRENT->current_nr_sectors);
end_request(1); /* success */
}
}
void sbull_request(request_queue_t *q)
{
Sbull_Dev *device;
int status;
while(1) {
INIT_REQUEST; /* returns when queue is empty */
/* Which "device" are we using? */
device = sbull_locate_device (CURRENT);
if (device == NULL) {
end_request(0);
continue;
}
/* Perform the transfer and clean up. */
spin_lock(&device->lock);
status = sbull_transfer(device, CURRENT);
spin_unlock(&device->lock);
end_request(status);
}
}
static Sbull_Dev *sbull_locate_device(const struct request *req)
{
int devno;
Sbull_Dev *device;
/* Check if the minor number is in range */
devno = DEVICE_NR(req->rq_dev);
if (devno >= sbull_devs) {
static int count = 0;
if (count++ < 5) /* print the message at most five times */
printk(KERN_WARNING "sbull: request for unknown device\n");
return NULL;
}
device = sbull_devices + devno; /* Pick it out of device array */
return device;
}
static int sbull_transfer(Sbull_Dev *device, const struct request *req)
{
int size;
u8 *ptr;
ptr = device->data + req->sector * sbull_hardsect;
size = req->current_nr_sectors * sbull_hardsect;
/* Make sure that the transfer fits within the device. */
if (ptr + size > device->data + sbull_blksize*sbull_size) {
static int count = 0;
if (count++ < 5)
printk(KERN_WARNING "sbull: request past end of device\n");
return 0;
}
/* Looks good, do the transfer. */
switch(req->cmd) {
case READ:
memcpy(req->buffer, ptr, size); /* from sbull to buffer */
return 1;
case WRITE:
memcpy(ptr, req->buffer, size); /* from buffer to sbull */
return 1;
default:
/* can't happen */
return 0;
}
}The sbull driver as described earlier works very well. In simple situations (as with sbull), the macros from
can be used to easily set up a request function and get a working driver. As has already been mentioned, however, block drivers are often a performance-critical part of the kernel. Drivers based on the simple code shown earlier will likely not perform very well in many situations, and can also be a drag on the system as a whole. In this section we get into the details of how the I/O request queue works with an eye toward writing a faster, more efficient driver. The I/O Request Queue
- struct request *blkdev_entry_next_request(struct list_head *head);
- struct request *blkdev_next_request(struct request *req);
- struct request *blkdev_prev_request(struct request *req);
- blkdev_dequeue_request(struct request *req);
- blkdev_release_request(struct request *req);
All of these functions require that the io_request_lock be held, which we will discuss next.
struct request *req = CURRENT;
blkdev_dequeue_request(req);
end_that_request_last(req);
for (i = 0; i < sbull_devs; i++) {
blk_init_queue(&sbull_devices[i].queue, sbull_request);
blk_queue_headactive(&sbull_devices[i].queue, 0);
}
blk_dev[major].queue = sbull_find_queue;
request_queue_t *sbull_find_queue(kdev_t device)
{
int devno = DEVICE_NR(device);
if (devno >= sbull_devs) {
static int count = 0;
if (count++ < 5) /* print the message at most five times */
printk(KERN_WARNING "sbull: request for unknown device\n");
return NULL;
}
return &sbull_devices[devno].queue;
}
void sbull_request(request_queue_t *q)
{
Sbull_Dev *device;
struct request *req;
int status;
/* Find our device */
device = sbull_locate_device (blkdev_entry_next_request(&q->queue_head));
if (device->busy) /* no race here - io_request_lock held */
return;
device->busy = 1;
/* Process requests in the queue */
while(! list_empty(&q->queue_head)) {
/* Pull the next request off the list. */
req = blkdev_entry_next_request(&q->queue_head);
blkdev_dequeue_request(req);
spin_unlock_irq (&io_request_lock);
spin_lock(&device->lock);
/* Process all of the buffers in this (possibly clustered) request. */
do {
status = sbull_transfer(device, req);
} while (end_that_request_first(req, status, DEVICE_NAME));
spin_unlock(&device->lock);
spin_lock_irq (&io_request_lock);
end_that_request_last(req);
}
device->busy = 0;
}
for (i = 0; i < sbull_devs; i++)
blk_cleanup_queue(&sbull_devices[i].queue);
blk_dev[major].queue = NULL;
int sbull_make_request(request_queue_t *queue, int rw,
struct buffer_head *bh)
{
u8 *ptr;
/* Figure out what we are doing */
Sbull_Dev *device = sbull_devices + MINOR(bh->b_rdev);
ptr = device->data + bh->b_rsector * sbull_hardsect;
/* Paranoid check; this apparently can really happen */
if (ptr + bh->b_size > device->data + sbull_blksize*sbull_size) {
static int count = 0;
if (count++ < 5)
printk(KERN_WARNING "sbull: request past end of device\n");
bh->b_end_io(bh, 0);
return 0;
}
/* This could be a high-memory buffer; shift it down */
#if CONFIG_HIGHMEM
bh = create_bounce(rw, bh);
#endif
/* Do the transfer */
switch(rw) {
case READ:
case READA: /* Read ahead */
memcpy(bh->b_data, ptr, bh->b_size); /* from sbull to buffer */
bh->b_end_io(bh, 1);
break;
case WRITE:
refile_buffer(bh);
memcpy(ptr, bh->b_data, bh->b_size); /* from buffer to sbull */
mark_buffer_uptodate(bh, 1);
bh->b_end_io(bh, 1);
break;
default:
/* can't happen */
bh->b_end_io(bh, 0);
break;
}
/* Nonzero return means we're done */
return 0;
}
int sbull_release (struct inode *inode, struct file *filp)
{
Sbull_Dev *dev = sbull_devices + MINOR(inode->i_rdev);
spin_lock(&dev->lock);
dev->usage--;
MOD_DEC_USE_COUNT;
spin_unlock(&dev->lock);
return 0;
}
Get and set the filesystem-level read-ahead value (the one stored in max_readahead) for this device.
These commands are used to change and check the read-only flag for the device.
int sbull_ioctl (struct inode *inode, struct file *filp,
unsigned int cmd, unsigned long arg)
{
int err;
long size;
struct hd_geometry geo;
PDEBUG("ioctl 0x%x 0x%lx\n", cmd, arg);
switch(cmd) {
case BLKGETSIZE:
/* Return the device size, expressed in sectors */
if (!arg) return -EINVAL; /* NULL pointer: not valid */
err = ! access_ok (VERIFY_WRITE, arg, sizeof(long));
if (err) return -EFAULT;
size = blksize*sbull_sizes[MINOR(inode->i_rdev)]
/ sbull_hardsects[MINOR(inode->i_rdev)];
if (copy_to_user((long *) arg, &size, sizeof (long)))
return -EFAULT;
return 0;
case BLKRRPART: /* reread partition table: can't do it */
return -ENOTTY;
case HDIO_GETGEO:
/*
* Get geometry: since we are a virtual device, we have to make
* up something plausible. So we claim 16 sectors, four heads,
* and calculate the corresponding number of cylinders. We set
* the start of data at sector four.
*/
err = ! access_ok(VERIFY_WRITE, arg, sizeof(geo));
if (err) return -EFAULT;
size = sbull_size * blksize / sbull_hardsect;
geo.cylinders = (size & ~0x3f) >> 6;
geo.heads = 4;
geo.sectors = 16;
geo.start = 4;
if (copy_to_user((void *) arg, &geo, sizeof(geo)))
return -EFAULT;
return 0;
default:
/*
* For ioctls we don't understand, let the block layer
* handle them.
*/
return blk_ioctl(inode->i_rdev, cmd, arg);
}
return -ENOTTY; /* unknown command */
}
int sbull_check_change(kdev_t i_rdev)
{
int minor = MINOR(i_rdev);
Sbull_Dev *dev = sbull_devices + minor;
PDEBUG("check_change for dev %i\n",minor);
if (dev->data)
return 0; /* still valid */
return 1; /* expired */
}
int sbull_revalidate(kdev_t i_rdev)
{
Sbull_Dev *dev = sbull_devices + MINOR(i_rdev);
PDEBUG("revalidate for dev %i\n",MINOR(i_rdev));
if (dev->data)
return 0;
dev->data = vmalloc(dev->size);
if (!dev->data)
return -ENOMEM;
return 0;
}int check_disk_change(kdev_t dev);
int sbull_open (struct inode *inode, struct file *filp)
{
Sbull_Dev *dev; /* device information */
int num = MINOR(inode->i_rdev);
if (num >= sbull_devs) return -ENODEV;
dev = sbull_devices + num;
spin_lock(&dev->lock);
/* revalidate on first open and fail if no data is there */
if (!dev->usage) {
check_disk_change(inode->i_rdev);
if (!dev->data)
{
spin_unlock (&dev->lock);
return -ENOMEM;
}
}
dev->usage++;
spin_unlock(&dev->lock);
MOD_INC_USE_COUNT;
return 0; /* success */
}
#define MAJOR_NR spull_major /* force definitions on in blk.h */
int spull_major; /* must be declared before including blk.h */
#define SPULL_SHIFT 4 /* max 16 partitions */
#define SPULL_MAXNRDEV 4 /* max 4 device units */
#define DEVICE_NR(device) (MINOR(device)>>SPULL_SHIFT)
#define DEVICE_NAME "pd" /* name for messaging */
The major number for the device that the structure refers to.
- void *real_devices
- struct block_device_operations *fops;
A pointer to the block operations structure for this device.
struct gendisk spull_gendisk = {
major: 0, /* Major number assigned later */
major_name: "pd", /* Name of the major device */
minor_shift: SPULL_SHIFT, /* Shift to get device number */
max_p: 1 << SPULL_SHIFT, /* Number of partitions */
fops: &spull_bdops, /* Block dev operations */
/* everything else is dynamic */
};
spull_sizes = kmalloc( (spull_devs << SPULL_SHIFT) * sizeof(int),
GFP_KERNEL);
if (!spull_sizes)
goto fail_malloc;
/* Start with zero-sized partitions, and correctly sized units */
memset(spull_sizes, 0, (spull_devs << SPULL_SHIFT) * sizeof(int));
for (i=0; i< spull_devs; i++)
spull_sizes[i<blk_size[MAJOR_NR] = spull_gendisk.sizes = spull_sizes;
/* Allocate the partitions array. */
spull_partitions = kmalloc( (spull_devs << SPULL_SHIFT) *
sizeof(struct hd_struct), GFP_KERNEL);
if (!spull_partitions)
goto fail_malloc;
memset(spull_partitions, 0, (spull_devs << SPULL_SHIFT) *
sizeof(struct hd_struct));
/* fill in whole-disk entries */
for (i=0; i < spull_devs; i++)
spull_partitions[i << SPULL_SHIFT].nr_sects =
spull_size*(blksize/SPULL_HARDSECT);
spull_gendisk.part = spull_partitions;
spull_gendisk.nr_real = spull_devs;register_disk(struct gendisk *gd, int drive, unsigned minors,
struct block_device_operations *ops, long size);
int spull_revalidate(kdev_t i_rdev)
{
/* first partition, # of partitions */
int part1 = (DEVICE_NR(i_rdev) << SPULL_SHIFT) + 1;
int npart = (1 << SPULL_SHIFT) -1;
/* first clear old partition information */
memset(spull_gendisk.sizes+part1, 0, npart*sizeof(int));
memset(spull_gendisk.part +part1, 0, npart*sizeof(struct hd_struct));
spull_gendisk.part[DEVICE_NR(i_rdev) << SPULL_SHIFT].nr_sects =
spull_size << 1;
/* then fill new info */
printk(KERN_INFO "Spull partition check: (%d) ", DEVICE_NR(i_rdev));
register_disk(&spull_gendisk, i_rdev, SPULL_MAXNRDEV, &spull_bdops,
spull_size << 1);
return 0;
}
for (i = 0; i < (spull_devs << SPULL_SHIFT); i++)
fsync_dev(MKDEV(spull_major, i)); /* flush the devices */
blk_cleanup_queue(BLK_DEFAULT_QUEUE(major));
read_ahead[major] = 0;
kfree(blk_size[major]); /* which is gendisk->sizes as well */
blk_size[major] = NULL;
kfree(spull_gendisk.part);
kfree(blksize_size[major]);
blksize_size[major] = NULL;
Spull_Dev *dev = spull_devices + DEVICE_NR(inode->i_rdev);
case BLKGETSIZE:
/* Return the device size, expressed in sectors */
err = ! access_ok (VERIFY_WRITE, arg, sizeof(long));
if (err) return -EFAULT;
size = spull_gendisk.part[MINOR(inode->i_rdev)].nr_sects;
if (copy_to_user((long *) arg, &size, sizeof (long)))
return -EFAULT;
return 0;
case BLKRRPART: /* re-read partition table */
return spull_revalidate(inode->i_rdev);
ptr = device->data +
(spull_partitions[minor].start_sect + req->sector)*SPULL_HARDSECT;
size = req->current_nr_sectors*SPULL_HARDSECT;
/*
* Make sure that the transfer fits within the device.
*/
if (req->sector + req->current_nr_sectors >
spull_partitions[minor].nr_sects) {
static int count = 0;
if (count++ < 5)
printk(KERN_WARNING "spull: request past end of partition\n");
return 0;
}
void spull_irqdriven_request(request_queue_t *q)
{
Spull_Dev *device;
int status;
long flags;
/* If we are already processing requests, don't do any more now. */
if (spull_busy)
return;
while(1) {
INIT_REQUEST; /* returns when queue is empty */
/* Which "device" are we using? */
device = spull_locate_device (CURRENT);
if (device == NULL) {
end_request(0);
continue;
}
spin_lock_irqsave(&device->lock, flags);
/* Perform the transfer and clean up. */
status = spull_transfer(device, CURRENT);
spin_unlock_irqrestore(&device->lock, flags);
/* ... and wait for the timer to expire -- no end_request(1) */
spull_timer.expires = jiffies + spull_irq;
add_timer(&spull_timer);
spull_busy = 1;
return;
}
}blk_dev[major].request_fn = sbull_request;
#ifdef RO_IOCTLS
static inline int blk_ioctl(kdev_t dev, unsigned int cmd,
unsigned long arg)
{
int err;
switch (cmd) {
case BLKRAGET: /* return the read-ahead value */
if (!arg) return -EINVAL;
err = ! access_ok(VERIFY_WRITE, arg, sizeof(long));
if (err) return -EFAULT;
PUT_USER(read_ahead[MAJOR(dev)],(long *) arg);
return 0;
case BLKRASET: /* set the read-ahead value */
if (!capable(CAP_SYS_ADMIN)) return -EACCES;
if (arg > 0xff) return -EINVAL; /* limit it */
read_ahead[MAJOR(dev)] = arg;
return 0;
case BLKFLSBUF: /* flush */
if (! capable(CAP_SYS_ADMIN)) return -EACCES; /* only root */
fsync_dev(dev);
invalidate_buffers(dev);
return 0;
RO_IOCTLS(dev, arg);
}
return -ENOTTY;
}
#endif /* RO_IOCTLS */resetup_one_dev(struct gendisk *gd, int drive);
static inline void register_disk(struct gendisk *gdev, kdev_t dev,
unsigned minors, struct file_operations *ops, long size)
{
if (! gdev)
return;
resetup_one_dev(gdev, MINOR(dev) >> gdev->minor_shift);
}The most important functions and macros used in writing block drivers are summarized here. To save space, however, we do not list the fields of struct request, struct buffer_head, or struct genhd, and we omit the predefined ioctl commands.
- #include
- int register_blkdev(unsigned int major, const char *name, struct block_device_operations *bdops);
- int unregister_blkdev(unsigned int major, const char *name);
- #include
- struct blk_dev_struct blk_dev[MAX_BLKDEV];
- DEVICE_NR(kdev_t device)
- struct request *blkdev_entry_next_request(struct list_head *head);
- struct request *blkdev_next_request(struct request *req);
- struct request *blkdev_prev_request(struct request *req);
- blkdev_dequeue_request(struct request *req);
- blkdev_release_request(struct request *req);
- int blk_ioctl(kdev_t dev, unsigned int cmd, unsigned long arg);
A utility function that implements most of the standard block device ioctl commands.
- int check_disk_change(kdev_t dev);
- void register_disk(struct gendisk *gd, int drive, unsigned minors, struct block_device_operations *ops, long size);
Back to:
© 2001, O'Reilly & Associates, Inc.