我们先将前面的内容简单的回顾下, 整个初始化由sata驱动模块开始,通过对sata设备的注册,初始化libata层的结构,在初始化的过程中,初始化了scsi层需要的结构,并开启一个错误处理线程,该线程负责处理在操作中出现异常/错误的处理,并负责确定是否对设备是否重新连接。因为现在整个系统刚刚初始化,还没有连接设备,所以通过该线程进行对设备的reset 以及连接等。
在此过程中,初始化线程处于等待状态,在错误处理线程执行一个周期后,初始化线程继续执行,并由此开始初始化总线上的设备,手动触发sd.c模块进行probe处理,这里的probe我们有必要详细分析下,因为这里有一些重要的数据结构,这些结构对于我们后续的理解有重大的帮助。
linux/driver/scsi/sd.c
/**
* sd_probe - called during driver initialization and whenever a
* new scsi device is attached to the system. It is called once
* for each scsi device (not just disks) present.
* @dev: pointer to device object
*
* Returns 0 if successful (or not interested in this scsi device
* (e.g. scanner)); 1 when there is an error.
*
* Note: this function is invoked from the scsi mid-level.
* This function sets up the mapping between a given
*
(found in sdp) and new device name
* (e.g. /dev/sda). More precisely it is the block device major
* and minor number that is chosen here.
*
* Assume sd_attach is not re-entrant (for time being)
* Also think about sd_attach() and sd_remove() running coincidentally.
**/
static int sd_probe(struct device *dev)
{
struct scsi_device *sdp = to_scsi_device(dev); //(1)
struct scsi_disk *sdkp;
struct gendisk *gd;
u32 index;
int error;
error = -ENODEV;
if (sdp->type != TYPE_DISK && sdp->type != TYPE_MOD && sdp->type != TYPE_RBC)
goto out;
SCSI_LOG_HLQUEUE(3, sdev_printk(KERN_INFO, sdp,
"sd_attach\n"));
error = -ENOMEM;
sdkp = kzalloc(sizeof(*sdkp), GFP_KERNEL);
if (!sdkp)
goto out;
gd = alloc_disk(16);
if (!gd)
goto out_free;
if (!idr_pre_get(&sd_index_idr, GFP_KERNEL))
goto out_put;
spin_lock(&sd_index_lock);
error = idr_get_new(&sd_index_idr, NULL, &index);
spin_unlock(&sd_index_lock);
if (index >= SD_MAX_DISKS)
error = -EBUSY;
if (error)
goto out_put;
sdkp->device = sdp;
sdkp->driver = &sd_template;
sdkp->disk = gd;
sdkp->index = index;
sdkp->openers = 0; // (2)
if (!sdp->timeout) {
if (sdp->type != TYPE_MOD)
sdp->timeout = SD_TIMEOUT;
else
sdp->timeout = SD_MOD_TIMEOUT;
}
class_device_initialize(&sdkp->cdev);
sdkp->cdev.dev = &sdp->sdev_gendev;
sdkp->cdev.class = &sd_disk_class;
strncpy(sdkp->cdev.class_id, sdp->sdev_gendev.bus_id, BUS_ID_SIZE);
if (class_device_add(&sdkp->cdev))
goto out_put;
get_device(&sdp->sdev_gendev);
gd->major = sd_major((index & 0xf0) >> 4);
gd->first_minor = ((index & 0xf) << 4) | (index & 0xfff00);
gd->minors = 16;
gd->fops = &sd_fops; //(3)
if (index < 26) {
sprintf(gd->disk_name, "sd%c", 'a' + index % 26);
} else if (index < (26 + 1) * 26) {
sprintf(gd->disk_name, "sd%c%c",
'a' + index / 26 - 1,'a' + index % 26);
} else {
const unsigned int m1 = (index / 26 - 1) / 26 - 1;
const unsigned int m2 = (index / 26 - 1) % 26;
const unsigned int m3 = index % 26;
sprintf(gd->disk_name, "sd%c%c%c",
'a' + m1, 'a' + m2, 'a' + m3);
}
gd->private_data = &sdkp->driver;
gd->queue = sdkp->device->request_queue; //(4)
sd_revalidate_disk(gd);
blk_queue_prep_rq(sdp->request_queue, sd_prep_fn);
gd->driverfs_dev = &sdp->sdev_gendev;
gd->flags = GENHD_FL_DRIVERFS;
if (sdp->removable)
gd->flags |= GENHD_FL_REMOVABLE;
dev_set_drvdata(dev, sdkp);
add_disk(gd);
sd_printk(KERN_NOTICE, sdkp, "Attached SCSI %sdisk\n",
sdp->removable ? "removable " : "");
return 0;
out_put:
put_disk(gd);
out_free:
kfree(sdkp);
out:
return error;
}
我们先说明下第三部分,第三部分初始化了struct gendisk的设备号和一个重要的指针( gd->fops = &sd_fops;),我们先来看下这个结构体的初始化:
static struct block_device_operations sd_fops = {
.owner = THIS_MODULE,
.open = sd_open,
.release = sd_release,
.ioctl = sd_ioctl,
.getgeo = sd_getgeo,
#ifdef CONFIG_COMPAT
.compat_ioctl = sd_compat_ioctl,
#endif
.media_changed = sd_media_changed,
.revalidate_disk = sd_revalidate_disk,
};
这个结构提很像字符设备中的file_operation,同样的有open, release函数,其实这个就是块设备的接口函数,我们打开一个块设备,系统最终会运行这里的open函数,至于这个函数的生效过程我们一会再分析。
第1 2 4部分要一起看,其实这里有一个块设备重要的操作函数,我们在上面的块设备操作函数没有看到read, write函数,因为块的读写是通过一个特殊的函数request函数实现的, 系统在文件系统层将需要的操作写入到相应设备的queue中,而这个queue中就包含这个request函数的指针,方便系统的最后调用,我们可以看到这里的gd->queue来自于probe函数的传入参数,我们可以看下之前的分析过程中的一个函数:
/linux/driver/scsi/scsi_scan.c
/**
* scsi_probe_and_add_lun - probe a LUN, if a LUN is found add it
* @starget: pointer to target device structure
* @lun: LUN of target device
* @sdevscan: probe the LUN corresponding to this scsi_device
* @sdevnew: store the value of any new scsi_device allocated
* @bflagsp: store bflags here if not NULL
*
* Description:
* Call scsi_probe_lun, if a LUN with an attached device is found,
* allocate and set it up by calling scsi_add_lun.
*
* Return:
* SCSI_SCAN_NO_RESPONSE: could not allocate or setup a scsi_device
* SCSI_SCAN_TARGET_PRESENT: target responded, but no device is
* attached at the LUN
* SCSI_SCAN_LUN_PRESENT: a new scsi_device was allocated and initialized
**/
static int scsi_probe_and_add_lun(struct scsi_target *starget,
uint lun, int *bflagsp,
struct scsi_device **sdevp, int rescan,
void *hostdata)
{
struct scsi_device *sdev;
unsigned char *result;
int bflags, res = SCSI_SCAN_NO_RESPONSE, result_len = 256;
struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
/*
* The rescan flag is used as an optimization, the first scan of a
* host adapter calls into here with rescan == 0.
*/
sdev = scsi_device_lookup_by_target(starget, lun);
if (sdev) {
if (rescan || sdev->sdev_state != SDEV_CREATED) {
SCSI_LOG_SCAN_BUS(3, printk(KERN_INFO
"scsi scan: device exists on %s\n",
sdev->sdev_gendev.bus_id));
if (sdevp)
*sdevp = sdev;
else
scsi_device_put(sdev);
if (bflagsp)
*bflagsp = scsi_get_device_flags(sdev,
sdev->vendor,
sdev->model);
return SCSI_SCAN_LUN_PRESENT;
}
scsi_device_put(sdev);
} else
sdev = scsi_alloc_sdev(starget, lun, hostdata);
if (!sdev)
goto out;
result = kmalloc(result_len, GFP_ATOMIC |
((shost->unchecked_isa_dma) ? __GFP_DMA : 0));
if (!result)
goto out_free_sdev;
if (scsi_probe_lun(sdev, result, result_len, &bflags))
goto out_free_result;
if (bflagsp)
*bflagsp = bflags;
/*
* result contains valid SCSI INQUIRY data.
*/
if (((result[0] >> 5) == 3) && !(bflags & BLIST_ATTACH_PQ3)) {
/*
* For a Peripheral qualifier 3 (011b), the SCSI
* spec says: The device server is not capable of
* supporting a physical device on this logical
* unit.
*
* For disks, this implies that there is no
* logical disk configured at sdev->lun, but there
* is a target id responding.
*/
SCSI_LOG_SCAN_BUS(2, sdev_printk(KERN_INFO, sdev, "scsi scan:"
" peripheral qualifier of 3, device not"
" added\n"))
if (lun == 0) {
SCSI_LOG_SCAN_BUS(1, {
unsigned char vend[9];
unsigned char mod[17];
sdev_printk(KERN_INFO, sdev,
"scsi scan: consider passing scsi_mod."
"dev_flags=%s:%s:0x240 or 0x1000240\n",
scsi_inq_str(vend, result, 8, 16),
scsi_inq_str(mod, result, 16, 32));
});
}
res = SCSI_SCAN_TARGET_PRESENT;
goto out_free_result;
}
这个函数在前面的文章曾经运行过一次,其中这里有相关queue的操作。
/**
* scsi_alloc_sdev - allocate and setup a scsi_Device
*
* Description:
* Allocate, initialize for io, and return a pointer to a scsi_Device.
* Stores the @shost, @channel, @id, and @lun in the scsi_Device, and
* adds scsi_Device to the appropriate list.
*
* Return value:
* scsi_Device pointer, or NULL on failure.
**/
static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget,
unsigned int lun, void *hostdata)
{
struct scsi_device *sdev;
int display_failure_msg = 1, ret;
struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
extern void scsi_evt_thread(struct work_struct *work);
sdev = kzalloc(sizeof(*sdev) + shost->transportt->device_size,
GFP_ATOMIC);
if (!sdev)
goto out;
sdev->vendor = scsi_null_device_strs;
sdev->model = scsi_null_device_strs;
sdev->rev = scsi_null_device_strs;
sdev->host = shost;
sdev->id = starget->id;
sdev->lun = lun;
sdev->channel = starget->channel;
sdev->sdev_state = SDEV_CREATED;
INIT_LIST_HEAD(&sdev->siblings);
INIT_LIST_HEAD(&sdev->same_target_siblings);
INIT_LIST_HEAD(&sdev->cmd_list);
INIT_LIST_HEAD(&sdev->starved_entry);
INIT_LIST_HEAD(&sdev->event_list);
spin_lock_init(&sdev->list_lock);
INIT_WORK(&sdev->event_work, scsi_evt_thread);
sdev->sdev_gendev.parent = get_device(&starget->dev);
sdev->sdev_target = starget;
/* usually NULL and set by ->slave_alloc instead */
sdev->hostdata = hostdata;
/* if the device needs this changing, it may do so in the
* slave_configure function */
sdev->max_device_blocked = SCSI_DEFAULT_DEVICE_BLOCKED;
/*
* Some low level driver could use device->type
*/
sdev->type = -1;
/*
* Assume that the device will have handshaking problems,
* and then fix this field later if it turns out it
* doesn't
*/
sdev->borken = 1;
sdev->request_queue = scsi_alloc_queue(sdev);
if (!sdev->request_queue) {
/* release fn is set up in scsi_sysfs_device_initialise, so
* have to free and put manually here */
put_device(&starget->dev);
kfree(sdev);
goto out;
}
sdev->request_queue->queuedata = sdev;
scsi_adjust_queue_depth(sdev, 0, sdev->host->cmd_per_lun);
scsi_sysfs_device_initialize(sdev);
if (shost->hostt->slave_alloc) {
ret = shost->hostt->slave_alloc(sdev);
if (ret) {
/*
* if LLDD reports slave not present, don't clutter
* console with alloc failure messages
*/
if (ret == -ENXIO)
display_failure_msg = 0;
goto out_device_destroy;
}
}
return sdev;
out_device_destroy:
transport_destroy_device(&sdev->sdev_gendev);
put_device(&sdev->sdev_gendev);
out:
if (display_failure_msg)
printk(ALLOC_FAILURE_MSG, __FUNCTION__);
return NULL;
}
这里初始化的sdev就是传入文章开始probe函数的参数,我们继续看queue的初始化。
struct request_queue *scsi_alloc_queue(struct scsi_device *sdev)
{
struct request_queue *q;
q = __scsi_alloc_queue(sdev->host, scsi_request_fn);
if (!q)
return NULL;
blk_queue_prep_rq(q, scsi_prep_fn);
blk_queue_softirq_done(q, scsi_softirq_done);
return q;
}
这里的传入__scsi_alloc_queue函数的第二个参数就是我们要找的request函数,系统在读写设备会最终运行这个函数。
struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost,
request_fn_proc *request_fn)
{
struct request_queue *q;
q = blk_init_queue(request_fn, NULL);
if (!q)
return NULL;
/*
* this limit is imposed by hardware restrictions
*/
blk_queue_max_hw_segments(q, shost->sg_tablesize);
/*
* In the future, sg chaining support will be mandatory and this
* ifdef can then go away. Right now we don't have all archs
* converted, so better keep it safe.
*/
#ifdef ARCH_HAS_SG_CHAIN
if (shost->use_sg_chaining)
blk_queue_max_phys_segments(q, SCSI_MAX_SG_CHAIN_SEGMENTS);
else
blk_queue_max_phys_segments(q, SCSI_MAX_SG_SEGMENTS);
#else
blk_queue_max_phys_segments(q, SCSI_MAX_SG_SEGMENTS);
#endif
blk_queue_max_sectors(q, shost->max_sectors);
blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost));
blk_queue_segment_boundary(q, shost->dma_boundary);
if (!shost->use_clustering)
clear_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
return q;
}
EXPORT_SYMBOL(__scsi_alloc_queue);
都这里关于queue与request的由来,以及快设备的操作结构体我们就分析到这里,读者可以继续看看下面的具体实现。
下面我们要进入probe为之准备万分的一个重要函数: add_disk
/**
* add_disk - add partitioning information to kernel list
* @disk: per-device partitioning information
*
* This function registers the partitioning information in @disk
* with the kernel.
*/
void add_disk(struct gendisk *disk)
{
disk->flags |= GENHD_FL_UP;
blk_register_region(MKDEV(disk->major, disk->first_minor),
disk->minors, NULL, exact_match, exact_lock, disk);
register_disk(disk);
blk_register_queue(disk);
}
EXPORT_SYMBOL(add_disk);
关于这个函数我在LDD3上看到这样的描述:
“一旦调用了add_disk,磁盘设备将被激活(可以理解为已经初始化完毕),并随时会调用它提供的方法。“从这里可以看出来add_disk是块设备生效的关键的最后一步。
linux/fs/partitions/check.c 可以看出这个函数已经是文件系统层的。
/* Not exported, helper to add_disk(). */
void register_disk(struct gendisk *disk)
{
struct block_device *bdev;
char *s;
int i;
struct hd_struct *p;
int err;
kobject_set_name(&disk->kobj, "%s", disk->disk_name);
/* e some of these buggers have / in name... */
s = strchr(disk->kobj.k_name, '/');
if (s)
*s = '!';
if ((err = kobject_add(&disk->kobj)))
return;
err = disk_sysfs_symlinks(disk);
if (err) {
kobject_del(&disk->kobj);
return;
}
disk_sysfs_add_subdirs(disk);
/* No minors to use for partitions */
if (disk->minors == 1)
goto exit;
/* No such device (e.g., media were just removed) */
if (!get_capacity(disk))
goto exit;
bdev = bdget_disk(disk, 0);
if (!bdev)
goto exit;
/* scan partition table, but suppress uevents */
bdev->bd_invalidated = 1;
disk->part_uevent_suppress = 1;
err = blkdev_get(bdev, FMODE_READ, 0);
disk->part_uevent_suppress = 0;
if (err < 0)
goto exit;
blkdev_put(bdev);
exit:
/* announce disk after possible partitions are already created */
kobject_uevent(&disk->kobj, KOBJ_ADD);
/* announce possible partitions */
for (i = 1; i < disk->minors; i++) {
p = disk->part[i-1];
if (!p || !p->nr_sects)
continue;
kobject_uevent(&p->kobj, KOBJ_ADD);
}
}
由于篇幅和时间问题,本系列文章就分析这么多,这里我只是分析了初始化的大体流程,读者可以选择性的阅读相关代码了解详细的流程, 至于关于块设备的读写流程,我在晚上发现了一篇很不错的文章,读者可以参考下,这里就不使用源码分析了。
谢谢大家的光临!!