Chinaunix首页 | 论坛 | 博客
  • 博客访问: 806026
  • 博文数量: 127
  • 博客积分: 2669
  • 博客等级: 少校
  • 技术积分: 1680
  • 用 户 组: 普通用户
  • 注册时间: 2009-10-23 11:39
文章分类

全部博文(127)

文章存档

2014年(5)

2013年(19)

2012年(25)

2011年(9)

2010年(25)

2009年(44)

分类: LINUX

2012-11-23 00:05:51

我们先将前面的内容简单的回顾下, 整个初始化由sata驱动模块开始,通过对sata设备的注册,初始化libata层的结构,在初始化的过程中,初始化了scsi层需要的结构,并开启一个错误处理线程,该线程负责处理在操作中出现异常/错误的处理,并负责确定是否对设备是否重新连接。因为现在整个系统刚刚初始化,还没有连接设备,所以通过该线程进行对设备的reset 以及连接等。
在此过程中,初始化线程处于等待状态,在错误处理线程执行一个周期后,初始化线程继续执行,并由此开始初始化总线上的设备,手动触发sd.c模块进行probe处理,这里的probe我们有必要详细分析下,因为这里有一些重要的数据结构,这些结构对于我们后续的理解有重大的帮助。

linux/driver/scsi/sd.c
/**
 *    sd_probe - called during driver initialization and whenever a
 *    new scsi device is attached to the system. It is called once
 *    for each scsi device (not just disks) present.
 *    @dev: pointer to device object
 *
 *    Returns 0 if successful (or not interested in this scsi device
 *    (e.g. scanner)); 1 when there is an error.
 *
 *    Note: this function is invoked from the scsi mid-level.
 *    This function sets up the mapping between a given
 *    (found in sdp) and new device name
 *    (e.g. /dev/sda). More precisely it is the block device major
 *    and minor number that is chosen here.
 *
 *    Assume sd_attach is not re-entrant (for time being)
 *    Also think about sd_attach() and sd_remove() running coincidentally.
 **/
static int sd_probe(struct device *dev)
{
    struct scsi_device *sdp = to_scsi_device(dev);    //(1)
    struct scsi_disk *sdkp;
    struct gendisk *gd;
    u32 index;
    int error;

    error = -ENODEV;
    if (sdp->type != TYPE_DISK && sdp->type != TYPE_MOD && sdp->type != TYPE_RBC)
        goto out;

    SCSI_LOG_HLQUEUE(3, sdev_printk(KERN_INFO, sdp,
                    "sd_attach\n"));

    error = -ENOMEM;
    sdkp = kzalloc(sizeof(*sdkp), GFP_KERNEL);
    if (!sdkp)
        goto out;

    gd = alloc_disk(16);
    if (!gd)
        goto out_free;

    if (!idr_pre_get(&sd_index_idr, GFP_KERNEL))
        goto out_put;

    spin_lock(&sd_index_lock);
    error = idr_get_new(&sd_index_idr, NULL, &index);
    spin_unlock(&sd_index_lock);

    if (index >= SD_MAX_DISKS)
        error = -EBUSY;
    if (error)
        goto out_put;

    sdkp->device = sdp;
    sdkp->driver = &sd_template;
    sdkp->disk = gd;
    sdkp->index = index;
    sdkp->openers = 0;  // (2)

    if (!sdp->timeout) {
        if (sdp->type != TYPE_MOD)
            sdp->timeout = SD_TIMEOUT;
        else
            sdp->timeout = SD_MOD_TIMEOUT;
    }

    class_device_initialize(&sdkp->cdev);
    sdkp->cdev.dev = &sdp->sdev_gendev;
    sdkp->cdev.class = &sd_disk_class;
    strncpy(sdkp->cdev.class_id, sdp->sdev_gendev.bus_id, BUS_ID_SIZE);

    if (class_device_add(&sdkp->cdev))
        goto out_put;

    get_device(&sdp->sdev_gendev);

    gd->major = sd_major((index & 0xf0) >> 4);
    gd->first_minor = ((index & 0xf) << 4) | (index & 0xfff00);
    gd->minors = 16;
    gd->fops = &sd_fops;
   //(3)

    if (index < 26) {
        sprintf(gd->disk_name, "sd%c", 'a' + index % 26);
    } else if (index < (26 + 1) * 26) {
        sprintf(gd->disk_name, "sd%c%c",
            'a' + index / 26 - 1,'a' + index % 26);
    } else {
        const unsigned int m1 = (index / 26 - 1) / 26 - 1;
        const unsigned int m2 = (index / 26 - 1) % 26;
        const unsigned int m3 =  index % 26;
        sprintf(gd->disk_name, "sd%c%c%c",
            'a' + m1, 'a' + m2, 'a' + m3);
    }

    gd->private_data = &sdkp->driver;
    gd->queue = sdkp->device->request_queue;
  //(4)

    sd_revalidate_disk(gd);

    blk_queue_prep_rq(sdp->request_queue, sd_prep_fn);

    gd->driverfs_dev = &sdp->sdev_gendev;
    gd->flags = GENHD_FL_DRIVERFS;
    if (sdp->removable)
        gd->flags |= GENHD_FL_REMOVABLE;

    dev_set_drvdata(dev, sdkp);
    add_disk(gd);

    sd_printk(KERN_NOTICE, sdkp, "Attached SCSI %sdisk\n",
          sdp->removable ? "removable " : "");

    return 0;

 out_put:
    put_disk(gd);
 out_free:
    kfree(sdkp);
 out:
    return error;
}

我们先说明下第三部分,第三部分初始化了struct gendisk的设备号和一个重要的指针( gd->fops = &sd_fops;),我们先来看下这个结构体的初始化:

static struct block_device_operations sd_fops = {
    .owner            = THIS_MODULE,
    .open            = sd_open,
    .release        = sd_release,
    .ioctl            = sd_ioctl,
    .getgeo            = sd_getgeo,
#ifdef CONFIG_COMPAT
    .compat_ioctl        = sd_compat_ioctl,
#endif
    .media_changed        = sd_media_changed,
    .revalidate_disk    = sd_revalidate_disk,
};

这个结构提很像字符设备中的file_operation,同样的有open, release函数,其实这个就是块设备的接口函数,我们打开一个块设备,系统最终会运行这里的open函数,至于这个函数的生效过程我们一会再分析。

第1 2 4部分要一起看,其实这里有一个块设备重要的操作函数,我们在上面的块设备操作函数没有看到read, write函数,因为块的读写是通过一个特殊的函数request函数实现的, 系统在文件系统层将需要的操作写入到相应设备的queue中,而这个queue中就包含这个request函数的指针,方便系统的最后调用,我们可以看到这里的gd->queue来自于probe函数的传入参数,我们可以看下之前的分析过程中的一个函数:

/linux/driver/scsi/scsi_scan.c

/**
 * scsi_probe_and_add_lun - probe a LUN, if a LUN is found add it
 * @starget:    pointer to target device structure
 * @lun:    LUN of target device
 * @sdevscan:    probe the LUN corresponding to this scsi_device
 * @sdevnew:    store the value of any new scsi_device allocated
 * @bflagsp:    store bflags here if not NULL
 *
 * Description:
 *     Call scsi_probe_lun, if a LUN with an attached device is found,
 *     allocate and set it up by calling scsi_add_lun.
 *
 * Return:
 *     SCSI_SCAN_NO_RESPONSE: could not allocate or setup a scsi_device
 *     SCSI_SCAN_TARGET_PRESENT: target responded, but no device is
 *         attached at the LUN
 *     SCSI_SCAN_LUN_PRESENT: a new scsi_device was allocated and initialized
 **/
static int scsi_probe_and_add_lun(struct scsi_target *starget,
                  uint lun, int *bflagsp,
                  struct scsi_device **sdevp, int rescan,
                  void *hostdata)
{
    struct scsi_device *sdev;
    unsigned char *result;
    int bflags, res = SCSI_SCAN_NO_RESPONSE, result_len = 256;
    struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);

    /*
     * The rescan flag is used as an optimization, the first scan of a
     * host adapter calls into here with rescan == 0.
     */
    sdev = scsi_device_lookup_by_target(starget, lun);
    if (sdev) {
        if (rescan || sdev->sdev_state != SDEV_CREATED) {
            SCSI_LOG_SCAN_BUS(3, printk(KERN_INFO
                "scsi scan: device exists on %s\n",
                sdev->sdev_gendev.bus_id));
            if (sdevp)
                *sdevp = sdev;
            else
                scsi_device_put(sdev);

            if (bflagsp)
                *bflagsp = scsi_get_device_flags(sdev,
                                 sdev->vendor,
                                 sdev->model);
            return SCSI_SCAN_LUN_PRESENT;
        }
        scsi_device_put(sdev);
    } else
        sdev = scsi_alloc_sdev(starget, lun, hostdata);

    if (!sdev)
        goto out;

    result = kmalloc(result_len, GFP_ATOMIC |
            ((shost->unchecked_isa_dma) ? __GFP_DMA : 0));
    if (!result)
        goto out_free_sdev;

    if (scsi_probe_lun(sdev, result, result_len, &bflags))
        goto out_free_result;

    if (bflagsp)
        *bflagsp = bflags;
    /*
     * result contains valid SCSI INQUIRY data.
     */
    if (((result[0] >> 5) == 3) && !(bflags & BLIST_ATTACH_PQ3)) {
        /*
         * For a Peripheral qualifier 3 (011b), the SCSI
         * spec says: The device server is not capable of
         * supporting a physical device on this logical
         * unit.
         *
         * For disks, this implies that there is no
         * logical disk configured at sdev->lun, but there
         * is a target id responding.
         */
        SCSI_LOG_SCAN_BUS(2, sdev_printk(KERN_INFO, sdev, "scsi scan:"
                   " peripheral qualifier of 3, device not"
                   " added\n"))
        if (lun == 0) {
            SCSI_LOG_SCAN_BUS(1, {
                unsigned char vend[9];
                unsigned char mod[17];

                sdev_printk(KERN_INFO, sdev,
                    "scsi scan: consider passing scsi_mod."
                    "dev_flags=%s:%s:0x240 or 0x1000240\n",
                    scsi_inq_str(vend, result, 8, 16),
                    scsi_inq_str(mod, result, 16, 32));
            });
        }
       
        res = SCSI_SCAN_TARGET_PRESENT;
        goto out_free_result;
    }
这个函数在前面的文章曾经运行过一次,其中这里有相关queue的操作。


/**
 * scsi_alloc_sdev - allocate and setup a scsi_Device
 *
 * Description:
 *     Allocate, initialize for io, and return a pointer to a scsi_Device.
 *     Stores the @shost, @channel, @id, and @lun in the scsi_Device, and
 *     adds scsi_Device to the appropriate list.
 *
 * Return value:
 *     scsi_Device pointer, or NULL on failure.
 **/
static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget,
                       unsigned int lun, void *hostdata)
{
    struct scsi_device *sdev;
    int display_failure_msg = 1, ret;
    struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
    extern void scsi_evt_thread(struct work_struct *work);

    sdev = kzalloc(sizeof(*sdev) + shost->transportt->device_size,
               GFP_ATOMIC);
    if (!sdev)
        goto out;

    sdev->vendor = scsi_null_device_strs;
    sdev->model = scsi_null_device_strs;
    sdev->rev = scsi_null_device_strs;
    sdev->host = shost;
    sdev->id = starget->id;
    sdev->lun = lun;
    sdev->channel = starget->channel;
    sdev->sdev_state = SDEV_CREATED;
    INIT_LIST_HEAD(&sdev->siblings);
    INIT_LIST_HEAD(&sdev->same_target_siblings);
    INIT_LIST_HEAD(&sdev->cmd_list);
    INIT_LIST_HEAD(&sdev->starved_entry);
    INIT_LIST_HEAD(&sdev->event_list);
    spin_lock_init(&sdev->list_lock);
    INIT_WORK(&sdev->event_work, scsi_evt_thread);

    sdev->sdev_gendev.parent = get_device(&starget->dev);
    sdev->sdev_target = starget;

    /* usually NULL and set by ->slave_alloc instead */
    sdev->hostdata = hostdata;

    /* if the device needs this changing, it may do so in the
     * slave_configure function */
    sdev->max_device_blocked = SCSI_DEFAULT_DEVICE_BLOCKED;

    /*
     * Some low level driver could use device->type
     */
    sdev->type = -1;

    /*
     * Assume that the device will have handshaking problems,
     * and then fix this field later if it turns out it
     * doesn't
     */
    sdev->borken = 1;

    sdev->request_queue = scsi_alloc_queue(sdev);
    if (!sdev->request_queue) {
        /* release fn is set up in scsi_sysfs_device_initialise, so
         * have to free and put manually here */
        put_device(&starget->dev);
        kfree(sdev);
        goto out;
    }

    sdev->request_queue->queuedata = sdev;
    scsi_adjust_queue_depth(sdev, 0, sdev->host->cmd_per_lun);

    scsi_sysfs_device_initialize(sdev);

    if (shost->hostt->slave_alloc) {
        ret = shost->hostt->slave_alloc(sdev);
        if (ret) {
            /*
             * if LLDD reports slave not present, don't clutter
             * console with alloc failure messages
             */
            if (ret == -ENXIO)
                display_failure_msg = 0;
            goto out_device_destroy;
        }
    }

    return sdev;

out_device_destroy:
    transport_destroy_device(&sdev->sdev_gendev);
    put_device(&sdev->sdev_gendev);
out:
    if (display_failure_msg)
        printk(ALLOC_FAILURE_MSG, __FUNCTION__);
    return NULL;
}
这里初始化的sdev就是传入文章开始probe函数的参数,我们继续看queue的初始化。

struct request_queue *scsi_alloc_queue(struct scsi_device *sdev)
{
    struct request_queue *q;

    q = __scsi_alloc_queue(sdev->host, scsi_request_fn);
    if (!q)
        return NULL;

    blk_queue_prep_rq(q, scsi_prep_fn);
    blk_queue_softirq_done(q, scsi_softirq_done);
    return q;
}

这里的传入__scsi_alloc_queue函数的第二个参数就是我们要找的request函数,系统在读写设备会最终运行这个函数。


struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost,
                     request_fn_proc *request_fn)
{
    struct request_queue *q;

    q = blk_init_queue(request_fn, NULL);
    if (!q)
        return NULL;

    /*
     * this limit is imposed by hardware restrictions
     */
    blk_queue_max_hw_segments(q, shost->sg_tablesize);

    /*
     * In the future, sg chaining support will be mandatory and this
     * ifdef can then go away. Right now we don't have all archs
     * converted, so better keep it safe.
     */
#ifdef ARCH_HAS_SG_CHAIN
    if (shost->use_sg_chaining)
        blk_queue_max_phys_segments(q, SCSI_MAX_SG_CHAIN_SEGMENTS);
    else
        blk_queue_max_phys_segments(q, SCSI_MAX_SG_SEGMENTS);
#else
    blk_queue_max_phys_segments(q, SCSI_MAX_SG_SEGMENTS);
#endif

    blk_queue_max_sectors(q, shost->max_sectors);
    blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost));
    blk_queue_segment_boundary(q, shost->dma_boundary);

    if (!shost->use_clustering)
        clear_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
    return q;
}
EXPORT_SYMBOL(__scsi_alloc_queue);
都这里关于queue与request的由来,以及快设备的操作结构体我们就分析到这里,读者可以继续看看下面的具体实现。

下面我们要进入probe为之准备万分的一个重要函数: add_disk

/**
 * add_disk - add partitioning information to kernel list
 * @disk: per-device partitioning information
 *
 * This function registers the partitioning information in @disk
 * with the kernel.
 */
void add_disk(struct gendisk *disk)
{
    disk->flags |= GENHD_FL_UP;
    blk_register_region(MKDEV(disk->major, disk->first_minor),
                disk->minors, NULL, exact_match, exact_lock, disk);
    register_disk(disk);
    blk_register_queue(disk);
}

EXPORT_SYMBOL(add_disk);
关于这个函数我在LDD3上看到这样的描述:

“一旦调用了add_disk,磁盘设备将被激活(可以理解为已经初始化完毕),并随时会调用它提供的方法。“从这里可以看出来add_disk是块设备生效的关键的最后一步。

linux/fs/partitions/check.c  可以看出这个函数已经是文件系统层的。


/* Not exported, helper to add_disk(). */
void register_disk(struct gendisk *disk)
{
    struct block_device *bdev;
    char *s;
    int i;
    struct hd_struct *p;
    int err;

    kobject_set_name(&disk->kobj, "%s", disk->disk_name);
    /* e some of these buggers have / in name... */
    s = strchr(disk->kobj.k_name, '/');
    if (s)
        *s = '!';
    if ((err = kobject_add(&disk->kobj)))
        return;
    err = disk_sysfs_symlinks(disk);
    if (err) {
        kobject_del(&disk->kobj);
        return;
    }
     disk_sysfs_add_subdirs(disk);

    /* No minors to use for partitions */
    if (disk->minors == 1)
        goto exit;

    /* No such device (e.g., media were just removed) */
    if (!get_capacity(disk))
        goto exit;

    bdev = bdget_disk(disk, 0);
    if (!bdev)
        goto exit;

    /* scan partition table, but suppress uevents */
    bdev->bd_invalidated = 1;
    disk->part_uevent_suppress = 1;
    err = blkdev_get(bdev, FMODE_READ, 0);
    disk->part_uevent_suppress = 0;
    if (err < 0)
        goto exit;
    blkdev_put(bdev);

exit:
    /* announce disk after possible partitions are already created */
    kobject_uevent(&disk->kobj, KOBJ_ADD);

    /* announce possible partitions */
    for (i = 1; i < disk->minors; i++) {
        p = disk->part[i-1];
        if (!p || !p->nr_sects)
            continue;
        kobject_uevent(&p->kobj, KOBJ_ADD);
    }
}
由于篇幅和时间问题,本系列文章就分析这么多,这里我只是分析了初始化的大体流程,读者可以选择性的阅读相关代码了解详细的流程, 至于关于块设备的读写流程,我在晚上发现了一篇很不错的文章,读者可以参考下,这里就不使用源码分析了。

谢谢大家的光临!!

阅读(2112) | 评论(0) | 转发(1) |
给主人留下些什么吧!~~