Chinaunix首页 | 论坛 | 博客
  • 博客访问: 1978898
  • 博文数量: 185
  • 博客积分: 10707
  • 博客等级: 上将
  • 技术积分: 1777
  • 用 户 组: 普通用户
  • 注册时间: 2008-09-19 17:31
文章分类

全部博文(185)

文章存档

2014年(1)

2012年(6)

2011年(27)

2010年(13)

2009年(75)

2008年(63)

分类:

2012-04-13 23:55:11

陆陆续续的看UBIFS很长时间了,一直没有写出一点东西。因为我在=到能够系统的理解UBIFS的时候再写出一点东西。但是因为工作比较忙,UBIFS源码读的断断续续,老是需要复习拾起,比较浪费时间,所以决定写出一点东西,做个备份吧。
我决定在读UBIFS源码之前需要读两份关于UBIF设计的文档:
一份是《UBI-Unsorted Block Images》 ubidesign.pdf   
另外一份是《A Brief Introduction to the design of UBIFS》 A Brief Introduction to the Design of UBIFS.pdf   
这两份简洁的介绍了UBIFS设计的一些结构和考虑。

我们按照挂载ubifs的工序来分析代码:

ubiattach /dev/ubi_ctrl -m 3                                   

2ubimkvol /dev/ubi0 -N ubifs -s 15MiB

3mount -t ubifs ubi0:ubifs /mnt

首先先分析(1),相应的代码是ubi_attach_mtd_dev()函数,下面我们紧跟代码来看看究竟干了些什么。

ubi_attach_mtd_dev

int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset)

{

//ubi_num, vid_hdr_offset是命令传进来的参数

       struct ubi_device *ubi;

       int i, err, do_free = 1;

 

       /*

        * Check if we already have the same MTD device attached.

        *

        * Note, this function assumes that UBI devices creations and deletions

        * are serialized, so it does not take the &ubi_devices_lock.

        */

       for (i = 0; i < UBI_MAX_DEVICES; i++) {

              ubi = ubi_devices[i];

              if (ubi && mtd->index == ubi->mtd->index) {

                     dbg_err("mtd%d is already attached to ubi%d",

                            mtd->index, i);

                     return -EEXIST;

              }

       }

//上面的这段代码可以看英文注释,一个mtd设备(一个分区)不能被attach两次,除非你已经deatch了。所以在这段代码的开始就检查被attachmtd设备是否已经被attach了。

 

       if (mtd->type == MTD_UBIVOLUME) {

              ubi_err("refuse attaching mtd%d - it is already emulated on "

                     "top of UBI", mtd->index);

              return -EINVAL;

       }

上面的代码接着检查被attachmtd设备时候是一个mtd volume(卷区),如果已经是一个mtd卷了,那么就不能再被attach了。

       if (ubi_num == UBI_DEV_NUM_AUTO) {

              /* Search for an empty slot in the @ubi_devices array */

              for (ubi_num = 0; ubi_num < UBI_MAX_DEVICES; ubi_num++)

                     if (!ubi_devices[ubi_num])

                            break;

如果在终端输入命令的时候没有带ubinum,那么就是自动分配ubinum,系统就会从ubi_device[]数组中找出一个没被使用的ubinum

              if (ubi_num == UBI_MAX_DEVICES) {

                     dbg_err("only %d UBI devices may be created",

                           

                     return -ENFILE;

              }

       } else {

              if (ubi_num >= UBI_MAX_DEVICES)

                     return -EINVAL;

如果ubi_num > UBI_MAX_DEVICES,就代表没有空余ubinum号可供分配,返回出错

              /* Make sure ubi_num is not busy */

              if (ubi_devices[ubi_num]) {

                     dbg_err("ubi%d already exists", ubi_num);

                     return -EEXIST;

              }

       }

 

       ubi = kzalloc(sizeof(struct ubi_device), GFP_KERNEL);

       if (!ubi)

              return -ENOMEM;

 

       ubi->mtd = mtd;

       ubi->ubi_num = ubi_num;

       ubi->vid_hdr_offset = vid_hdr_offset;

       ubi->autoresize_vol_id = -1;

 

       mutex_init(&ubi->buf_mutex);

       mutex_init(&ubi->ckvol_mutex);

       mutex_init(&ubi->mult_mutex);

       mutex_init(&ubi->volumes_mutex);

       spin_lock_init(&ubi->volumes_lock);

初始化信号

       ubi_msg("attaching mtd%d to ubi%d", mtd->index, ubi_num);

 

       err = io_init(ubi);

       if (err)

              goto out_free;

下面跟着io_init()往下分析:

static int io_init(struct ubi_device *ubi)

{

       if (ubi->mtd->numeraseregions != 0) {

              ubi_err("multiple regions, not implemented");

              return -EINVAL;

       }

Numeraseregions是扫描nandflash得到的信息,如果numeraseregions 等于0,代表我们需要attach的设备已经擦除过了

       if (ubi->vid_hdr_offset < 0)

              return -EINVAL;

ubi->vid_hdr_offset显然应该是一个正数,一般是nandflash的一页,我们的4020上的nandflash页大小为512字节,所以ubi->vid_hdr_offset512.这儿再稍微说一下,EC headerVID header,是记录我们ubi管理信息。一般EC在一个擦除块的第一页,所以偏移量为0VID在擦除块的第二页上,所以偏移量为512.,在我们4020nandflash上,一个擦除块的大小为16K,也就是32页。

下面接着讲我们的扫描信息写进mtd结构体

       ubi->peb_size   = ubi->mtd->erasesize;

       ubi->peb_count  = ubi->mtd->size / ubi->mtd->erasesize;

是指逻辑块的数目,也就是总的大小除以每一页的大小

       ubi->flash_size = ubi->mtd->size;

 

       if (ubi->mtd->block_isbad && ubi->mtd->block_markbad)

              ubi->bad_allowed = 1;

 

       ubi->min_io_size = ubi->mtd->writesize;

       ubi->hdrs_min_io_size = ubi->mtd->writesize >> ubi->mtd->subpage_sft;

       if (!is_power_of_2(ubi->min_io_size)) {

              ubi_err("min. I/O unit (%d) is not power of 2",

                     ubi->min_io_size);

              return -EINVAL;

       }

 

       ubi_assert(ubi->hdrs_min_io_size > 0);

       ubi_assert(ubi->hdrs_min_io_size <= ubi->min_io_size);

       ubi_assert(ubi->min_io_size % ubi->hdrs_min_io_size == 0);

 

       /* Calculate default aligned sizes of EC and VID headers */

       ubi->ec_hdr_alsize = ALIGN(UBI_EC_HDR_SIZE, ubi->hdrs_min_io_size);

       ubi->vid_hdr_alsize = ALIGN(UBI_VID_HDR_SIZE, ubi->hdrs_min_io_size);

 

       dbg_msg("min_io_size      %d", ubi->min_io_size);

       dbg_msg("hdrs_min_io_size %d", ubi->hdrs_min_io_size);

       dbg_msg("ec_hdr_alsize    %d", ubi->ec_hdr_alsize);

       dbg_msg("vid_hdr_alsize   %d", ubi->vid_hdr_alsize);

       if (ubi->vid_hdr_offset == 0)

              /* Default offset */

              ubi->vid_hdr_offset = ubi->vid_hdr_aloffset =

                                  ubi->ec_hdr_alsize;

       else {

              ubi->vid_hdr_aloffset = ubi->vid_hdr_offset &

                                          ~(ubi->hdrs_min_io_size - 1);

              ubi->vid_hdr_shift = ubi->vid_hdr_offset -

                                          ubi->vid_hdr_aloffset;

       }

剩余的部分就不分析了,比较容易

接着上面ubi_attach_mtd_dev()往下说:

       ubi->peb_buf1 = vmalloc(ubi->peb_size);

       if (!ubi->peb_buf1)

              goto out_free;

 

       ubi->peb_buf2 = vmalloc(ubi->peb_size);

       if (!ubi->peb_buf2)

               goto out_free;

分配两个物理擦除块大小的buf,具体的用途下面再说

       err = attach_by_scanning(ubi);

       if (err) {

              dbg_err("failed to attach by scanning, error %d", err);

              goto out_free;

       }

我们再跟着attach_by_scanning(ubi)细说

static int attach_by_scanning(struct ubi_device *ubi)

{

       int err;

       struct ubi_scan_info *si;

 

       si = ubi_scan(ubi);

**********************************************************************************

这儿通过ubi_scan函数来扫描MTD分区的每一块。具体是调用static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si,int pnum)函数来读取ECVID头(即没一块的前两页),在读每一页的时候,会调用check_pattern函数来判断这一页是否为空,如果每一页都是空的,那么就会发现这个MTD分区是空的。

**********************************************************************************

       if (IS_ERR(si))

              return PTR_ERR(si);

 

       ubi->bad_peb_count = si->bad_peb_count;

       ubi->good_peb_count = ubi->peb_count - ubi->bad_peb_count;

       ubi->max_ec = si->max_ec;

       ubi->mean_ec = si->mean_ec;

 

       err = ubi_read_volume_table(ubi, si);

       if (err)

              goto out_si;

 

       err = ubi_wl_init_scan(ubi, si);

**********************************************************************************

取之ubi_wl_init_scan(ubi, si);函数片段

list_for_each_entry_safe(seb, tmp, &si->erase, u.list) {

              cond_resched();

 

              e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL);

              if (!e)

                     goto out_free;

 

              e->pnum = seb->pnum;

              e->ec = seb->ec;

              ubi->lookuptbl[e->pnum] = e;

              if (schedule_erase(ubi, e, 0)) {

                     kmem_cache_free(ubi_wl_entry_slab, e);

                     goto out_free;

              }

       }

在初始化wl的时候会将为每一个空页建立一个struct ubi_work *wl_wrk;结构体(该结构体的具体处理函数为erase_worker,擦除一块,并写入EC头),并添加到ubi->works队列中(list_add_tail(&wrk->list, &));这儿我们渐渐的认识到ubi->works这个队列的作用,后台进程ubi_thread就是循环的处理该队列中的工作的。

在第一次attach的时候,在这儿ubi_thread进程还没有被唤醒,所以这些工作要等到进程被唤醒的时候才能被处理

**********************************************************************************

       if (err)

              goto out_vtbl;

 

       err = ubi_eba_init_scan(ubi, si);

**********************************************************************************

前面我们看到了ubi_scan,其实这个这个过程是建立ubifs的基础,因为所有关于ubiubifs的基本信息都是在scan 的过程中建立在内存中的,现在调用ubi_eba_init_scan来建立起EBA子系统就是利用前面的扫描信息,建立起没一个volumnvtl

       if (err)

              goto out_wl;

 

       ubi_scan_destroy_si(si);

       return 0;

 

out_wl:

       ubi_wl_close(ubi);

out_vtbl:

       free_internal_volumes(ubi);

       vfree(ubi->vtbl);

out_si:

       ubi_scan_destroy_si(si);

       return err;

}

 

Ubi_scan

struct ubi_scan_info *ubi_scan(struct ubi_device *ubi)

{

       int err, pnum;

       struct rb_node *rb1, *rb2;

       struct ubi_scan_volume *sv;

       struct ubi_scan_leb *seb;

       struct ubi_scan_info *si;

 

       si = kzalloc(sizeof(struct ubi_scan_info), GFP_KERNEL);

       if (!si)

              return ERR_PTR(-ENOMEM);

       初始化sicorrupt队列

       INIT_LIST_HEAD(&si->free);// //初始化sicorrupt队列

       初始化sicorrupt队列

       INIT_LIST_HEAD(&si->alien); //初始化sicorrupt队列

       si->volumes = RB_ROOT;

只是空的,哈哈

       si->is_empty = 1;

       err = -ENOMEM;

       ech = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL);//ec头部分配空间,用于暂存后面读出的每一个pebec头部信息

       if (!ech)

              goto out_si;

 

       vidh = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL); //vid头部分配空间,用于暂存后面读出的每一个pebvid头部信息,注意扫描的目的就是收集ECVID中信息,在内存中建立相关的信息

       if (!vidh)

              goto out_ech;

 

       for (pnum = 0; pnum < ubi->peb_count; pnum++) {

              cond_resched();

 

              dbg_gen("process PEB %d", pnum);

              err = process_eb(ubi, si, pnum);//具体的扫描每一个物理块

              if (err < 0)

                     goto out_vidh;

       }

 

       dbg_msg("scanning is finished");

 

       /* Calculate mean erase counter */

       if (si->ec_count)//算平均擦除次数

              si->mean_ec = div_u64(si->ec_sum, si->ec_count);

 

       if (si->is_empty)//判断这是否是一个空的MTD,如果是空的话,那么后面的mount的时候调用create_default_filesystem在建立初始的ubifs数据

              ubi_msg("empty MTD device detected");

 

       /*

        * Few corrupted PEBs are not a problem and may be just a result of

        * unclean reboots. However, many of them may indicate some problems

        * with the flash HW or driver. Print a warning in this case.

        */

       if (si->corr_count >= 8 || si->corr_count >= ubi->peb_count / 4) {

              ubi_warn("%d PEBs are corrupted", si->corr_count);

              printk(KERN_WARNING "corrupted PEBs are:");

              list_for_each_entry(seb, &si->corr, u.list)

                     printk(KERN_CONT " %d", seb->pnum);

              printk(KERN_CONT "\n");

       }

 

       /*

        * In case of unknown erase counter we use the mean erase counter

        * value.

        */

       ubi_rb_for_each_entry(rb1, sv, &si->volumes, rb) {

              ubi_rb_for_each_entry(rb2, seb, &sv->root, u.rb)

                     if (seb->ec == UBI_SCAN_UNKNOWN_EC)

                            seb->ec = si->mean_ec;

       }

 

       list_for_each_entry(seb, &si->free, u.list) {

              if (seb->ec == UBI_SCAN_UNKNOWN_EC)

                     seb->ec = si->mean_ec;

       }

 

       list_for_each_entry(seb, &si->corr, u.list)

              if (seb->ec == UBI_SCAN_UNKNOWN_EC)

                     seb->ec = si->mean_ec;

 

       list_for_each_entry(seb, &si->erase, u.list)

              if (seb->ec == UBI_SCAN_UNKNOWN_EC)

                     seb->ec = si->mean_ec;

 

       err = paranoid_check_si(ubi, si);

       if (err) {

              if (err > 0)

                     err = -EINVAL;

              goto out_vidh;

       }

 

       ubi_free_vid_hdr(ubi, vidh);

       kfree(ech);

 

       return si;

 

out_vidh:

       ubi_free_vid_hdr(ubi, vidh);

out_ech:

       kfree(ech);

out_si:

       ubi_scan_destroy_si(si);

       return ERR_PTR(err);

}

process_eb

static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si,  int pnum)

{

       long long uninitialized_var(ec);

       int err, bitflips = 0, vol_id, ec_corr = 0;

 

       dbg_bld("scan PEB %d", pnum);

 

       /* Skip bad physical eraseblocks */

       err = ubi_io_is_bad(ubi, pnum);

判断一个块是否为坏块,直接调用mtd层的mtd->block_isbad

       if (err < 0)

              return err;

       else if (err) {

              /*

               * FIXME: this is actually duty of the I/O sub-system to

               * initialize this, but MTD does not provide enough

               * information.

               */

              si->bad_peb_count += 1;

              return 0;

       }

       err = ubi_io_read_ec_hdr(ubi, pnum, ech, 0);//ec header,一般为一块的第一页

       if (err < 0)

              return err;

       else if (err == UBI_IO_BITFLIPS)

              bitflips = 1;

       else if (err == UBI_IO_PEB_EMPTY)

              return add_to_list(si, pnum, UBI_SCAN_UNKNOWN_EC, &si->erase);

//注意这儿,为什么这个块是empty(也就是全是0xff),还要丢到si->erase队列中呢?这是因为MTD所谓的空与UBI所谓的空不是一回事。在UBI中,空块是指只包含EC头部的块。所以这些需要将全0xff的块进行擦除,写入EC头部

       else if (err == UBI_IO_BAD_EC_HDR) {

              /*

               * We have to also look at the VID header, possibly it is not

               * corrupted. Set %bitflips flag in order to make this PEB be

               * moved and EC be re-created.

               */

              ec_corr = 1;

              ec = UBI_SCAN_UNKNOWN_EC;

              bitflips = 1;

       }

 

       si->is_empty = 0;

 

       if (!ec_corr) {

              int image_seq;

 

              /* Make sure UBI version is OK */

              if (ech->version != UBI_VERSION) {

                     ubi_err("this UBI version is %d, image version is %d",

                            UBI_VERSION, (int)ech->version);

                     return -EINVAL;

              }

 

              ec = be64_to_cpu(ech->ec);

              if (ec > UBI_MAX_ERASECOUNTER) {

                     /*

                      * Erase counter overflow. The EC headers have 64 bits

                      * reserved, but we anyway make use of only 31 bit

                      * values, as this seems to be enough for any existing

                      * flash. Upgrade UBI and use 64-bit erase counters

                      * internally.

                      */

                     ubi_err("erase counter overflow, max is %d",

                           

                     ubi_dbg_dump_ec_hdr(ech);

                     return -EINVAL;

              }

 

              /*

               * Make sure that all PEBs have the same image sequence number.

               * This allows us to detect situations when users flash UBI

               * images incorrectly, so that the flash has the new UBI image

               * and leftovers from the old one. This feature was added

               * relatively recently, and the sequence number was always

               * zero, because old UBI implementations always set it to zero.

               * For this reasons, we do not panic if some PEBs have zero

               * sequence number, while other PEBs have non-zero sequence

               * number.

               */

              image_seq = be32_to_cpu(ech->image_seq);

              if (!ubi->image_seq && image_seq)

                     ubi->image_seq = image_seq;

              if (ubi->image_seq && image_seq &&

                  ubi->image_seq != image_seq) {

                     ubi_err("bad image sequence number %d in PEB %d, "

                            "expected %d", image_seq, pnum, ubi->image_seq);

                     ubi_dbg_dump_ec_hdr(ech);

                     return -EINVAL;

              }

       }

 

       /* OK, we've done with the EC header, let's look at the VID header */

 

       err = ubi_io_read_vid_hdr(ubi, pnum, vidh, 0);

       if (err < 0)

              return err;

       else if (err == UBI_IO_BITFLIPS)

              bitflips = 1;

       else if (err == UBI_IO_BAD_VID_HDR ||

               

//如果是一个块的VID头,那么就添加到corr队列中去

              /* VID header is corrupted */

              err = add_to_list(si, pnum, ec, &si->corr);

              if (err)

                     return err;

              goto adjust_mean_ec;

      

//如果VID头是空的,也就是说该PEB只存在EC头部,那么添加到free队列中,可以用于后面的分配。

             

              err = add_to_list(si, pnum, ec, &si->free);

              if (err)

                     return err;

              goto adjust_mean_ec;

       }

 

       vol_id = be32_to_cpu(vidh->vol_id);

       if (vol_id > UBI_MAX_VOLUMES && vol_id != UBI_LAYOUT_VOLUME_ID) {

//判断vol_id是否合法,ubi内部存在一个layout_volume,专门用来保存user volumn的信息

UBI maintains internal volumes to store UBI related information e.g. volume information, flash based erase block assignment tables

              int lnum = be32_to_cpu(vidh->lnum);

 

              /* Unsupported internal volume */

              switch (vidh->compat) {

              case UBI_COMPAT_DELETE:

                     ubi_msg("\"delete\" compatible internal volume %d:%d"

                            " found, remove it", vol_id, lnum);

                     err = add_to_list(si, pnum, ec, &si->corr);

                     if (err)

                            return err;

                     break;

 

              case UBI_COMPAT_RO:

                     ubi_msg("read-only compatible internal volume %d:%d"

                            " found, switch to read-only mode",

                            vol_id, lnum);

                     ubi->ro_mode = 1;

                     break;

 

              case UBI_COMPAT_PRESERVE:

                     ubi_msg("\"preserve\" compatible internal volume %d:%d"

                            " found", vol_id, lnum);

                     err = add_to_list(si, pnum, ec, &si->alien);

                     if (err)

                            return err;

                     si->alien_peb_count += 1;

                     return 0;

 

              case UBI_COMPAT_REJECT:

                     ubi_err("incompatible internal volume %d:%d found",

                            vol_id, lnum);

                     return -EINVAL;

              }

       }

 

       if (ec_corr)

              ubi_warn("valid VID header but corrupted EC header at PEB %d",

                      pnum);

//到这儿可以判定这个PEB是一个有效的UBI块,包含有效的EC头部很有效的VID头部

       err = ubi_scan_add_used(ubi, si, pnum, ec, vidh, bitflips);

       if (err)

              return err;

 

adjust_mean_ec:

       if (!ec_corr) {

              si->ec_sum += ec;

              si->ec_count += 1;

              if (ec > si->max_ec)

                     si->max_ec = ec;

              if (ec < si->min_ec)

                     si->min_ec = ec;

       }

 

       return 0;

}

ubi_scan_add_used

int ubi_scan_add_used (struct ubi_device *ubi, struct ubi_scan_info *si,int pnum, int ec, const struct ubi_vid_hdr *vid_hdr,int bitflips)

{

       int err, vol_id, lnum;

       unsigned long long sqnum;

       struct ubi_scan_volume *sv;

       struct ubi_scan_leb *seb;

       struct rb_node **p, *parent = NULL;

 

       vol_id = be32_to_cpu(vid_hdr->vol_id);

       lnum = be32_to_cpu(vid_hdr->lnum);

       sqnum = be64_to_cpu(vid_hdr->sqnum);

 

       dbg_bld("PEB %d, LEB %d:%d, EC %d, sqnum %llu, bitflips %d",

              pnum, vol_id, lnum, ec, sqnum, bitflips);

       sv = add_volume(si, vol_id, pnum, vid_hdr);

调用add_volumn在检查读出的pnumvolumn id号,在内存中建立volumn的红黑树

 

       if (IS_ERR(sv))

              return PTR_ERR(sv);

 

       if (si->max_sqnum < sqnum)

              si->max_sqnum = sqnum;

 

       /*

        * Walk the RB-tree of logical eraseblocks of volume @vol_id to look

        * if this is the first instance of this logical eraseblock or not.

        */

       p = &sv->root.rb_node;

       while (*p) {

              int cmp_res;

 

              parent = *p;

              seb = rb_entry(parent, struct ubi_scan_leb, u.rb);

              if (lnum != seb->lnum) {

                     if (lnum < seb->lnum)

                            p = &(*p)->rb_left;

                     else

                            p = &(*p)->rb_right;

                     continue;

              }

在内存中建立ubi_scan_leb的红黑树

              /*

               * There is already a physical eraseblock describing the same

               * logical eraseblock present.

               */

 

              dbg_bld("this LEB already exists: PEB %d, sqnum %llu, "

                     "EC %d", seb->pnum, seb->sqnum, seb->ec);

 

              /*

               * Make sure that the logical eraseblocks have different

               * sequence numbers. Otherwise the image is bad.

               *

               * However, if the sequence number is zero, we assume it must

               * be an ancient UBI image from the era when UBI did not have

               * sequence numbers. We still can attach these images, unless

               * there is a need to distinguish between old and new

               * eraseblocks, in which case we'll refuse the image in

               * 'compare_lebs()'. In other words, we attach old clean

               * images, but refuse attaching old images with duplicated

               * logical eraseblocks because there was an unclean reboot.

               */

              //注意上面的那个while(1)的范围,到这儿的时候表示在ubi_seb的红黑树中找到了一个描述pnumubi_seb结构,那么说明什么问题呢?说明在ubi中存在多个PEB指向同一个LEB.

              //sqnum是一个持续增加的64bit的全局变量,我们认为它不会溢出,如果seb->sqnum == sqnum,那么显然是不合理的

              if (seb->sqnum == sqnum && sqnum != 0) {

                     ubi_err("two LEBs with same sequence number %llu",

                            sqnum);

                     ubi_dbg_dump_seb(seb, 0);

                     ubi_dbg_dump_vid_hdr(vid_hdr);

                     return -EINVAL;

              }

 

              /*

               * Now we have to drop the older one and preserve the newer

               * one.

               */

// * @copy_flag: if this logical eraseblock was copied from another physical eraseblock (for wear-leveling reasons)

//如果存在多个PEB指向同一个LEB,那么一般是WL的时候,或者修改文件的时候发生了unclean reboot,那么我们就需要从这些多个PEB中找出哪个是最新的。compare_lebs就是完成这个工作的。

              cmp_res = compare_lebs(ubi, seb, pnum, vid_hdr);

              if (cmp_res < 0)

                     return cmp_res;

 

              if (cmp_res & 1) {

                     /*

                      * This logical eraseblock is newer then the one

                      * found earlier.

                      */

                     err = validate_vid_hdr(vid_hdr, sv, pnum);

                     if (err)

                            return err;

 

                     if (cmp_res & 4)

                            err = add_to_list(si, seb->pnum, seb->ec,

                                            &si->corr);

                     else

                            err = add_to_list(si, seb->pnum, seb->ec,

                                            &si->erase);

                     if (err)

                            return err;

 

                     seb->ec = ec;

                     seb->pnum = pnum;

                     seb->scrub = ((cmp_res & 2) || bitflips);

                     seb->sqnum = sqnum;

 

                     if (sv->highest_lnum == lnum)

                            sv->last_data_size =

                                   be32_to_cpu(vid_hdr->data_size);

 

                     return 0;

             

                     /*

                      * This logical eraseblock is older than the one found

                      * previously.

                      */

                     if (cmp_res & 4)

                            return add_to_list(si, pnum, ec, &si->corr);

                     else

                            return add_to_list(si, pnum, ec, &si->erase);

              }

       }

       /*

        * We've met this logical eraseblock for the first time, add it to the

        * scanning information.

        */

       //如果到这儿了,表示这是第一次遇到该LEB,那么很简单,将它添加到队列中就可以了

       err = validate_vid_hdr(vid_hdr, sv, pnum);

       if (err)

              return err;

       seb = kmalloc(sizeof(struct ubi_scan_leb), GFP_KERNEL);

       if (!seb)

              return -ENOMEM;

       seb->ec = ec;

       seb->pnum = pnum;

       seb->lnum = lnum;

       seb->sqnum = sqnum;

       seb->scrub = bitflips;

       if (sv->highest_lnum <= lnum) {

              sv->highest_lnum = lnum;

              sv->last_data_size = be32_to_cpu(vid_hdr->data_size);

       }

       sv->leb_count += 1;

       rb_link_node(&seb->u.rb, parent, p);

       rb_insert_color(&seb->u.rb, &sv->root);

       return 0;

}

 

compare_lebs

static int compare_lebs(struct ubi_device *ubi, const struct ubi_scan_leb *seb,int pnum, const struct ubi_vid_hdr *vid_hdr)

{

       void *buf;

       int len, err, second_is_newer, bitflips = 0, corrupted = 0;

       uint32_t data_crc, crc;

       struct ubi_vid_hdr *vh = NULL;

       unsigned long long sqnum2 = be64_to_cpu(vid_hdr->sqnum);

       //再次判断一下是否存在sqnum相等的情况发生

       if (sqnum2 == seb->sqnum) {

              /*

               * This must be a really ancient UBI image which has been

               * created before sequence numbers support has been added. At

               * that times we used 32-bit LEB versions stored in logical

               * eraseblocks. That was before UBI got into mainline. We do not

               * support these images anymore. Well, those images will work

               * still work, but only if no unclean reboots happened.

               */

              ubi_err("unsupported on-flash UBI format\n");

              return -EINVAL;

       }

 

       /* Obviously the LEB with lower sequence counter is older */

       //因为sqnum是持续增加的,而且不会溢出。所以认为sqnum大的那个PEB是最新的。       second_is_newer = !!(sqnum2 > seb->sqnum);

       /*

        * Now we know which copy is newer. If the copy flag of the PEB with

        * newer version is not set, then we just return, otherwise we have to

        * check data CRC. For the second PEB we already have the VID header,

        * for the first one - we'll need to re-read it from flash.

        *

        * Note: this may be optimized so that we wouldn't read twice.

        */

 

       if (second_is_newer) {

              if (!vid_hdr->copy_flag) {

                    

                     dbg_bld("second PEB %d is newer, copy_flag is unset",

                            pnum);

                     return 1;

              }

       } else {

       //如果copy_flag位设置了,那么可以认为是在WL的时候发生意外。因为发生了unclear reboot,所以需要判断这个最新的PEB中的数据是否是完整的。(unclean reboot时数据可能被打断了)

                     pnum = seb->pnum;

 

              vh = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL);

              if (!vh)

                     return -ENOMEM;

 

              err = ubi_io_read_vid_hdr(ubi, pnum, vh, 0);

              if (err) {

                     if (err == UBI_IO_BITFLIPS)

                            bitflips = 1;

                     else {

                            dbg_err("VID of PEB %d header is bad, but it "

                                   "was OK earlier", pnum);

                            if (err > 0)

                                   err = -EIO;

 

                            goto out_free_vidh;

                     }

              }

 

              if (!vh->copy_flag) {

                    

                     dbg_bld("first PEB %d is newer, copy_flag is unset",

                            pnum);

                     err = bitflips << 1;

                     goto out_free_vidh;

              }

 

              vid_hdr = vh;

       }

 

       /* Read the data of the copy and check the CRC */

 

       len = be32_to_cpu(vid_hdr->data_size);

       buf = vmalloc(len);

       if (!buf) {

              err = -ENOMEM;

              goto out_free_vidh;

       }

//OK,读出数据,校验CRC

       err = ubi_io_read_data(ubi, buf, pnum, 0, len);

       if (err && err != UBI_IO_BITFLIPS && err != -EBADMSG)

              goto out_free_buf;

 

       data_crc = be32_to_cpu(vid_hdr->data_crc);

       crc = crc32(UBI_CRC32_INIT, buf, len);

       if (crc != data_crc) {

              dbg_bld("PEB %d CRC error: calculated %#08x, must be %#08x",

                     pnum, crc, data_crc);

              corrupted = 1;

              bitflips = 0;

              //如果CRC校验失败了,那么还沿用老的PEB

              second_is_newer = !second_is_newer;

      

              dbg_bld("PEB %d CRC is OK", pnum);

              bitflips = !!err;

       }

 

       vfree(buf);

       ubi_free_vid_hdr(ubi, vh);

 

       if (second_is_newer)

              dbg_bld("second PEB %d is newer, copy_flag is set", pnum);

       else

              dbg_bld("first PEB %d is newer, copy_flag is set", pnum);

 

       return second_is_newer | (bitflips << 1) | (corrupted << 2);

 

out_free_buf:

       vfree(buf);

out_free_vidh:

       ubi_free_vid_hdr(ubi, vh);

       return err;

}

 

ubimkvol /dev/ubi0 -N ubifs -s 15MiB

上面的这条命令是在ubi设备0上创建一个大小为15M,名字叫做ubifsvolumn

这条命令是通过ioctl实现的,我们下面来看一下相关的代码:

       /* Create volume command */

       case UBI_IOCMKVOL:

       {

              struct ubi_mkvol_req req;

              dbg_gen("create volume");

              err = copy_from_user(&req, argp, sizeof(struct ubi_mkvol_req));

              if (err) {

                     err = -EFAULT;

                     break;

              }

              req.name[req.name_len] = '\0';

              err = verify_mkvol_req(ubi, &req);

              if (err)

                     break;

              mutex_lock(&ubi->device_mutex);

              err = ubi_create_volume(ubi, &req);

              mutex_unlock(&ubi->device_mutex);

              if (err)

                     break;

              err = put_user(req.vol_id, (__user int32_t *)argp);

              if (err)

                     err = -EFAULT;

              break;

       }

函数的主体部分是ubi_create_volume。传给ubi_create_volume的是一个ubi_mkvol_req类型的结构体。

struct ubi_mkvol_req {

       __s32 vol_id;//要创建的volumnID,可以不指定

       __s32 alignment;//The @alignment field specifies the required alignment of the volume logical eraseblock. This means, that the size of logical eraseblocks will be aligned to this number, i.e.,

(UBI device logical eraseblock size) mod (@alignment) = 0.

       __s64 bytes;//volume的大小

       __s8 vol_type;//volume的类型,静态或者动态

       __s8 padding1;

       __s16 name_len;//volume的名字的长度

       __s8 padding2[4];

       char name[UBI_MAX_VOLUME_NAME + 1];

} __attribute__ ((packed));

int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req)

{

       int i, err, vol_id = req->vol_id, do_free = 1;

       struct ubi_volume *vol;

       struct ubi_vtbl_record vtbl_rec;

       dev_t dev;

       if (ubi->ro_mode)

              return -EROFS;

       vol = kzalloc(sizeof(struct ubi_volume), GFP_KERNEL);

       if (!vol)

              return -ENOMEM;

       spin_lock(&ubi->volumes_lock);

       如果没有指定vol-id,那么就是采用默认的方式获得id

       if (vol_id == UBI_VOL_NUM_AUTO) {

              /* Find unused volume ID */

              dbg_gen("search for vacant volume ID");

              for (i = 0; i < ubi->vtbl_slots; i++)

                     if (!ubi->volumes[i]) {

                            vol_id = i;

                            break;

                     }

              if (vol_id == UBI_VOL_NUM_AUTO) {

                     dbg_err("out of volume IDs");

                     err = -ENFILE;

                     goto out_unlock;

              }

              req->vol_id = vol_id;

       }

 

       dbg_gen("create device %d, volume %d, %llu bytes, type %d, name %s",

              ubi->ubi_num, vol_id, (unsigned long long)req->bytes,

              (int)req->vol_type, req->name);

 

       /* Ensure that this volume does not exist */

       err = -EEXIST;

       if (ubi->volumes[vol_id]) {

              dbg_err("volume %d already exists", vol_id);

              goto out_unlock;

       }

 

       /* Ensure that the name is unique */

       确认要创建的volume的名字是唯一的。与已经存在的volume对比

       for (i = 0; i < ubi->vtbl_slots; i++)

              if (ubi->volumes[i] &&

                  ubi->volumes[i]->name_len == req->name_len &&

                 

                     dbg_err("volume \"%s\" exists (ID %d)", req->name, i);

                     goto out_unlock;

              }

       //根据req->bytes计算需要的物理块数,UBI中操作的基本单元是物理块

       /* Calculate how many eraseblocks are requested */

       vol->usable_leb_size = ubi->leb_size - ubi->leb_size % req->alignment;

       vol->reserved_pebs += div_u64(req->bytes + vol->usable_leb_size - 1,

                                  vol->usable_leb_size);

 

       /* Reserve physical eraseblocks */

       if (vol->reserved_pebs > ubi->avail_pebs) {

              dbg_err("not enough PEBs, only %d available", ubi->avail_pebs);

              err = -ENOSPC;

              goto out_unlock;

       }

       //ubi设备中的可用pebs减少,因为已经分配了新创建的volume

       ubi->avail_pebs -= vol->reserved_pebs;

       ubi->rsvd_pebs += vol->reserved_pebs;

       spin_unlock(&ubi->volumes_lock);

       //初始化新创建的volume的相关信息

       vol->vol_id    = vol_id;

       vol->alignment = req->alignment;

       vol->data_pad  = ubi->leb_size % vol->alignment;

       vol->vol_type  = req->vol_type;

       vol->name_len  = req->name_len;

       memcpy(vol->name, req->name, vol->name_len);

       vol->ubi = ubi;

 

       /*

        * Finish all pending erases because there may be some LEBs belonging

        * to the same volume ID.

        */

       //刷新UBI后台中pendingworkers

       err = ubi_wl_flush(ubi);

       if (err)

              goto out_acc;

       //创建eba_tbl表,并将其初始化为UBI_LEB_UNMAPPED,只有在对具体的LEB进行写操作的时候才会真正的更新该表中的每一个LEB对应的项

       vol->eba_tbl = kmalloc(vol->reserved_pebs * sizeof(int), GFP_KERNEL);

       if (!vol->eba_tbl) {

              err = -ENOMEM;

              goto out_acc;

       }

 

       for (i = 0; i < vol->reserved_pebs; i++)

              vol->eba_tbl[i] = UBI_LEB_UNMAPPED;

 

       if (vol->vol_type == UBI_DYNAMIC_VOLUME) {

              vol->used_ebs = vol->reserved_pebs;

              vol->last_eb_bytes = vol->usable_leb_size;

              vol->used_bytes =

                     (long long)vol->used_ebs * vol->usable_leb_size;

      

              vol->used_ebs = div_u64_rem(vol->used_bytes,

                                       vol->usable_leb_size,

                                       &vol->last_eb_bytes);

              if (vol->last_eb_bytes != 0)

                     vol->used_ebs += 1;

              else

                     vol->last_eb_bytes = vol->usable_leb_size;

       }

 

       /* Register character device for the volume */

       //ubi volume注册字符接口

       cdev_init(&vol->cdev, &ubi_vol_cdev_operations);

       vol->cdev.owner = THIS_MODULE;

       dev = MKDEV(MAJOR(ubi->cdev.dev), vol_id + 1);

       err = cdev_add(&vol->cdev, dev, 1);

       if (err) {

              ubi_err("cannot add character device");

              goto out_mapping;

       }

 

       vol->dev.release = vol_release;

       vol->dev.parent = &ubi->dev;

       vol->dev.devt = dev;

       vol->dev.class = ubi_class;

       dev_set_name(&vol->dev, "%s_%d", ubi->ubi_name, vol->vol_id);

       err = device_register(&vol->dev);

       if (err) {

              ubi_err("cannot register device");

              goto out_cdev;

       }

       err = volume_sysfs_init(ubi, vol);

       if (err)

              goto out_sysfs;

 

       /* Fill volume table record */

       //ubi中存在一个internal volume ,其中保持的是每一个volume 的信息,现在新创建了一个volume,就需要更新其中的这个internal volumelayout volume)的信息

       memset(&vtbl_rec, 0, sizeof(struct ubi_vtbl_record));

       vtbl_rec.reserved_pebs = cpu_to_be32(vol->reserved_pebs);

       vtbl_rec.alignment     = cpu_to_be32(vol->alignment);

       vtbl_rec.data_pad      = cpu_to_be32(vol->data_pad);

       vtbl_rec.name_len      = cpu_to_be16(vol->name_len);

       if (vol->vol_type == UBI_DYNAMIC_VOLUME)

              vtbl_rec.vol_type = UBI_VID_DYNAMIC;

       else

              vtbl_rec.vol_type = UBI_VID_STATIC;

       memcpy(vtbl_rec.name, vol->name, vol->name_len);

       err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec);

通过一个ubi_eba_unmap_leb操作,和一个ubi_eba_write_leb操作来实现了ubifs的写操作,保证了数据的安全性

 

       if (err)

              goto out_sysfs;

 

       spin_lock(&ubi->volumes_lock);

       ubi->volumes[vol_id] = vol;

       ubi->vol_count += 1;

       spin_unlock(&ubi->volumes_lock);

       //通知相关模块,UBI创建了一个新的volume,让它们也采取相应的措施,貌似这个通知联上只有gluebi_notifier

       ubi_volume_notify(ubi, vol, UBI_VOLUME_ADDED);

       if (paranoid_check_volumes(ubi))

              dbg_err("check failed while creating volume %d", vol_id);

       return err;

 

out_sysfs:

       /*

        * We have registered our device, we should not free the volume

        * description object in this function in case of an error - it is

        * freed by the release function.

        *

        * Get device reference to prevent the release function from being

        * called just after sysfs has been closed.

        */

       do_free = 0;

       get_device(&vol->dev);

       volume_sysfs_close(vol);

out_cdev:

       cdev_del(&vol->cdev);

out_mapping:

       if (do_free)

              kfree(vol->eba_tbl);

out_acc:

       spin_lock(&ubi->volumes_lock);

       ubi->rsvd_pebs -= vol->reserved_pebs;

       ubi->avail_pebs += vol->reserved_pebs;

out_unlock:

       spin_unlock(&ubi->volumes_lock);

       if (do_free)

              kfree(vol);

       else

              put_device(&vol->dev);

       ubi_err("cannot create volume %d, error %d", vol_id, err);

       return err;

}

 

过程

static int mount_ubifs(struct ubifs_info *c)

static int mount_ubifs(struct ubifs_info *c)

{

       struct super_block *sb = c->vfs_sb;

       int err, mounted_read_only = (sb->s_flags & MS_RDONLY);

       long long x;

       size_t sz;

       err = init_constants_early(c);

       if (err)

              return err;

       err = ubifs_debugging_init(c);

       if (err)

              return err;

       //通过检查vtl表来确定volume是否为空

       err = check_volume_empty(c);

       if (err)

              goto out_free;

       如果该volume为空,但是只读的话,显然不能写入信息,自然

       //也就不能mount

       if (c->empty && (mounted_read_only || c->ro_media)) {

              /*

               * This UBI volume is empty, and read-only, or the file system

               * is mounted read-only - we cannot format it.

               */

              ubifs_err("can't format empty UBI volume: read-only %s",

                       c->ro_media ? "UBI volume" : "mount");

              err = -EROFS;

              goto out_free;

       }

 

       if (c->ro_media && !mounted_read_only) {

              ubifs_err("cannot mount read-write - read-only media");

              err = -EROFS;

              goto out_free;

       }

 

       /*

        * The requirement for the buffer is that it should fit indexing B-tree

        * height amount of integers. We assume the height if the TNC tree will

        * never exceed 64.

        */

       err = -ENOMEM;

//bottom_up_buf: a buffer which is used by 'dirty_cow_bottom_up()' in tnc.c,在后面我们会看到在dirty_cow_bottom_up中将znode的所有的ancestors(父节点,父节点的父节点,一直到根节点未知)都设为dirty。所以在标记之前要记录一下所以的ancestors znode。这个bottom_up_buf就是用于这个目的的。

       c->bottom_up_buf = kmalloc(BOTTOM_UP_HEIGHT * sizeof(int), GFP_KERNEL);

       if (!c->bottom_up_buf)

              goto out_free;

       //sbuf: LEB-sized buffer to use

       c->sbuf = vmalloc(c->leb_size);

       if (!c->sbuf)

              goto out_free;

 

       if (!mounted_read_only) {

             

              c->ileb_buf = vmalloc(c->leb_size);

              if (!c->ileb_buf)

                     goto out_free;

       }

 

       if (c->bulk_read == 1)

              //初始化bulk-read的信息,关于bulk-read的相关信息可以在通过VFS的读操作中看到详细的解释

              bu_init(c);

 

       /*

        * We have to check all CRCs, even for data nodes, when we mount the FS

        * (specifically, when we are replaying).

        */

       c->always_chk_crc = 1;

       //读超级块,如果该volume是空的,显然不存在超级块,这时候需要创建一个最初的文件系统

       err = ubifs_read_superblock(c);

       if (err)

              goto out_free;

 

       /*

        * Make sure the compressor which is set as default in the superblock

        * or overridden by mount options is actually compiled in.

        */

       if (!ubifs_compr_present(c->default_compr)) {

              ubifs_err("'compressor \"%s\" is not compiled in",

                       ubifs_compr_name(c->default_compr));

              err = -ENOTSUPP;

              goto out_free;

       }

//初始化ubifs的一些常量

       err = init_constants_sb(c);

       if (err)

              goto out_free;

       sz = ALIGN(c->max_idx_node_sz, c->min_io_size);

       sz = ALIGN(sz + c->max_idx_node_sz, c->min_io_size);

       c->cbuf = kmalloc(sz, GFP_NOFS);

       if (!c->cbuf) {

              err = -ENOMEM;

              goto out_free;

       }

       sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id);

       if (!mounted_read_only) {

              err = alloc_wbufs(c);

              if (err)

                     goto out_cbuf;

 

              /* Create background thread */

              //创建UBIFS的后台进程,这个后台进程主要用于基于wbuf的读写

              c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name);

              if (IS_ERR(c->bgt)) {

                     err = PTR_ERR(c->bgt);

                     c->bgt = NULL;

                     ubifs_err("cannot spawn \"%s\", error %d",

                              c->bgt_name, err);

                     goto out_wbufs;

              }

              //唤醒该进程

              wake_up_process(c->bgt);

       }

       err = ubifs_read_master(c);

       //见下面的具体描述

       if (err)

              goto out_free;

       /*

        * Make sure the compressor which is set as default in the superblock

        * or overridden by mount options is actually compiled in.

        */

       if (!ubifs_compr_present(c->default_compr)) {

              ubifs_err("'compressor \"%s\" is not compiled in",

                       ubifs_compr_name(c->default_compr));

              err = -ENOTSUPP;

              goto out_free;

       }

 

       err = init_constants_sb(c);

       if (err)

              goto out_free;

 

       sz = ALIGN(c->max_idx_node_sz, c->min_io_size);

       sz = ALIGN(sz + c->max_idx_node_sz, c->min_io_size);

       c->cbuf = kmalloc(sz, GFP_NOFS);

       if (!c->cbuf) {

              err = -ENOMEM;

              goto out_free;

       }

 

       sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id);

       if (!mounted_read_only) {

              err = alloc_wbufs(c);

              if (err)

                     goto out_cbuf;

 

              /* Create background thread */

              c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name);

              if (IS_ERR(c->bgt)) {

                     err = PTR_ERR(c->bgt);

                     c->bgt = NULL;

                     ubifs_err("cannot spawn \"%s\", error %d",

                              c->bgt_name, err);

                     goto out_wbufs;

              }

              wake_up_process(c->bgt);

       }

 

       err = ubifs_read_master(c);

       if (err)

              goto out_master;

 

       init_constants_master(c);

 

       if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) {

              ubifs_msg("recovery needed");

              c->need_recovery = 1;

              if (!mounted_read_only) {

                     err = ubifs_recover_inl_heads(c, c->sbuf);

                     if (err)

                            goto out_master;

              }

       } else if (!mounted_read_only) {

              /*

               * Set the "dirty" flag so that if we reboot uncleanly we

               * will notice this immediately on the next mount.

               */

              c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);

              err = ubifs_write_master(c);

              if (err)

                     goto out_master;

       }

 

       err = ubifs_lpt_init(c, 1, !mounted_read_only);

       if (err)

              goto out_lpt;

 

       err = dbg_check_idx_size(c, c->old_idx_sz);

       if (err)

              goto out_lpt;

 

       err = ubifs_replay_journal(c);

       if (err)

              goto out_journal;

 

       /* Calculate 'min_idx_lebs' after journal replay */

       c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);

 

       err = ubifs_mount_orphans(c, c->need_recovery, mounted_read_only);

       if (err)

              goto out_orphans;

 

       if (!mounted_read_only) {

              int lnum;

 

              err = check_free_space(c);

              if (err)

                     goto out_orphans;

 

              /* Check for enough log space */

              lnum = c->lhead_lnum + 1;

              if (lnum >= UBIFS_LOG_LNUM + c->log_lebs)

                     lnum = UBIFS_LOG_LNUM;

              if (lnum == c->ltail_lnum) {

                     err = ubifs_consolidate_log(c);

                     if (err)

                            goto out_orphans;

              }

 

              if (c->need_recovery) {

                     err = ubifs_recover_size(c);

                     if (err)

                            goto out_orphans;

                     err = ubifs_rcvry_gc_commit(c);

             

                     err = take_gc_lnum(c);

                     if (err)

                            goto out_orphans;

 

                     /*

                      * GC LEB may contain garbage if there was an unclean

                      * reboot, and it should be un-mapped.

                      */

                     err = ubifs_leb_unmap(c, c->gc_lnum);

                     if (err)

                            return err;

              }

 

              err = dbg_check_lprops(c);

              if (err)

                     goto out_orphans;

      

              err = ubifs_recover_size(c);

              if (err)

                     goto out_orphans;

      

              /*

               * Even if we mount read-only, we have to set space in GC LEB

               * to proper value because this affects UBIFS free space

               * reporting. We do not want to have a situation when

               * re-mounting from R/O to R/W changes amount of free space.

               */

              err = take_gc_lnum(c);

              if (err)

                     goto out_orphans;

       }

 

       spin_lock(&ubifs_infos_lock);

       list_add_tail(&c->infos_list, &ubifs_infos);

       spin_unlock(&ubifs_infos_lock);

 

       if (c->need_recovery) {

              if (mounted_read_only)

                     ubifs_msg("recovery deferred");

              else {

                     c->need_recovery = 0;

                     ubifs_msg("recovery completed");

                     /*

                      * GC LEB has to be empty and taken at this point. But

                      * the journal head LEBs may also be accounted as

                      * "empty taken" if they are empty.

                      */

                     ubifs_assert(c->lst.taken_empty_lebs > 0);

              }

       } else

              ubifs_assert(c->lst.taken_empty_lebs > 0);

 

       err = dbg_check_filesystem(c);

       if (err)

              goto out_infos;

 

       err = dbg_debugfs_init_fs(c);

       if (err)

              goto out_infos;

 

       c->always_chk_crc = 0;

 

       ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"",

                c->vi.ubi_num, c->vi.vol_id, c->vi.name);

       if (mounted_read_only)

              ubifs_msg("mounted read-only");

       x = (long long)c->main_lebs * c->leb_size;

       ubifs_msg("file system size:   %lld bytes (%lld KiB, %lld MiB, %d "

                "LEBs)", x, x >> 10, x >> 20, c->main_lebs);

       x = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes;

       ubifs_msg("journal size:       %lld bytes (%lld KiB, %lld MiB, %d "

                "LEBs)", x, x >> 10, x >> 20, c->log_lebs + c->max_bud_cnt);

       ubifs_msg("media format:       w%d/r%d (latest is w%d/r%d)",

                c->fmt_version, c->ro_compat_version,

                UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION);

       ubifs_msg("default compressor: %s", ubifs_compr_name(c->default_compr));

       ubifs_msg("reserved for root:  %llu bytes (%llu KiB)",

              c->report_rp_size, c->report_rp_size >> 10);

 

       dbg_msg("compiled on:         " __DATE__ " at " __TIME__);

       dbg_msg("min. I/O unit size:  %d bytes", c->min_io_size);

       dbg_msg("LEB size:            %d bytes (%d KiB)",

              c->leb_size, c->leb_size >> 10);

       dbg_msg("data journal heads:  %d",

              c->jhead_cnt - NONDATA_JHEADS_CNT);

       dbg_msg("UUID:                %02X%02X%02X%02X-%02X%02X"

              "-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X",

              c->uuid[0], c->uuid[1], c->uuid[2], c->uuid[3],

              c->uuid[4], c->uuid[5], c->uuid[6], c->uuid[7],

              c->uuid[8], c->uuid[9], c->uuid[10], c->uuid[11],

              c->uuid[12], c->uuid[13], c->uuid[14], c->uuid[15]);

       dbg_msg("big_lpt              %d", c->big_lpt);

       dbg_msg("log LEBs:            %d (%d - %d)",

              c->log_lebs, UBIFS_LOG_LNUM, c->log_last);

       dbg_msg("LPT area LEBs:       %d (%d - %d)",

              c->lpt_lebs, c->lpt_first, c->lpt_last);

       dbg_msg("orphan area LEBs:    %d (%d - %d)",

              c->orph_lebs, c->orph_first, c->orph_last);

       dbg_msg("main area LEBs:      %d (%d - %d)",

              c->main_lebs, c->main_first, c->leb_cnt - 1);

       dbg_msg("index LEBs:          %d", c->lst.idx_lebs);

       dbg_msg("total index bytes:   %lld (%lld KiB, %lld MiB)",

              c->old_idx_sz, c->old_idx_sz >> 10, c->old_idx_sz >> 20);

       dbg_msg("key hash type:       %d", c->key_hash_type);

       dbg_msg("tree fanout:         %d", c->fanout);

       dbg_msg("reserved GC LEB:     %d", c->gc_lnum);

       dbg_msg("first main LEB:      %d", c->main_first);

       dbg_msg("max. znode size      %d", c->max_znode_sz);

       dbg_msg("max. index node size %d", c->max_idx_node_sz);

       dbg_msg("node sizes:          data %zu, inode %zu, dentry %zu",

              UBIFS_DATA_NODE_SZ, UBIFS_INO_NODE_SZ, UBIFS_DENT_NODE_SZ);

       dbg_msg("node sizes:          trun %zu, sb %zu, master %zu",

              UBIFS_TRUN_NODE_SZ, UBIFS_SB_NODE_SZ, UBIFS_MST_NODE_SZ);

       dbg_msg("node sizes:          ref %zu, cmt. start %zu, orph %zu",

              UBIFS_REF_NODE_SZ, UBIFS_CS_NODE_SZ, UBIFS_ORPH_NODE_SZ);

       dbg_msg("max. node sizes:     data %zu, inode %zu dentry %zu",

              

             

       dbg_msg("dead watermark:      %d", c->dead_wm);

       dbg_msg("dark watermark:      %d", c->dark_wm);

       dbg_msg("LEB overhead:        %d", c->leb_overhead);

       x = (long long)c->main_lebs * c->dark_wm;

       dbg_msg("max. dark space:     %lld (%lld KiB, %lld MiB)",

              x, x >> 10, x >> 20);

       dbg_msg("maximum bud bytes:   %lld (%lld KiB, %lld MiB)",

              c->max_bud_bytes, c->max_bud_bytes >> 10,

              c->max_bud_bytes >> 20);

       dbg_msg("BG commit bud bytes: %lld (%lld KiB, %lld MiB)",

              c->bg_bud_bytes, c->bg_bud_bytes >> 10,

              c->bg_bud_bytes >> 20);

       dbg_msg("current bud bytes    %lld (%lld KiB, %lld MiB)",

              c->bud_bytes, c->bud_bytes >> 10, c->bud_bytes >> 20);

       dbg_msg("max. seq. number:    %llu", c->max_sqnum);

       dbg_msg("commit number:       %llu", c->cmt_no);

 

       return 0;

 

out_infos:

       spin_lock(&ubifs_infos_lock);

       list_del(&c->infos_list);

       spin_unlock(&ubifs_infos_lock);

out_orphans:

       free_orphans(c);

out_journal:

       destroy_journal(c);

out_lpt:

       ubifs_lpt_free(c, 0);

out_master:

       kfree(c->mst_node);

       kfree(c->rcvrd_mst_node);

       if (c->bgt)

              kthread_stop(c->bgt);

out_wbufs:

       free_wbufs(c);

out_cbuf:

       kfree(c->cbuf);

out_free:

       kfree(c->bu.buf);

       vfree(c->ileb_buf);

       vfree(c->sbuf);

       kfree(c->bottom_up_buf);

       ubifs_debugging_exit(c);

       return err;

}

 

int ubifs_read_superblock(struct ubifs_info *c)

{

       int err, sup_flags;

       struct ubifs_sb_node *sup;

       //如果前面扫描的时候发现该卷中的LEB全部没有map,因此是一个空卷,什么信息都没有,这时候需要建立一个最原始的文件系统,其实就是写入superblock节点(LEB0),master节点(LEB1,和LEB2),commit节点(LEB3),inode节点(main_first+1),index节点(main_first+0)。

       //对于这些节点,我觉得很有必要详细的描述一下。我们都知道每一个文件系统都有一个超级块,里面存放的是文件系统的基本信息,在这儿ubifs将超级块以superblock类型节点的形式写进了flash media

       //从《a brief introduce of ubi and ubifs》的文档中可以看出。为了垃圾回收,采用node-structure的形式组织文件,jiffs2中这些相关的数据结构是在mount的时候建立的,这样花费了大量的时间和内存资源,而ubifs中这些数据是保存在flash media中的。Master节点就是这样的树状信息的根节点。Master节点是一式两份的,分别保存在LEB1LEB2上。为什么需要两份呢?

因为文件更新的时候,B+tree中的数据会变的,相应的master也就需要更新,为了防止在更新master的时候发生unclean reboot导致数据被破坏,所以保存了两份,用于unclean reboot时候的数据恢复。

       if (c->empty) {

              err = create_default_filesystem(c);

              if (err)

                     return err;

       }

       //读出超级块,当然这个超级块有可能是上面的create_default_filesystem刚刚写进去的。

       sup = ubifs_read_sb_node(c);

       if (IS_ERR(sup))

              return PTR_ERR(sup);

 

       c->fmt_version = le32_to_cpu(sup->fmt_version);

       c->ro_compat_version = le32_to_cpu(sup->ro_compat_version);

 

       /*

        * The software supports all previous versions but not future versions,

        * due to the unavailability of time-travelling equipment.

        */

       if (c->fmt_version > UBIFS_FORMAT_VERSION) {

              struct super_block *sb = c->vfs_sb;

              int mounting_ro = sb->s_flags & MS_RDONLY;

 

              ubifs_assert(!c->ro_media || mounting_ro);

              if (!mounting_ro ||

                  c->ro_compat_version > UBIFS_RO_COMPAT_VERSION) {

                     ubifs_err("on-flash format version is w%d/r%d, but "

                              "software only supports up to version "

                              "w%d/r%d", c->fmt_version,

                              c->ro_compat_version, UBIFS_FORMAT_VERSION,

                              UBIFS_RO_COMPAT_VERSION);

                     if (c->ro_compat_version <= UBIFS_RO_COMPAT_VERSION) {

                            ubifs_msg("only R/O mounting is possible");

                            err = -EROFS;

                     } else

                            err = -EINVAL;

                     goto out;

              }

 

              /*

               * The FS is mounted R/O, and the media format is

               * R/O-compatible with the UBIFS implementation, so we can

               * mount.

               */

              c->rw_incompat = 1;

       }

 

       if (c->fmt_version < 3) {

              ubifs_err("on-flash format version %d is not supported",

                       c->fmt_version);

              err = -EINVAL;

              goto out;

       }

 

//采用哪种hash运算方法   

switch (sup->key_hash) {

       case UBIFS_KEY_HASH_R5:

              c->key_hash = key_r5_hash;

              c->key_hash_type = UBIFS_KEY_HASH_R5;

              break;

 

       case UBIFS_KEY_HASH_TEST:

              c->key_hash = key_test_hash;

              c->key_hash_type = UBIFS_KEY_HASH_TEST;

              break;

       };

 

       c->key_fmt = sup->key_fmt;

 

       switch (c->key_fmt) {

       case UBIFS_SIMPLE_KEY_FMT:

              c->key_len = UBIFS_SK_LEN;

              break;

       default:

              ubifs_err("unsupported key format");

              err = -EINVAL;

              goto out;

       }

       //用从超级块中读出的信息来初始化内存中的ubifs_info结构体

       c->leb_cnt       = le32_to_cpu(sup->leb_cnt);

       c->max_leb_cnt   = le32_to_cpu(sup->max_leb_cnt);

       c->max_bud_bytes = le64_to_cpu(sup->max_bud_bytes);

       c->log_lebs      = le32_to_cpu(sup->log_lebs);

       c->lpt_lebs      = le32_to_cpu(sup->lpt_lebs);

       c->orph_lebs     = le32_to_cpu(sup->orph_lebs);

       c->jhead_cnt     = le32_to_cpu(sup->jhead_cnt) + NONDATA_JHEADS_CNT;

       c->fanout        = le32_to_cpu(sup->fanout);

       c->lsave_cnt     = le32_to_cpu(sup->lsave_cnt);

       c->rp_size       = le64_to_cpu(sup->rp_size);

       c->rp_uid        = le32_to_cpu(sup->rp_uid);

       c->rp_gid        = le32_to_cpu(sup->rp_gid);

       sup_flags        = le32_to_cpu(sup->flags);

       if (!c->mount_opts.override_compr)

              c->default_compr = le16_to_cpu(sup->default_compr);

 

       c->vfs_sb->s_time_gran = le32_to_cpu(sup->time_gran);

       memcpy(&c->uuid, &sup->uuid, 16);

       c->big_lpt = !!(sup_flags & UBIFS_FLG_BIGLPT);

 

       /* Automatically increase file system size to the maximum size */

       //ubivolume是可以resize的,即可以改变大小。此时需要重新写超级块

       c->old_leb_cnt = c->leb_cnt;

       if (c->leb_cnt < c->vi.size && c->leb_cnt < c->max_leb_cnt) {

              c->leb_cnt = min_t(int, c->max_leb_cnt, c->vi.size);

              if (c->vfs_sb->s_flags & MS_RDONLY)

                     dbg_mnt("Auto resizing (ro) from %d LEBs to %d LEBs",

                            c->old_leb_cnt,      c->leb_cnt);

              else {

                     dbg_mnt("Auto resizing (sb) from %d LEBs to %d LEBs",

                            c->old_leb_cnt, c->leb_cnt);

                     sup->leb_cnt = cpu_to_le32(c->leb_cnt);

                     err = ubifs_write_sb_node(c, sup);

                     if (err)

                            goto out;

                     c->old_leb_cnt = c->leb_cnt;

              }

       }

 

       c->log_bytes = (long long)c->log_lebs * c->leb_size;

       c->log_last = UBIFS_LOG_LNUM + c->log_lebs - 1;

       c->lpt_first = UBIFS_LOG_LNUM + c->log_lebs;

       c->lpt_last = c->lpt_first + c->lpt_lebs - 1;

       c->orph_first = c->lpt_last + 1;

       c->orph_last = c->orph_first + c->orph_lebs - 1;

       c->main_lebs = c->leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS;

       c->main_lebs -= c->log_lebs + c->lpt_lebs + c->orph_lebs;

       c->main_first = c->leb_cnt - c->main_lebs;

 

       err = validate_sb(c, sup);

out:

       kfree(sup);

       return err;

}

 

static int create_default_filesystem(struct ubifs_info *c)

{

       struct ubifs_sb_node *sup;

       struct ubifs_mst_node *mst;

       struct ubifs_idx_node *idx;

       struct ubifs_branch *br;

       struct ubifs_ino_node *ino;

       struct ubifs_cs_node *cs;

       union ubifs_key key;

       int err, tmp, jnl_lebs, log_lebs, max_buds, main_lebs, main_first;

       int lpt_lebs, lpt_first, orph_lebs, big_lpt, ino_waste, sup_flags = 0;

       int min_leb_cnt = UBIFS_MIN_LEB_CNT;

       long long tmp64, main_bytes;

       __le64 tmp_le64;

 

       /* Some functions called from here depend on the @c->key_len filed */

       c->key_len = UBIFS_SK_LEN;

 

       /*

        * First of all, we have to calculate default file-system geometry -

        * log size, journal size, etc.

        */

       //首先根据文件系统的大小算相应的journallog区的大小。Journal的目的前面可能已经提到了,因为ubifs的文件的B+tree的数据是保存在flash media中,这就带来了一个问题,每次更新文件的时候都需要更新相关的B+tree的信息,这样就会频繁的读写flash设备,降低文件系统的性能。所以采用了joural,也就是说在更新的时候先将更新相关inode的信息写进log中,在log满了的时候才一起更新flash media中的B+tree。这样降低了更新的频率,提高了文件系统的性能。

       if (c->leb_cnt < 0x7FFFFFFF / DEFAULT_JNL_PERCENT)

              /* We can first multiply then divide and have no overflow */

              jnl_lebs = c->leb_cnt * DEFAULT_JNL_PERCENT / 100;

       else

              jnl_lebs = (c->leb_cnt / 100) * DEFAULT_JNL_PERCENT;

 

       if (jnl_lebs < UBIFS_MIN_JNL_LEBS)

              jnl_lebs = UBIFS_MIN_JNL_LEBS;

       if (jnl_lebs * c->leb_size > DEFAULT_MAX_JNL)

              jnl_lebs = DEFAULT_MAX_JNL / c->leb_size;

 

       /*

        * The log should be large enough to fit reference nodes for all bud

        * LEBs. Because buds do not have to start from the beginning of LEBs

        * (half of the LEB may contain committed data), the log should

        * generally be larger, make it twice as large.

        */

       tmp = 2 * (c->ref_node_alsz * jnl_lebs) + c->leb_size - 1;

       log_lebs = tmp / c->leb_size;

       /* Plus one LEB reserved for commit */

       log_lebs += 1;

       if (c->leb_cnt - min_leb_cnt > 8) {

              /* And some extra space to allow writes while committing */

              log_lebs += 1;

              min_leb_cnt += 1;

       }

 

       max_buds = jnl_lebs - log_lebs;

       if (max_buds < UBIFS_MIN_BUD_LEBS)

              max_buds = UBIFS_MIN_BUD_LEBS;

 

       /*

        * Orphan nodes are stored in a separate area. One node can store a lot

        * of orphan inode numbers, but when new orphan comes we just add a new

        * orphan node. At some point the nodes are consolidated into one

        * orphan node.

        */

       // An orphan is an inode number whose inode node has been committed to the index with a link count of zero. That happens when an open file is deleted (unlinked) and then a commit is run

       // The orphan area is a fixed number of LEBs situated between the LPT area and the main area

       // orphan 顾名思义是指牺牲者,在ubifs中的当一inode的引用为零的时候,这个文件需要被删除,为了防止在删除的时候发生unclean rebootubifs将这些需要删除的文件信息写在orphan area中,这样在发生unclean reboot的时候文件系统可以清楚的知道哪些文件需要被删除,而不是去扫描整个分区。文件系统在没有空余空间的时候也可以通过GC子系统来回收这些空间。关于orphan 的相关信息就保存在orphan area中,The orphan area is a fixed number of LEBs situated between the LPT area and the main area

       orph_lebs = UBIFS_MIN_ORPH_LEBS;

#ifdef CONFIG_UBIFS_FS_DEBUG

       if (c->leb_cnt - min_leb_cnt > 1)

              /*

               * For debugging purposes it is better to have at least 2

               * orphan LEBs, because the orphan subsystem would need to do

               * consolidations and would be stressed more.

               */

              orph_lebs += 1;

#endif

 

       main_lebs = c->leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS - log_lebs;

       main_lebs -= orph_lebs;

       //上面提到了,orphan区处于LPT区和main area之间。什么是LPTLPT= LEB Properties Tree

       lpt_first = UBIFS_LOG_LNUM + log_lebs;

       c->lsave_cnt = DEFAULT_LSAVE_CNT;

       c->max_leb_cnt = c->leb_cnt;

       err = ubifs_create_dflt_lpt(c, &main_lebs, lpt_first, &lpt_lebs,

                                &big_lpt);

*********************************************************************************

ubifs_create_dflt_lpt算出LPT需要占用几块LEB,LPT是描述的ubifs中每一个leb的空闲bytesdirty 这儿的脏好像并不是指被修改的意思,从代码pnode->lprops[0].dirty = iopos - node_sz;中大体的意思为没有被写,但是别人不能用的空间,因为flash操作的基本单元是page,如果在某一页中只写了一半的数据,那么另外一半就是脏的,虽然没有写东西,但是别人也用不了, Dirty space is the number of bytes taken up by obsolete nodes and padding, that can potentially be reclaimed by garbage collectionbytes。因为LPT区自己也占用了LEB,所以需要建立LPT自己的表。这想内核在启动的过程中建立自己的页表一样

a)         为跟index节点和根inode节点所占的leb创建LEB properties

b)        为其余所有的pnode节点建立信息,同时将信息写入flash media

 

**********************************************************************************

if (err)

              return err;

       dbg_gen("LEB Properties Tree created (LEBs %d-%d)", lpt_first,

              lpt_first + lpt_lebs - 1);

 

       main_first = c->leb_cnt - main_lebs;

 

       /* Create default superblock */

       tmp = ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size);

       sup = kzalloc(tmp, GFP_KERNEL);

       if (!sup)

              return -ENOMEM;

 

       tmp64 = (long long)max_buds * c->leb_size;

       if (big_lpt)

              sup_flags |= UBIFS_FLG_BIGLPT;

       //初始化superblock节点

       sup->ch.node_type  = UBIFS_SB_NODE;

       sup->key_hash      = UBIFS_KEY_HASH_R5;

       sup->flags         = cpu_to_le32(sup_flags);

       sup->min_io_size   = cpu_to_le32(c->min_io_size);

       sup->leb_size      = cpu_to_le32(c->leb_size);

       sup->leb_cnt       = cpu_to_le32(c->leb_cnt);

       sup->max_leb_cnt   = cpu_to_le32(c->max_leb_cnt);

       sup->max_bud_bytes = cpu_to_le64(tmp64);

       sup->log_lebs      = cpu_to_le32(log_lebs);

       sup->lpt_lebs      = cpu_to_le32(lpt_lebs);

       sup->orph_lebs     = cpu_to_le32(orph_lebs);

       sup->jhead_cnt     = cpu_to_le32(DEFAULT_JHEADS_CNT);

       sup->fanout        = cpu_to_le32(DEFAULT_FANOUT);

       sup->lsave_cnt     = cpu_to_le32(c->lsave_cnt);

       sup->fmt_version   = cpu_to_le32(UBIFS_FORMAT_VERSION);

       sup->time_gran     = cpu_to_le32(DEFAULT_TIME_GRAN);

       if (c->mount_opts.override_compr)

              sup->default_compr = cpu_to_le16(c->mount_opts.compr_type);

       else

              sup->default_compr = cpu_to_le16(UBIFS_COMPR_LZO);

 

       generate_random_uuid(sup->uuid);

 

       main_bytes = (long long)main_lebs * c->leb_size;

       tmp64 = div_u64(main_bytes * DEFAULT_RP_PERCENT, 100);

       if (tmp64 > DEFAULT_MAX_RP_SIZE)

              tmp64 = DEFAULT_MAX_RP_SIZE;

       sup->rp_size = cpu_to_le64(tmp64);

       sup->ro_compat_version = cpu_to_le32(UBIFS_RO_COMPAT_VERSION);

       //写入superblock 节点到LEB0

       err = ubifs_write_node(c, sup, UBIFS_SB_NODE_SZ, 0, 0, UBI_LONGTERM);

       kfree(sup);

       if (err)

              return err;

 

       dbg_gen("default superblock created at LEB 0:0");

 

       /* Create default master node */

       mst = kzalloc(c->mst_node_alsz, GFP_KERNEL);

       if (!mst)

              return -ENOMEM;

       //初始化master节点

       mst->ch.node_type = UBIFS_MST_NODE;

       mst->log_lnum     = cpu_to_le32(UBIFS_LOG_LNUM);

       mst->highest_inum = cpu_to_le64(UBIFS_FIRST_INO);

       mst->cmt_no       = 0;

       mst->root_lnum    = cpu_to_le32(main_first + DEFAULT_IDX_LEB);

       mst->root_offs    = 0;

       tmp = ubifs_idx_node_sz(c, 1);

       mst->root_len     = cpu_to_le32(tmp);

       mst->gc_lnum      = cpu_to_le32(main_first + DEFAULT_GC_LEB);

       mst->ihead_lnum   = cpu_to_le32(main_first + DEFAULT_IDX_LEB);

       mst->ihead_offs   = cpu_to_le32(ALIGN(tmp, c->min_io_size));

       mst->index_size   = cpu_to_le64(ALIGN(tmp, 8));

       mst->lpt_lnum     = cpu_to_le32(c->lpt_lnum);

       mst->lpt_offs     = cpu_to_le32(c->lpt_offs);

       mst->nhead_lnum   = cpu_to_le32(c->nhead_lnum);

       mst->nhead_offs   = cpu_to_le32(c->nhead_offs);

       mst->ltab_lnum    = cpu_to_le32(c->ltab_lnum);

       mst->ltab_offs    = cpu_to_le32(c->ltab_offs);

       mst->lsave_lnum   = cpu_to_le32(c->lsave_lnum);

       mst->lsave_offs   = cpu_to_le32(c->lsave_offs);

       mst->lscan_lnum   = cpu_to_le32(main_first);

       mst->empty_lebs   = cpu_to_le32(main_lebs - 2);

       mst->idx_lebs     = cpu_to_le32(1);

       mst->leb_cnt      = cpu_to_le32(c->leb_cnt);

 

       /* Calculate lprops statistics */

       tmp64 = main_bytes;

       tmp64 -= ALIGN(ubifs_idx_node_sz(c, 1), c->min_io_size);

       tmp64 -= ALIGN(UBIFS_INO_NODE_SZ, c->min_io_size);

       mst->total_free = cpu_to_le64(tmp64);

 

       tmp64 = ALIGN(ubifs_idx_node_sz(c, 1), c->min_io_size);

       ino_waste = ALIGN(UBIFS_INO_NODE_SZ, c->min_io_size) -

                      

       tmp64 += ino_waste;

       tmp64 -= ALIGN(ubifs_idx_node_sz(c, 1), 8);

       mst->total_dirty = cpu_to_le64(tmp64);

 

       /*  The indexing LEB does not contribute to dark space */

       tmp64 = (c->main_lebs - 1) * c->dark_wm;

       mst->total_dark = cpu_to_le64(tmp64);

 

       mst->total_used = cpu_to_le64(UBIFS_INO_NODE_SZ);

        //master节点一式两份

       err = ubifs_write_node(c, mst, UBIFS_MST_NODE_SZ, UBIFS_MST_LNUM, 0,

                           

       if (err) {

              kfree(mst);

              return err;

       }

       err = ubifs_write_node(c, mst, UBIFS_MST_NODE_SZ, UBIFS_MST_LNUM + 1, 0,

                           

       kfree(mst);

       if (err)

              return err;

 

       dbg_gen("default master node created at LEB %d:0", UBIFS_MST_LNUM);

 

       /* Create the root indexing node */

       tmp = ubifs_idx_node_sz(c, 1);

       //idx节点。从tnc.c中的描述操作,idx的成员zbranch以及make_idx_node函数看来,idx节点是用来在flash media中保存TNC树的

内核用struct ubifs_znode结构体来代表着flash中的一个idx 节点。Idx节点的孩子代表真正的数据,当然这些数据本身可以是一个idx节点,也可以是当初的数据。

这儿初始化的是TNC的根节点。

       //a brief introduce of ubi and ubifs》中说inode节点和它的数据是分开的,上面的idx节点其实是存放的数据。那么struct ubifs_ino_node类型的节点是存放的inode吗?(yes)

       // In UBIFS, inodes have a corresponding inode node which records the number of directory entry links, more simply known as the link count.

       // inode node is a node that holds the metadata for an inode. Every inode has

exactly one (non-obsolete) inode node.

       idx = kzalloc(ALIGN(tmp, c->min_io_size), GFP_KERNEL);

       if (!idx)

              return -ENOMEM;

 

       c->key_fmt = UBIFS_SIMPLE_KEY_FMT;

       c->key_hash = key_r5_hash;

 

       idx->ch.node_type = UBIFS_IDX_NODE;

       idx->child_cnt = cpu_to_le16(1);

       ino_key_init(c, &key, UBIFS_ROOT_INO);

       br = ubifs_idx_branch(c, idx, 0);

       key_write_idx(c, &key, &br->key);

       br->lnum = cpu_to_le32(main_first + DEFAULT_DATA_LEB);

       br->len  = cpu_to_le32(UBIFS_INO_NODE_SZ);

       err = ubifs_write_node(c, idx, tmp, main_first + DEFAULT_IDX_LEB, 0,

                           

       kfree(idx);

       if (err)

              return err;

 

       dbg_gen("default root indexing node created LEB %d:0",

              main_first + DEFAULT_IDX_LEB);

 

       /* Create default root inode */

       tmp = ALIGN(UBIFS_INO_NODE_SZ, c->min_io_size);

       ino = kzalloc(tmp, GFP_KERNEL);

       if (!ino)

              return -ENOMEM;

 

       ino_key_init_flash(c, &ino->key, UBIFS_ROOT_INO);

       ino->ch.node_type = UBIFS_INO_NODE;

       ino->creat_sqnum = cpu_to_le64(++c->max_sqnum);

       ino->nlink = cpu_to_le32(2);

       tmp_le64 = cpu_to_le64(CURRENT_TIME_SEC.tv_sec);

       ino->atime_sec   = tmp_le64;

       ino->ctime_sec   = tmp_le64;

       ino->mtime_sec   = tmp_le64;

       ino->atime_nsec  = 0;

       ino->ctime_nsec  = 0;

       ino->mtime_nsec  = 0;

       ino->mode = cpu_to_le32(S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO);

       ino->size = cpu_to_le64(UBIFS_INO_NODE_SZ);

 

       /* Set compression enabled by default */

       ino->flags = cpu_to_le32(UBIFS_COMPR_FL);

 

       err = ubifs_write_node(c, ino, UBIFS_INO_NODE_SZ,

                                   main_first + DEFAULT_DATA_LEB, 0,

                           

       kfree(ino);

       if (err)

              return err;

 

       dbg_gen("root inode created at LEB %d:0",

              main_first + DEFAULT_DATA_LEB);

 

       /*

        * The first node in the log has to be the commit start node. This is

        * always the case during normal file-system operation. Write a fake

        * commit start node to the log.

        */

       tmp = ALIGN(UBIFS_CS_NODE_SZ, c->min_io_size);

       cs = kzalloc(tmp, GFP_KERNEL);

       if (!cs)

              return -ENOMEM;

 

       cs->ch.node_type = UBIFS_CS_NODE;

       //log区域写入一个commit start node,每一次commit的时候会向log区域写入两种类型,一种就是commit start类型的节点表示一次commit的开始,两外一种就是referencr 节点,里面记录了相应的日志需要操作的leb,和offset

       err = ubifs_write_node(c, cs, UBIFS_CS_NODE_SZ, UBIFS_LOG_LNUM,

                            0, UBI_UNKNOWN);

       kfree(cs);

       ubifs_msg("default file-system created");

       return 0;

}

 

ubifs文件系统的master节点,我们前面提到了master节点是一式两份的,因为它里面保存的是idx的最基本的东西,不容有失。而且master节点是不能同时写的,防止unclean reboot使得两份数据同时被破坏

int ubifs_read_master(struct ubifs_info *c)

{

       int err, old_leb_cnt;

       c->mst_node = kzalloc(c->mst_node_alsz, GFP_KERNEL);

       if (!c->mst_node)

              return -ENOMEM;

       //检查两份master节点,看是master中的数据是否被破坏。

       err = scan_for_master(c);

       if (err) {

              if (err == -EUCLEAN)

                     //如果被破坏,那么就需要恢复

                     err = ubifs_recover_master_node(c);

              if (err)

                     /*

                      * Note, we do not free 'c->mst_node' here because the

                      * unmount routine will take care of this.

                      */

                     return err;

       }

 

       /* Make sure that the recovery flag is clear */

       master节点来初始化ubifs_info结构体中的信息

       c->mst_node->flags &= cpu_to_le32(~UBIFS_MST_RCVRY);

       c->max_sqnum       = le64_to_cpu(c->mst_node->ch.sqnum);

       c->highest_inum    = le64_to_cpu(c->mst_node->highest_inum);

       c->cmt_no          = le64_to_cpu(c->mst_node->cmt_no);

       c->zroot.lnum      = le32_to_cpu(c->mst_node->root_lnum);

       c->zroot.offs      = le32_to_cpu(c->mst_node->root_offs);

       c->zroot.len       = le32_to_cpu(c->mst_node->root_len);

       c->lhead_lnum      = le32_to_cpu(c->mst_node->log_lnum);

       c->gc_lnum         = le32_to_cpu(c->mst_node->gc_lnum);

       c->ihead_lnum      = le32_to_cpu(c->mst_node->ihead_lnum);

       c->ihead_offs      = le32_to_cpu(c->mst_node->ihead_offs);

       c->old_idx_sz      = le64_to_cpu(c->mst_node->index_size);

       c->lpt_lnum        = le32_to_cpu(c->mst_node->lpt_lnum);

       c->lpt_offs        = le32_to_cpu(c->mst_node->lpt_offs);

       c->nhead_lnum      = le32_to_cpu(c->mst_node->nhead_lnum);

       c->nhead_offs      = le32_to_cpu(c->mst_node->nhead_offs);

       c->ltab_lnum       = le32_to_cpu(c->mst_node->ltab_lnum);

       c->ltab_offs       = le32_to_cpu(c->mst_node->ltab_offs);

       c->lsave_lnum      = le32_to_cpu(c->mst_node->lsave_lnum);

       c->lsave_offs      = le32_to_cpu(c->mst_node->lsave_offs);

       c->lscan_lnum      = le32_to_cpu(c->mst_node->lscan_lnum);

       c->lst.empty_lebs  = le32_to_cpu(c->mst_node->empty_lebs);

       c->lst.idx_lebs    = le32_to_cpu(c->mst_node->idx_lebs);

       old_leb_cnt        = le32_to_cpu(c->mst_node->leb_cnt);

       c->lst.total_free  = le64_to_cpu(c->mst_node->total_free);

       c->lst.total_dirty = le64_to_cpu(c->mst_node->total_dirty);

       c->lst.total_used  = le64_to_cpu(c->mst_node->total_used);

       c->lst.total_dead  = le64_to_cpu(c->mst_node->total_dead);

       c->lst.total_dark  = le64_to_cpu(c->mst_node->total_dark);

 

       c->calc_idx_sz = c->old_idx_sz;

 

       if (c->mst_node->flags & cpu_to_le32(UBIFS_MST_NO_ORPHS))

              c->no_orphs = 1;

 

       if (old_leb_cnt != c->leb_cnt) {

              /* The file system has been resized */

              int growth = c->leb_cnt - old_leb_cnt;

 

              if (c->leb_cnt < old_leb_cnt ||

                  c->leb_cnt < UBIFS_MIN_LEB_CNT) {

                     ubifs_err("bad leb_cnt on master node");

                     dbg_dump_node(c, c->mst_node);

                     return -EINVAL;

              }

              dbg_mnt("Auto resizing (master) from %d LEBs to %d LEBs",

                     old_leb_cnt, c->leb_cnt);

              c->lst.empty_lebs += growth;

              c->lst.total_free += growth * (long long)c->leb_size;

              c->lst.total_dark += growth * (long long)c->dark_wm;

              /*

               * Reflect changes back onto the master node. N.B. the master

               * node gets written immediately whenever mounting (or

               * remounting) in read-write mode, so we do not need to write it

               * here.

               */

              c->mst_node->leb_cnt = cpu_to_le32(c->leb_cnt);

              c->mst_node->empty_lebs = cpu_to_le32(c->lst.empty_lebs);

              c->mst_node->total_free = cpu_to_le64(c->lst.total_free);

              c->mst_node->total_dark = cpu_to_le64(c->lst.total_dark);

       }

       err = validate_master(c);

       if (err)

              return err;

       err = dbg_old_index_check_init(c, &c->zroot);

       return err;

}



---------------如有不对,请给予指正,谢谢。

阅读(4613) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~