Chinaunix首页 | 论坛 | 博客
  • 博客访问: 12337
  • 博文数量: 4
  • 博客积分: 0
  • 博客等级: 民兵
  • 技术积分: 20
  • 用 户 组: 普通用户
  • 注册时间: 2014-03-24 15:02
文章存档

2014年(4)

我的朋友

分类: C/C++

2014-03-26 16:41:17


用户层mmap函数的原型:

点击(此处)折叠或打开

  1. void *mmap(void *start,size_t length,int prot,int flags,int fd,off_t offset);

参数解析:
start:映射区的起始地址,一般置NULL,即交给内核自动分配
length:映射区的大小。
prot:被映射的内存在映射区的保护方式
          可取如下几个值的或:PROT_READ(可读) , PROT_WRITE (可写), PROT_EXEC (可执行), PROT_NONE(不可访问)。
flags:映射区的类型
         flags由以下几个常值指定:MAP_SHARED , MAP_PRIVATE , MAP_FIXED,其中,MAP_SHARED , MAP_PRIVATE必选其一,而MAP_FIXED则不推荐使用。
offset:实际数据在映射区的偏移值,一般置0,表示从文件的开头进行映射。


返回值:即映射到进程空间的地址。
PS:在man手册里面其中的参数会有很好的解释。

调用过程:

当用户层调用mmap函数时,应用程序通知系统告诉内核需要执行一个系统调用,希望系统切换到内核态。这种机制靠软件中断实现。
实现过程如下:首先用户程序为系统调用设置参数,其中一个参数是调用编号,设置完毕后,系统切换到内核态,通过一个基址+前面设置的编号,跳转到需要执行的系统调用
函数的地址上。该表一般在sys/entry.S中定义,如下:

点击(此处)折叠或打开

  1. sys_call_table:
  2.     .long    sys_restart_syscall
  3.     .long    sys_exit
  4.     .long    __sys_fork
  5.     .long    sys_read
  6.     .long    sys_write
  7.           .....
  8.     .long    sys_mmap   //或者有的版本里面可能是 .long    sys_mmap2,
  9.                         //不过后面都会调用sys_mmap_pgoff函数
  10.            ....
  11. 省略

再看看sys_mmap和sys_mmap2是如何定义的:

点击(此处)折叠或打开

  1. asmlinkage long
  2. sys_mmap(unsigned long addr,unsigned long len,unsigned long prot,unsigned long flags,unsigned long fd,off_t offset)
  3. {
  4.     if(unlikely(offset & ~PAGE_MASK))
  5.        return -EINVAL;
  6.     return sys_mmap_pgoff(addr,len,prot,flags,fd,offset>>PAGE_SHIFT);
  7. }


点击(此处)折叠或打开

  1. asmlinkage long
  2. sys_mmap2(unsigned long addr,unsigned long len,unsigned long prot,unsigned long flags,unsigned long fd,off_t offset)

  3. {
  4.     if(unlikely(offset & ~PAGE_MASK))
  5.        return -EINVAL;
  6.     return sys_mmap_pgoff(addr,len,prot,flags,fd,offset>>PAGE_SHIFT);
  7. }
然后linux系统会做什么了,我想应该是通过一个宏函数 SYSCALL_DEFINE6来继续往下执行(不是很懂,若有错误,望不吝指教)

此处是SYSCALL_DEFINE6的宏定义:

点击(此处)折叠或打开

  1. #define SYSCALL_DEFINE6(name, ...) SYSCALL_DEFINEx(6, _##name, __VA_ARGS__)

  2. #define SYSCALL_DEFINEx(x, sname, ...)                \
  3.     static const char *types_##sname[] = {            \
  4.         __SC_STR_TDECL##x(__VA_ARGS__)            \
  5.     };                            \
  6.     static const char *args_##sname[] = {            \
  7.         __SC_STR_ADECL##x(__VA_ARGS__)            \
  8.     };                            \
  9.     SYSCALL_METADATA(sname, x);                \
  10.     __SYSCALL_DEFINEx(x, sname, __VA_ARGS__)

sys_mmap_pgoff()函数做了如下几件事:
1、一些常规的错误检查工作
2、通过file = fget(fd);得到对应的struct file对象指针
3、调用do_mmap_pgoff(file, addr, len, prot, flags, pgoff)函数完成后续的映射工作。

点击(此处)折叠或打开

  1. SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
  2.         unsigned long, prot, unsigned long, flags,
  3.         unsigned long, fd, unsigned long, pgoff)
  4. {
  5.     struct file *file = NULL;
  6.     unsigned long retval = -EBADF;
  7.                                                       //---------------------1
  8.     if (!(flags & MAP_ANONYMOUS)) {
  9.         if (unlikely(flags & MAP_HUGETLB))
  10.             return -EINVAL;
  11.         file = fget(fd);                               //--------------------2
  12.         if (!file)
  13.             goto out;
  14.     } else if (flags & MAP_HUGETLB) {
  15.         struct user_struct *user = NULL;
  16.         /*
  17.          * VM_NORESERVE is used because the reservations will be
  18.          * taken when vm_ops->mmap() is called
  19.          * A dummy user value is used because we are not locking
  20.          * memory so no accounting is necessary
  21.          */
  22.         len = ALIGN(len, huge_page_size(&default_hstate));
  23.         file = hugetlb_file_setup(HUGETLB_ANON_FILE, len, VM_NORESERVE,
  24.                         &user, HUGETLB_ANONHUGE_INODE);
  25.         if (IS_ERR(file))
  26.             return PTR_ERR(file);
  27.     }

  28.     flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);

  29.     down_write(&current->mm->mmap_sem);
  30.     retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);  //-----------------------3
  31.     up_write(&current->mm->mmap_sem);

  32.     if (file)
  33.         fput(file);
  34. out:
  35.     return retval;
  36. }

do_mmap_pgoff()函数原型及用法:

点击(此处)折叠或打开

  1. unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
  2.             unsigned long len, unsigned long prot,
  3.             unsigned long flags, unsigned long pgoff)
  4. {
  5.     struct mm_struct * mm = current->mm;
  6.     struct inode *inode;
  7.     unsigned int vm_flags;
  8.     int error;
  9.     unsigned long reqprot = prot;

  10.     /*
  11.      * Does the application expect PROT_READ to imply PROT_EXEC?
  12.      *
  13.      * (the exception is when the underlying filesystem is noexec
  14.      * mounted, in which case we dont add PROT_EXEC.)
  15.      */
  16.     if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))         //防御性代码检查,即参数的合法性检查
  17.         if (!(file && (file->f_path.mnt->mnt_flags & MNT_NOEXEC)))
  18.             prot |= PROT_EXEC;

  19.     if (!len)
  20.         return -EINVAL;

  21.     if (!(flags & MAP_FIXED))
  22.         addr = round_hint_to_min(addr);

  23.     /* Careful about overflows.. */
  24.     len = PAGE_ALIGN(len);               //确保映射区的长度为一个PAGE大小的整数倍
  25.     if (!len)
  26.         return -ENOMEM;

  27.     /* offset overflow? */
  28.     if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)         //检查pageoffset是否溢出,即offset参数的合法性
  29.                return -EOVERFLOW;

  30.     /* Too many mappings? */
  31.     if (mm->map_count > sysctl_max_map_count)
  32.         return -ENOMEM;

  33.     /* Obtain the address to map to. we verify (or select) it and ensure
  34.      * that it represents a valid section of the address space.
  35.      */
  36.     addr = get_unmapped_area(file, addr, len, pgoff, flags);     //用来在用户进程的3GB的虚拟地址空间内分配一段空闲区域,传统的布局方式
  37.     if (addr & ~PAGE_MASK)
  38.         return addr;

  39.     /* Do simple checking here so the lower-level routines won't have
  40.      * to. we assume access permissions have been handled by the open
  41.      * of the memory object, so we don't do any here.
  42.      */
  43.     vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
  44.             mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;

  45.     if (flags & MAP_LOCKED)
  46.         if (!can_do_mlock())
  47.             return -EPERM;

  48.     /* mlock MCL_FUTURE? */
  49.     if (vm_flags & VM_LOCKED) {
  50.         unsigned long locked, lock_limit;
  51.         locked = len >> PAGE_SHIFT;
  52.         locked += mm->locked_vm;
  53.         lock_limit = rlimit(RLIMIT_MEMLOCK);
  54.         lock_limit >>= PAGE_SHIFT;
  55.         if (locked > lock_limit && !capable(CAP_IPC_LOCK))
  56.             return -EAGAIN;
  57.     }

  58.     inode = file ? file->f_path.dentry->d_inode : NULL;

  59.     if (file) {
  60.         switch (flags & MAP_TYPE) {
  61.         case MAP_SHARED:
  62.             if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE))
  63.                 return -EACCES;

  64.             /*
  65.              * Make sure we don't allow writing to an append-only
  66.              * file..
  67.              */
  68.             if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))
  69.                 return -EACCES;

  70.             /*
  71.              * Make sure there are no mandatory locks on the file.
  72.              */
  73.             if (locks_verify_locked(inode))
  74.                 return -EAGAIN;

  75.             vm_flags |= VM_SHARED | VM_MAYSHARE;
  76.             if (!(file->f_mode & FMODE_WRITE))
  77.                 vm_flags &= ~(VM_MAYWRITE | VM_SHARED);

  78.             /* fall through */
  79.         case MAP_PRIVATE:
  80.             if (!(file->f_mode & FMODE_READ))
  81.                 return -EACCES;
  82.             if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) {
  83.                 if (vm_flags & VM_EXEC)
  84.                     return -EPERM;
  85.                 vm_flags &= ~VM_MAYEXEC;
  86.             }

  87.             if (!file->f_op || !file->f_op->mmap)
  88.                 return -ENODEV;
  89.             break;

  90.         default:
  91.             return -EINVAL;
  92.         }
  93.     } else {
  94.         switch (flags & MAP_TYPE) {
  95.         case MAP_SHARED:
  96.             /*
  97.              * Ignore pgoff.
  98.              */
  99.             pgoff = 0;
  100.             vm_flags |= VM_SHARED | VM_MAYSHARE;
  101.             break;
  102.         case MAP_PRIVATE:
  103.             /*
  104.              * Set pgoff according to addr for anon_vma.
  105.              */
  106.             pgoff = addr >> PAGE_SHIFT;
  107.             break;
  108.         default:
  109.             return -EINVAL;
  110.         }
  111.     }

  112.     error = security_file_mmap(file, reqprot, prot, flags, addr, 0);
  113.     if (error)
  114.         return error;

  115.     return mmap_region(file, addr, len, flags, vm_flags, pgoff);//核心功能,此函数实现
  116. }


mmap_region原型及用法:

点击(此处)折叠或打开

  1. unsigned long mmap_region(struct file *file, unsigned long addr,
  2.              unsigned long len, unsigned long flags,
  3.              unsigned int vm_flags, unsigned long pgoff)
  4. {
  5.     struct mm_struct *mm = current->mm;
  6.     struct vm_area_struct *vma, *prev;
  7.     int correct_wcount = 0;
  8.     int error;
  9.     struct rb_node **rb_link, *rb_parent;
  10.     unsigned long charged = 0;
  11.     struct inode *inode = file ? file->f_path.dentry->d_inode : NULL;

  12.     /* Clear old maps */
  13.     error = -ENOMEM;
  14. munmap_back:
  15.     vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
  16.     if (vma && vma->vm_start < addr + len) {
  17.         if (do_munmap(mm, addr, len))
  18.             return -ENOMEM;
  19.         goto munmap_back;
  20.     }

  21.     /* Check against address space limit. */
  22.     if (!may_expand_vm(mm, len >> PAGE_SHIFT))
  23.         return -ENOMEM;

  24.     /*
  25.      * Set 'VM_NORESERVE' if we should not account for the
  26.      * memory use of this mapping.
  27.      */
  28.     if ((flags & MAP_NORESERVE)) {
  29.         /* We honor MAP_NORESERVE if allowed to overcommit */
  30.         if (sysctl_overcommit_memory != OVERCOMMIT_NEVER)
  31.             vm_flags |= VM_NORESERVE;

  32.         /* hugetlb applies strict overcommit unless MAP_NORESERVE */
  33.         if (file && is_file_hugepages(file))
  34.             vm_flags |= VM_NORESERVE;
  35.     }

  36.     /*
  37.      * Private writable mapping: check memory availability
  38.      */
  39.     if (accountable_mapping(file, vm_flags)) {
  40.         charged = len >> PAGE_SHIFT;
  41.         if (security_vm_enough_memory(charged))
  42.             return -ENOMEM;
  43.         vm_flags |= VM_ACCOUNT;
  44.     }

  45.     /*
  46.      * Can we just expand an old mapping?
  47.      */
  48.     vma = vma_merge(mm, prev, addr, addr + len, vm_flags, NULL, file, pgoff, NULL);
  49.     if (vma)
  50.         goto out;

  51.     /*
  52.      * Determine the object being mapped and call the appropriate
  53.      * specific mapper. the address has already been validated, but
  54.      * not unmapped, but the maps are removed from the list.
  55.      */
  56.     vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
  57.     if (!vma) {
  58.         error = -ENOMEM;
  59.         goto unacct_error;
  60.     }

  61.     vma->vm_mm = mm;
  62.     vma->vm_start = addr;
  63.     vma->vm_end = addr + len;
  64.     vma->vm_flags = vm_flags;
  65.     vma->vm_page_prot = vm_get_page_prot(vm_flags);
  66.     vma->vm_pgoff = pgoff;
  67.     INIT_LIST_HEAD(&vma->anon_vma_chain);

  68.     if (file) {
  69.         error = -EINVAL;
  70.         if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
  71.             goto free_vma;
  72.         if (vm_flags & VM_DENYWRITE) {
  73.             error = deny_write_access(file);
  74.             if (error)
  75.                 goto free_vma;
  76.             correct_wcount = 1;
  77.         }
  78.         vma->vm_file = file;
  79.         get_file(file);
  80.         error = file->f_op->mmap(file, vma);                //------------------------------------------------------引用驱动程序中的mmap方法
  81.         if (error)
  82.             goto unmap_and_free_vma;
  83.         if (vm_flags & VM_EXECUTABLE)
  84.             added_exe_file_vma(mm);

  85.         /* Can addr have changed??
  86.          *
  87.          * Answer: Yes, several device drivers can do it in their
  88.          * f_op->mmap method. -DaveM
  89.          */
  90.         addr = vma->vm_start;
  91.         pgoff = vma->vm_pgoff;
  92.         vm_flags = vma->vm_flags;
  93.     } else if (vm_flags & VM_SHARED) {
  94.         error = shmem_zero_setup(vma);
  95.         if (error)
  96.             goto free_vma;
  97.     }

  98.     if (vma_wants_writenotify(vma)) {
  99.         pgprot_t pprot = vma->vm_page_prot;

  100.         /* Can vma->vm_page_prot have changed??
  101.          *
  102.          * Answer: Yes, drivers may have changed it in their
  103.          * f_op->mmap method.
  104.          *
  105.          * Ensures that vmas marked as uncached stay that way.
  106.          */
  107.         vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED);
  108.         if (pgprot_val(pprot) == pgprot_val(pgprot_noncached(pprot)))
  109.             vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
  110.     }

  111.     vma_link(mm, vma, prev, rb_link, rb_parent);
  112.     file = vma->vm_file;

  113.     /* Once vma denies write, undo our temporary denial count */
  114.     if (correct_wcount)
  115.         atomic_inc(&inode->i_writecount);
  116. out:
  117.     perf_event_mmap(vma);

  118.     mm->total_vm += len >> PAGE_SHIFT;
  119.     vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
  120.     if (vm_flags & VM_LOCKED) {
  121.         if (!mlock_vma_pages_range(vma, addr, addr + len))
  122.             mm->locked_vm += (len >> PAGE_SHIFT);
  123.     } else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK))
  124.         make_pages_present(addr, addr + len);
  125.     return addr;

  126. unmap_and_free_vma:
  127.     if (correct_wcount)
  128.         atomic_inc(&inode->i_writecount);
  129.     vma->vm_file = NULL;
  130.     fput(file);

  131.     /* Undo any partial mapping done by a device driver. */
  132.     unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
  133.     charged = 0;
  134. free_vma:
  135.     kmem_cache_free(vm_area_cachep, vma);
  136. unacct_error:
  137.     if (charged)
  138.         vm_unacct_memory(charged);
  139.     return error;
  140. }


该函数实现的主要功能:当该函数被调用的时候,参数addr指向了一块空闲的待映射的MMAP区域的起始地址,利用kmem_cache_zalloc分配出一个struct vm_area_struct实例对象,然后对其进行相应的初始化,


然后执行error = file->f_op->mmap(file, vma);引用驱动程序中mmap方法,至此用户层如何调用驱动层的mmap方法已全部实现。

阅读(591) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~