用户层mmap函数的原型:
-
void *mmap(void *start,size_t length,int prot,int flags,int fd,off_t offset);
参数解析:
start:映射区的起始地址,一般置NULL,即交给内核自动分配
length:映射区的大小。
prot:被映射的内存在映射区的保护方式
可取如下几个值的或:PROT_READ(可读) , PROT_WRITE (可写), PROT_EXEC (可执行), PROT_NONE(不可访问)。
flags:映射区的类型
flags由以下几个常值指定:MAP_SHARED , MAP_PRIVATE , MAP_FIXED,其中,MAP_SHARED , MAP_PRIVATE必选其一,而MAP_FIXED则不推荐使用。
offset:实际数据在映射区的偏移值,一般置0,表示从文件的开头进行映射。
返回值:即映射到进程空间的地址。
PS:在man手册里面其中的参数会有很好的解释。
调用过程:
当用户层调用mmap函数时,应用程序通知系统告诉内核需要执行一个系统调用,希望系统切换到内核态。这种机制靠软件中断实现。
实现过程如下:首先用户程序为系统调用设置参数,其中一个参数是调用编号,设置完毕后,系统切换到内核态,通过一个基址+前面设置的编号,跳转到需要执行的系统调用
函数的地址上。该表一般在sys/entry.S中定义,如下:
-
sys_call_table:
-
.long sys_restart_syscall
-
.long sys_exit
-
.long __sys_fork
-
.long sys_read
-
.long sys_write
-
.....
-
.long sys_mmap //或者有的版本里面可能是 .long sys_mmap2,
-
//不过后面都会调用sys_mmap_pgoff函数
-
....
-
省略
再看看sys_mmap和sys_mmap2是如何定义的:
-
asmlinkage long
-
sys_mmap(unsigned long addr,unsigned long len,unsigned long prot,unsigned long flags,unsigned long fd,off_t offset)
-
{
-
if(unlikely(offset & ~PAGE_MASK))
-
return -EINVAL;
-
return sys_mmap_pgoff(addr,len,prot,flags,fd,offset>>PAGE_SHIFT);
-
}
-
-
asmlinkage long
-
sys_mmap2(unsigned long addr,unsigned long len,unsigned long prot,unsigned long flags,unsigned long fd,off_t offset)
-
-
{
-
if(unlikely(offset & ~PAGE_MASK))
-
return -EINVAL;
-
return sys_mmap_pgoff(addr,len,prot,flags,fd,offset>>PAGE_SHIFT);
-
}
然后linux系统会做什么了,我想应该是通过一个宏函数 SYSCALL_DEFINE6来继续往下执行(不是很懂,若有错误,望不吝指教)
此处是SYSCALL_DEFINE6的宏定义:
-
#define SYSCALL_DEFINE6(name, ...) SYSCALL_DEFINEx(6, _##name, __VA_ARGS__)
-
-
#define SYSCALL_DEFINEx(x, sname, ...) \
-
static const char *types_##sname[] = { \
-
__SC_STR_TDECL##x(__VA_ARGS__) \
-
}; \
-
static const char *args_##sname[] = { \
-
__SC_STR_ADECL##x(__VA_ARGS__) \
-
}; \
-
SYSCALL_METADATA(sname, x); \
-
__SYSCALL_DEFINEx(x, sname, __VA_ARGS__)
sys_mmap_pgoff()函数做了如下几件事:
1、一些常规的错误检查工作
2、通过
file = fget(fd);得到对应的struct file对象指针
3、调用do_mmap_pgoff(file, addr, len, prot, flags, pgoff)函数完成后续的映射工作。
-
SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
-
unsigned long, prot, unsigned long, flags,
-
unsigned long, fd, unsigned long, pgoff)
-
{
-
struct file *file = NULL;
-
unsigned long retval = -EBADF;
-
//---------------------1
-
if (!(flags & MAP_ANONYMOUS)) {
-
if (unlikely(flags & MAP_HUGETLB))
-
return -EINVAL;
-
file = fget(fd); //--------------------2
-
if (!file)
-
goto out;
-
} else if (flags & MAP_HUGETLB) {
-
struct user_struct *user = NULL;
-
/*
-
* VM_NORESERVE is used because the reservations will be
-
* taken when vm_ops->mmap() is called
-
* A dummy user value is used because we are not locking
-
* memory so no accounting is necessary
-
*/
-
len = ALIGN(len, huge_page_size(&default_hstate));
-
file = hugetlb_file_setup(HUGETLB_ANON_FILE, len, VM_NORESERVE,
-
&user, HUGETLB_ANONHUGE_INODE);
-
if (IS_ERR(file))
-
return PTR_ERR(file);
-
}
-
-
flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
-
-
down_write(¤t->mm->mmap_sem);
-
retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); //-----------------------3
-
up_write(¤t->mm->mmap_sem);
-
-
if (file)
-
fput(file);
-
out:
-
return retval;
-
}
do_mmap_pgoff()函数原型及用法:
-
unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
-
unsigned long len, unsigned long prot,
-
unsigned long flags, unsigned long pgoff)
-
{
-
struct mm_struct * mm = current->mm;
-
struct inode *inode;
-
unsigned int vm_flags;
-
int error;
-
unsigned long reqprot = prot;
-
-
/*
-
* Does the application expect PROT_READ to imply PROT_EXEC?
-
*
-
* (the exception is when the underlying filesystem is noexec
-
* mounted, in which case we dont add PROT_EXEC.)
-
*/
-
if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC)) //防御性代码检查,即参数的合法性检查
-
if (!(file && (file->f_path.mnt->mnt_flags & MNT_NOEXEC)))
-
prot |= PROT_EXEC;
-
-
if (!len)
-
return -EINVAL;
-
-
if (!(flags & MAP_FIXED))
-
addr = round_hint_to_min(addr);
-
-
/* Careful about overflows.. */
-
len = PAGE_ALIGN(len); //确保映射区的长度为一个PAGE大小的整数倍
-
if (!len)
-
return -ENOMEM;
-
-
/* offset overflow? */
-
if ((pgoff + (len >> PAGE_SHIFT)) < pgoff) //检查pageoffset是否溢出,即offset参数的合法性
-
return -EOVERFLOW;
-
-
/* Too many mappings? */
-
if (mm->map_count > sysctl_max_map_count)
-
return -ENOMEM;
-
-
/* Obtain the address to map to. we verify (or select) it and ensure
-
* that it represents a valid section of the address space.
-
*/
-
addr = get_unmapped_area(file, addr, len, pgoff, flags); //用来在用户进程的3GB的虚拟地址空间内分配一段空闲区域,传统的布局方式
-
if (addr & ~PAGE_MASK)
-
return addr;
-
-
/* Do simple checking here so the lower-level routines won't have
-
* to. we assume access permissions have been handled by the open
-
* of the memory object, so we don't do any here.
-
*/
-
vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
-
mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
-
-
if (flags & MAP_LOCKED)
-
if (!can_do_mlock())
-
return -EPERM;
-
-
/* mlock MCL_FUTURE? */
-
if (vm_flags & VM_LOCKED) {
-
unsigned long locked, lock_limit;
-
locked = len >> PAGE_SHIFT;
-
locked += mm->locked_vm;
-
lock_limit = rlimit(RLIMIT_MEMLOCK);
-
lock_limit >>= PAGE_SHIFT;
-
if (locked > lock_limit && !capable(CAP_IPC_LOCK))
-
return -EAGAIN;
-
}
-
-
inode = file ? file->f_path.dentry->d_inode : NULL;
-
-
if (file) {
-
switch (flags & MAP_TYPE) {
-
case MAP_SHARED:
-
if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE))
-
return -EACCES;
-
-
/*
-
* Make sure we don't allow writing to an append-only
-
* file..
-
*/
-
if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))
-
return -EACCES;
-
-
/*
-
* Make sure there are no mandatory locks on the file.
-
*/
-
if (locks_verify_locked(inode))
-
return -EAGAIN;
-
-
vm_flags |= VM_SHARED | VM_MAYSHARE;
-
if (!(file->f_mode & FMODE_WRITE))
-
vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
-
-
/* fall through */
-
case MAP_PRIVATE:
-
if (!(file->f_mode & FMODE_READ))
-
return -EACCES;
-
if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) {
-
if (vm_flags & VM_EXEC)
-
return -EPERM;
-
vm_flags &= ~VM_MAYEXEC;
-
}
-
-
if (!file->f_op || !file->f_op->mmap)
-
return -ENODEV;
-
break;
-
-
default:
-
return -EINVAL;
-
}
-
} else {
-
switch (flags & MAP_TYPE) {
-
case MAP_SHARED:
-
/*
-
* Ignore pgoff.
-
*/
-
pgoff = 0;
-
vm_flags |= VM_SHARED | VM_MAYSHARE;
-
break;
-
case MAP_PRIVATE:
-
/*
-
* Set pgoff according to addr for anon_vma.
-
*/
-
pgoff = addr >> PAGE_SHIFT;
-
break;
-
default:
-
return -EINVAL;
-
}
-
}
-
-
error = security_file_mmap(file, reqprot, prot, flags, addr, 0);
-
if (error)
-
return error;
-
-
return mmap_region(file, addr, len, flags, vm_flags, pgoff);//核心功能,此函数实现
-
}
mmap_region原型及用法:
-
unsigned long mmap_region(struct file *file, unsigned long addr,
-
unsigned long len, unsigned long flags,
-
unsigned int vm_flags, unsigned long pgoff)
-
{
-
struct mm_struct *mm = current->mm;
-
struct vm_area_struct *vma, *prev;
-
int correct_wcount = 0;
-
int error;
-
struct rb_node **rb_link, *rb_parent;
-
unsigned long charged = 0;
-
struct inode *inode = file ? file->f_path.dentry->d_inode : NULL;
-
-
/* Clear old maps */
-
error = -ENOMEM;
-
munmap_back:
-
vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
-
if (vma && vma->vm_start < addr + len) {
-
if (do_munmap(mm, addr, len))
-
return -ENOMEM;
-
goto munmap_back;
-
}
-
-
/* Check against address space limit. */
-
if (!may_expand_vm(mm, len >> PAGE_SHIFT))
-
return -ENOMEM;
-
-
/*
-
* Set 'VM_NORESERVE' if we should not account for the
-
* memory use of this mapping.
-
*/
-
if ((flags & MAP_NORESERVE)) {
-
/* We honor MAP_NORESERVE if allowed to overcommit */
-
if (sysctl_overcommit_memory != OVERCOMMIT_NEVER)
-
vm_flags |= VM_NORESERVE;
-
-
/* hugetlb applies strict overcommit unless MAP_NORESERVE */
-
if (file && is_file_hugepages(file))
-
vm_flags |= VM_NORESERVE;
-
}
-
-
/*
-
* Private writable mapping: check memory availability
-
*/
-
if (accountable_mapping(file, vm_flags)) {
-
charged = len >> PAGE_SHIFT;
-
if (security_vm_enough_memory(charged))
-
return -ENOMEM;
-
vm_flags |= VM_ACCOUNT;
-
}
-
-
/*
-
* Can we just expand an old mapping?
-
*/
-
vma = vma_merge(mm, prev, addr, addr + len, vm_flags, NULL, file, pgoff, NULL);
-
if (vma)
-
goto out;
-
-
/*
-
* Determine the object being mapped and call the appropriate
-
* specific mapper. the address has already been validated, but
-
* not unmapped, but the maps are removed from the list.
-
*/
-
vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
-
if (!vma) {
-
error = -ENOMEM;
-
goto unacct_error;
-
}
-
-
vma->vm_mm = mm;
-
vma->vm_start = addr;
-
vma->vm_end = addr + len;
-
vma->vm_flags = vm_flags;
-
vma->vm_page_prot = vm_get_page_prot(vm_flags);
-
vma->vm_pgoff = pgoff;
-
INIT_LIST_HEAD(&vma->anon_vma_chain);
-
-
if (file) {
-
error = -EINVAL;
-
if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
-
goto free_vma;
-
if (vm_flags & VM_DENYWRITE) {
-
error = deny_write_access(file);
-
if (error)
-
goto free_vma;
-
correct_wcount = 1;
-
}
-
vma->vm_file = file;
-
get_file(file);
-
error = file->f_op->mmap(file, vma); //------------------------------------------------------引用驱动程序中的mmap方法
-
if (error)
-
goto unmap_and_free_vma;
-
if (vm_flags & VM_EXECUTABLE)
-
added_exe_file_vma(mm);
-
-
/* Can addr have changed??
-
*
-
* Answer: Yes, several device drivers can do it in their
-
* f_op->mmap method. -DaveM
-
*/
-
addr = vma->vm_start;
-
pgoff = vma->vm_pgoff;
-
vm_flags = vma->vm_flags;
-
} else if (vm_flags & VM_SHARED) {
-
error = shmem_zero_setup(vma);
-
if (error)
-
goto free_vma;
-
}
-
-
if (vma_wants_writenotify(vma)) {
-
pgprot_t pprot = vma->vm_page_prot;
-
-
/* Can vma->vm_page_prot have changed??
-
*
-
* Answer: Yes, drivers may have changed it in their
-
* f_op->mmap method.
-
*
-
* Ensures that vmas marked as uncached stay that way.
-
*/
-
vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED);
-
if (pgprot_val(pprot) == pgprot_val(pgprot_noncached(pprot)))
-
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-
}
-
-
vma_link(mm, vma, prev, rb_link, rb_parent);
-
file = vma->vm_file;
-
-
/* Once vma denies write, undo our temporary denial count */
-
if (correct_wcount)
-
atomic_inc(&inode->i_writecount);
-
out:
-
perf_event_mmap(vma);
-
-
mm->total_vm += len >> PAGE_SHIFT;
-
vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
-
if (vm_flags & VM_LOCKED) {
-
if (!mlock_vma_pages_range(vma, addr, addr + len))
-
mm->locked_vm += (len >> PAGE_SHIFT);
-
} else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK))
-
make_pages_present(addr, addr + len);
-
return addr;
-
-
unmap_and_free_vma:
-
if (correct_wcount)
-
atomic_inc(&inode->i_writecount);
-
vma->vm_file = NULL;
-
fput(file);
-
-
/* Undo any partial mapping done by a device driver. */
-
unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
-
charged = 0;
-
free_vma:
-
kmem_cache_free(vm_area_cachep, vma);
-
unacct_error:
-
if (charged)
-
vm_unacct_memory(charged);
-
return error;
-
}
该函数实现的主要功能:当该函数被调用的时候,参数addr指向了一块空闲的待映射的MMAP区域的起始地址,利用kmem_cache_zalloc分配出一个struct vm_area_struct实例对象,然后对其进行相应的初始化,
然后执行
error = file->f_op->mmap(file, vma);引用驱动程序中mmap方法,至此用户层如何调用驱动层的mmap方法已全部实现。
阅读(591) | 评论(0) | 转发(0) |