Chinaunix首页 | 论坛 | 博客
  • 博客访问: 1148259
  • 博文数量: 241
  • 博客积分: 10
  • 博客等级: 民兵
  • 技术积分: 2279
  • 用 户 组: 普通用户
  • 注册时间: 2012-11-27 19:53
个人简介

JustForFun

文章分类

全部博文(241)

文章存档

2023年(8)

2022年(2)

2021年(3)

2020年(30)

2019年(11)

2018年(27)

2017年(54)

2016年(83)

2015年(23)

我的朋友

分类: LINUX

2016-08-17 14:43:20

linux/module.h

enum module_state  {  

    MODULE_STATE_LIVE,  //模块当前正常使用中(存活状态) 0  

    MODULE_STATE_COMING, //模块当前正在被加载  1   

    MODULE_STATE_GOING,  //模块当前正在被卸载  2  

};

 

struct module
{
 enum module_state state;//模块状态 
 
/* Member of list of modules */
 struct list_head list;
                       //list是作为一个列表的成员,所有的内核模块都被维护在一个全局链表中,链表头是一个全局变量struct
                         //  module *modules。任何一个新创建的模块,都会被加入到这个链表的头部
 

 /* Unique handle for this module */
 char name[MODULE_NAME_LEN];//模块句柄   模块的名字
 /* Sysfs stuff. */
 struct module_kobject mkobj;
 struct module_attribute *modinfo_attrs;
 const char *version;
 const char *srcversion;
 struct kobject *holders_dir;
 /* Exported symbols */
 const struct kernel_symbol *syms;//导出的符号
 const unsigned long *crcs;
 unsigned int num_syms; //符号数量
 /* GPL-only exported symbols. */
 unsigned int num_gpl_syms;
 const struct kernel_symbol *gpl_syms;
 const unsigned long *gpl_crcs;
#ifdef CONFIG_UNUSED_SYMBOLS
 /* unused exported symbols. */
 const struct kernel_symbol *unused_syms;
 const unsigned long *unused_crcs;
 unsigned int num_unused_syms;
 /* GPL-only, unused exported symbols. */
 unsigned int num_unused_gpl_syms;
 const struct kernel_symbol *unused_gpl_syms;
 const unsigned long *unused_gpl_crcs;
#endif
 /* symbols that will be GPL-only in the near future. */
 const struct kernel_symbol *gpl_future_syms;
 const unsigned long *gpl_future_crcs;
 unsigned int num_gpl_future_syms;
 /* Exception table */
 unsigned int num_exentries;
 struct exception_table_entry *extable;
 /* Startup function. */
 int (*init)(void);                //模块初始化函数 模块入口函数
 /* If this is non-NULL, vfree after init() returns */
 void *module_init;//???? 
 /* Here is the actual code + data, vfree'd on unload. */
 void *module_core;
 /* Here are the sizes of the init and core sections */
 unsigned int init_size, core_size;
 /* The size of the executable code in each section.  */
 unsigned int init_text_size, core_text_size;
 /* The handle returned from unwind_add_table. */
 void *unwind_info;
 /* Arch-specific module values */
 struct mod_arch_specific arch;//体系结构
 unsigned int taints; /* same bits as kernel:tainted */
#ifdef CONFIG_GENERIC_BUG
 /* Support for BUG */
 unsigned num_bugs;
 struct list_head bug_list;
 struct bug_entry *bug_table;
#endif
#ifdef CONFIG_KALLSYMS
 /* We keep the symbol and string tables for kallsyms. */
 Elf_Sym *symtab;
 unsigned int num_symtab;
 char *strtab;
 /* Section attributes */
 struct module_sect_attrs *sect_attrs;
 /* Notes attributes */
 struct module_notes_attrs *notes_attrs;
#endif
 /* Per-cpu data. */
 void *percpu;//针对每个CPU的数据
 /* The command line arguments (may be mangled).  People like
    keeping pointers to this stuff */
 char *args;               //命令行参数
#ifdef CONFIG_MARKERS
 struct marker *markers;
 unsigned int num_markers;
#endif
#ifdef CONFIG_TRACEPOINTS
 struct tracepoint *tracepoints;
 unsigned int num_tracepoints;
#endif
#ifdef CONFIG_MODULE_UNLOAD
 /* What modules depend on me? */
 struct list_head modules_which_use_me;//这个模块所依赖的模块链表
 /* Who is waiting for us to be unloaded */
 struct task_struct *waiter;  //正在等待这么模块被卸载的任务
 /* Destruction function. */
 void (*exit)(void);
      //模块出口函数
 /* Reference counts */
 struct module_ref ref[NR_CPUS];//引用了一个计数
#endif
};



include/linux/init.h

/* initcalls are now grouped by functionality into separate
 * subsections. Ordering inside the subsections is determined
 * by link order.
 * For backwards compatibility, initcall() puts the call in
 * the device init subsection.
 *
 * The `id' arg to __define_initcall() is needed so that multiple initcalls
 * can point at the same handler without causing duplicate-symbol build errors.
 */

#define __define_initcall(level,fn,id) \
    static initcall_t __initcall_##fn##id __used \
    __attribute__((__section__(".initcall" level ".init"))) = fn
///////////////////////////
#define __initcall(fn) device_initcall(fn)
/**
 * module_init() - driver initialization entry point
 * @x: function to be run at kernel boot time or module insertion
 *
 * module_init() will either be called during do_initcalls() (if
 * builtin) or at module insertion time (if a module).  There can only
 * be one per module.
 */

/include/linux/init.h
#define module_init(x)    __initcall(x);
#define device_initcall(fn)        __define_initcall("6",fn,6)
可以发现这些*_initcall(fn)最终都是通过__define_initcall(level,fn)宏定义生成的。//这个版本少了id项
__define_initcall宏定义如下:
#define __define_initcall(level,fn) \
       static initcall_t __initcall_##fn __attribute_used__ \
       __attribute__((__section__(".initcall" level ".init"))) = fn

这句话的意思为定义一个initcall_t型的初始化函数,函数存放在.initcall”level”.init section内。.initcall”level”.init section定义在vmlinux.lds内。
/* arch/arm/kernel/vmlinux.lds */
  __initcall_start = .;
   *(.initcallearly.init) __early_initcall_end = .; *(.initcall0.init) *(.initcall0s.init) *(.initcall1.init) *(.initcall1s.init) *(.initcall2.init) *(.initcall2s.init) *(.initcall3.init) *(.initcall3s.init) *(.initcall4.init) *(.initcall4s.init) *(.initcall5.init) *(.initcall5s.init) *(.initcallrootfs.init) *(.initcall6.init) *(.initcall6s.init) *(.initcall7.init) *(.initcall7s.init)
  __initcall_end = .;

正好包括了上面init.h里定义的从core_initcall到late_initcall等7个level等级的.initcall”level”.init section. 因此通过不同的*_initcall声明的函数指针最终都会存放不同level等级的.initcall”level”.init section内。这些不同level的section按level等级高低依次存放。

下面我们再来看看,内核是什么时候调用存储在.initcall”level”.init section内的函数的。

内核是通过do_initcalls函数循环调用执行initcall.init section内的函数的,流程如下:
main.c
start_kernel -> rest_init -> kernel_thread -> kernel_init -> do_basic_setup -> do_initcalls
////////////////////////////
init/main.c
extern initcall_t __initcall_start[], __initcall_end[], __early_initcall_end[];

static void __init do_initcalls(void)
{
    initcall_t *call;

    for (call = __early_initcall_end; call < __initcall_end; call++)
        do_one_initcall(*call);  //回调函数
                                                  //kernel/module.c    中
                                                  //SYSCALL_DEFINE3(init_module, void __user *, umod,
                                                 //       unsigned long, len, const char __user *, uargs)
也调用了
do_one_initcall

 
    /* Make sure there is no pending stuff from the initcall sequence */
    flush_scheduled_work();

}
 
///////////////////////////////////////////////// 
 

/* Search for module by name: must hold module_mutex. */
static struct module *find_module(const char *name)
{
    struct module *mod;

    list_for_each_entry(mod, &modules, list) {
        if (strcmp(mod->name, name) == 0)
            return mod;
    }
    return NULL;
}
///////////////////////
static int percpu_modinit(void)
{
    pcpu_num_used = 2;
    pcpu_num_allocated = 2;
    pcpu_size = kmalloc(sizeof(pcpu_size[0]) * pcpu_num_allocated,
                GFP_KERNEL);
    /* Static in-kernel percpu data (used). */
    pcpu_size[0] = -(__per_cpu_end-__per_cpu_start);
    /* Free room. */
    pcpu_size[1] = PERCPU_ENOUGH_ROOM + pcpu_size[0];
    if (pcpu_size[1] < 0) {
        printk(KERN_ERR "No per-cpu room for modules.\n");
        pcpu_num_used = 1;
    }

    return 0;
}
__initcall(percpu_modinit);

模块加载由内核的系统调用init_module完成。

linux3.5.2/kernel/module.c:3009

/* This is where the real work happens */

SYSCALL_DEFINE3(init_module, void __user *, umod,

       unsigned long, len, const char __user *, uargs)

{

    struct module *mod;

    int ret = 0;

    ……

    /* Do all the hard work */

    mod = load_module(umod, len, uargs);//模块加载

    ……

    /* Start the module */

    if (mod->init != NULL)

       ret = do_one_initcall(mod->init);//模块init函数调用

    ……

    return 0;

}

  模块加载

系统调用init_moduleSYSCALL_DEFINE3(init_module...)实现,其中有两个关键的函数调用。load_module用于模块加载,do_one_initcall用于回调模块的init函数。

/* This is where the real work happens */
SYSCALL_DEFINE3(init_module, void __user *, umod,
        unsigned long, len, const char __user *, uargs)

{
    struct module *mod;
    int ret = 0;

    /* Must have permission */
    if (!capable(CAP_SYS_MODULE))

        return -EPERM;

    /* Only one module load at a time, please */
    if (mutex_lock_interruptible(&module_mutex) != 0)
        return -EINTR;

    /* Do all the hard work */
    mod = load_module(umod, len, uargs);
    if (IS_ERR(mod)) {
        mutex_unlock(&module_mutex);
        return PTR_ERR(mod);
    }

    /* Drop lock so they can recurse */
    mutex_unlock(&module_mutex);

    blocking_notifier_call_chain(&module_notify_list,
            MODULE_STATE_COMING, mod);

    /* Start the module */
    if (mod->init != NULL)
        ret = do_one_initcall(mod->init);//
do_initcalls里也调用了 do_one_initcall
//此处是没编译进内核时的模块加载?
do_initcalls里也调用了 do_one_initcall是编译进内核的模块自动加载?
    if (ret < 0) {
        /* Init routine failed: abort.  Try to protect us from
                   buggy refcounters. */
        mod->state = MODULE_STATE_GOING;
        synchronize_sched();
        module_put(mod);
        blocking_notifier_call_chain(&module_notify_list,
                         MODULE_STATE_GOING, mod);
        mutex_lock(&module_mutex);
        free_module(mod);

        mutex_unlock(&module_mutex);
        wake_up(&module_wq);
        return ret;
    }
    if (ret > 0) {
        printk(KERN_WARNING "%s: '%s'->init suspiciously returned %d, "
                    "it should follow 0/-E convention\n"
               KERN_WARNING "%s: loading module anyway...\n",
               __func__, mod->name, ret,
               __func__);
        dump_stack();
    }

    /* Now it's a first class citizen!  Wake up anyone waiting for it. */
    mod->state = MODULE_STATE_LIVE;

    wake_up(&module_wq);

    mutex_lock(&module_mutex);
    /* Drop initial reference. */
    module_put(mod);
    unwind_remove_table(mod->unwind_info, 1);
    module_free(mod, mod->module_init);
    mod->module_init = NULL;
    mod->init_size = 0;
    mod->init_text_size = 0;
    mutex_unlock(&module_mutex);

    return 0;
}

//////////////////////////////////////////////////

模块卸载由内核的系统调用delete_module完成。
 
SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
        unsigned int, flags)

{
    struct module *mod;
    char name[MODULE_NAME_LEN];
    int ret, forced = 0;

    if (!capable(CAP_SYS_MODULE))
        return -EPERM;

    if (strncpy_from_user(name, name_user, MODULE_NAME_LEN-1) < 0)
        return -EFAULT;
    name[MODULE_NAME_LEN-1] = '\0';

    if (mutex_lock_interruptible(&module_mutex) != 0)
        return -EINTR;

    mod = find_module(name);
    if (!mod) {
        ret = -ENOENT;
        goto out;
    }

    if (!list_empty(&mod->modules_which_use_me)) {
        /* Other modules depend on us: get rid of them first. */
        ret = -EWOULDBLOCK;
        goto out;
    }

    /* Doing init or already dying? */
    if (mod->state != MODULE_STATE_LIVE) {
        /* FIXME: if (force), slam module count and wake up
                   waiter --RR */
        DEBUGP("%s already dying\n", mod->name);
        ret = -EBUSY;
        goto out;
    }

    /* If it has an init func, it must have an exit func to unload */
    if (mod->init && !mod->exit) {
        forced = try_force_unload(flags);
        if (!forced) {
            /* This module can't be removed */
            ret = -EBUSY;
            goto out;
        }
    }

    /* Set this up before setting mod->state */
    mod->waiter = current;

    /* Stop the machine so refcounts can't move and disable module. */
    ret = try_stop_module(mod, flags, &forced);
    if (ret != 0)
        goto out;

    /* Never wait if forced. */
    if (!forced && module_refcount(mod) != 0)
        wait_for_zero_refcount(mod);

    mutex_unlock(&module_mutex);
    /* Final destruction now noone is using it. */
    if (mod->exit != NULL)
        mod->exit();//调用exit module
    blocking_notifier_call_chain(&module_notify_list,
                     MODULE_STATE_GOING, mod);
    mutex_lock(&module_mutex);
    /* Store the name of the last unloaded module for diagnostic purposes */
    strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module));
    unregister_dynamic_debug_module(mod->name);
    free_module(mod);//卸载模块

 out:
    mutex_unlock(&module_mutex);
    return ret;
}

函数在内核中的位置:linux-2.6.30/kernel/module.c

该函数的功能是将一个特定模块module的引用计数减1 ,这样当一个模块的引用计数因为不为0而不能从内核中卸载时,可以调用此函数一次或多次,实现对模块计数的清零,从而实现模块卸载。
void module_put(struct module *module)
{
 if (module) {
  unsigned int cpu = get_cpu();
  local_dec(&module->ref[cpu].count);
  /* Maybe they're waiting for us to drop reference? */
  if (unlikely(!module_is_live(module)))
   wake_up_process(module->waiter);
  put_cpu();
 }
}
//////////////////////////////////////
/*
 modprobe_path is set via /proc/sys.
*/
char modprobe_path[KMOD_PATH_LEN] = "/sbin/modprobe";

/**
 * request_module - try to load a kernel module
 * @fmt:     printf style format string for the name of the module
 * @varargs: arguements as specified in the format string
 *
 * Load a module using the user mode module loader. The function returns
 * zero on success or a negative errno code on failure
. Note that a
 * successful module load does not mean the module did not then unload
 * and exit on an error of its own. Callers must check that the service
 * they requested is now available not blindly invoke it.
 *
 * If module auto-loading support is disabled then this function
 * becomes a no-operation.
 */
kernel/kmod.c
int request_module(const char *fmt, ...)
{
 va_list args;
 char module_name[MODULE_NAME_LEN];
 unsigned int max_modprobes;
 int ret;
 char *argv[] = { modprobe_path, "-q", "--", module_name, NULL };
 static char *envp[] = { "HOME=/",
    "TERM=linux",
    "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
    NULL };
 static atomic_t kmod_concurrent = ATOMIC_INIT(0);
#define MAX_KMOD_CONCURRENT 50 /* Completely arbitrary value - KAO */
 static int kmod_loop_msg;

 va_start(args, fmt);
 ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args);
 va_end(args);
 if (ret >= MODULE_NAME_LEN)
  return -ENAMETOOLONG;

 /* If modprobe needs a service that is in a module, we get a recursive
  * loop.  Limit the number of running kmod threads to max_threads/2 or
  * MAX_KMOD_CONCURRENT, whichever is the smaller.  A cleaner method
  * would be to run the parents of this process, counting how many times
  * kmod was invoked.  That would mean accessing the internals of the
  * process tables to get the command line, proc_pid_cmdline is static
  * and it is not worth changing the proc code just to handle this case.
  * KAO.
  *
  * "trace the ppid" is simple, but will fail if someone's
  * parent exits.  I think this is as good as it gets. --RR
  */
 max_modprobes = min(max_threads/2, MAX_KMOD_CONCURRENT);
 atomic_inc(&kmod_concurrent);
 if (atomic_read(&kmod_concurrent) > max_modprobes) {
  /* We may be blaming an innocent here, but unlikely */
  if (kmod_loop_msg++ < 5)
   printk(KERN_ERR
          "request_module: runaway loop modprobe %s\n",
          module_name);
  atomic_dec(&kmod_concurrent);
  return -ENOMEM;
 }

 ret = call_usermodehelper(modprobe_path, argv, envp, 1);
 atomic_dec(&kmod_concurrent);
 return ret;
}
 
Linux把内核也看作一个模块。那么模块与模块之间如何进行交互呢,一种常用的方法就是共享变量和函数。但并不是模块中的每个变量和函数都能被共享,内核只把各个模块中主要的变量和函数放在一个特定的区段,这些变量和函数就统称为符号。到低哪些符号可以被共享? Linux内核有自己的规定。例如 EXPORT_SYMBOL_GPL(sysfs_create_link); 在fs/sysfs/symlink.c
其中宏定义EXPORT_SYMBOL()本身的含义是“移出符号”。为什么说是“移出”呢?因为这些符号本来是内核内部的符号,通过这个宏放在一个公开的地方,使得装入到内核中的其他模块可以引用它们。
实际上,仅仅知道这些符号的名字是不够的,还得知道它们在内核映像中的地址才有意义。因此,内核中定义了如下结构来描述模块的符号:
 
struct module_symbol

{

        unsigned long value; *符号在内核映像中的地址*/

        const char *name;   /*指向符号名的指针*/

};

从后面对EXPORT_SYMBOL宏的定义可以看出,连接程序(ld)在连接内核映像时将这个结构存放在一个叫做“__ksymtab”的区段中,而这个区段中所有的符号就组成了模块对外“移出”的符号表,这些符号可供内核及已安装的模块来引用。而其他“对内”的符号则由连接程序自行生成,并仅供内部使用。
   
EXPORT_SYMBOL相关的定义在include/linux/module.h中:
   #define __MODULE_STRING_1(x)    #x
   #define __MODULE_STRING(x)      __MODULE_STRING_1(x)
   
#define __EXPORT_SYMBOL(sym, str)                       \

const char __kstrtab_##sym[]                            \

__attribute__((section(".kstrtab"))) = str;             \

const struct module_symbol __ksymtab_##sym              \

__attribute__((section("__ksymtab"))) =                 \

 { (unsigned long)&sym, __kstrtab_##sym }

 #if defined(MODVERSIONS) || !defined(CONFIG_MODVERSIONS)

 #define EXPORT_SYMBOL(var)  __EXPORT_SYMBOL(var, __MODULE_STRING(var)) 


 
下面我们以EXPORT_SYMBOL(schedule)为例,来看一下这个宏的结果是什么。首先EXPORT_SYMBOL(schedule)的定义成了__EXPORT_SYMBOL(schedule, schedule)。而__EXPORT_SYMBOL()定义了两个语句,第一个语句定义了一个名为__kstrtab_ schedule的字符串,将字符串的内容初始化为“schedule”,并将其置于内核映像中的.kstrtab区段,注意这是一个专门存放符号名字符串的区段。第二个语句则定义了一个名为__kstrtab_ schedulemodule_symbol结构,将其初始化为{&schedule__kstrtab_ schedule}结构,并将其置于内核映像中的__ksymtab区段。这样,module_symbol结构中的域value的值就为schedule在内核映像中的地址,而指针name则指向字符串“schedule”。 

 


阅读(1614) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~