linux 2.6.28 kernel之module源码解析-文峰聊书斋-ChinaUnix博客

struct module
{
enum module_state state;//模块状态
/* Member of list of modules */
struct list_head list;
//list是作为一个列表的成员，所有的内核模块都被维护在一个全局链表中，链表头是一个全局变量struct
// module *modules。任何一个新创建的模块，都会被加入到这个链表的头部

/* Unique handle for this module */
char name[MODULE_NAME_LEN];//模块句柄模块的名字

/* Sysfs stuff. */
struct module_kobject mkobj;
struct module_attribute *modinfo_attrs;
const char *version;
const char *srcversion;
struct kobject *holders_dir;

/* Exported symbols */
const struct kernel_symbol *syms;//导出的符号
const unsigned long *crcs;
unsigned int num_syms; //符号数量

/* GPL-only exported symbols. */
unsigned int num_gpl_syms;
const struct kernel_symbol *gpl_syms;
const unsigned long *gpl_crcs;

#ifdef CONFIG_UNUSED_SYMBOLS
/* unused exported symbols. */
const struct kernel_symbol *unused_syms;
const unsigned long *unused_crcs;
unsigned int num_unused_syms;

/* GPL-only, unused exported symbols. */
unsigned int num_unused_gpl_syms;
const struct kernel_symbol *unused_gpl_syms;
const unsigned long *unused_gpl_crcs;
#endif

/* symbols that will be GPL-only in the near future. */
const struct kernel_symbol *gpl_future_syms;
const unsigned long *gpl_future_crcs;
unsigned int num_gpl_future_syms;

/* Exception table */
unsigned int num_exentries;
struct exception_table_entry *extable;

/* Startup function. */
int (*init)(void); //模块初始化函数模块入口函数

/* If this is non-NULL, vfree after init() returns */
void *module_init;//????

/* Here is the actual code + data, vfree'd on unload. */
void *module_core;

/* Here are the sizes of the init and core sections */
unsigned int init_size, core_size;

/* The size of the executable code in each section. */
unsigned int init_text_size, core_text_size;

/* The handle returned from unwind_add_table. */
void *unwind_info;

/* Arch-specific module values */
struct mod_arch_specific arch;//体系结构

unsigned int taints; /* same bits as kernel:tainted */

#ifdef CONFIG_GENERIC_BUG
/* Support for BUG */
unsigned num_bugs;
struct list_head bug_list;
struct bug_entry *bug_table;
#endif

#ifdef CONFIG_KALLSYMS
/* We keep the symbol and string tables for kallsyms. */
Elf_Sym *symtab;
unsigned int num_symtab;
char *strtab;

/* Section attributes */
struct module_sect_attrs *sect_attrs;

/* Notes attributes */
struct module_notes_attrs *notes_attrs;
#endif

/* Per-cpu data. */
void *percpu;//针对每个CPU的数据

/* The command line arguments (may be mangled). People like
keeping pointers to this stuff */
char *args; //命令行参数
#ifdef CONFIG_MARKERS
struct marker *markers;
unsigned int num_markers;
#endif
#ifdef CONFIG_TRACEPOINTS
struct tracepoint *tracepoints;
unsigned int num_tracepoints;
#endif

#ifdef CONFIG_MODULE_UNLOAD
/* What modules depend on me? */
struct list_head modules_which_use_me;//这个模块所依赖的模块链表

/* Who is waiting for us to be unloaded */
struct task_struct *waiter; //正在等待这么模块被卸载的任务

/* Destruction function. */
void (*exit)(void); //模块出口函数

/* Reference counts */
struct module_ref ref[NR_CPUS];//引用了一个计数
#endif
};

include/linux/init.h

/* initcalls are now grouped by functionality into separate
* subsections. Ordering inside the subsections is determined
* by link order.
* For backwards compatibility, initcall() puts the call in
* the device init subsection.
*
* The `id' arg to __define_initcall() is needed so that multiple initcalls
* can point at the same handler without causing duplicate-symbol build errors.
*/

#define __define_initcall(level,fn,id) \
    static initcall_t __initcall_##fn##id __used \
    __attribute__((__section__(".initcall" level ".init"))) = fn
///////////////////////////
#define __initcall(fn) device_initcall(fn)
/**
* module_init() - driver initialization entry point
* @x: function to be run at kernel boot time or module insertion
*
* module_init() will either be called during do_initcalls() (if
* builtin) or at module insertion time (if a module). There can only
* be one per module.
*/
/include/linux/init.h
#define module_init(x)    __initcall(x);
#define device_initcall(fn)        __define_initcall("6",fn,6)
可以发现这些*_initcall(fn)最终都是通过__define_initcall(level,fn)宏定义生成的。//这个版本少了id项
__define_initcall宏定义如下：
#define __define_initcall(level,fn) \
   static initcall_t __initcall_##fn __attribute_used__ \
   __attribute__((__section__(".initcall" level ".init"))) = fn

这句话的意思为定义一个initcall_t型的初始化函数，函数存放在.initcall”level”.init section内。.initcall”level”.init section定义在vmlinux.lds内。
/* arch/arm/kernel/vmlinux.lds */
__initcall_start = .;
   *(.initcallearly.init) __early_initcall_end = .; *(.initcall0.init) *(.initcall0s.init) *(.initcall1.init) *(.initcall1s.init) *(.initcall2.init) *(.initcall2s.init) *(.initcall3.init) *(.initcall3s.init) *(.initcall4.init) *(.initcall4s.init) *(.initcall5.init) *(.initcall5s.init) *(.initcallrootfs.init) *(.initcall6.init) *(.initcall6s.init) *(.initcall7.init) *(.initcall7s.init)
__initcall_end = .;

正好包括了上面init.h里定义的从core_initcall到late_initcall等7个level等级的.initcall”level”.init section. 因此通过不同的*_initcall声明的函数指针最终都会存放不同level等级的.initcall”level”.init section内。这些不同level的section按level等级高低依次存放。

下面我们再来看看，内核是什么时候调用存储在.initcall”level”.init section内的函数的。

内核是通过do_initcalls函数循环调用执行initcall.init section内的函数的，流程如下：
main.c
start_kernel -> rest_init -> kernel_thread -> kernel_init -> do_basic_setup -> do_initcalls
////////////////////////////
init/main.c
extern initcall_t __initcall_start[], __initcall_end[], __early_initcall_end[];

static void __init do_initcalls(void)
{
    initcall_t *call;

    for (call = __early_initcall_end; call < __initcall_end; call++)
        do_one_initcall(*call); //回调函数
                                                  //kernel/module.c    中
                                                  //SYSCALL_DEFINE3(init_module, void __user *, umod,
                                                 //     unsigned long, len, const char __user *, uargs)也调用了do_one_initcall

     /* Make sure there is no pending stuff from the initcall sequence */
    flush_scheduled_work();
}
/////////////////////////////////////////////////
/* Search for module by name: must hold module_mutex. */
static struct module *find_module(const char *name)
{
    struct module *mod;

    list_for_each_entry(mod, &modules, list) {
        if (strcmp(mod->name, name) == 0)
            return mod;
    }
    return NULL;
}
///////////////////////
static int percpu_modinit(void)
{
    pcpu_num_used = 2;
    pcpu_num_allocated = 2;
    pcpu_size = kmalloc(sizeof(pcpu_size[0]) * pcpu_num_allocated,
                GFP_KERNEL);
    /* Static in-kernel percpu data (used). */
    pcpu_size[0] = -(__per_cpu_end-__per_cpu_start);
    /* Free room. */
    pcpu_size[1] = PERCPU_ENOUGH_ROOM + pcpu_size[0];
    if (pcpu_size[1] < 0) {
        printk(KERN_ERR "No per-cpu room for modules.\n");
        pcpu_num_used = 1;
    }

    return 0;
}
__initcall(percpu_modinit);

模块加载由内核的系统调用init_module完成。

linux3.5.2/kernel/module.c:3009

/* This is where the real work happens */

SYSCALL_DEFINE3(init_module, void __user *, umod,

unsigned long, len, const char __user *, uargs)

{

struct module *mod;

int ret = 0;

……

/* Do all the hard work */

mod = load_module(umod, len, uargs);//模块加载

……

/* Start the module */

if (mod->init != NULL)

ret = do_one_initcall(mod->init);//模块init函数调用

……

return 0;

}

模块加载

系统调用init_module由SYSCALL_DEFINE3(init_module...)实现，其中有两个关键的函数调用。load_module用于模块加载，do_one_initcall用于回调模块的init函数。

/* This is where the real work happens */
SYSCALL_DEFINE3(init_module, void __user *, umod,
        unsigned long, len, const char __user *, uargs)
{
    struct module *mod;
    int ret = 0;

    /* Must have permission */
    if (!capable(CAP_SYS_MODULE))
        return -EPERM;

    /* Only one module load at a time, please */
    if (mutex_lock_interruptible(&module_mutex) != 0)
        return -EINTR;

    /* Do all the hard work */
    mod = load_module(umod, len, uargs);
    if (IS_ERR(mod)) {
        mutex_unlock(&module_mutex);
        return PTR_ERR(mod);
    }

    /* Drop lock so they can recurse */
    mutex_unlock(&module_mutex);

    blocking_notifier_call_chain(&module_notify_list,
            MODULE_STATE_COMING, mod);

    /* Start the module */
    if (mod->init != NULL)
        ret = do_one_initcall(mod->init);//do_initcalls里也调用了 do_one_initcall
//此处是没编译进内核时的模块加载？do_initcalls里也调用了 do_one_initcall是编译进内核的模块自动加载？
    if (ret < 0) {
        /* Init routine failed: abort. Try to protect us from
                   buggy refcounters. */
        mod->state = MODULE_STATE_GOING;
        synchronize_sched();
        module_put(mod);
        blocking_notifier_call_chain(&module_notify_list,
                         MODULE_STATE_GOING, mod);
        mutex_lock(&module_mutex);
        free_module(mod);
        mutex_unlock(&module_mutex);
        wake_up(&module_wq);
        return ret;
    }
    if (ret > 0) {
        printk(KERN_WARNING "%s: '%s'->init suspiciously returned %d, "
                    "it should follow 0/-E convention\n"
               KERN_WARNING "%s: loading module anyway...\n",
               __func__, mod->name, ret,
               __func__);
        dump_stack();
    }

    /* Now it's a first class citizen! Wake up anyone waiting for it. */
    mod->state = MODULE_STATE_LIVE;
    wake_up(&module_wq);

    mutex_lock(&module_mutex);
    /* Drop initial reference. */
    module_put(mod);
    unwind_remove_table(mod->unwind_info, 1);
    module_free(mod, mod->module_init);
    mod->module_init = NULL;
    mod->init_size = 0;
    mod->init_text_size = 0;
    mutex_unlock(&module_mutex);

    return 0;
}
//////////////////////////////////////////////////

模块卸载由内核的系统调用delete_module完成。
SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
        unsigned int, flags)
{
    struct module *mod;
    char name[MODULE_NAME_LEN];
    int ret, forced = 0;

    if (!capable(CAP_SYS_MODULE))
        return -EPERM;

    if (strncpy_from_user(name, name_user, MODULE_NAME_LEN-1) < 0)
        return -EFAULT;
    name[MODULE_NAME_LEN-1] = '\0';

    if (mutex_lock_interruptible(&module_mutex) != 0)
        return -EINTR;

    mod = find_module(name);
    if (!mod) {
        ret = -ENOENT;
        goto out;
    }

    if (!list_empty(&mod->modules_which_use_me)) {
        /* Other modules depend on us: get rid of them first. */
        ret = -EWOULDBLOCK;
        goto out;
    }

    /* Doing init or already dying? */
    if (mod->state != MODULE_STATE_LIVE) {
        /* FIXME: if (force), slam module count and wake up
                   waiter --RR */
        DEBUGP("%s already dying\n", mod->name);
        ret = -EBUSY;
        goto out;
    }

    /* If it has an init func, it must have an exit func to unload */
    if (mod->init && !mod->exit) {
        forced = try_force_unload(flags);
        if (!forced) {
            /* This module can't be removed */
            ret = -EBUSY;
            goto out;
        }
    }

    /* Set this up before setting mod->state */
    mod->waiter = current;

    /* Stop the machine so refcounts can't move and disable module. */
    ret = try_stop_module(mod, flags, &forced);
    if (ret != 0)
        goto out;

    /* Never wait if forced. */
    if (!forced && module_refcount(mod) != 0)
        wait_for_zero_refcount(mod);

    mutex_unlock(&module_mutex);
    /* Final destruction now noone is using it. */
    if (mod->exit != NULL)
        mod->exit();//调用exit module
    blocking_notifier_call_chain(&module_notify_list,
                     MODULE_STATE_GOING, mod);
    mutex_lock(&module_mutex);
    /* Store the name of the last unloaded module for diagnostic purposes */
    strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module));
    unregister_dynamic_debug_module(mod->name);
    free_module(mod);//卸载模块

out:
    mutex_unlock(&module_mutex);
    return ret;
}

函数在内核中的位置：linux-2.6.30/kernel/module.c

该函数的功能是将一个特定模块module的引用计数减1 ，这样当一个模块的引用计数因为不为0而不能从内核中卸载时，可以调用此函数一次或多次，实现对模块计数的清零，从而实现模块卸载。
void module_put(struct module *module)
{
if (module) {
  unsigned int cpu = get_cpu();
  local_dec(&module->ref[cpu].count);
  /* Maybe they're waiting for us to drop reference? */
  if (unlikely(!module_is_live(module)))
   wake_up_process(module->waiter);
  put_cpu();
}
}
//////////////////////////////////////
/*
modprobe_path is set via /proc/sys.
*/
char modprobe_path[KMOD_PATH_LEN] = "/sbin/modprobe";

/**
* request_module - try to load a kernel module
* @fmt:     printf style format string for the name of the module
* @varargs: arguements as specified in the format string
*
* Load a module using the user mode module loader. The function returns
* zero on success or a negative errno code on failure. Note that a
* successful module load does not mean the module did not then unload
* and exit on an error of its own. Callers must check that the service
* they requested is now available not blindly invoke it.
*
* If module auto-loading support is disabled then this function
* becomes a no-operation.
*/
kernel/kmod.c
int request_module(const char *fmt, ...)
{
va_list args;
char module_name[MODULE_NAME_LEN];
unsigned int max_modprobes;
int ret;
char *argv[] = { modprobe_path, "-q", "--", module_name, NULL };
static char *envp[] = { "HOME=/",
    "TERM=linux",
    "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
    NULL };
static atomic_t kmod_concurrent = ATOMIC_INIT(0);
#define MAX_KMOD_CONCURRENT 50 /* Completely arbitrary value - KAO */
static int kmod_loop_msg;

va_start(args, fmt);
ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args);
va_end(args);
if (ret >= MODULE_NAME_LEN)
return -ENAMETOOLONG;

/* If modprobe needs a service that is in a module, we get a recursive
* loop. Limit the number of running kmod threads to max_threads/2 or
* MAX_KMOD_CONCURRENT, whichever is the smaller. A cleaner method
* would be to run the parents of this process, counting how many times
* kmod was invoked. That would mean accessing the internals of the
* process tables to get the command line, proc_pid_cmdline is static
* and it is not worth changing the proc code just to handle this case.
* KAO.
*
* "trace the ppid" is simple, but will fail if someone's
* parent exits. I think this is as good as it gets. --RR
*/
max_modprobes = min(max_threads/2, MAX_KMOD_CONCURRENT);
atomic_inc(&kmod_concurrent);
if (atomic_read(&kmod_concurrent) > max_modprobes) {
  /* We may be blaming an innocent here, but unlikely */
  if (kmod_loop_msg++ < 5)
   printk(KERN_ERR
          "request_module: runaway loop modprobe %s\n",
          module_name);
  atomic_dec(&kmod_concurrent);
  return -ENOMEM;
}

ret = call_usermodehelper(modprobe_path, argv, envp, 1);
atomic_dec(&kmod_concurrent);
return ret;
}

Linux把内核也看作一个模块。那么模块与模块之间如何进行交互呢，一种常用的方法就是共享变量和函数。但并不是模块中的每个变量和函数都能被共享，内核只把各个模块中主要的变量和函数放在一个特定的区段，这些变量和函数就统称为符号。到低哪些符号可以被共享？ Linux内核有自己的规定。例如 EXPORT_SYMBOL_GPL(sysfs_create_link); 在fs/sysfs/symlink.c
其中宏定义EXPORT_SYMBOL（）本身的含义是“移出符号”。为什么说是“移出”呢？因为这些符号本来是内核内部的符号，通过这个宏放在一个公开的地方，使得装入到内核中的其他模块可以引用它们。
实际上，仅仅知道这些符号的名字是不够的，还得知道它们在内核映像中的地址才有意义。因此，内核中定义了如下结构来描述模块的符号：
struct module_symbol

{

unsigned long value; ／*符号在内核映像中的地址*/

const char *name; /*指向符号名的指针*/

};

从后面对EXPORT_SYMBOL宏的定义可以看出，连接程序（ld）在连接内核映像时将这个结构存放在一个叫做“__ksymtab”的区段中，而这个区段中所有的符号就组成了模块对外“移出”的符号表，这些符号可供内核及已安装的模块来引用。而其他“对内”的符号则由连接程序自行生成，并仅供内部使用。
   与EXPORT_SYMBOL相关的定义在include/linux/module.h中：
#define __MODULE_STRING_1(x)    #x
#define __MODULE_STRING(x)      __MODULE_STRING_1(x)
   #define __EXPORT_SYMBOL(sym, str)                       \

const char __kstrtab_##sym[] \

__attribute__((section(".kstrtab"))) = str; \

const struct module_symbol __ksymtab_##sym \

__attribute__((section("__ksymtab"))) = \

{ (unsigned long)&sym, __kstrtab_##sym }

#if defined(MODVERSIONS) || !defined(CONFIG_MODVERSIONS)

#define EXPORT_SYMBOL(var) __EXPORT_SYMBOL(var, __MODULE_STRING(var))

下面我们以EXPORT_SYMBOL(schedule)为例，来看一下这个宏的结果是什么。首先EXPORT_SYMBOL(schedule)的定义成了__EXPORT_SYMBOL(schedule, “schedule”)。而__EXPORT_SYMBOL（）定义了两个语句，第一个语句定义了一个名为__kstrtab_ schedule的字符串，将字符串的内容初始化为“schedule”，并将其置于内核映像中的.kstrtab区段，注意这是一个专门存放符号名字符串的区段。第二个语句则定义了一个名为__kstrtab_ schedule的module_symbol结构，将其初始化为｛＆schedule，__kstrtab_ schedule｝结构，并将其置于内核映像中的__ksymtab区段。这样，module_symbol结构中的域value的值就为schedule在内核映像中的地址，而指针name则指向字符串“schedule”。

阅读(1835) | 评论(0) | 转发(0) |

上一篇：linux3.3之platform之DMA板端

下一篇：linux3.3之platform之DMA之PL330控制器0

给主人留下些什么吧！~~

感谢所有关心和支持过ChinaUnix的朋友们

16024965号-6