Bug描述:Linux内核启动过程中,ramdisk加载失败,系统崩溃
日志信息:
-
RAMDISK: Couldn't find valid RAM disk image starting at 0.
-
UDF-fs: No partition found (1)
-
NILFS: Can't find nilfs on dev ram0.
-
(1,15):ocfs2_fill_super:1001 ERROR: superblock probe
-
VFS: Cannot open root device "ram0" or unknown-block(1,0)
-
Please append a correct "root=" boot option; here are the available partitions:
-
0800 8003520 sda driver: sd
-
0801 14048 sda1
-
0804 1 sda4
-
0805 393057 sda5
-
0806 102400 sda6
-
0807 7488690 sda7
-
Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(1,0)
-
Unable to load '/system/dump '.
-
Pid: 1, comm: swapper Not tainted 2.6.32.15-hermes-1 #23
-
Call Trace:
-
-
UTC time : 2005-1-1 0:14:52
-
[<ffffffff8151f1d5>] panic+0x7a/0x12d
-
[<ffffffff818780ba>] mount_block_root+0x257/0x275
-
[<ffffffff8187812e>] mount_root+0x56/0x5a
-
[<ffffffff8187829d>] prepare_namespace+0x16b/0x198
-
[<ffffffff81877685>] kernel_init+0x178/0x188
-
[<ffffffff8102e8fa>] child_rip+0xa/0x20
-
[<ffffffff8187750d>] ? kernel_init+0x0/0x188
-
[<ffffffff8102e8f0>] ? child_rip+0x0/0x20
首先看到
Couldn't find valid RAM disk image starting at 0.
顺藤摸瓜,找到打印此信息的代码:
-
static int __init
-
identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor)
-
{
-
const int size = 512;
-
struct minix_super_block *minixsb;
-
struct ext2_super_block *ext2sb;
-
struct romfs_super_block *romfsb;
-
struct cramfs_super *cramfsb;
-
struct squashfs_super_block *squashfsb;
-
int nblocks = -1;
-
unsigned char *buf;
-
const char *compress_name;
-
int i = 0;
-
-
buf = kmalloc(size, GFP_KERNEL);
-
if (!buf)
-
return -1;
-
-
minixsb = (struct minix_super_block *) buf;
-
ext2sb = (struct ext2_super_block *) buf;
-
romfsb = (struct romfs_super_block *) buf;
-
cramfsb = (struct cramfs_super *) buf;
-
squashfsb = (struct squashfs_super_block *) buf;
-
memset(buf, 0xe5, size);
-
-
/*
-
* Read block 0 to test for compressed kernel
-
*/
-
sys_lseek(fd, start_block * BLOCK_SIZE, 0);
-
sys_read(fd, buf, size);
-
// Eric Ju Jul 27th 2016
-
printk("start_block:%d\n",start_block);
-
for(i=0;i<size;i++)
-
printk("0x%x ",*(buf+i));
-
printk("\n");
-
-
*decompressor = decompress_method(buf, size, &compress_name);
-
if (compress_name) {
-
printk(KERN_NOTICE "RAMDISK: %s image found at block %d\n",
-
compress_name, start_block);
-
if (!*decompressor)
-
printk(KERN_EMERG
-
"RAMDISK: %s decompressor not configured!\n",
-
compress_name);
-
nblocks = 0;
-
goto done;
-
}
-
-
/* romfs is at block zero too */
-
if (romfsb->word0 == ROMSB_WORD0 &&
-
romfsb->word1 == ROMSB_WORD1) {
-
printk(KERN_NOTICE
-
"RAMDISK: romfs filesystem found at block %d\n",
-
start_block);
-
nblocks = (ntohl(romfsb->size)+BLOCK_SIZE-1)>>BLOCK_SIZE_BITS;
-
goto done;
-
}
-
-
if (cramfsb->magic == CRAMFS_MAGIC) {
-
printk(KERN_NOTICE
-
"RAMDISK: cramfs filesystem found at block %d\n",
-
start_block);
-
nblocks = (cramfsb->size + BLOCK_SIZE - 1) >> BLOCK_SIZE_BITS;
-
goto done;
-
}
-
-
/* squashfs is at block zero too */
-
if (le32_to_cpu(squashfsb->s_magic) == SQUASHFS_MAGIC) {
-
printk(KERN_NOTICE
-
"RAMDISK: squashfs filesystem found at block %d\n",
-
start_block);
-
nblocks = (le64_to_cpu(squashfsb->bytes_used) + BLOCK_SIZE - 1)
-
>> BLOCK_SIZE_BITS;
-
goto done;
-
}
-
-
/*
-
* Read block 1 to test for minix and ext2 superblock
-
*/
-
sys_lseek(fd, (start_block+1) * BLOCK_SIZE, 0);
-
sys_read(fd, buf, size);
-
-
/* Try minix */
-
if (minixsb->s_magic == MINIX_SUPER_MAGIC ||
-
minixsb->s_magic == MINIX_SUPER_MAGIC2) {
-
printk(KERN_NOTICE
-
"RAMDISK: Minix filesystem found at block %d\n",
-
start_block);
-
nblocks = minixsb->s_nzones << minixsb->s_log_zone_size;
-
goto done;
-
}
-
-
/* Try ext2 */
-
if (ext2sb->s_magic == cpu_to_le16(EXT2_SUPER_MAGIC)) {
-
printk(KERN_NOTICE
-
"RAMDISK: ext2 filesystem found at block %d\n",
-
start_block);
-
nblocks = le32_to_cpu(ext2sb->s_blocks_count) <<
-
le32_to_cpu(ext2sb->s_log_block_size);
-
goto done;
-
}
-
-
printk(KERN_NOTICE
-
"RAMDISK: Couldn't find valid RAM disk image starting at %d.\n",
-
start_block);
-
-
done:
-
sys_lseek(fd, start_block * BLOCK_SIZE, 0);
-
kfree(buf);
-
return nblocks;
-
}
可以看到,打印此日志,是由于本函数中所有的分支都没有成功匹配。正常情况下,该函数应该走入第一个分支,并跳转至done处。
为什么没有走入第一个分支呢?猜测fd应该是指向initrd的文件描述符,第一个分支之前的read应该为读取initrd的第一个扇区内容,并进行magic比较,当匹配成功,说明initrd为正确的镜像文件,并调用相应解压函数进行解压缩。通过打印读取出的buf内容来确认,initrd文件是否正确。经过实验,打印内容全部为0xFF,证明该initrd文件错误。
为什么initrd文件会错误呢?磁盘上initrd.img文件都是正确的。继续跟踪identify_ramdisk_image
的调用处,看看fd到底是什么?经过跟踪,发现以下函数,位于内核源码/init/do_mounts_initrd.c中。
-
int __init initrd_load(void)
-
{
-
if (mount_initrd) {
-
create_dev("/dev/ram", Root_RAM0);
-
/*
-
* Load the initrd data into /dev/ram0. Execute it as initrd
-
* unless /dev/ram0 is supposed to be our actual root device,
-
* in that case the ram disk is just set up here, and gets
-
* mounted in the normal path.
-
*/
-
if (rd_load_image("/initrd.image") && ROOT_DEV != Root_RAM0) {
-
sys_unlink("/initrd.image");
-
handle_initrd();
-
return 1;
-
}
-
}
-
sys_unlink("/initrd.image");
-
return 0;
-
}
initrd.image文件?不对啊,我们磁盘上的initrd文件名为initrd.img怎么会变为initrd.image呢?而且路径也不对。猜测,initrd.image文件应该是有启动部分代码创建了符号链接到了initrd.img上。继续查找initrd.image的创建是在哪里?找到如下代码,位于/init/initramfs.c
-
static int __init populate_rootfs(void)
-
{
-
int i=0;
-
char *err = unpack_to_rootfs(__initramfs_start,
-
__initramfs_end - __initramfs_start);
-
if (err)
-
panic(err); /* Failed to decompress INTERNAL initramfs */
-
if (initrd_start) {
-
#ifdef CONFIG_BLK_DEV_RAM
-
int fd;
-
printk(KERN_INFO "Trying to unpack rootfs image as initramfs...\n");
-
err = unpack_to_rootfs((char *)initrd_start,
-
initrd_end - initrd_start);
-
if (!err) {
-
free_initrd();
-
return 0;
-
} else {
-
clean_rootfs();
-
unpack_to_rootfs(__initramfs_start,
-
__initramfs_end - __initramfs_start);
-
}
-
printk(KERN_INFO "rootfs image is not initramfs (%s)"
-
"; looks like an initrd\n", err);
-
fd = sys_open("/initrd.image", O_WRONLY|O_CREAT, 0700);
-
-
if (fd >= 0) {
-
sys_write(fd, (char *)initrd_start,
-
initrd_end - initrd_start);
-
sys_close(fd);
-
free_initrd();
-
}
-
#else
-
printk(KERN_INFO "Unpacking initramfs...\n");
-
err = unpack_to_rootfs((char *)initrd_start,
-
initrd_end - initrd_start);
-
if (err)
-
printk(KERN_EMERG "Initramfs unpacking failed: %s\n", err);
-
free_initrd();
-
#endif
-
}
-
return 0;
-
}
在这里创建了initrd.image文件。另外还有一句:sys_write
(fd
, (char
*)initrd_start
,initrd_end
- initrd_start
); 看来是内核从内存中将相关数据写到/init.image中的。并非软链接。那initrd_start又是哪里呢?哪里来的数据呢?将initrd_start尝试打印后,发现initrd_start为0xffff880000100000,估计是已经转换完的虚拟地址。既然知道initrd.image是从内存写入根文件系统的,那么一定有其他程序将我们的initrd.img读入内存。initrd.img是在哪里被读入内存的呢?这个文件的路径是在哪里被提供的呢?想起来,lilo.multi.conf文件中有指定initrd.img文件的路径。那一定是lilo在启动时,将initrd.img读入内存,并将地址传递给内核。那就继续查看串口日志。在刚刚启动的日志中有如下信息:
-
RAMDISK: 7fa36000 - 7ffff40e
-
Allocated new RAMDISK: 00100000 - 006c940e
-
Move RAMDISK from 000000007fa36000 - 000000007ffff40d to 00100000 - 006c940d
可以看到,RAMDISK的起始地址为0x7fa36000,那是不是刚刚的虚拟地址就是从这个物理地址转换过去的呢?仔细看第三行,貌似内核将RAMDISK的内容移动到了0x00100000地址处。在对比刚的虚拟地址0xffff880000100000,可以确定该虚拟地址一定是从0x00100000处映射的。因为内核在物理低地址处地址映射的习惯是,设定虚拟高端地址后,偏移实际的物理地址。那么打印以下RAMDISK被移动之前、移动之后的内容,看看是不是移动时除了错误。结果发现RAMDISK被移动之前就是0xFF。可以断定,LILO将initrd.img时就已经错了。从0xFF上看,物理内存应该是没有被写过,是上电后的初始状态。突然想到还有另外一个信息,在替换内核后,执行lilo64 -C lilo.multi.conf -s `pwd`时,lilo报了一个警告信息:
-
Normally any initial ramdisk (initrd) loaded with a kernel is loaded as
-
high in memory as possible, but never above 15Mb. This is due to a BIOS
-
limitation on older systems. On newer systems, this option enables using
-
memory above 15Mb (up to a kernel imposed limit, around 768Mb) for
-
passing the initrd to the kernel. The presence of this option merely
-
indicates that your system does not have the old BIOS limitation.
再看RAMDISK的初始起始地址:0x7fa36000,很显然该地址高于15MB处地址,说明LILO认为内核和initrd.img的大小超过了某个固定限制,将initrd放在了高端内存中。那为什么LILO写入内存失败呢?从上面的提示信息来看,应该是在刚上电,启动时BIOS不支持访问高端内存,所以LILO在调用BIOS的写入程序时发生了错误,但LILO并没有关心这个错误。
在LILO的HomePage上找到一篇技术文档,其中很明确的表述了LILO会将initrd.img加载在内存的低端地址的尾部处(16MB以下)。16MB的限制是因为BIOS只使用24位的地址空间来传输数据。后经过阅读LILO的代码,LILO会按照内核镜像大小的3倍与initrd.img的大小计算总和,当总和大于14MB,LILO认为14MB以下的低地址空间无法放入内核和initrd镜像文件,便认为BIOS是支持16MB以上地址空间的,于是LILO在加载initrd镜像时,将initrd放在高地址空间中。
LILO的技术文档:,感兴趣的同学可以拿来看看哦。
阅读(9892) | 评论(0) | 转发(0) |