首先了解一下Linux的文件操作,接下来看一下qemu对于ide设备虚拟化过程
一)学习Linux的文件操作涉及分层
-----------
虚拟文件层vfs
-----------
磁盘设备缓存(directio不存在)
----------
块设备层(包括映射,调度)
---------
块设备驱动
---------
硬件设备
----------
二)采用ide虚拟化设备后,Linux客户机文件操作涉及分层
1.客户机
-----------
虚拟文件层vfs
-----------
磁盘设备缓存(directio不存在)
----------
块设备层(包括映射,调度)
---------
块设备驱动
----------
2.Qemu
----------
硬件设备模拟
----------
映像文件格式解析
------------
3.宿主机
-----------
虚拟文件层vfs
-----------
磁盘设备缓存(directio不存在)
----------
块设备层(包括映射,调度)
---------
块设备驱动
----------
硬件设备
----------
三)采用半虚拟化virtio-blk设备后,Linux客户机文件操作涉及分层
1.客户机
-----------
虚拟文件层vfs
-----------
磁盘设备缓存(directio不存在)
-----------
虚拟化设备模型前端
----------
2.Qemu
----------
虚拟化设备模型后端
----------
映像文件格式解析
------------
3.宿主机
-----------
虚拟文件层vfs
-----------
磁盘设备缓存(directio不存在)
----------
块设备层(包括映射,调度)
---------
块设备驱动
----------
硬件设备
----------
分析qemui/o虚拟化
1.ide驱动模拟操作流程
读取相应扇区到客户机地址空间
读取0x1f0-0x1f7保存需要读取的扇区
写入0xc100-0xc10f发起DMA操作
2.qemu设备模型截获操作
虚拟机退出时,分析vm退出原因
int kvm_run(CPUState *env)
{
case KVM_EXIT_IO:
r = kvm_handle_io(run->io.port,
(uint8_t *)run + run->io.data_offset,
run->io.direction,
run->io.size,
run->io.count);
}
处理i/o读操作
static int kvm_handle_io(uint16_t port, void *data, int direction, int size,
uint32_t count)
{
case 2:
stw_p(ptr, cpu_inw(port));
break;
}
uint16_t cpu_inw(pio_addr_t addr)
{
uint16_t val;
val = ioport_read(1, addr);
trace_cpu_in(addr, val);
LOG_IOPORT("inw : %04"FMT_pioaddr" %04"PRIx16"\n", addr, val);
return val;
}
static uint32_t ioport_read(int index, uint32_t address)
{
static IOPortReadFunc * const default_func[3] = {
default_ioport_readb,
default_ioport_readw,
default_ioport_readl
};
IOPortReadFunc *func = ioport_read_table[index][address];
if (!func)
func = default_func[index];
return func(ioport_opaque[address], address);
}
static void cmd_read(IDEState *s, uint8_t* buf)
{
int nb_sectors, lba;
if (buf[0] == GPCMD_READ_10) {
nb_sectors = ube16_to_cpu(buf + 7);
} else {
nb_sectors = ube32_to_cpu(buf + 6);
}
lba = ube32_to_cpu(buf + 2);
if (nb_sectors == 0) {
ide_atapi_cmd_ok(s);
return;
}
ide_atapi_cmd_read(s, lba, nb_sectors, 2048);
}
static void ide_atapi_cmd_read(IDEState *s, int lba, int nb_sectors,
int sector_size)
{
#ifdef DEBUG_IDE_ATAPI
printf("read %s: LBA=%d nb_sectors=%d\n", s->atapi_dma ? "dma" : "pio",
lba, nb_sectors);
#endif
if (s->atapi_dma) {
ide_atapi_cmd_read_dma(s, lba, nb_sectors, sector_size);
} else {
ide_atapi_cmd_read_pio(s, lba, nb_sectors, sector_size);
}
}
static void ide_atapi_cmd_read_dma(IDEState *s, int lba, int nb_sectors,
int sector_size)
{
s->lba = lba;
s->packet_transfer_size = nb_sectors * sector_size;
s->io_buffer_index = 0;
s->io_buffer_size = 0;
s->cd_sector_size = sector_size;
bdrv_acct_start(s->bs, &s->acct, s->packet_transfer_size, BDRV_ACCT_READ);
/* XXX: check if BUSY_STAT should be set */
s->status = READY_STAT | SEEK_STAT | DRQ_STAT | BUSY_STAT;
ide_dma_start(s, ide_atapi_cmd_read_dma_cb);
}
/* ATAPI DMA support */
/* XXX: handle read errors */
static void ide_atapi_cmd_read_dma_cb(void *opaque, int ret)
{
bm->iov.iov_base = (void *)(s->io_buffer + data_offset);
bm->iov.iov_len = n * 4 * 512;
qemu_iovec_init_external(&bm->qiov, &bm->iov, 1);
bm->aiocb = bdrv_aio_readv(s->bs, (int64_t)s->lba << 2, &bm->qiov,
n * 4, ide_atapi_cmd_read_dma_cb, bm);
if (!bm->aiocb) {
/* Note: media not present is the most likely case */
ide_atapi_cmd_error(s, SENSE_NOT_READY,
ASC_MEDIUM_NOT_PRESENT);
goto eot;
}
return;
}
设备额模型使用异步i/o操作模拟
/**************************************************************/
/* async I/Os */
BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
{
BlockDriver *drv = bs->drv;
trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
if (!drv)
return NULL;
if (bdrv_check_request(bs, sector_num, nb_sectors))
return NULL;
return drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors, cb, opaque);
}
启动映像文件格式的读操作
static BlockDriver bdrv_qcow2 = {
.format_name = "qcow2",
.instance_size = sizeof(BDRVQcowState),
.bdrv_probe = qcow2_probe,
.bdrv_open = qcow2_open,
.bdrv_close = qcow2_close,
.bdrv_create = qcow2_create,
.bdrv_flush = qcow2_flush,
.bdrv_is_allocated = qcow2_is_allocated,
.bdrv_set_key = qcow2_set_key,
.bdrv_make_empty = qcow2_make_empty,
.bdrv_aio_readv = qcow2_aio_readv,
.bdrv_aio_writev = qcow2_aio_writev,
.bdrv_aio_flush = qcow2_aio_flush,
}
static BlockDriverAIOCB *qcow2_aio_readv(BlockDriverState *bs,
int64_t sector_num,
QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb,
void *opaque)
{
QCowAIOCB *acb;
acb = qcow2_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
if (!acb)
return NULL;
qcow2_aio_read_cb(acb, 0);
return &acb->common;
}
3.qemu设备模型截获操作注册
1)注册要截获的端口
/* size is the word size in byte */
int register_ioport_read(pio_addr_t start, int length, int size,
IOPortReadFunc *func, void *opaque)
{
int i, bsize;
if (ioport_bsize(size, &bsize)) {
hw_error("register_ioport_read: invalid size");
return -1;
}
for(i = start; i < start + length; i += size) {
ioport_read_table[bsize][i] = func;
if (ioport_opaque[i] != NULL && ioport_opaque[i] != opaque)
hw_error("register_ioport_read: invalid opaque");
ioport_opaque[i] = opaque;
}
return 0;
}
void ide_init_ioport(IDEBus *bus, int iobase, int iobase2)
{
register_ioport_write(iobase, 8, 1, ide_ioport_write, bus);
register_ioport_read(iobase, 8, 1, ide_ioport_read, bus);
if (iobase2) {
register_ioport_read(iobase2, 1, 1, ide_status_read, bus);
register_ioport_write(iobase2, 1, 1, ide_cmd_write, bus);
}
/* data ports */
register_ioport_write(iobase, 2, 2, ide_data_writew, bus);
register_ioport_read(iobase, 2, 2, ide_data_readw, bus);
register_ioport_write(iobase, 4, 4, ide_data_writel, bus);
register_ioport_read(iobase, 4, 4, ide_data_readl, bus);
}
static EndTransferFunc* transfer_end_table[] = {
ide_sector_read,
ide_sector_write,
ide_transfer_stop,
ide_atapi_cmd_reply_end,
ide_atapi_cmd,
ide_dummy_transfer_stop,
};
uint32_t ide_data_readw(void *opaque, uint32_t addr)
{
IDEBus *bus = opaque;
IDEState *s = idebus_active_if(bus);
uint8_t *p;
int ret;
/* PIO data access allowed only when DRQ bit is set */
if (!(s->status & DRQ_STAT))
return 0;
p = s->data_ptr;
ret = cpu_to_le16(*(uint16_t *)p);
p += 2;
s->data_ptr = p;
if (p >= s->data_end)
s->end_transfer_func(s);
return ret;
}
static const struct {
void (*handler)(IDEState *s, uint8_t *buf);
int flags;
} atapi_cmd_table[0x100] = {
[ 0x00 ] = { cmd_test_unit_ready, CHECK_READY },
[ 0x03 ] = { cmd_request_sense, ALLOW_UA },
[ 0x12 ] = { cmd_inquiry, ALLOW_UA },
[ 0x1a ] = { cmd_mode_sense, /* (6) */ 0 },
[ 0x1b ] = { cmd_start_stop_unit, 0 }, /* [1] */
[ 0x1e ] = { cmd_prevent_allow_medium_removal, 0 },
[ 0x25 ] = { cmd_read_cdvd_capacity, CHECK_READY },
[ 0x28 ] = { cmd_read, /* (10) */ CHECK_READY },
[ 0x2b ] = { cmd_seek, CHECK_READY },
[ 0x43 ] = { cmd_read_toc_pma_atip, CHECK_READY },
[ 0x46 ] = { cmd_get_configuration, ALLOW_UA },
[ 0x4a ] = { cmd_get_event_status_notification, ALLOW_UA },
[ 0x5a ] = { cmd_mode_sense, /* (10) */ 0 },
[ 0xa8 ] = { cmd_read, /* (12) */ CHECK_READY },
[ 0xad ] = { cmd_read_dvd_structure, CHECK_READY },
[ 0xbb ] = { cmd_set_speed, 0 },
[ 0xbd ] = { cmd_mechanism_status, 0 },
[ 0xbe ] = { cmd_read_cd, CHECK_READY },
/* [1] handler detects and reports not ready condition itself */
};
void ide_atapi_cmd(IDEState *s)
{
/* Execute the command */
if (atapi_cmd_table[s->io_buffer[0]].handler) {
atapi_cmd_table[s->io_buffer[0]].handler(s, buf);
return;
}
}
http://blog.csdn.net/zhuriyuxiao/article/details/8819002