TUN/TAP 提供了给用户空间程序的包的接收和传输,它可以看成是简单的点对点设备或是 以太网设备。它不是从物理设备接收包,而是从用户空间程序接收包。它发送包不是通过物 理设备来发送包,而是将这些包写入用户空间程序来发送。 为了应用这个驱动,应用程序需要打开/dev/net/tun 设备(字符设备),然后发出一个控 制(ioctl)来注册一个网卡设备,一个网络设备将命名为tunXX 或tapXX.依赖于你所设定的标志 位。当应用程序关闭文件描述符的时候,网络设备和其他相关的路由将会消失。 依赖于所选择的设备类型,用户空间的应用程序需要读写IP 包(用tun 设备)或以太网包(用 tap 设备).至于具体用那种设备,依赖于传递给ioctl 函数的标志参数. Tun/tap 设备的源码包地址是http://vtun.sourceforge.net/tun
包含两个简单的例子,用于显示如何使用tun 设备和tap 设备。两个程序就像是这两个网 络设备接口间的网桥。 br_select.c ‐ bridge based on select system call. br_sigio.c ‐ bridge based on async io and SIGIO signal. 当然,最好的例子是 is VTun http://vtun.sourceforge.net :))
module_init(tun_init); module_exit(tun_cleanup); /* Network device part of the driver */ static LIST_HEAD(tun_dev_list); static const struct ethtool_ops tun_ethtool_ops;
主要的数据结构 struct miscdevice struct miscdevice { int minor; const char *name; const struct file_operations *fops; struct list_head list; struct device *parent; struct device *this_device; }; struct tun_struct struct tun_struct { struct list_head list; unsigned long flags;// //区分tun 和tap 设备
int attached; uid_t owner; wait_queue_head_t read_wait;// //等待队列
struct sk_buff_head readq; // //网络缓冲区队列
struct net_device *dev; // //linux 抽象网络设备结构(结构是linux 内核提供的
统一网络设备结构,定义了系统统一的访问接口。) struct net_device_stats stats; // //网卡状态信息结构
struct fasync_struct *fasync;// //文件异步通知结构
unsigned long if_flags; u8 dev_addr[ETH_ALEN]; u32 chr_filter[2]; u32 net_filter[2]; #ifdef TUN_DEBUG int debug; #endif }; Struct ifreq /* * Interface request structure used for socket * ioctl's. All interface ioctl's must have parameter * definitions which begin with ifr_name. The * remainder may be interface specific. */ struct ifreq { #define IFHWADDRLEN 6 union { char ifrn_name[IFNAMSIZ]; /* if name, e.g. "en0" */ } ifr_ifrn; union { struct sockaddr ifru_addr; struct sockaddr ifru_dstaddr; struct sockaddr ifru_broadaddr; struct sockaddr ifru_netmask; struct sockaddr ifru_hwaddr; short ifru_flags; int ifru_ivalue; int ifru_mtu; struct ifmap ifru_map; char ifru_slave[IFNAMSIZ]; /* Just fits the size */ char ifru_newname[IFNAMSIZ]; void __user * ifru_data; struct if_settings ifru_settings; } ifr_ifru; }; 模块的初始化(tun_init) static int __init tun_init(void) { int ret = 0; printk(KERN_INFO "tun: %s, %s\n", DRV_DESCRIPTION, DRV_VERSION); printk(KERN_INFO "tun: %s\n", DRV_COPYRIGHT); ret = misc_register(&tun_miscdev); if (ret) printk(KERN_ERR "tun: Can't register misc device %d\n", TUN_MINOR); return ret; } static struct miscdevice tun_miscdev = { .minor = TUN_MINOR, .name = "tun", .fops = &tun_fops, }; static const struct file_operations tun_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .read = do_sync_read, .aio_read = tun_chr_aio_read, .write = do_sync_write, .aio_write = tun_chr_aio_write, .poll = tun_chr_poll, .ioctl = tun_chr_ioctl, .open = tun_chr_open, .release = tun_chr_close, .fasync = tun_chr_fasync }; misc_register //在内核中利用misc_register() 函数将该驱动注册为非标准字符设备驱动,提供字符设备具
有的各种程序接口。 int misc_register(struct miscdevice * misc) { struct miscdevice *c; dev_t dev; int err = 0; INIT_LIST_HEAD(&misc‐>list); mutex_lock(&misc_mtx); list_for_each_entry(c, &misc_list, list) { if (c‐>minor == misc‐>minor) { mutex_unlock(&misc_mtx); return ‐EBUSY; } } if (misc‐>minor == MISC_DYNAMIC_MINOR) { int i = DYNAMIC_MINORS; while (‐‐i >= 0) if ( (misc_minors[i>>3] & (1 << (i&7))) == 0) break; if (i<0) { mutex_unlock(&misc_mtx); return ‐EBUSY; } misc‐>minor = i; } if (misc‐>minor < DYNAMIC_MINORS) misc_minors[misc‐>minor >> 3] |= 1 << (misc‐>minor & 7); dev = MKDEV(MISC_MAJOR, misc‐>minor); misc‐>this_device = device_create(misc_class, misc‐>parent, dev, "%s", misc‐>name); if (IS_ERR(misc‐>this_device)) { err = PTR_ERR(misc‐>this_device); goto out; } /* * Add it to the front, so that later devices can "override" * earlier defaults */ list_add(&misc‐>list, &misc_list); out: mutex_unlock(&misc_mtx); return err; } tun 设备的操作(系统调用) tun_chr_open(打开设备时调用) 当打开一个tun/tap 设备时,open 函数将调用tun_chr_open()函数,其中将完成一些重要的初始化过 程, 初始化函数以及网络缓冲区链表的初始化和等待队列的初始化 static int tun_chr_open(struct inode *inode, struct file * file) { DBG1(KERN_INFO "tunX: tun_chr_open\n"); file‐>private_data = NULL;//初始化设备文件的内容
return 0; } tun_chr_ioctl(设备的控制调用接口) 控制调用接口: Cmd= .. TUNSETIFF .. _IOC_TYPE(cmd) == 0x89 .. TUNSETNOCSUM .. TUNSETPERSIST .. TUNSETOWNER .. TUNSETLINK .. TUNSETDEBUG .. SIOCGIFFLAGS .. SIOCSIFFLAGS .. SIOCGIFHWADDR .. SIOCSIFHWADDR .. SIOCADDMULTI .. SIOCDELMULTI Tun/tap 驱动中网卡的注册被嵌入了字符驱动的ioctl 例程中,它是通过对字符设备文件描述符利用自 定义的ioctl 设置标志 TUNSETIFF 完成网卡的注册的。 static int tun_chr_ioctl(struct inode *inode, struct file *file,unsigned int cmd, unsigned long arg) { struct tun_struct *tun = file‐>private_data; void __user* argp = (void __user*)arg; struct ifreq ifr; if (cmd == TUNSETIFF || _IOC_TYPE(cmd) == 0x89) if (copy_from_user(&ifr, argp, sizeof ifr))//拷贝用户区的网络设备配置。在用户区已
经分配了ifreq 结构的值和配置值, return ‐EFAULT; if (cmd == TUNSETIFF && !tun) {//字符设备文件的数据不是空的则
int err; ifr.ifr_name[IFNAMSIZ‐] = '\0'; rtnl_lock();//在中定义
err = tun_set_iff(file, &ifr); rtnl_unlock(); if (err) return err; if (copy_to_user(argp, &ifr, sizeof(ifr)))//把配置数据拷贝到用户区
return ‐EFAULT; return 0; } if (!tun)//tun 设备错误
return ‐EBADFD; DBG(KERN_INFO "%s: tun_chr_ioctl cmd %d\n", tun‐>dev‐>name, cmd); switch (cmd) { case TUNSETNOCSUM: /* Disable/Enable checksum */ if (arg) tun‐>flags |= TUN_NOCHECKSUM; else tun‐>flags &= ~TUN_NOCHECKSUM; DBG(KERN_INFO "%s: checksum %s\n", tun‐>dev‐>name, arg ? "disabled" : "enabled"); break; case TUNSETPERSIST: /* Disable/Enable persist mode */ if (arg) tun‐>flags |= TUN_PERSIST; else tun‐>flags &= ~TUN_PERSIST; DBG(KERN_INFO "%s: persist %s\n", tun‐>dev‐>name, arg ? "disabled" : "enabled"); break; case TUNSETOWNER: /* Set owner of the device */ tun‐>owner = (uid_t) arg; DBG(KERN_INFO "%s: owner set to %d\n", tun‐>dev‐>name, tun‐>owner); break; case TUNSETLINK: /* Only allow setting the type when the interface is down */ if (tun‐>dev‐>flags & IFF_UP) { DBG(KERN_INFO "%s: Linktype set failed because interface is up\n", tun‐>dev‐>name); return ‐EBUSY; } else { tun‐>dev‐>type = (int) arg; DBG(KERN_INFO "%s: linktype set to %d\n", tun‐>dev‐>name, tun‐>dev‐>type); } break; #ifdef TUN_DEBUG case TUNSETDEBUG: tun‐>debug = arg; break; #endif case SIOCGIFFLAGS: ifr.ifr_flags = tun‐>if_flags; if (copy_to_user( argp, &ifr, sizeof ifr)) return ‐EFAULT; return 0; case SIOCSIFFLAGS: /** Set the character device's interface flags. Currently only * IFF_PROMISC and IFF_ALLMULTI are used. */ tun‐>if_flags = ifr.ifr_flags; DBG(KERN_INFO "%s: interface flags 0x%lx\n", tun‐>dev‐>name, tun‐>if_flags); return 0; case SIOCGIFHWADDR: /* Note: the actual net device's address may be different */ memcpy(ifr.ifr_hwaddr.sa_data, tun‐>dev_addr, min(sizeof ifr.ifr_hwaddr.sa_data, sizeof tun‐>dev_addr)); if (copy_to_user( argp, &ifr, sizeof ifr)) return ‐EFAULT; return 0; case SIOCSIFHWADDR: { /* try to set the actual net device's hw address */ int ret = dev_set_mac_address(tun‐>dev, &ifr.ifr_hwaddr); if (ret == 0) { /** Set the character device's hardware address. This is used when * filtering packets being sent from the network device to the character * device. */ memcpy(tun‐>dev_addr, ifr.ifr_hwaddr.sa_data, min(sizeof ifr.ifr_hwaddr.sa_data, sizeof tun‐>dev_addr)); DBG(KERN_DEBUG "%s: set hardware address: %x:%x:%x:%x:%x:%x\n", tun‐>dev‐>name, tun‐>dev_addr[0], tun‐>dev_addr[1], tun‐>dev_addr[2], tun‐>dev_addr[3], tun‐>dev_addr[4], tun‐>dev_addr[5]); } return ret; } case SIOCADDMULTI: /** Add the specified group to the character device's multicast filter * list. */ add_multi(tun‐>chr_filter, ifr.ifr_hwaddr.sa_data); DBG(KERN_DEBUG "%s: add multi: %x:%x:%x:%x:%x:%x\n", tun‐>dev‐>name, (u8)ifr.ifr_hwaddr.sa_data[0], (u8)ifr.ifr_hwaddr.sa_data[1], (u8)ifr.ifr_hwaddr.sa_data[2], (u8)ifr.ifr_hwaddr.sa_data[3], (u8)ifr.ifr_hwaddr.sa_data[4], (u8)ifr.ifr_hwaddr.sa_data[5]); return 0; case SIOCDELMULTI: /** Remove the specified group from the character device's multicast * filter list. */ del_multi(tun‐>chr_filter, ifr.ifr_hwaddr.sa_data); DBG(KERN_DEBUG "%s: del multi: %x:%x:%x:%x:%x:%x\n", tun‐>dev‐>name, (u8)ifr.ifr_hwaddr.sa_data[0], (u8)ifr.ifr_hwaddr.sa_data[1], (u8)ifr.ifr_hwaddr.sa_data[2], (u8)ifr.ifr_hwaddr.sa_data[3], (u8)ifr.ifr_hwaddr.sa_data[4], (u8)ifr.ifr_hwaddr.sa_data[5]); return 0; default: return ‐EINVAL; }; return 0; } tun_chr_aio_read(异步读)(从tun 设备中读取数据) static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv, unsigned long count, loff_t pos) { struct file *file = iocb‐>ki_filp; struct tun_struct *tun = file‐>private_data; DECLARE_WAITQUEUE(wait, current); struct sk_buff *skb; ssize_t len, ret = 0; if (!tun) return ‐EBADFD; DBG(KERN_INFO "%s: tun_chr_read\n", tun‐>dev‐>name); len = iov_total(iv, count); if (len < 0) return ‐EINVAL; add_wait_queue(&tun‐>read_wait, &wait); while (len) { const u8 ones[ ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; u8 addr[ ETH_ALEN]; int bit_nr; current‐>state = TASK_INTERRUPTIBLE; /* Read frames from the queue */ if (!(skb=skb_dequeue(&tun‐>readq))) { if (file‐>f_flags & O_NONBLOCK) { ret = ‐EAGAIN; break; } if (signal_pending(current)) { ret = ‐ERESTARTSYS; break; } /* Nothing to read, let's sleep */ schedule(); continue; } netif_wake_queue(tun‐>dev); /** Decide whether to accept this packet. This code is designed to * behave identically to an Ethernet interface. Accept the packet if * ‐ we are promiscuous. * ‐ the packet is addressed to us. * ‐ the packet is broadcast. * ‐ the packet is multicast and * ‐ we are multicast promiscous. * ‐ we belong to the multicast group. */ skb_copy_from_linear_data(skb, addr, min_t(size_t, sizeof addr, skb‐>len)); bit_nr = ether_crc(sizeof addr, addr) >> 26; if ((tun‐>if_flags & IFF_PROMISC) || memcmp(addr, tun‐>dev_addr, sizeof addr) == 0 || memcmp(addr, ones, sizeof addr) == 0 || (((addr[0] == 1 && addr[1] == 0 && addr[2] == 0x5e) || (addr[0] == 0x33 && addr[1] == 0x33)) && ((tun‐>if_flags & IFF_ALLMULTI) || (tun‐>chr_filter[bit_nr >> 5] & (1 << (bit_nr & 31)))))) { DBG(KERN_DEBUG "%s: tun_chr_readv: accepted: %x:%x:%x:%x:%x:%x\n", tun‐>dev‐>name, addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]); ret = tun_put_user(tun, skb, (struct iovec *) iv, len); kfree_skb(skb); break; } else { DBG(KERN_DEBUG "%s: tun_chr_readv: rejected: %x:%x:%x:%x:%x:%x\n", tun‐>dev‐>name, addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]); kfree_skb(skb); continue; } } current‐>state = TASK_RUNNING; remove_wait_queue(&tun‐>read_wait, &wait); return ret; } skb_dequeue(src/net/core/skbuff.c) /** * skb_dequeue ‐ remove from the head of the queue * @list: list to dequeue from * * Remove the head of the list. The list lock is taken so the function * may be used safely with other locking list functions. The head item is * returned or %NULL if the list is empty. */ struct sk_buff *skb_dequeue(struct sk_buff_head *list) { unsigned long flags; struct sk_buff *result; spin_lock_irqsave(&list‐>lock, flags); result = __skb_dequeue(list); spin_unlock_irqrestore(&list‐>lock, flags); return result; } __skb_dequeue /** * __skb_dequeue ‐ remove from the head of the queue * @list: list to dequeue from * * Remove the head of the list. This function does not take any locks * so must be used with appropriate locks held only. The head item is * returned or %NULL if the list is empty. */ extern struct sk_buff *skb_dequeue(struct sk_buff_head *list); static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list) { struct sk_buff *next, *prev, *result; prev = (struct sk_buff *) list; next = prev‐>next; result = NULL; if (next != prev) { result = next; next = next‐>next; list‐>qlen‐‐; next‐>prev = prev; prev‐>next = next; result‐>next = result‐>prev = NULL; } return result; } tun_put_user /* Put packet to the user space buffer */ static __inline__ ssize_t tun_put_user(struct tun_struct *tun, struct sk_buff *skb, struct iovec *iv, int len) { struct tun_pi pi = { 0, skb‐>protocol }; ssize_t total = 0; if (!(tun‐>flags & TUN_NO_PI)) { if ((len ‐= sizeof(pi)) < 0) return ‐EINVAL; if (len < skb‐>len) { /* Packet will be striped */ pi.flags |= TUN_PKT_STRIP; } if (memcpy_toiovec(iv, (void *) &pi, sizeof(pi))) return ‐EFAULT; total += sizeof(pi); } len = min_t(int, skb‐>len, len); skb_copy_datagram_iovec(skb, 0, iv, len); total += len; tun‐>stats.tx_packets++; tun‐>stats.tx_bytes += len; return total; } tun_chr_aio_write(把数据写入到tun 设备中) static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv, unsigned long count, loff_t pos) { struct tun_struct *tun = iocb‐>ki_filp‐>private_data; if (!tun) return ‐EBADFD; DBG(KERN_INFO "%s: tun_chr_write %ld\n", tun‐>dev‐>name, count); return tun_get_user(tun, (struct iovec *) iv, iov_total(iv, count)); } tun_get_user /* Get packet from user space buffer */ static __inline__ ssize_t tun_get_user(struct tun_struct *tun, struct iovec *iv, size_t count) { struct tun_pi pi = { 0, __constant_htons(ETH_P_IP) }; struct sk_buff *skb; size_t len = count, align = 0; if (!(tun‐>flags & TUN_NO_PI)) { if ((len ‐= sizeof(pi)) > count) return ‐EINVAL; if(memcpy_fromiovec((void *)&pi, iv, sizeof(pi))) return ‐EFAULT; } if ((tun‐>flags & TUN_TYPE_MASK) == TUN_TAP_DEV) align = NET_IP_ALIGN; if (!(skb = alloc_skb(len + align, GFP_KERNEL))) { tun‐>stats.rx_dropped++; return ‐ENOMEM; } if (align) skb_reserve(skb, align); if (memcpy_fromiovec(skb_put(skb, len), iv, len)) { tun‐>stats.rx_dropped++; kfree_skb(skb); return ‐EFAULT; } switch (tun‐>flags & TUN_TYPE_MASK) { case TUN_TUN_DEV: skb_reset_mac_header(skb); skb‐>protocol = pi.proto; skb‐>dev = tun‐>dev; break; case TUN_TAP_DEV: skb‐>protocol = eth_type_trans(skb, tun‐>dev); break; }; if (tun‐>flags & TUN_NOCHECKSUM) skb‐>ip_summed = CHECKSUM_UNNECESSARY; netif_rx_ni(skb); tun‐>dev‐>last_rx = jiffies; tun‐>stats.rx_packets++; tun‐>stats.rx_bytes += len; return count; }
|