网络设备的IP地址结构 ====================
(1) 在TCPIP协议环境下, 网络设备结构(net_device)具有一个ip_ptr指针指向IP协议的设备参数块(in_device), 它包含设备IP地址结构(in_ifaddr)的链表指针(ifa_list). IP地址结构链可以为一个网络设备配置多个IP地址, 使得局域网中的单台主机能模拟多台主机的作用.
(2) 设备IP地址的配置由应用程序通过ioctl()系统调用使用ifreq参数结构来完成. 同一设备的不同IP地址用不同的设备别名来标识, 例如"eth0:1"和"eth0:2"分别代表设备eht0的两个地址. 当增加一个别名设备时, 如果它的地址与已有地址属于同一子网, 则它的地址被标记为"从属"(IFA_F_SECONDARY). 当设备最后一个别名被删除时, 设备的IP参数块将被释放.设备地址参数发生改变时, 将通过地址消息链(inetaddr_chain)向有关子系统发送通知消息, 例如路由子系统用来刷新转发表和路由缓冲表.
struct net_device { ... void *ip_ptr; /* IPv4 specific data */ ... } struct in_device { struct net_device *dev; atomic_t refcnt; rwlock_t lock; int dead; struct in_ifaddr *ifa_list; /* IP ifaddr chain */ struct ip_mc_list *mc_list; /* IP multicast filter chain */ unsigned long mr_v1_seen; struct neigh_parms *arp_parms; struct ipv4_devconf cnf; }; struct in_ifaddr { struct in_ifaddr *ifa_next; struct in_device *ifa_dev; u32 ifa_local; 设备地址 u32 ifa_address; 点对点设备的对端地址 u32 ifa_mask; 网络地址掩码 u32 ifa_broadcast; 设备的广播地址 u32 ifa_anycast; unsigned char ifa_scope; 设备地址的寻址范围 unsigned char ifa_flags; 地址标志 unsigned char ifa_prefixlen; 设备网络地址长度 char ifa_label[IFNAMSIZ]; 设备IP地址标签 };
/* * Interface request structure used for socket * ioctl's. All interface ioctl's must have parameter * definitions which begin with ifr_name. The * remainder may be interface specific. */
struct ifreq { #define IFHWADDRLEN 6 #define IFNAMSIZ 16 union { char ifrn_name[IFNAMSIZ]; /* if name, e.g. "en0" */ } ifr_ifrn; union { struct sockaddr ifru_addr; struct sockaddr ifru_dstaddr; struct sockaddr ifru_broadaddr; struct sockaddr ifru_netmask; struct sockaddr ifru_hwaddr; short ifru_flags; int ifru_ivalue; int ifru_mtu; struct ifmap ifru_map; char ifru_slave[IFNAMSIZ]; /* Just fits the size */ char ifru_newname[IFNAMSIZ]; char * ifru_data; } ifr_ifru; };
; net/ipv4/devinet.c:
int devinet_ioctl(unsigned int cmd, void *arg) { struct ifreq ifr; struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr; struct in_device *in_dev; struct in_ifaddr **ifap = NULL; struct in_ifaddr *ifa = NULL; struct net_device *dev; char *colon; int ret = 0;
/* * Fetch the caller's info block into kernel space */
if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) return -EFAULT; ifr.ifr_name[IFNAMSIZ-1] = 0;
colon = strchr(ifr.ifr_name, ':'); 从设备地址标签中取设备名称 if (colon) *colon = 0;
#ifdef CONFIG_KMOD dev_load(ifr.ifr_name); 加载相应名称的设备驱动模块 #endif
switch(cmd) { case SIOCGIFADDR: /* Get interface address */ case SIOCGIFBRDADDR: /* Get the broadcast address */ case SIOCGIFDSTADDR: /* Get the destination address */ case SIOCGIFNETMASK: /* Get the netmask for the interface */ /* Note that this ioctls will not sleep, so that we do not impose a lock. One day we will be forced to put shlock here (I mean SMP) */ memset(sin, 0, sizeof(*sin)); sin->sin_family = AF_INET; break;
case SIOCSIFFLAGS: if (!capable(CAP_NET_ADMIN)) return -EACCES; break; case SIOCSIFADDR: /* Set interface address (and family) */ case SIOCSIFBRDADDR: /* Set the broadcast address */ case SIOCSIFDSTADDR: /* Set the destination address */ case SIOCSIFNETMASK: /* Set the netmask for the interface */ if (!capable(CAP_NET_ADMIN)) return -EACCES; if (sin->sin_family != AF_INET) return -EINVAL; break; default: return -EINVAL; }
dev_probe_lock(); rtnl_lock();
if ((dev = __dev_get_by_name(ifr.ifr_name)) == NULL) { 取设备结构 ret = -ENODEV; goto done; }
if (colon) *colon = ':'; 恢复用户地址标签
if ((in_dev=__in_dev_get(dev)) != NULL) { 取IP设备块 for (ifap=&in_dev->ifa_list; (ifa=*ifap) != NULL; ifap=&ifa->ifa_next) if (strcmp(ifr.ifr_name, ifa->ifa_label) == 0) break; 取用户地址标签对应的设备地址结构 }
if (ifa == NULL && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS) { 除了设置地址和设置标志 ret = -EADDRNOTAVAIL; goto done; }
switch(cmd) { case SIOCGIFADDR: /* Get interface address */ sin->sin_addr.s_addr = ifa->ifa_local; 取设备IP地址 goto rarok;
case SIOCGIFBRDADDR: /* Get the broadcast address */ sin->sin_addr.s_addr = ifa->ifa_broadcast; 取设备IP广播地址 goto rarok;
case SIOCGIFDSTADDR: /* Get the destination address */ sin->sin_addr.s_addr = ifa->ifa_address; 取点对点设备的对端IP地址 goto rarok;
case SIOCGIFNETMASK: /* Get the netmask for the interface */ sin->sin_addr.s_addr = ifa->ifa_mask; 取设备的IP地址掩码 goto rarok;
case SIOCSIFFLAGS: 设置设备标志 if (colon) { if (ifa == NULL) { ret = -EADDRNOTAVAIL; break; } if (!(ifr.ifr_flags&IFF_UP)) 如果标志为关闭设备 inet_del_ifa(in_dev, ifap, 1); 破环性删除该地址结构 break; } ret = dev_change_flags(dev, ifr.ifr_flags); break; case SIOCSIFADDR: /* Set interface address (and family) */ if (inet_abc_len(sin->sin_addr.s_addr) < 0) { 取网络地址位长 ret = -EINVAL; break; }
if (!ifa) { 如果设备尚无地址结构 if ((ifa = inet_alloc_ifa()) == NULL) { 分配地址结构 ret = -ENOBUFS; break; } if (colon) 如果地址标签为设备别名标签 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ); else memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); } else { 如果修改设备地址 ret = 0; if (ifa->ifa_local == sin->sin_addr.s_addr) break; inet_del_ifa(in_dev, ifap, 0); 从链接中删除该地址结构 ifa->ifa_broadcast = 0; ifa->ifa_anycast = 0; }
ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr; 将设备地址和对端地址设置为新地址
if (!(dev->flags&IFF_POINTOPOINT)) { 如果非点对点设备 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address); 取地址的网络地址长度 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen); 求网络掩码 if ((dev->flags&IFF_BROADCAST) && ifa->ifa_prefixlen < 31) ifa->ifa_broadcast = ifa->ifa_address|~ifa->ifa_mask; 设置标准广播地址 } else { 如果是点对点设备 ifa->ifa_prefixlen = 32; 网络地址长度为32 ifa->ifa_mask = inet_make_mask(32); } ret = inet_set_ifa(dev, ifa); 添加设备地址 break;
case SIOCSIFBRDADDR: /* Set the broadcast address */ if (ifa->ifa_broadcast != sin->sin_addr.s_addr) { inet_del_ifa(in_dev, ifap, 0); ifa->ifa_broadcast = sin->sin_addr.s_addr; inet_insert_ifa(ifa); } break; case SIOCSIFDSTADDR: /* Set the destination address */ if (ifa->ifa_address != sin->sin_addr.s_addr) { if (inet_abc_len(sin->sin_addr.s_addr) < 0) { ret = -EINVAL; break; } inet_del_ifa(in_dev, ifap, 0); ifa->ifa_address = sin->sin_addr.s_addr; inet_insert_ifa(ifa); } break;
case SIOCSIFNETMASK: /* Set the netmask for the interface */
/* * The mask we set must be legal. */ if (bad_mask(sin->sin_addr.s_addr, 0)) { ret = -EINVAL; break; }
if (ifa->ifa_mask != sin->sin_addr.s_addr) { inet_del_ifa(in_dev, ifap, 0); ifa->ifa_mask = sin->sin_addr.s_addr; ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask); inet_insert_ifa(ifa); } break; } done: rtnl_unlock(); dev_probe_unlock(); return ret;
rarok: rtnl_unlock(); dev_probe_unlock(); if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) return -EFAULT; return 0; } static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa) { struct in_device *in_dev = __in_dev_get(dev);
ASSERT_RTNL();
if (in_dev == NULL) { 如果IP设备块不存在 in_dev = inetdev_init(dev); 分配IP设备块 if (in_dev == NULL) { inet_free_ifa(ifa); return -ENOBUFS; } } if (ifa->ifa_dev != in_dev) { BUG_TRAP(ifa->ifa_dev==NULL); in_dev_hold(in_dev); ifa->ifa_dev=in_dev; 将地址结构绑定到IP设备块上 } if (LOOPBACK(ifa->ifa_local)) 如果设备地址是回送地址 ifa->ifa_scope = RT_SCOPE_HOST; 地址的寻址范围为主机内部 return inet_insert_ifa(ifa); } static int inet_insert_ifa(struct in_ifaddr *ifa) { struct in_device *in_dev = ifa->ifa_dev; struct in_ifaddr *ifa1, **ifap, **last_primary;
ASSERT_RTNL();
if (ifa->ifa_local == 0) { inet_free_ifa(ifa); return 0; }
ifa->ifa_flags &= ~IFA_F_SECONDARY; 清除地址结构的从属标志 last_primary = &in_dev->ifa_list; 取IP设备块地址链表指针地址
for (ifap=&in_dev->ifa_list; (ifa1=*ifap)!=NULL; ifap=&ifa1->ifa_next) { 扫描IP设备块上的地址链 if (!(ifa1->ifa_flags&IFA_F_SECONDARY) && ifa->ifa_scope <= ifa1->ifa_scope) last_primary = &ifa1->ifa_next; if (ifa1->ifa_mask == ifa->ifa_mask && inet_ifa_match(ifa1->ifa_address, ifa)) { ; 如果与链中某个地址具有相同的网络地址 if (ifa1->ifa_local == ifa->ifa_local) { 如果两者地址相同 inet_free_ifa(ifa); return -EEXIST; } if (ifa1->ifa_scope != ifa->ifa_scope) { 如果两者寻址范围不同 inet_free_ifa(ifa); return -EINVAL; } ifa->ifa_flags |= IFA_F_SECONDARY; 标记为从属地址 } }
if (!(ifa->ifa_flags&IFA_F_SECONDARY)) { net_srandom(ifa->ifa_local); ifap = last_primary; }
ifa->ifa_next = *ifap; write_lock_bh(&in_dev->lock); *ifap = ifa; write_unlock_bh(&in_dev->lock);
/* Send message first, then call notifier. Notifier will trigger FIB update, so that listeners of netlink will know about new ifaddr */ rtmsg_ifa(RTM_NEWADDR, ifa); notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa); 发布设备启动消息
return 0; } static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, int destroy) { struct in_ifaddr *ifa1 = *ifap; 取要删除地址结构的地址
ASSERT_RTNL();
/* 1. Deleting primary ifaddr forces deletion all secondaries */
if (!(ifa1->ifa_flags&IFA_F_SECONDARY)) { 如果删除的是设备主地址结构 struct in_ifaddr *ifa; struct in_ifaddr **ifap1 = &ifa1->ifa_next; 取下一地址指针的地址
while ((ifa=*ifap1) != NULL) { if (!(ifa->ifa_flags&IFA_F_SECONDARY) || 如果为主地址 ifa1->ifa_mask != ifa->ifa_mask || !inet_ifa_match(ifa1->ifa_address, ifa)) { ifap1 = &ifa->ifa_next; continue; } write_lock_bh(&in_dev->lock); *ifap1 = ifa->ifa_next; write_unlock_bh(&in_dev->lock);
rtmsg_ifa(RTM_DELADDR, ifa); notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa); 发布设备停机消息 inet_free_ifa(ifa); } }
/* 2. Unlink it */
write_lock_bh(&in_dev->lock); *ifap = ifa1->ifa_next; 从设备地址链中删除该地址标签 write_unlock_bh(&in_dev->lock);
/* 3. Announce address deletion */
/* Send message first, then call notifier. At first sight, FIB update triggered by notifier will refer to already deleted ifaddr, that could confuse netlink listeners. It is not true: look, gated sees that route deleted and if it still thinks that ifaddr is valid, it will try to restore deleted routes... Grr. So that, this order is correct. */ rtmsg_ifa(RTM_DELADDR, ifa1); notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1); if (destroy) { inet_free_ifa(ifa1);
if (in_dev->ifa_list == NULL) inetdev_destroy(in_dev); } } static void inetdev_destroy(struct in_device *in_dev) { struct in_ifaddr *ifa;
ASSERT_RTNL();
in_dev->dead = 1;
ip_mc_destroy_dev(in_dev);
while ((ifa = in_dev->ifa_list) != NULL) { inet_del_ifa(in_dev, &in_dev->ifa_list, 0); inet_free_ifa(ifa); }
#ifdef CONFIG_SYSCTL devinet_sysctl_unregister(&in_dev->cnf); #endif write_lock_bh(&inetdev_lock); in_dev->dev->ip_ptr = NULL; /* in_dev_put following below will kill the in_device */ write_unlock_bh(&inetdev_lock);
neigh_parms_release(&arp_tbl, in_dev->arp_parms); in_dev_put(in_dev); } struct in_device *inetdev_init(struct net_device *dev) { struct in_device *in_dev;
ASSERT_RTNL();
in_dev = kmalloc(sizeof(*in_dev), GFP_KERNEL); if (!in_dev) return NULL; memset(in_dev, 0, sizeof(*in_dev)); in_dev->lock = RW_LOCK_UNLOCKED; memcpy(&in_dev->cnf, &ipv4_devconf_dflt, sizeof(in_dev->cnf)); in_dev->cnf.sysctl = NULL; in_dev->dev = dev; if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL) { kfree(in_dev); return NULL; } inet_dev_count++; /* Reference in_dev->dev */ dev_hold(dev); #ifdef CONFIG_SYSCTL neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4, NET_IPV4_NEIGH, "ipv4"); #endif write_lock_bh(&inetdev_lock); dev->ip_ptr = in_dev; /* Account for reference dev->ip_ptr */ in_dev_hold(in_dev); write_unlock_bh(&inetdev_lock); #ifdef CONFIG_SYSCTL devinet_sysctl_register(in_dev, &in_dev->cnf); #endif if (dev->flags&IFF_UP) ip_mc_up(in_dev); return in_dev; } static __inline__ void inet_free_ifa(struct in_ifaddr *ifa) { if (ifa->ifa_dev) __in_dev_put(ifa->ifa_dev); kfree(ifa); inet_ifa_count--; } static struct in_ifaddr * inet_alloc_ifa(void) { struct in_ifaddr *ifa;
ifa = kmalloc(sizeof(*ifa), GFP_KERNEL); if (ifa) { memset(ifa, 0, sizeof(*ifa)); inet_ifa_count++; }
return ifa; } extern __inline__ struct in_device * in_dev_get(const struct net_device *dev) { struct in_device *in_dev;
read_lock(&inetdev_lock); in_dev = dev->ip_ptr; if (in_dev) atomic_inc(&in_dev->refcnt); read_unlock(&inetdev_lock); return in_dev; }
extern __inline__ struct in_device * __in_dev_get(const struct net_device *dev) { return (struct in_device*)dev->ip_ptr; }
extern __inline__ void in_dev_put(struct in_device *idev) { if (atomic_dec_and_test(&idev->refcnt)) in_dev_finish_destroy(idev); } void in_dev_finish_destroy(struct in_device *idev) { struct net_device *dev = idev->dev;
BUG_TRAP(idev->ifa_list==NULL); BUG_TRAP(idev->mc_list==NULL); #ifdef NET_REFCNT_DEBUG printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n", idev, dev ? dev->name : "NIL"); #endif dev_put(dev); if (!idev->dead) { printk("Freeing alive in_device %p\n", idev); return; } inet_dev_count--; kfree(idev); }
|