网络设备的IP地址结构
====================
(1) 在TCPIP协议环境下,
网络设备结构(net_device)具有一个ip_ptr指针指向IP协议的设备参数块(in_device),
它包含设备IP地址结构(in_ifaddr)的链表指针(ifa_list).
IP地址结构链可以为一个网络设备配置多个IP地址,
使得局域网中的单台主机能模拟多台主机的作用.
(2) 设备IP地址的配置由应用程序通过ioctl()系统调用使用ifreq参数结构来完成.
同一设备的不同IP地址用不同的设备别名来标识,
例如"eth0:1"和"eth0:2"分别代表设备eht0的两个地址. 当增加一个别名设备时,
如果它的地址与已有地址属于同一子网, 则它的地址被标记为"从属"(IFA_F_SECONDARY).
当设备{zh1}一个别名被删除时, 设备的IP参数块将被释放.设备地址参数发生改变时,
将通过地址消息链(inetaddr_chain)向有关子系统发送通知消息,
例如路由子系统用来刷新转发表和路由缓冲表.
struct net_device
{
...
void *ip_ptr; /* IPv4 specific data */
...
}
struct in_device
{
struct net_device *dev;
atomic_t refcnt;
rwlock_t lock;
int dead;
struct in_ifaddr *ifa_list; /* IP ifaddr chain */
struct ip_mc_list *mc_list; /* IP multicast filter chain */
unsigned long mr_v1_seen;
struct neigh_parms *arp_parms;
struct ipv4_devconf cnf;
};
struct in_ifaddr
{
struct in_ifaddr *ifa_next;
struct in_device *ifa_dev;
u32 ifa_local; 设备地址
u32 ifa_address; 点对点设备的对端地址
u32 ifa_mask; 网络地址掩码
u32 ifa_broadcast; 设备的广播地址
u32 ifa_anycast;
unsigned char ifa_scope; 设备地址的寻址范围
unsigned char ifa_flags; 地址标志
unsigned char ifa_prefixlen; 设备网络地址长度
char ifa_label[IFNAMSIZ]; 设备IP地址标签
};
/*
* Interface request structure used for socket
* ioctl's. All interface ioctl's must have parameter
* definitions which begin with ifr_name. The
* remainder may be interface specific.
*/
struct ifreq
{
#define IFHWADDRLEN 6
#define IFNAMSIZ 16
union
{
char ifrn_name[IFNAMSIZ]; /* if name, e.g. "en0" */
} ifr_ifrn;
union {
struct sockaddr ifru_addr;
struct sockaddr ifru_dstaddr;
struct sockaddr ifru_broadaddr;
struct sockaddr ifru_netmask;
struct sockaddr ifru_hwaddr;
short ifru_flags;
int ifru_ivalue;
int ifru_mtu;
struct ifmap ifru_map;
char ifru_slave[IFNAMSIZ]; /* Just fits the size */
char ifru_newname[IFNAMSIZ];
char * ifru_data;
} ifr_ifru;
};
; net/ipv4/devinet.c:
int devinet_ioctl(unsigned int cmd, void *arg)
{
struct ifreq ifr;
struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
struct in_device *in_dev;
struct in_ifaddr **ifap = NULL;
struct in_ifaddr *ifa = NULL;
struct net_device *dev;
char *colon;
int ret = 0;
/*
* Fetch the caller's info block into kernel space
*/
if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
return -EFAULT;
ifr.ifr_name[IFNAMSIZ-1] = 0;
colon = strchr(ifr.ifr_name, ':'); 从设备地址标签中取设备名称
if (colon)
*colon = 0;
#ifdef CONFIG_KMOD
dev_load(ifr.ifr_name); 加载相应名称的设备驱动模块
#endif
switch(cmd) {
case SIOCGIFADDR: /* Get interface address */
case SIOCGIFBRDADDR: /* Get the broadcast address */
case SIOCGIFDSTADDR: /* Get the destination address */
case SIOCGIFNETMASK: /* Get the netmask for the interface */
/* Note that this ioctls will not sleep,
so that we do not impose a lock.
One day we will be forced to put shlock here (I mean SMP)
*/
memset(sin, 0, sizeof(*sin));
sin->sin_family = AF_INET;
break;
case SIOCSIFFLAGS:
if (!capable(CAP_NET_ADMIN))
return -EACCES;
break;
case SIOCSIFADDR: /* Set interface address (and family) */
case SIOCSIFBRDADDR: /* Set the broadcast address */
case SIOCSIFDSTADDR: /* Set the destination address */
case SIOCSIFNETMASK: /* Set the netmask for the interface */
if (!capable(CAP_NET_ADMIN))
return -EACCES;
if (sin->sin_family != AF_INET)
return -EINVAL;
break;
default:
return -EINVAL;
}
dev_probe_lock();
rtnl_lock();
if ((dev = __dev_get_by_name(ifr.ifr_name)) == NULL) { 取设备结构
ret = -ENODEV;
goto done;
}
if (colon)
*colon = ':'; 恢复用户地址标签
if ((in_dev=__in_dev_get(dev)) != NULL) { 取IP设备块
for (ifap=&in_dev->ifa_list; (ifa=*ifap) != NULL; ifap=&ifa->ifa_next)
if (strcmp(ifr.ifr_name, ifa->ifa_label) == 0)
break; 取用户地址标签对应的设备地址结构
}
if (ifa == NULL && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS) {
除了设置地址和设置标志
ret = -EADDRNOTAVAIL;
goto done;
}
switch(cmd) {
case SIOCGIFADDR: /* Get interface address */
sin->sin_addr.s_addr = ifa->ifa_local; 取设备IP地址
goto rarok;
case SIOCGIFBRDADDR: /* Get the broadcast address */
sin->sin_addr.s_addr = ifa->ifa_broadcast; 取设备IP广播地址
goto rarok;
case SIOCGIFDSTADDR: /* Get the destination address */
sin->sin_addr.s_addr = ifa->ifa_address; 取点对点设备的对端IP地址
goto rarok;
case SIOCGIFNETMASK: /* Get the netmask for the interface */
sin->sin_addr.s_addr = ifa->ifa_mask; 取设备的IP地址掩码
goto rarok;
case SIOCSIFFLAGS: 设置设备标志
if (colon) {
if (ifa == NULL) {
ret = -EADDRNOTAVAIL;
break;
}
if (!(ifr.ifr_flags&IFF_UP)) 如果标志为关闭设备
inet_del_ifa(in_dev, ifap, 1); 破环性删除该地址结构
break;
}
ret = dev_change_flags(dev, ifr.ifr_flags);
break;
case SIOCSIFADDR: /* Set interface address (and family) */
if (inet_abc_len(sin->sin_addr.s_addr) < 0) { 取网络地址位长
ret = -EINVAL;
break;
}
if (!ifa) { 如果设备尚无地址结构
if ((ifa = inet_alloc_ifa()) == NULL) { 分配地址结构
ret = -ENOBUFS;
break;
}
if (colon) 如果地址标签为设备别名标签
memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
else
memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
} else { 如果修改设备地址
ret = 0;
if (ifa->ifa_local == sin->sin_addr.s_addr)
break;
inet_del_ifa(in_dev, ifap, 0); 从链接中删除该地址结构
ifa->ifa_broadcast = 0;
ifa->ifa_anycast = 0;
}
ifa->ifa_address =
ifa->ifa_local = sin->sin_addr.s_addr; 将设备地址和对端地址设置为新地址
if (!(dev->flags&IFF_POINTOPOINT)) { 如果非点对点设备
ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address); 取地址的网络地址长度
ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen); 求网络掩码
if ((dev->flags&IFF_BROADCAST) && ifa->ifa_prefixlen < 31)
ifa->ifa_broadcast = ifa->ifa_address|~ifa->ifa_mask; 设置标准广播地址
} else { 如果是点对点设备
ifa->ifa_prefixlen = 32; 网络地址长度为32
ifa->ifa_mask = inet_make_mask(32);
}
ret = inet_set_ifa(dev, ifa); 添加设备地址
break;
case SIOCSIFBRDADDR: /* Set the broadcast address */
if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
inet_del_ifa(in_dev, ifap, 0);
ifa->ifa_broadcast = sin->sin_addr.s_addr;
inet_insert_ifa(ifa);
}
break;
case SIOCSIFDSTADDR: /* Set the destination address */
if (ifa->ifa_address != sin->sin_addr.s_addr) {
if (inet_abc_len(sin->sin_addr.s_addr) < 0) {
ret = -EINVAL;
break;
}
inet_del_ifa(in_dev, ifap, 0);
ifa->ifa_address = sin->sin_addr.s_addr;
inet_insert_ifa(ifa);
}
break;
case SIOCSIFNETMASK: /* Set the netmask for the interface */
/*
* The mask we set must be legal.
*/
if (bad_mask(sin->sin_addr.s_addr, 0)) {
ret = -EINVAL;
break;
}
if (ifa->ifa_mask != sin->sin_addr.s_addr) {
inet_del_ifa(in_dev, ifap, 0);
ifa->ifa_mask = sin->sin_addr.s_addr;
ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
inet_insert_ifa(ifa);
}
break;
}
done:
rtnl_unlock();
dev_probe_unlock();
return ret;
rarok:
rtnl_unlock();
dev_probe_unlock();
if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
return -EFAULT;
return 0;
}
static int
inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
{
struct in_device *in_dev = __in_dev_get(dev);
ASSERT_RTNL();
if (in_dev == NULL) { 如果IP设备块不存在
in_dev = inetdev_init(dev); 分配IP设备块
if (in_dev == NULL) {
inet_free_ifa(ifa);
return -ENOBUFS;
}
}
if (ifa->ifa_dev != in_dev) {
BUG_TRAP(ifa->ifa_dev==NULL);
in_dev_hold(in_dev);
ifa->ifa_dev=in_dev; 将地址结构绑定到IP设备块上
}
if (LOOPBACK(ifa->ifa_local)) 如果设备地址是回送地址
ifa->ifa_scope = RT_SCOPE_HOST; 地址的寻址范围为主机内部
return inet_insert_ifa(ifa);
}
static int
inet_insert_ifa(struct in_ifaddr *ifa)
{
struct in_device *in_dev = ifa->ifa_dev;
struct in_ifaddr *ifa1, **ifap, **last_primary;
ASSERT_RTNL();
if (ifa->ifa_local == 0) {
inet_free_ifa(ifa);
return 0;
}
ifa->ifa_flags &= ~IFA_F_SECONDARY; xx地址结构的从属标志
last_primary = &in_dev->ifa_list; 取IP设备块地址链表指针地址
for (ifap=&in_dev->ifa_list; (ifa1=*ifap)!=NULL; ifap=&ifa1->ifa_next) {
扫描IP设备块上的地址链
if (!(ifa1->ifa_flags&IFA_F_SECONDARY) && ifa->ifa_scope <= ifa1->ifa_scope)
last_primary = &ifa1->ifa_next;
if (ifa1->ifa_mask == ifa->ifa_mask && inet_ifa_match(ifa1->ifa_address, ifa)) {
; 如果与链中某个地址具有相同的网络地址
if (ifa1->ifa_local == ifa->ifa_local) { 如果两者地址相同
inet_free_ifa(ifa);
return -EEXIST;
}
if (ifa1->ifa_scope != ifa->ifa_scope) { 如果两者寻址范围不同
inet_free_ifa(ifa);
return -EINVAL;
}
ifa->ifa_flags |= IFA_F_SECONDARY; 标记为从属地址
}
}
if (!(ifa->ifa_flags&IFA_F_SECONDARY)) {
net_srandom(ifa->ifa_local);
ifap = last_primary;
}
ifa->ifa_next = *ifap;
write_lock_bh(&in_dev->lock);
*ifap = ifa;
write_unlock_bh(&in_dev->lock);
/* Send message first, then call notifier.
Notifier will trigger FIB update, so that
listeners of netlink will know about new ifaddr */
rtmsg_ifa(RTM_NEWADDR, ifa);
notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa); 发布设备启动消息
return 0;
}
static void
inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, int destroy)
{
struct in_ifaddr *ifa1 = *ifap; 取要删除地址结构的地址
ASSERT_RTNL();
/* 1. Deleting primary ifaddr forces deletion all secondaries */
if (!(ifa1->ifa_flags&IFA_F_SECONDARY)) { 如果删除的是设备主地址结构
struct in_ifaddr *ifa;
struct in_ifaddr **ifap1 = &ifa1->ifa_next; 取下一地址指针的地址
while ((ifa=*ifap1) != NULL) {
if (!(ifa->ifa_flags&IFA_F_SECONDARY) || 如果为主地址
ifa1->ifa_mask != ifa->ifa_mask ||
!inet_ifa_match(ifa1->ifa_address, ifa)) {
ifap1 = &ifa->ifa_next;
continue;
}
write_lock_bh(&in_dev->lock);
*ifap1 = ifa->ifa_next;
write_unlock_bh(&in_dev->lock);
rtmsg_ifa(RTM_DELADDR, ifa);
notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa); 发布设备停机消息
inet_free_ifa(ifa);
}
}
/* 2. Unlink it */
write_lock_bh(&in_dev->lock);
*ifap = ifa1->ifa_next; 从设备地址链中删除该地址标签
write_unlock_bh(&in_dev->lock);
/* 3. Announce address deletion */
/* Send message first, then call notifier.
At first sight, FIB update triggered by notifier
will refer to already deleted ifaddr, that could confuse
netlink listeners. It is not true: look, gated sees
that route deleted and if it still thinks that ifaddr
is valid, it will try to restore deleted routes... Grr.
So that, this order is correct.
*/
rtmsg_ifa(RTM_DELADDR, ifa1);
notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
if (destroy) {
inet_free_ifa(ifa1);
if (in_dev->ifa_list == NULL)
inetdev_destroy(in_dev);
}
}
static void inetdev_destroy(struct in_device *in_dev)
{
struct in_ifaddr *ifa;
ASSERT_RTNL();
in_dev->dead = 1;
ip_mc_destroy_dev(in_dev);
while ((ifa = in_dev->ifa_list) != NULL) {
inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
inet_free_ifa(ifa);
}
#ifdef CONFIG_SYSCTL
devinet_sysctl_unregister(&in_dev->cnf);
#endif
write_lock_bh(&inetdev_lock);
in_dev->dev->ip_ptr = NULL;
/* in_dev_put following below will kill the in_device */
write_unlock_bh(&inetdev_lock);
neigh_parms_release(&arp_tbl, in_dev->arp_parms);
in_dev_put(in_dev);
}
struct in_device *inetdev_init(struct net_device *dev)
{
struct in_device *in_dev;
ASSERT_RTNL();
in_dev = kmalloc(sizeof(*in_dev), GFP_KERNEL);
if (!in_dev)
return NULL;
memset(in_dev, 0, sizeof(*in_dev));
in_dev->lock = RW_LOCK_UNLOCKED;
memcpy(&in_dev->cnf, &ipv4_devconf_dflt, sizeof(in_dev->cnf));
in_dev->cnf.sysctl = NULL;
in_dev->dev = dev;
if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL) {
kfree(in_dev);
return NULL;
}
inet_dev_count++;
/* Reference in_dev->dev */
dev_hold(dev);
#ifdef CONFIG_SYSCTL
neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4, NET_IPV4_NEIGH, "ipv4");
#endif
write_lock_bh(&inetdev_lock);
dev->ip_ptr = in_dev;
/* Account for reference dev->ip_ptr */
in_dev_hold(in_dev);
write_unlock_bh(&inetdev_lock);
#ifdef CONFIG_SYSCTL
devinet_sysctl_register(in_dev, &in_dev->cnf);
#endif
if (dev->flags&IFF_UP)
ip_mc_up(in_dev);
return in_dev;
}
static __inline__ void inet_free_ifa(struct in_ifaddr *ifa)
{
if (ifa->ifa_dev)
__in_dev_put(ifa->ifa_dev);
kfree(ifa);
inet_ifa_count--;
}
static struct in_ifaddr * inet_alloc_ifa(void)
{
struct in_ifaddr *ifa;
ifa = kmalloc(sizeof(*ifa), GFP_KERNEL);
if (ifa) {
memset(ifa, 0, sizeof(*ifa));
inet_ifa_count++;
}
return ifa;
}
extern __inline__ struct in_device *
in_dev_get(const struct net_device *dev)
{
struct in_device *in_dev;
read_lock(&inetdev_lock);
in_dev = dev->ip_ptr;
if (in_dev)
atomic_inc(&in_dev->refcnt);
read_unlock(&inetdev_lock);
return in_dev;
}
extern __inline__ struct in_device *
__in_dev_get(const struct net_device *dev)
{
return (struct in_device*)dev->ip_ptr;
}
extern __inline__ void
in_dev_put(struct in_device *idev)
{
if (atomic_dec_and_test(&idev->refcnt))
in_dev_finish_destroy(idev);
}
void in_dev_finish_destroy(struct in_device *idev)
{
struct net_device *dev = idev->dev;
BUG_TRAP(idev->ifa_list==NULL);
BUG_TRAP(idev->mc_list==NULL);
#ifdef NET_REFCNT_DEBUG
printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n", idev, dev ? dev->name : "NIL");
#endif
dev_put(dev);
if (!idev->dead) {
printk("Freeing alive in_device %p\n", idev);
return;
}
inet_dev_count--;
kfree(idev);
}