Chinaunix首页 | 论坛 | 博客
  • 博客访问: 525394
  • 博文数量: 51
  • 博客积分: 345
  • 博客等级: 民兵
  • 技术积分: 534
  • 用 户 组: 普通用户
  • 注册时间: 2011-03-21 12:02
个人简介

文章分类

全部博文(51)

文章存档

2023年(2)

2022年(1)

2021年(7)

2020年(10)

2019年(2)

2016年(20)

2015年(5)

2014年(1)

2011年(3)

我的朋友

分类: LINUX

2020-09-29 22:10:36

Tunneling?



a communications protocol that allows for the movement of data from one network to another.


Uses Case

·        allow a foreign protocol to run over a network that does not support that particular protocol;

·        provide services that are impractical or unsafe to be offered using only the underlying network services;


Virtual Interface: Tunnels

https://developers.redhat.com/blog/2019/05/17/an-introduction-to-linux-virtual-interfaces-tunnels/

将报文增加一层tunnel信息,就是增加了一个interface,新增的操作都在tunnel接口的驱动上完成;内核中一个文件就可以搞定。

drivers\net\vxlan.c           //L2驱动,可以封装二层报文

net\ipv4\ipip.c                  //IP层驱动,分装IP报文;

net\ipv4\ip_gre.c




Kernel Moduel

init:

·        register net subsys

register_pernet_subsys(&vxlan_net_ops);

·        register notifier

·        register rtnetlink

rtnl_link_register(&vxlan_link_ops);


Management for network device

static const struct net_device_ops vxlan_netdev_ether_ops = {

                    .ndo_init                        = vxlan_init,

                    .ndo_uninit                                       = vxlan_uninit,

                    .ndo_open                    = vxlan_open,

                    .ndo_stop                      = vxlan_stop,

                    .ndo_start_xmit                                = vxlan_xmit,

                    .ndo_get_stats64         = ip_tunnel_get_stats64,

                    .ndo_set_rx_mode      = vxlan_set_multicast_list,

                    .ndo_change_mtu                           = vxlan_change_mtu,

                    .ndo_validate_addr     = eth_validate_addr,

                    .ndo_set_mac_address                  = eth_mac_addr,

                    .ndo_fdb_add                                   = vxlan_fdb_add,

                    .ndo_fdb_del                                    = vxlan_fdb_delete,

                    .ndo_fdb_dump                               = vxlan_fdb_dump,

                    .ndo_fdb_get                                    = vxlan_fdb_get,

                    .ndo_fill_metadata_dst                   = vxlan_fill_metadata_dst,

                    .ndo_change_proto_down  = dev_change_proto_down_generic,

};

Configure

iproute

ip link add DEVICE type vxlan id VNI [ dev PHYS_DEV ] [ { group | remote } IPADDR ] [ local { IPADDR | any } ] [ ttl TTL ] [ tos TOS ] [ df DF ] [ flowlabel FLOWLABEL ] [ dstport PORT ] [ srcport MIN MAX ] [ [no]learning ] [ [no]proxy ] [ [no]rsc ] [ [no]l2miss ] [ [no]l3miss ] [ [no]udpcsum ] [ [no]udp6zerocsumtx ] [ [no]udp6zerocsumrx ] [ ageing SECONDS ] [ maxaddress NUMBER ] [ [no]external ] [ gbp ] [ gpe ]

do_iplink

-> iplink_modify

               -> IFLA_INFO_KIND          //attach type info

               -> lu->parse_opt               //attach other option: vxlan_parse_opt

Kernel:

__rtnl_newlink

-> ops = rtnl_link_ops_get(kind);                // IFLA_INFO_KIND

-> dev = rtnl_create_link();

-> ops->newlink                // vxlan_newlink

               -> vxlan_nl2conf

               -> __vxlan_dev_create

                              -> vxlan_dev_configure

                                             -> vxlan_config_apply

                                                            -> vxlan_ether_setup

                                                                           -> dev->netdev_ops = &vxlan_netdev_ether_ops;

Vxlan

Virtual Extensible LAN protocol

https://www.kernel.org/doc/Documentation/networking/vxlan.txt

The VXLAN protocol is a tunnelling protocol designed to solve the problem of limited VLAN IDs (4096) in IEEE 802.1q.




Testing Environment 

利用network namespace,来构建一个实验环境,来练习tunnel的配置

 sudo ip netns add ns1

 sudo ip netns add ns2

 sudo nsenter --net=/var/run/netns/ns1 ip link add name veth1 type veth peer name veth2

 sudo nsenter --net=/var/run/netns/ns1 ip link set dev veth2 netns ns2

 sudo nsenter --net=/var/run/netns/ns1 ip addr add 192.168.1.100/24 dev veth1

 sudo nsenter --net=/var/run/netns/ns1 ip link set dev veth1 up

 sudo nsenter --net=/var/run/netns/ns1 ip a

 sudo nsenter --net=/var/run/netns/ns2 ip addr add 192.168.1.200/24 dev veth2

 sudo nsenter --net=/var/run/netns/ns2 ip link set dev veth2 up

 sudo nsenter --net=/var/run/netns/ns2 ip a

 

 sudo nsenter --net=/var/run/netns/ns1 ping -c 2 192.168.1.200

 

 sudo ip netns del ns1

 sudo ip netns del ns2

 ip netns exec ns1 /bin/bash  --rcfile <(echo "PS1=\"namespace ns1> \"")

 ip netns exec ns2 /bin/bash  --rcfile <(echo "PS1=\"namespace ns2> \"")



 sudo ip netns exec ns1 ip link add name veth3 type veth peer name veth4

 sudo ip netns exec ns1 ip link set dev veth4 netns ns2

 sudo ip netns exec ns2 ip addr add 172.168.30.200/24 dev veth4

 sudo ip netns exec ns2 ip link set dev veth4 up



Typical VXLAN

Test Case

sudo ip netns exec ns1 ping 172.168.30.200



Create vxlan interface

NS1:

sudo ip netns exec ns1 ip link add vxlan01 type vxlan id 42 dstport 4789 remote 192.168.1.200 local 192.168.1.100 dev veth1

sudo ip netns exec ns1 ip addr add 172.200.18.100/24 dev vxlan01

sudo ip netns exec ns1 ip link set vxlan01 up

NS2:

sudo ip netns exec ns2 ip link add vxlan02 type vxlan id 42 dstport 4789 remote 192.168.1.100 local 192.168.1.200 dev veth2

sudo ip netns exec ns2 ip addr add 172.200.18.200/24 dev vxlan02

sudo ip netns exec ns2 ip link set vxlan02 up   


Configure Route

sudo ip netns exec ns1 ip route add 172.168.30.0/24 dev veth1

or

sudo ip netns exec ns1 ip route add 172.168.30.0/24 dev vxlan01


Code

vxlan_xmit

-> vxlan_xmit_one(skb, dev, vni, fdst, did_rsc);

               -> fetch infor: remote_ip, local_ip,port,vni

               -> vxlan_build_skb();        //Add VXLAN protocol header

               -> udp_tunnel_xmit_skb();

                              -> iptunnel_xmit

                                             -> ip_local_out()

LWT Tunnel: VXLAN

·        LWT are simple way to do tunneling by attaching encapsulation instructions to routes.

·        Metadata dst to carry metadata

·        Use of this new infrastructure in VXLAN and GRE to enable flow based RX/TX tunnels using iproute2


Test Case

sudo ip netns exec ns1 ping 172.168.30.200


Create vxlan interface

NS1

sudo ip netns exec ns1 ip link add vxlan01 type vxlan dstport 4789 external

sudo ip netns exec ns1 ip addr add 172.200.18.100/24  dev vxlan01

sudo ip netns exec ns1 ip link set dev vxlan01 up

sudo ip netns exec ns1 ip route add 172.168.30.0/24 encap ip id 42 dst 192.168.1.200 dev vxlan01


NS2

sudo ip netns exec ns2 ip link add vxlan02 type vxlan id 42 dstport 4789 remote 192.168.1.100 local 192.168.1.200 dev veth2

sudo ip netns exec ns2 ip addr add 172.200.18.200/24 dev vxlan02

sudo ip netns exec ns2 ip link set vxlan02 up   


lwt_parse_encap(rta, sizeof(buf), &argc, &argv, RTA_ENCAP, RTA_ENCAP_TYPE);

->parse_encap_ip

               -> LWTUNNEL_IP_ID/ LWTUNNEL_IP_DST/

->parse_encap_bpf



Kernel :

lwtunnel_fill_encap

lwtunnel_xmit

https://blog.csdn.net/sinat_20184565/article/details/84952713

 

inet_rtm_newroute

 

->rtm_to_fib_config        //

-> cfg->fc_encap_type    //            lwtunnel_valid_encap_type() ;

->fib_table_insert

->fib_create_info

-> fib_nh_init

                                             -> fib_nh_common_init

                                                            -> lwtunnel_build_state

                                                                           -> ops->build_state// tun_info保存在lwtstate->data;

                                                            -> nhc->nhc_lwtstate = lwtstate_get(lwtstate);    //保存lwt信息;

 

__mkroute_output

-> rt_dst_alloc()

               -> rt = dst_alloc(&ipv4_dst_ops,…);

               -> rt->dst.output = ip_output;

-> rt_set_nexthop()

               -> rt->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate);      // lwt info;

-> lwtunnel_set_redirect()

               -> dst->output = lwtunnel_output;

 

__mkroute_input

->rt_set_nexthop();

               ->rt->dst.lwtstate = lwtstate_get

->lwtunnel_set_redirect(&rth->dst);

->skb_dst_set(skb, &rth->dst);

cilium

pod的报文经过cilium_host后,被直接重定向到cilium_vxlan;

tunnel信息由bpf map保存在skb->_skb_refdst中;

Create vxlan interface

ip link add name cilium_vxlan address $(rnd_mac_addr) mtu $MTU type vxlan external

ip link set cilium_vxlan up






Code

1.Set tun_info

bpf_skb_set_tunnel_key

-> fill tunnel info: info->key.tun_id, info->key.u.ipv4.dst,

-> skb_dst_set(skb, (struct dst_entry *) md);

               -> skb->_skb_refdst = (unsigned long)dst;

 

2.redirect

skb_do_redirect

-> __bpf_redirect(skb, dev, ri->flags);      //dev is target device ;

               -> __bpf_redirect_common(skb, dev, flags);

-> __bpf_tx_skb(dev, skb);

               -> skb->dev = dev;

               -> dev_queue_xmit(skb);

                                                            -> ops->ndo_start_xmit(skb, dev);

3.vxlan_xmit

->skb_tunnel_info(skb);

->vxlan_xmit_one(skb, dev, vni, NULL, false);


阅读(1667) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~