全部博文(51)
分类: LINUX
2020-09-29 22:10:36
a communications protocol that allows for the movement of data from one network to another.
Uses Case
· allow a foreign protocol to run over a network that does not support that particular protocol;
· provide services that are impractical or unsafe to be offered using only the underlying network services;
https://developers.redhat.com/blog/2019/05/17/an-introduction-to-linux-virtual-interfaces-tunnels/
将报文增加一层tunnel信息,就是增加了一个interface,新增的操作都在tunnel接口的驱动上完成;内核中一个文件就可以搞定。
drivers\net\vxlan.c //L2驱动,可以封装二层报文
net\ipv4\ipip.c //IP层驱动,分装IP报文;
net\ipv4\ip_gre.c
init:
· register net subsys
register_pernet_subsys(&vxlan_net_ops);
· register notifier
· register rtnetlink
rtnl_link_register(&vxlan_link_ops);
Management for network device
static const struct net_device_ops vxlan_netdev_ether_ops = { .ndo_init = vxlan_init, .ndo_uninit = vxlan_uninit, .ndo_open = vxlan_open, .ndo_stop = vxlan_stop, .ndo_start_xmit = vxlan_xmit, .ndo_get_stats64 = ip_tunnel_get_stats64, .ndo_set_rx_mode = vxlan_set_multicast_list, .ndo_change_mtu = vxlan_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, .ndo_fdb_add = vxlan_fdb_add, .ndo_fdb_del = vxlan_fdb_delete, .ndo_fdb_dump = vxlan_fdb_dump, .ndo_fdb_get = vxlan_fdb_get, .ndo_fill_metadata_dst = vxlan_fill_metadata_dst, .ndo_change_proto_down = dev_change_proto_down_generic, }; |
iproute
ip link add DEVICE type vxlan id VNI [ dev PHYS_DEV ] [ { group | remote } IPADDR ] [ local { IPADDR | any } ] [ ttl TTL ] [ tos TOS ] [ df DF ] [ flowlabel FLOWLABEL ] [ dstport PORT ] [ srcport MIN MAX ] [ [no]learning ] [ [no]proxy ] [ [no]rsc ] [ [no]l2miss ] [ [no]l3miss ] [ [no]udpcsum ] [ [no]udp6zerocsumtx ] [ [no]udp6zerocsumrx ] [ ageing SECONDS ] [ maxaddress NUMBER ] [ [no]external ] [ gbp ] [ gpe ] |
do_iplink
-> iplink_modify
-> IFLA_INFO_KIND //attach type info
-> lu->parse_opt //attach other option: vxlan_parse_opt
Kernel:
__rtnl_newlink
-> ops = rtnl_link_ops_get(kind); // IFLA_INFO_KIND
-> dev = rtnl_create_link();
-> ops->newlink // vxlan_newlink
-> vxlan_nl2conf
-> __vxlan_dev_create
-> vxlan_dev_configure
-> vxlan_config_apply
-> vxlan_ether_setup
-> dev->netdev_ops = &vxlan_netdev_ether_ops;
Virtual Extensible LAN protocol
https://www.kernel.org/doc/Documentation/networking/vxlan.txt
The VXLAN protocol is a tunnelling protocol designed to solve the problem of limited VLAN IDs (4096) in IEEE 802.1q.
sudo ip netns add ns1 sudo ip netns add ns2 sudo nsenter --net=/var/run/netns/ns1 ip link add name veth1 type veth peer name veth2 sudo nsenter --net=/var/run/netns/ns1 ip link set dev veth2 netns ns2 sudo nsenter --net=/var/run/netns/ns1 ip addr add 192.168.1.100/24 dev veth1 sudo nsenter --net=/var/run/netns/ns1 ip link set dev veth1 up sudo nsenter --net=/var/run/netns/ns1 ip a sudo nsenter --net=/var/run/netns/ns2 ip addr add 192.168.1.200/24 dev veth2 sudo nsenter --net=/var/run/netns/ns2 ip link set dev veth2 up sudo nsenter --net=/var/run/netns/ns2 ip a
sudo nsenter --net=/var/run/netns/ns1 ping -c 2 192.168.1.200
|
sudo ip netns del ns1 sudo ip netns del ns2 |
ip netns exec ns1 /bin/bash --rcfile <(echo "PS1=\"namespace ns1> \"") ip netns exec ns2 /bin/bash --rcfile <(echo "PS1=\"namespace ns2> \"")
|
sudo ip netns exec ns1 ip link add name veth3 type veth peer name veth4 sudo ip netns exec ns1 ip link set dev veth4 netns ns2 sudo ip netns exec ns2 ip addr add 172.168.30.200/24 dev veth4 sudo ip netns exec ns2 ip link set dev veth4 up |
sudo ip netns exec ns1 ping 172.168.30.200 |
|
NS1:
sudo ip netns exec ns1 ip link add vxlan01 type vxlan id 42 dstport 4789 remote 192.168.1.200 local 192.168.1.100 dev veth1 sudo ip netns exec ns1 ip addr add 172.200.18.100/24 dev vxlan01 sudo ip netns exec ns1 ip link set vxlan01 up |
NS2:
sudo ip netns exec ns2 ip link add vxlan02 type vxlan id 42 dstport 4789 remote 192.168.1.100 local 192.168.1.200 dev veth2 sudo ip netns exec ns2 ip addr add 172.200.18.200/24 dev vxlan02 sudo ip netns exec ns2 ip link set vxlan02 up |
sudo ip netns exec ns1 ip route add 172.168.30.0/24 dev veth1 |
or |
sudo ip netns exec ns1 ip route add 172.168.30.0/24 dev vxlan01 |
vxlan_xmit
-> vxlan_xmit_one(skb, dev, vni, fdst, did_rsc);
-> fetch infor: remote_ip, local_ip,port,vni
-> vxlan_build_skb(); //Add VXLAN protocol header
-> udp_tunnel_xmit_skb();
-> iptunnel_xmit
-> ip_local_out()
· LWT are simple way to do tunneling by attaching encapsulation instructions to routes.
· Metadata dst to carry metadata
· Use of this new infrastructure in VXLAN and GRE to enable flow based RX/TX tunnels using iproute2
sudo ip netns exec ns1 ping 172.168.30.200 |
NS1
sudo ip netns exec ns1 ip link add vxlan01 type vxlan dstport 4789 external sudo ip netns exec ns1 ip addr add 172.200.18.100/24 dev vxlan01 sudo ip netns exec ns1 ip link set dev vxlan01 up sudo ip netns exec ns1 ip route add 172.168.30.0/24 encap ip id 42 dst 192.168.1.200 dev vxlan01 |
NS2
sudo ip netns exec ns2 ip link add vxlan02 type vxlan id 42 dstport 4789 remote 192.168.1.100 local 192.168.1.200 dev veth2 sudo ip netns exec ns2 ip addr add 172.200.18.200/24 dev vxlan02 sudo ip netns exec ns2 ip link set vxlan02 up |
lwt_parse_encap(rta, sizeof(buf), &argc, &argv, RTA_ENCAP, RTA_ENCAP_TYPE);
->parse_encap_ip
-> LWTUNNEL_IP_ID/ LWTUNNEL_IP_DST/
->parse_encap_bpf
Kernel :
lwtunnel_fill_encap
lwtunnel_xmit
https://blog.csdn.net/sinat_20184565/article/details/84952713
inet_rtm_newroute
->rtm_to_fib_config //
-> cfg->fc_encap_type // lwtunnel_valid_encap_type() ;
->fib_table_insert
->fib_create_info
-> fib_nh_init
-> fib_nh_common_init
-> lwtunnel_build_state
-> ops->build_state// tun_info保存在lwtstate->data;
-> nhc->nhc_lwtstate = lwtstate_get(lwtstate); //保存lwt信息;
__mkroute_output
-> rt_dst_alloc()
-> rt = dst_alloc(&ipv4_dst_ops,…);
-> rt->dst.output = ip_output;
-> rt_set_nexthop()
-> rt->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate); // lwt info;
-> lwtunnel_set_redirect()
-> dst->output = lwtunnel_output;
__mkroute_input
->rt_set_nexthop();
->rt->dst.lwtstate = lwtstate_get
->lwtunnel_set_redirect(&rth->dst);
->skb_dst_set(skb, &rth->dst);
pod的报文经过cilium_host后,被直接重定向到cilium_vxlan;
tunnel信息由bpf map保存在skb->_skb_refdst中;
ip link add name cilium_vxlan address $(rnd_mac_addr) mtu $MTU type vxlan external ip link set cilium_vxlan up |
1.Set tun_info
bpf_skb_set_tunnel_key
-> fill tunnel info: info->key.tun_id, info->key.u.ipv4.dst,
-> skb_dst_set(skb, (struct dst_entry *) md);
-> skb->_skb_refdst = (unsigned long)dst;
2.redirect
skb_do_redirect
-> __bpf_redirect(skb, dev, ri->flags); //dev is target device ;
-> __bpf_redirect_common(skb, dev, flags);
-> __bpf_tx_skb(dev, skb);
-> skb->dev = dev;
-> dev_queue_xmit(skb);
-> ops->ndo_start_xmit(skb, dev);
3.vxlan_xmit
->skb_tunnel_info(skb);
->vxlan_xmit_one(skb, dev, vni, NULL, false);