/*
* vrf.c: device driver to encapsulate a VRF space
*
* Copyright (c) 2015 Cumulus Networks. All rights reserved.
* Copyright (c) 2015 Shrijeet Mukherjee <shm@cumulusnetworks.com>
* Copyright (c) 2015 David Ahern <dsa@cumulusnetworks.com>
*
* Based on dummy, team and ipvlan drivers
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*/
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/ip.h>
#include <linux/init.h>
#include <linux/moduleparam.h>
#include <linux/netfilter.h>
#include <linux/rtnetlink.h>
#include <net/rtnetlink.h>
#include <linux/u64_stats_sync.h>
#include <linux/hashtable.h>
#include <linux/inetdevice.h>
#include <net/arp.h>
#include <net/ip.h>
#include <net/ip_fib.h>
#include <net/ip6_fib.h>
#include <net/ip6_route.h>
#include <net/route.h>
#include <net/addrconf.h>
#include <net/l3mdev.h>
#define RT_FL_TOS(oldflp4) \
((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
#define DRV_NAME "vrf"
#define DRV_VERSION "1.0"
struct net_vrf {
struct rtable __rcu *rth;
struct rt6_info __rcu *rt6;
u32 tb_id;
};
struct pcpu_dstats {
u64 tx_pkts;
u64 tx_bytes;
u64 tx_drps;
u64 rx_pkts;
u64 rx_bytes;
struct u64_stats_sync syncp;
};
static void vrf_tx_error(struct net_device *vrf_dev, struct sk_buff *skb)
{
vrf_dev->stats.tx_errors++;
kfree_skb(skb);
}
static struct rtnl_link_stats64 *vrf_get_stats64(struct net_device *dev,
struct rtnl_link_stats64 *stats)
{
int i;
for_each_possible_cpu(i) {
const struct pcpu_dstats *dstats;
u64 tbytes, tpkts, tdrops, rbytes, rpkts;
unsigned int start;
dstats = per_cpu_ptr(dev->dstats, i);
do {
start = u64_stats_fetch_begin_irq(&dstats->syncp);
tbytes = dstats->tx_bytes;
tpkts = dstats->tx_pkts;
tdrops = dstats->tx_drps;
rbytes = dstats->rx_bytes;
rpkts = dstats->rx_pkts;
} while (u64_stats_fetch_retry_irq(&dstats->syncp, start));
stats->tx_bytes += tbytes;
stats->tx_packets += tpkts;
stats->tx_dropped += tdrops;
stats->rx_bytes += rbytes;
stats->rx_packets += rpkts;
}
return stats;
}
#if IS_ENABLED(CONFIG_IPV6)
static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb,
struct net_device *dev)
{
const struct ipv6hdr *iph = ipv6_hdr(skb);
struct net *net = dev_net(skb->dev);
struct flowi6 fl6 = {
/* needed to match OIF rule */
.flowi6_oif = dev->ifindex,
.flowi6_iif = LOOPBACK_IFINDEX,
.daddr = iph->daddr,
.saddr = iph->saddr,
.flowlabel = ip6_flowinfo(iph),
.flowi6_mark = skb->mark,
.flowi6_proto = iph->nexthdr,
.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC | FLOWI_FLAG_SKIP_NH_OIF,
};
int ret = NET_XMIT_DROP;
struct dst_entry *dst;
struct dst_entry *dst_null = &net->ipv6.ip6_null_entry->dst;
dst = ip6_route_output(net, NULL, &fl6);
if (dst == dst_null)
goto err;
skb_dst_drop(skb);
skb_dst_set(skb, dst);
ret = ip6_local_out(net, skb->sk, skb);
if (unlikely(net_xmit_eval(ret)))
dev->stats.tx_errors++;
else
ret = NET_XMIT_SUCCESS;
return ret;
err:
vrf_tx_error(dev, skb);
return NET_XMIT_DROP;
}
#else
static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb,
struct net_device *dev)
{
vrf_tx_error(dev, skb);
return NET_XMIT_DROP;
}
#endif
static int vrf_send_v4_prep(struct sk_buff *skb, struct flowi4 *fl4,
struct net_device *vrf_dev)
{
struct rtable *rt;
int err = 1;
rt = ip_route_output_flow(dev_net(vrf_dev), fl4, NULL);
if (IS_ERR(rt))
goto out;
/* TO-DO: what about broadcast ? */
if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) {
ip_rt_put(rt);
goto out;
}
skb_dst_drop(skb);
skb_dst_set(skb, &rt->dst);
err = 0;
out:
return err;
}
static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb,
struct net_device *vrf_dev)
{
struct iphdr *ip4h = ip_hdr(skb);
int ret = NET_XMIT_DROP;
struct flowi4 fl4 = {
/* needed to match OIF rule */
.flowi4_oif = vrf_dev->ifindex,
.flowi4_iif = LOOPBACK_IFINDEX,
.flowi4_tos