diff options
| -rw-r--r-- | include/linux/netfilter_bridge.h | 28 | ||||
| -rw-r--r-- | include/linux/skbuff.h | 8 | ||||
| -rw-r--r-- | include/net/netfilter/nf_tables.h | 155 | ||||
| -rw-r--r-- | include/net/netfilter/nf_tables_core.h | 3 | ||||
| -rw-r--r-- | include/uapi/linux/netfilter/nf_tables.h | 39 | ||||
| -rw-r--r-- | net/bridge/br_netfilter.c | 144 | ||||
| -rw-r--r-- | net/ipv4/netfilter/nf_reject_ipv4.c | 4 | ||||
| -rw-r--r-- | net/ipv6/netfilter/nf_reject_ipv6.c | 4 | ||||
| -rw-r--r-- | net/netfilter/Makefile | 2 | ||||
| -rw-r--r-- | net/netfilter/ipset/ip_set_hash_netiface.c | 32 | ||||
| -rw-r--r-- | net/netfilter/nf_log_common.c | 5 | ||||
| -rw-r--r-- | net/netfilter/nf_queue.c | 18 | ||||
| -rw-r--r-- | net/netfilter/nf_tables_api.c | 186 | ||||
| -rw-r--r-- | net/netfilter/nf_tables_core.c | 7 | ||||
| -rw-r--r-- | net/netfilter/nfnetlink_log.c | 17 | ||||
| -rw-r--r-- | net/netfilter/nfnetlink_queue_core.c | 28 | ||||
| -rw-r--r-- | net/netfilter/nft_dynset.c | 218 | ||||
| -rw-r--r-- | net/netfilter/nft_hash.c | 117 | ||||
| -rw-r--r-- | net/netfilter/nft_lookup.c | 2 | ||||
| -rw-r--r-- | net/netfilter/nft_meta.c | 5 | ||||
| -rw-r--r-- | net/netfilter/xt_cgroup.c | 2 | ||||
| -rw-r--r-- | net/netfilter/xt_physdev.c | 34 | ||||
| -rw-r--r-- | net/netfilter/xt_socket.c | 95 |
23 files changed, 973 insertions, 180 deletions
diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index 5fc0a0fe244b..ab8f76dba668 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -2,7 +2,7 @@ #define __LINUX_BRIDGE_NETFILTER_H #include <uapi/linux/netfilter_bridge.h> - +#include <linux/skbuff.h> enum nf_br_hook_priorities { NF_BR_PRI_FIRST = INT_MIN, @@ -17,15 +17,12 @@ enum nf_br_hook_priorities { #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) -#define BRNF_PKT_TYPE 0x01 #define BRNF_BRIDGED_DNAT 0x02 #define BRNF_NF_BRIDGE_PREROUTING 0x08 -#define BRNF_8021Q 0x10 -#define BRNF_PPPoE 0x20 static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) { - if (unlikely(skb->nf_bridge->mask & BRNF_PPPoE)) + if (skb->nf_bridge->orig_proto == BRNF_PROTO_PPPOE) return PPPOE_SES_HLEN; return 0; } @@ -40,6 +37,27 @@ static inline void br_drop_fake_rtable(struct sk_buff *skb) skb_dst_drop(skb); } +static inline int nf_bridge_get_physinif(const struct sk_buff *skb) +{ + return skb->nf_bridge ? skb->nf_bridge->physindev->ifindex : 0; +} + +static inline int nf_bridge_get_physoutif(const struct sk_buff *skb) +{ + return skb->nf_bridge ? skb->nf_bridge->physoutdev->ifindex : 0; +} + +static inline struct net_device * +nf_bridge_get_physindev(const struct sk_buff *skb) +{ + return skb->nf_bridge ? skb->nf_bridge->physindev : NULL; +} + +static inline struct net_device * +nf_bridge_get_physoutdev(const struct sk_buff *skb) +{ + return skb->nf_bridge ? skb->nf_bridge->physoutdev : NULL; +} #else #define br_drop_fake_rtable(skb) do { } while (0) #endif /* CONFIG_BRIDGE_NETFILTER */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 36f3f43c0117..0991259643d6 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -166,10 +166,16 @@ struct nf_conntrack { #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) struct nf_bridge_info { atomic_t use; + enum { + BRNF_PROTO_UNCHANGED, + BRNF_PROTO_8021Q, + BRNF_PROTO_PPPOE + } orig_proto; + bool pkt_otherhost; unsigned int mask; struct net_device *physindev; struct net_device *physoutdev; - unsigned long data[32 / sizeof(unsigned long)]; + char neigh_header[8]; }; #endif diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 804981980393..d6a2f0ed5130 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -195,6 +195,7 @@ struct nft_set_estimate { }; struct nft_set_ext; +struct nft_expr; /** * struct nft_set_ops - nf_tables set operations @@ -217,6 +218,15 @@ struct nft_set_ops { bool (*lookup)(const struct nft_set *set, const struct nft_data *key, const struct nft_set_ext **ext); + bool (*update)(struct nft_set *set, + const struct nft_data *key, + void *(*new)(struct nft_set *, + const struct nft_expr *, + struct nft_data []), + const struct nft_expr *expr, + struct nft_data data[], + const struct nft_set_ext **ext); + int (*insert)(const struct nft_set *set, const struct nft_set_elem *elem); void (*activate)(const struct nft_set *set, @@ -257,6 +267,9 @@ void nft_unregister_set(struct nft_set_ops *ops); * @dtype: data type (verdict or numeric type defined by userspace) * @size: maximum set size * @nelems: number of elements + * @ndeact: number of deactivated elements queued for removal + * @timeout: default timeout value in msecs + * @gc_int: garbage collection interval in msecs * @policy: set parameterization (see enum nft_set_policies) * @ops: set ops * @pnet: network namespace @@ -272,7 +285,10 @@ struct nft_set { u32 ktype; u32 dtype; u32 size; - u32 nelems; + atomic_t nelems; + u32 ndeact; + u64 timeout; + u32 gc_int; u16 policy; /* runtime data below here */ const struct nft_set_ops *ops ____cacheline_aligned; @@ -289,16 +305,27 @@ static inline void *nft_set_priv(const struct nft_set *set) return (void *)set->data; } +static inline struct nft_set *nft_set_container_of(const void *priv) +{ + return (void *)priv - offsetof(struct nft_set, data); +} + struct nft_set *nf_tables_set_lookup(const struct nft_table *table, const struct nlattr *nla); struct nft_set *nf_tables_set_lookup_byid(const struct net *net, const struct nlattr *nla); +static inline unsigned long nft_set_gc_interval(const struct nft_set *set) +{ + return set->gc_int ? msecs_to_jiffies(set->gc_int) : HZ; +} + /** * struct nft_set_binding - nf_tables set binding * * @list: set bindings list node * @chain: chain containing the rule bound to the set + * @flags: set action flags * * A set binding contains all information necessary for validation * of new elements added to a bound set. @@ -306,6 +333,7 @@ struct nft_set *nf_tables_set_lookup_byid(const struct net *net, struct nft_set_binding { struct list_head list; const struct nft_chain *chain; + u32 flags; }; int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, @@ -319,12 +347,18 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, * @NFT_SET_EXT_KEY: element key * @NFT_SET_EXT_DATA: mapping data * @NFT_SET_EXT_FLAGS: element flags + * @NFT_SET_EXT_TIMEOUT: element timeout + * @NFT_SET_EXT_EXPIRATION: element expiration time + * @NFT_SET_EXT_USERDATA: user data associated with the element * @NFT_SET_EXT_NUM: number of extension types */ enum nft_set_extensions { NFT_SET_EXT_KEY, NFT_SET_EXT_DATA, NFT_SET_EXT_FLAGS, + NFT_SET_EXT_TIMEOUT, + NFT_SET_EXT_EXPIRATION, + NFT_SET_EXT_USERDATA, NFT_SET_EXT_NUM }; @@ -421,15 +455,97 @@ static inline u8 *nft_set_ext_flags(const struct nft_set_ext *ext) return nft_set_ext(ext, NFT_SET_EXT_FLAGS); } +static inline u64 *nft_set_ext_timeout(const struct nft_set_ext *ext) +{ + return nft_set_ext(ext, NFT_SET_EXT_TIMEOUT); +} + +static inline unsigned long *nft_set_ext_expiration(const struct nft_set_ext *ext) +{ + return nft_set_ext(ext, NFT_SET_EXT_EXPIRATION); +} + +static inline struct nft_userdata *nft_set_ext_userdata(const struct nft_set_ext *ext) +{ + return nft_set_ext(ext, NFT_SET_EXT_USERDATA); +} + +static inline bool nft_set_elem_expired(const struct nft_set_ext *ext) +{ + return nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION) && + time_is_before_eq_jiffies(*nft_set_ext_expiration(ext)); +} + static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set, void *elem) { return elem + set->ops->elemsize; } +void *nft_set_elem_init(const struct nft_set *set, + const struct nft_set_ext_tmpl *tmpl, + const struct nft_data *key, + const struct nft_data *data, + u64 timeout, gfp_t gfp); void nft_set_elem_destroy(const struct nft_set *set, void *elem); /** + * struct nft_set_gc_batch_head - nf_tables set garbage collection batch + * + * @rcu: rcu head + * @set: set the elements belong to + * @cnt: count of elements + */ +struct nft_set_gc_batch_head { + struct rcu_head rcu; + const struct nft_set *set; + unsigned int cnt; +}; + +#define NFT_SET_GC_BATCH_SIZE ((PAGE_SIZE - \ + sizeof(struct nft_set_gc_batch_head)) / \ + sizeof(void *)) + +/** + * struct nft_set_gc_batch - nf_tables set garbage collection batch + * + * @head: GC batch head + * @elems: garbage collection elements + */ +struct nft_set_gc_batch { + struct nft_set_gc_batch_head head; + void *elems[NFT_SET_GC_BATCH_SIZE]; +}; + +struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set, + gfp_t gfp); +void nft_set_gc_batch_release(struct rcu_head *rcu); + +static inline void nft_set_gc_batch_complete(struct nft_set_gc_batch *gcb) +{ + if (gcb != NULL) + call_rcu(&gcb->head.rcu, nft_set_gc_batch_release); +} + +static inline struct nft_set_gc_batch * +nft_set_gc_batch_check(const struct nft_set *set, struct nft_set_gc_batch *gcb, + gfp_t gfp) +{ + if (gcb != NULL) { + if (gcb->head.cnt + 1 < ARRAY_SIZE(gcb->elems)) + return gcb; + nft_set_gc_batch_complete(gcb); + } + return nft_set_gc_batch_alloc(set, gfp); +} + +static inline void nft_set_gc_batch_add(struct nft_set_gc_batch *gcb, + void *elem) +{ + gcb->elems[gcb->head.cnt++] = elem; +} + +/** * struct nft_expr_type - nf_tables expression type * * @select_ops: function to select nft_expr_ops @@ -750,6 +866,8 @@ static inline u8 nft_genmask_cur(const struct net *net) return 1 << ACCESS_ONCE(net->nft.gencursor); } +#define NFT_GENMASK_ANY ((1 << 0) | (1 << 1)) + /* * Set element transaction helpers */ @@ -766,6 +884,41 @@ static inline void nft_set_elem_change_active(const struct nft_set *set, ext->genmask ^= nft_genmask_next(read_pnet(&set->pnet)); } +/* + * We use a free bit in the genmask field to indicate the element + * is busy, meaning it is currently being processed either by + * the netlink API or GC. + * + * Even though the genmask is only a single byte wide, this works + * because the extension structure if fully constant once initialized, + * so there are no non-atomic write accesses unless it is already + * marked busy. + */ +#define NFT_SET_ELEM_BUSY_MASK (1 << 2) + +#if defined(__LITTLE_ENDIAN_BITFIELD) +#define NFT_SET_ELEM_BUSY_BIT 2 +#elif defined(__BIG_ENDIAN_BITFIELD) +#define NFT_SET_ELEM_BUSY_BIT (BITS_PER_LONG - BITS_PER_BYTE + 2) +#else +#error +#endif + +static inline int nft_set_elem_mark_busy(struct nft_set_ext *ext) +{ + unsigned long *word = (unsigned long *)ext; + + BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0); + return test_and_set_bit(NFT_SET_ELEM_BUSY_BIT, word); +} + +static inline void nft_set_elem_clear_busy(struct nft_set_ext *ext) +{ + unsigned long *word = (unsigned long *)ext; + + clear_bit(NFT_SET_ELEM_BUSY_BIT, word); +} + /** * struct nft_trans - nf_tables object update in transaction * diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index a75fc8e27cd6..c6f400cfaac8 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -31,6 +31,9 @@ void nft_cmp_module_exit(void); int nft_lookup_module_init(void); void nft_lookup_module_exit(void); +int nft_dynset_module_init(void); +void nft_dynset_module_exit(void); + int nft_bitwise_module_init(void); void nft_bitwise_module_exit(void); diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index b9783931503b..05ee1e0804a3 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -208,12 +208,14 @@ enum nft_rule_compat_attributes { * @NFT_SET_CONSTANT: set contents may not change while bound * @NFT_SET_INTERVAL: set contains intervals * @NFT_SET_MAP: set is used as a dictionary + * @NFT_SET_TIMEOUT: set uses timeouts */ enum nft_set_flags { NFT_SET_ANONYMOUS = 0x1, NFT_SET_CONSTANT = 0x2, NFT_SET_INTERVAL = 0x4, NFT_SET_MAP = 0x8, + NFT_SET_TIMEOUT = 0x10, }; /** @@ -252,6 +254,8 @@ enum nft_set_desc_attributes { * @NFTA_SET_POLICY: selection policy (NLA_U32) * @NFTA_SET_DESC: set description (NLA_NESTED) * @NFTA_SET_ID: uniquely identifies a set in a transaction (NLA_U32) + * @NFTA_SET_TIMEOUT: default timeout value (NLA_U64) + * @NFTA_SET_GC_INTERVAL: garbage collection interval (NLA_U32) */ enum nft_set_attributes { NFTA_SET_UNSPEC, @@ -265,6 +269,8 @@ enum nft_set_attributes { NFTA_SET_POLICY, NFTA_SET_DESC, NFTA_SET_ID, + NFTA_SET_TIMEOUT, + NFTA_SET_GC_INTERVAL, __NFTA_SET_MAX }; #define NFTA_SET_MAX (__NFTA_SET_MAX - 1) @@ -284,12 +290,18 @@ enum nft_set_elem_flags { * @NFTA_SET_ELEM_KEY: key value (NLA_NESTED: nft_data) * @NFTA_SET_ELEM_DATA: data value of mapping (NLA_NESTED: nft_data_attributes) * @NFTA_SET_ELEM_FLAGS: bitmask of nft_set_elem_flags (NLA_U32) + * @NFTA_SET_ELEM_TIMEOUT: timeout value (NLA_U64) + * @NFTA_SET_ELEM_EXPIRATION: expiration time (NLA_U64) + * @NFTA_SET_ELEM_USERDATA: user data (NLA_BINARY) */ enum nft_set_elem_attributes { NFTA_SET_ELEM_UNSPEC, NFTA_SET_ELEM_KEY, NFTA_SET_ELEM_DATA, NFTA_SET_ELEM_FLAGS, + NFTA_SET_ELEM_TIMEOUT, + NFTA_SET_ELEM_EXPIRATION, + NFTA_SET_ELEM_USERDATA, __NFTA_SET_ELEM_MAX }; #define NFTA_SET_ELEM_MAX (__NFTA_SET_ELEM_MAX - 1) @@ -505,6 +517,33 @@ enum nft_lookup_attributes { }; #define NFTA_LOOKUP_MAX (__NFTA_LOOKUP_MAX - 1) +enum nft_dynset_ops { + NFT_DYNSET_OP_ADD, + NFT_DYNSET_OP_UPDATE, +}; + +/** + * enum nft_dynset_attributes - dynset expression attributes + * + * @NFTA_DYNSET_SET_NAME: name of set the to add data to (NLA_STRING) + * @NFTA_DYNSET_SET_ID: uniquely identifier of the set in the transaction (NLA_U32) + * @NFTA_DYNSET_OP: operation (NLA_U32) + * @NFTA_DYNSET_SREG_KEY: source register of the key (NLA_U32) + * @NFTA_DYNSET_SREG_DATA: source register of the data (NLA_U32) + * @NFTA_DYNSET_TIMEOUT: timeout value for the new element (NLA_U64) + */ +enum nft_dynset_attributes { + NFTA_DYNSET_UNSPEC, + NFTA_DYNSET_SET_NAME, + NFTA_DYNSET_SET_ID, + NFTA_DYNSET_OP, + NFTA_DYNSET_SREG_KEY, + NFTA_DYNSET_SREG_DATA, + NFTA_DYNSET_TIMEOUT, + __NFTA_DYNSET_MAX, +}; +#define NFTA_DYNSET_MAX (__NFTA_DYNSET_MAX - 1) + /** * enum nft_payload_bases - nf_tables payload expression offset bases * diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index acd31c9f2116..ab55e2472beb 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -111,6 +111,24 @@ static inline __be16 pppoe_proto(const struct sk_buff *skb) pppoe_proto(skb) == htons(PPP_IPV6) && \ brnf_filter_pppoe_tagged) +/* largest possible L2 header, see br_nf_dev_queue_xmit() */ +#define NF_BRIDGE_MAX_MAC_HEADER_LENGTH (PPPOE_SES_HLEN + ETH_HLEN) + +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) +struct brnf_frag_data { + char mac[NF_BRIDGE_MAX_MAC_HEADER_LENGTH]; + u8 encap_size; + u8 size; +}; + +static DEFINE_PER_CPU(struct brnf_frag_data, brnf_frag_data_storage); +#endif + +static struct nf_bridge_info *nf_bridge_info_get(const struct sk_buff *skb) +{ + return skb->nf_bridge; +} + static inline struct rtable *bridge_parent_rtable(const struct net_device *dev) { struct net_bridge_port *port; @@ -189,14 +207,6 @@ static inline void nf_bridge_pull_encap_header_rcsum(struct sk_buff *skb) skb->network_header += len; } -static inline void nf_bridge_save_header(struct sk_buff *skb) -{ - int header_size = ETH_HLEN + nf_bridge_encap_header_len(skb); - - skb_copy_from_linear_data_offset(skb, -header_size, - skb->nf_bridge->data, header_size); -} - /* When handing a packet over to the IP layer * check whether we have a skb that is in the * expected format @@ -252,10 +262,16 @@ drop: static void nf_bridge_update_protocol(struct sk_buff *skb) { - if (skb->nf_bridge->mask & BRNF_8021Q) + switch (skb->nf_bridge->orig_proto) { + case BRNF_PROTO_8021Q: skb->protocol = htons(ETH_P_8021Q); - else if (skb->nf_bridge->mask & BRNF_PPPoE) + break; + case BRNF_PROTO_PPPOE: skb->protocol = htons(ETH_P_PPP_SES); + break; + case BRNF_PROTO_UNCHANGED: + break; + } } /* PF_BRIDGE/PRE_ROUTING *********************************************/ @@ -263,12 +279,12 @@ static void nf_bridge_update_protocol(struct sk_buff *skb) * bridge PRE_ROUTING hook. */ static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb) { - struct nf_bridge_info *nf_bridge = skb->nf_bridge; + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); struct rtable *rt; - if (nf_bridge->mask & BRNF_PKT_TYPE) { + if (nf_bridge->pkt_otherhost) { skb->pkt_type = PACKET_OTHERHOST; - nf_bridge->mask ^= BRNF_PKT_TYPE; + nf_bridge->pkt_otherhost = false; } nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING; @@ -296,7 +312,6 @@ static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb) */ static int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb) { - struct nf_bridge_info *nf_bridge = skb->nf_bridge; struct neighbour *neigh; struct dst_entry *dst; @@ -306,6 +321,7 @@ static int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb) dst = skb_dst(skb); neigh = dst_neigh_lookup_skb(dst, skb); if (neigh) { + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); int ret; if (neigh->hh.hh_len) { @@ -319,7 +335,7 @@ static int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb) */ skb_copy_from_linear_data_offset(skb, -(ETH_HLEN-ETH_ALEN), - skb->nf_bridge->data, + nf_bridge->neigh_header, ETH_HLEN-ETH_ALEN); /* tell br_dev_xmit to continue with forwarding */ nf_bridge->mask |= BRNF_BRIDGED_DNAT; @@ -392,7 +408,7 @@ static int br_nf_pre_routing_finish(struct sock *sk, struct sk_buff *skb) { struct net_device *dev = skb->dev; struct iphdr *iph = ip_hdr(skb); - struct nf_bridge_info *nf_bridge = skb->nf_bridge; + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); struct rtable *rt; int err; int frag_max_size; @@ -400,9 +416,9 @@ static int br_nf_pre_routing_finish(struct sock *sk, struct sk_buff *skb) frag_max_size = IPCB(skb)->frag_max_size; BR_INPUT_SKB_CB(skb)->frag_max_size = frag_max_size; - if (nf_bridge->mask & BRNF_PKT_TYPE) { + if (nf_bridge->pkt_otherhost) { skb->pkt_type = PACKET_OTHERHOST; - nf_bridge->mask ^= BRNF_PKT_TYPE; + nf_bridge->pkt_otherhost = false; } nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING; if (dnat_took_place(skb)) { @@ -485,20 +501,21 @@ static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct /* Some common code for IPv4/IPv6 */ static struct net_device *setup_pre_routing(struct sk_buff *skb) { - struct nf_bridge_info *nf_bridge = skb->nf_bridge; + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); if (skb->pkt_type == PACKET_OTHERHOST) { skb->pkt_type = PACKET_HOST; - nf_bridge->mask |= BRNF_PKT_TYPE; + nf_bridge->pkt_otherhost = true; } nf_bridge->mask |= BRNF_NF_BRIDGE_PREROUTING; nf_bridge->physindev = skb->dev; skb->dev = brnf_get_logical_dev(skb, skb->dev); + if (skb->protocol == htons(ETH_P_8021Q)) - nf_bridge->mask |= BRNF_8021Q; + nf_bridge->orig_proto = BRNF_PROTO_8021Q; else if (skb->protocol == htons(ETH_P_PPP_SES)) - nf_bridge->mask |= BRNF_PPPoE; + nf_bridge->orig_proto = BRNF_PROTO_PPPOE; /* Must drop socket now because of tproxy. */ skb_orphan(skb); @@ -680,14 +697,21 @@ static unsigned int br_nf_local_in(const struct nf_hook_ops *ops, /* PF_BRIDGE/FORWARD *************************************************/ static int br_nf_forward_finish(struct sock *sk, struct sk_buff *skb) { - struct nf_bridge_info *nf_bridge = skb->nf_bridge; + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); struct net_device *in; if (!IS_ARP(skb) && !IS_VLAN_ARP(skb)) { + int frag_max_size; + + if (skb->protocol == htons(ETH_P_IP)) { + frag_max_size = IPCB(skb)->frag_max_size; + BR_INPUT_SKB_CB(skb)->frag_max_size = frag_max_size; + } + in = nf_bridge->physindev; - if (nf_bridge->mask & BRNF_PKT_TYPE) { + if (nf_bridge->pkt_otherhost) { skb->pkt_type = PACKET_OTHERHOST; - nf_bridge->mask ^= BRNF_PKT_TYPE; + nf_bridge->pkt_otherhost = false; } nf_bridge_update_protocol(skb); } else { @@ -722,6 +746,10 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops, if (!nf_bridge_unshare(skb)) return NF_DROP; + nf_bridge = nf_bridge_info_get(skb); + if (!nf_bridge) + return NF_DROP; + parent = bridge_parent(state->out); if (!parent) return NF_DROP; @@ -735,14 +763,19 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops, nf_bridge_pull_encap_header(skb); - nf_bridge = skb->nf_bridge; if (skb->pkt_type == PACKET_OTHERHOST) { skb->pkt_type = PACKET_HOST; - nf_bridge->mask |= BRNF_PKT_TYPE; + nf_bridge->pkt_otherhost = true; } - if (pf == NFPROTO_IPV4 && br_parse_ip_options(skb)) - return NF_DROP; + if (pf == NFPROTO_IPV4) { + int frag_max = BR_INPUT_SKB_CB(skb)->frag_max_size; + + if (br_parse_ip_options(skb)) + return NF_DROP; + + IPCB(skb)->frag_max_size = frag_max; + } nf_bridge->physoutdev = skb->dev; if (pf == NFPROTO_IPV4) @@ -792,30 +825,22 @@ static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops, } #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) -static bool nf_bridge_copy_header(struct sk_buff *skb) +static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb) { + struct brnf_frag_data *data; int err; - unsigned int header_size; - nf_bridge_update_protocol(skb); - header_size = ETH_HLEN + nf_bridge_encap_header_len(skb); - err = skb_cow_head(skb, header_size); - if (err) - return false; - - skb_copy_to_linear_data_offset(skb, -header_size, - skb->nf_bridge->data, header_size); - __skb_push(skb, nf_bridge_encap_header_len(skb)); - return true; -} + data = this_cpu_ptr(&brnf_frag_data_storage); + err = skb_cow_head(skb, data->size); -static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb) -{ - if (!nf_bridge_copy_header(skb)) { + if (err) { kfree_skb(skb); return 0; } + skb_copy_to_linear_data_offset(skb, -data->size, data->mac, data->size); + __skb_push(skb, data->encap_size); + return br_dev_queue_push_xmit(sk, skb); } @@ -833,14 +858,27 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) * boundaries by preserving frag_list rather than refragmenting. */ if (skb->len + mtu_reserved > skb->dev->mtu) { + struct brnf_frag_data *data; + frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size; if (br_parse_ip_options(skb)) /* Drop invalid packet */ return NF_DROP; IPCB(skb)->frag_max_size = frag_max_size; + + nf_bridge_update_protocol(skb); + + data = this_cpu_ptr(&brnf_frag_data_storage); + data->encap_size = nf_bridge_encap_header_len(skb); + data->size = ETH_HLEN + data->encap_size; + + skb_copy_from_linear_data_offset(skb, -data->size, data->mac, + data->size); + ret = ip_fragment(sk, skb, br_nf_push_frag_xmit); - } else + } else { ret = br_dev_queue_push_xmit(sk, skb); + } return ret; } @@ -856,7 +894,7 @@ static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state) { - struct nf_bridge_info *nf_bridge = skb->nf_bridge; + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); struct net_device *realoutdev = bridge_parent(skb->dev); u_int8_t pf; @@ -882,11 +920,10 @@ static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops, * about the value of skb->pkt_type. */ if (skb->pkt_type == PACKET_OTHERHOST) { skb->pkt_type = PACKET_HOST; - nf_bridge->mask |= BRNF_PKT_TYPE; + nf_bridge->pkt_otherhost = true; } nf_bridge_pull_encap_header(skb); - nf_bridge_save_header(skb); if (pf == NFPROTO_IPV4) skb->protocol = htons(ETH_P_IP); else @@ -925,13 +962,16 @@ static unsigned int ip_sabotage_in(const struct nf_hook_ops *ops, */ static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb) { - struct nf_bridge_info *nf_bridge = skb->nf_bridge; + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); skb_pull(skb, ETH_HLEN); nf_bridge->mask &= ~BRNF_BRIDGED_DNAT; - skb_copy_to_linear_data_offset(skb, -(ETH_HLEN-ETH_ALEN), - skb->nf_bridge->data, ETH_HLEN-ETH_ALEN); + BUILD_BUG_ON(sizeof(nf_bridge->neigh_header) != (ETH_HLEN - ETH_ALEN)); + + skb_copy_to_linear_data_offset(skb, -(ETH_HLEN - ETH_ALEN), + nf_bridge->neigh_header, + ETH_HLEN - ETH_ALEN); skb->dev = nf_bridge->physindev; br_handle_frame_finish(NULL, skb); } diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index c5b794da51a9..3262e41ff76f 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -13,6 +13,7 @@ #include <net/dst.h> #include <net/netfilter/ipv4/nf_reject.h> #include <linux/netfilter_ipv4.h> +#include <linux/netfilter_bridge.h> #include <net/netfilter/ipv4/nf_reject.h> const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff *oldskb, @@ -146,7 +147,8 @@ void nf_send_reset(struct sk_buff *oldskb, int hook) */ if (oldskb->nf_bridge) { struct ethhdr *oeth = eth_hdr(oldskb); - nskb->dev = oldskb->nf_bridge->physindev; + + nskb->dev = nf_bridge_get_physindev(oldskb); niph->tot_len = htons(nskb->len); ip_send_check(niph); if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol), diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index 3afdce03d94e..94b4c6dfb400 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -13,6 +13,7 @@ #include <net/ip6_checksum.h> #include <net/netfilter/ipv6/nf_reject.h> #include <linux/netfilter_ipv6.h> +#include <linux/netfilter_bridge.h> #include <net/netfilter/ipv6/nf_reject.h> const struct tcphdr *nf_reject_ip6_tcphdr_get(struct sk_buff *oldskb, @@ -195,7 +196,8 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) */ if (oldskb->nf_bridge) { struct ethhdr *oeth = eth_hdr(oldskb); - nskb->dev = oldskb->nf_bridge->physindev; + + nskb->dev = nf_bridge_get_physindev(oldskb); nskb->protocol = htons(ETH_P_IPV6); ip6h->payload_len = htons(sizeof(struct tcphdr)); if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol), diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 89f73a9e9874..a87d8b8ec730 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -70,7 +70,7 @@ obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o # nf_tables nf_tables-objs += nf_tables_core.o nf_tables_api.o -nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o +nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o nft_dynset.o nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o obj-$(CONFIG_NF_TABLES) += nf_tables.o diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 758b002130d9..380ef5148ea1 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -19,6 +19,7 @@ #include <net/netlink.h> #include <linux/netfilter.h> +#include <linux/netfilter_bridge.h> #include <linux/netfilter/ipset/pfxlen.h> #include <linux/netfilter/ipset/ip_set.h> #include <linux/netfilter/ipset/ip_set_hash.h> @@ -211,6 +212,22 @@ hash_netiface4_data_next(struct hash_netiface4_elem *next, #define HKEY_DATALEN sizeof(struct hash_netiface4_elem_hashed) #include "ip_set_hash_gen.h" +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) +static const char *get_physindev_name(const struct sk_buff *skb) +{ + struct net_device *dev = nf_bridge_get_physindev(skb); + + return dev ? dev->name : NULL; +} + +static const char *get_phyoutdev_name(const struct sk_buff *skb) +{ + struct net_device *dev = nf_bridge_get_physoutdev(skb); + + return dev ? dev->name : NULL; +} +#endif + static int hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, @@ -234,16 +251,15 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb, e.ip &= ip_set_netmask(e.cidr); #define IFACE(dir) (par->dir ? par->dir->name : NULL) -#define PHYSDEV(dir) (nf_bridge->dir ? nf_bridge->dir->name : NULL) #define SRCDIR (opt->flags & IPSET_DIM_TWO_SRC) if (opt->cmdflags & IPSET_FLAG_PHYSDEV) { #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - const struct nf_bridge_info *nf_bridge = skb->nf_bridge; + e.iface = SRCDIR ? get_physindev_name(skb) : + get_phyoutdev_name(skb); - if (!nf_bridge) + if (!e.iface) return -EINVAL; - e.iface = SRCDIR ? PHYSDEV(physindev) : PHYSDEV(physoutdev); e.physdev = 1; #else e.iface = NULL; @@ -476,11 +492,11 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb, if (opt->cmdflags & IPSET_FLAG_PHYSDEV) { #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - const struct nf_bridge_info *nf_bridge = skb->nf_bridge; - - if (!nf_bridge) + e.iface = SRCDIR ? get_physindev_name(skb) : + get_phyoutdev_name(skb); + if (!e.iface) return -EINVAL; - e.iface = SRCDIR ? PHYSDEV(physindev) : PHYSDEV(physoutdev); + e.physdev = 1; #else e.iface = NULL; |
