From 23a1f8d44c0bca48f04fc2a2f1edafd826ce6133 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Tue, 8 Dec 2015 16:04:31 +0200 Subject: mac80211: process and save VHT MU-MIMO group frame The Group ID Management frame is an Action frame of category VHT. It is transmitted by the AP to assign or change the user position of a STA for one or more group IDs. Process and save the group membership data. Notify underlying driver of changes. Signed-off-by: Sara Sharon Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 7 +++++++ include/net/mac80211.h | 17 +++++++++++++++++ 2 files changed, 24 insertions(+) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 452c0b0d2f32..d9ddb89533a7 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -843,6 +843,8 @@ enum ieee80211_vht_opmode_bits { }; #define WLAN_SA_QUERY_TR_ID_LEN 2 +#define WLAN_MEMBERSHIP_LEN 8 +#define WLAN_USER_POSITION_LEN 16 /** * struct ieee80211_tpc_report_ie @@ -989,6 +991,11 @@ struct ieee80211_mgmt { u8 action_code; u8 operating_mode; } __packed vht_opmode_notif; + struct { + u8 action_code; + u8 membership[WLAN_MEMBERSHIP_LEN]; + u8 position[WLAN_USER_POSITION_LEN]; + } __packed vht_group_notif; struct { u8 action_code; u8 dialog_token; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 7c30faff245f..8da483b2c067 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -298,6 +298,7 @@ struct ieee80211_vif_chanctx_switch { * note that this is only called when it changes after the channel * context had been assigned. * @BSS_CHANGED_OCB: OCB join status changed + * @BSS_CHANGED_MU_GROUPS: VHT MU-MIMO group id or user position changed */ enum ieee80211_bss_change { BSS_CHANGED_ASSOC = 1<<0, @@ -323,6 +324,7 @@ enum ieee80211_bss_change { BSS_CHANGED_BEACON_INFO = 1<<20, BSS_CHANGED_BANDWIDTH = 1<<21, BSS_CHANGED_OCB = 1<<22, + BSS_CHANGED_MU_GROUPS = 1<<23, /* when adding here, make sure to change ieee80211_reconfig */ }; @@ -435,6 +437,19 @@ struct ieee80211_event { } u; }; +/** + * struct ieee80211_mu_group_data - STA's VHT MU-MIMO group data + * + * This structure describes the group id data of VHT MU-MIMO + * + * @membership: 64 bits array - a bit is set if station is member of the group + * @position: 2 bits per group id indicating the position in the group + */ +struct ieee80211_mu_group_data { + u8 membership[WLAN_MEMBERSHIP_LEN]; + u8 position[WLAN_USER_POSITION_LEN]; +}; + /** * struct ieee80211_bss_conf - holds the BSS's changing parameters * @@ -477,6 +492,7 @@ struct ieee80211_event { * @enable_beacon: whether beaconing should be enabled or not * @chandef: Channel definition for this BSS -- the hardware might be * configured a higher bandwidth than this BSS uses, for example. + * @mu_group: VHT MU-MIMO group membership data * @ht_operation_mode: HT operation mode like in &struct ieee80211_ht_operation. * This field is only valid when the channel is a wide HT/VHT channel. * Note that with TDLS this can be the case (channel is HT, protection must @@ -535,6 +551,7 @@ struct ieee80211_bss_conf { s32 cqm_rssi_thold; u32 cqm_rssi_hyst; struct cfg80211_chan_def chandef; + struct ieee80211_mu_group_data mu_group; __be32 arp_addr_list[IEEE80211_BSS_ARP_ADDR_LIST_LEN]; int arp_addr_cnt; bool qos; -- cgit v1.2.3 From f9cfa5f354b11e56cd8f019c12e14a42585586cd Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Tue, 8 Dec 2015 16:04:33 +0200 Subject: mac80211: add flag for duplication check Add an option for driver to check for packet duplication by itself. This is needed for example by the iwlwifi driver which parallelizes the RX path and does the duplication check per queue. Signed-off-by: Sara Sharon Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- include/net/mac80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 8da483b2c067..ecab934dc8d9 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1063,7 +1063,7 @@ enum mac80211_rx_flags { RX_FLAG_HT_GF = BIT(13), RX_FLAG_AMPDU_DETAILS = BIT(14), RX_FLAG_PN_VALIDATED = BIT(15), - /* bit 16 free */ + RX_FLAG_DUP_VALIDATED = BIT(16), RX_FLAG_AMPDU_LAST_KNOWN = BIT(17), RX_FLAG_AMPDU_IS_LAST = BIT(18), RX_FLAG_AMPDU_DELIM_CRC_ERROR = BIT(19), -- cgit v1.2.3 From fad471860c097844432c7cf5d3ae6a0a059c2bdc Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Tue, 8 Dec 2015 16:04:34 +0200 Subject: mac80211: pass RX aggregation window size to driver Currently mac80211 does not inform the driver of the window size when starting an RX aggregation session. To enable managing the reorder buffer in the driver or hardware the window size is needed. Signed-off-by: Sara Sharon Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- include/net/mac80211.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index ecab934dc8d9..a990338a766e 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3047,9 +3047,11 @@ enum ieee80211_reconfig_type { * ieee80211_ampdu_mlme_action. Starting sequence number (@ssn) * is the first frame we expect to perform the action on. Notice * that TX/RX_STOP can pass NULL for this parameter. - * The @buf_size parameter is only valid when the action is set to - * %IEEE80211_AMPDU_TX_OPERATIONAL and indicates the peer's reorder - * buffer size (number of subframes) for this session -- the driver + * The @buf_size parameter is valid only when the action is set to + * %IEEE80211_AMPDU_RX_START or %IEEE80211_AMPDU_TX_OPERATIONAL and + * indicates the reorder buffer size (number of subframes) for this + * session. + * When the action is set to %IEEE80211_AMPDU_TX_OPERATIONAL the driver * may neither send aggregates containing more subframes than this * nor send aggregates in a way that lost frames would exceed the * buffer size. If just limiting the aggregate size, this would be -- cgit v1.2.3 From 4352a4d7f6bfd0aed0276a13fa4993db35714db4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 8 Dec 2015 16:04:35 +0200 Subject: mac80211: document status.freq restrictions It's not always necessary to set the status.freq field, for example when this would be an expensive calculation. It must be set for all management frames (as they might be reported to userspace), but for data frames it's not really required. Document this. Signed-off-by: Johannes Berg --- include/net/mac80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index a990338a766e..bdee1cc19c7e 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1108,6 +1108,8 @@ enum mac80211_rx_vht_flags { * it but can store it and pass it back to the driver for synchronisation * @band: the active band when this frame was received * @freq: frequency the radio was tuned to when receiving this frame, in MHz + * This field must be set for management frames, but isn't strictly needed + * for data (other) frames - for those it only affects radiotap reporting. * @signal: signal strength when receiving this frame, either in dBm, in dB or * unspecified depending on the hardware capabilities flags * @IEEE80211_HW_SIGNAL_* -- cgit v1.2.3 From 50ea05efaf3bed7dd34bcc2635a8b3f53bd0ccc1 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Wed, 30 Dec 2015 16:06:04 +0200 Subject: mac80211: pass block ack session timeout to to driver Currently mac80211 does not inform the driver of the session block ack timeout when starting a rx aggregation session. Drivers that manage the reorder buffer need to know this parameter. Seeing that there are now too many arguments for the drv_ampdu_action() function, wrap them inside a structure. Signed-off-by: Sara Sharon Signed-off-by: Johannes Berg --- include/net/mac80211.h | 44 ++++++++++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index bdee1cc19c7e..6c9c559394b0 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2702,6 +2702,33 @@ enum ieee80211_ampdu_mlme_action { IEEE80211_AMPDU_TX_OPERATIONAL, }; +/** + * struct ieee80211_ampdu_params - AMPDU action parameters + * + * @action: the ampdu action, value from %ieee80211_ampdu_mlme_action. + * @sta: peer of this AMPDU session + * @tid: tid of the BA session + * @ssn: start sequence number of the session. TX/RX_STOP can pass 0. When + * action is set to %IEEE80211_AMPDU_RX_START the driver passes back the + * actual ssn value used to start the session and writes the value here. + * @buf_size: reorder buffer size (number of subframes). Valid only when the + * action is set to %IEEE80211_AMPDU_RX_START or + * %IEEE80211_AMPDU_TX_OPERATIONAL + * @amsdu: indicates the peer's ability to receive A-MSDU within A-MPDU. + * valid when the action is set to %IEEE80211_AMPDU_TX_OPERATIONAL + * @timeout: BA session timeout. Valid only when the action is set to + * %IEEE80211_AMPDU_RX_START + */ +struct ieee80211_ampdu_params { + enum ieee80211_ampdu_mlme_action action; + struct ieee80211_sta *sta; + u16 tid; + u16 ssn; + u8 buf_size; + bool amsdu; + u16 timeout; +}; + /** * enum ieee80211_frame_release_type - frame release reason * @IEEE80211_FRAME_RELEASE_PSPOLL: frame released for PS-Poll @@ -3046,15 +3073,9 @@ enum ieee80211_reconfig_type { * @ampdu_action: Perform a certain A-MPDU action * The RA/TID combination determines the destination and TID we want * the ampdu action to be performed for. The action is defined through - * ieee80211_ampdu_mlme_action. Starting sequence number (@ssn) - * is the first frame we expect to perform the action on. Notice - * that TX/RX_STOP can pass NULL for this parameter. - * The @buf_size parameter is valid only when the action is set to - * %IEEE80211_AMPDU_RX_START or %IEEE80211_AMPDU_TX_OPERATIONAL and - * indicates the reorder buffer size (number of subframes) for this - * session. + * ieee80211_ampdu_mlme_action. * When the action is set to %IEEE80211_AMPDU_TX_OPERATIONAL the driver - * may neither send aggregates containing more subframes than this + * may neither send aggregates containing more subframes than @buf_size * nor send aggregates in a way that lost frames would exceed the * buffer size. If just limiting the aggregate size, this would be * possible with a buf_size of 8: @@ -3065,9 +3086,6 @@ enum ieee80211_reconfig_type { * buffer size of 8. Correct ways to retransmit #1 would be: * - TX: 1 or 18 or 81 * Even "189" would be wrong since 1 could be lost again. - * The @amsdu parameter is valid when the action is set to - * %IEEE80211_AMPDU_TX_OPERATIONAL and indicates the peer's ability - * to receive A-MSDU within A-MPDU. * * Returns a negative error code on failure. * The callback can sleep. @@ -3409,9 +3427,7 @@ struct ieee80211_ops { int (*tx_last_beacon)(struct ieee80211_hw *hw); int (*ampdu_action)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, - enum ieee80211_ampdu_mlme_action action, - struct ieee80211_sta *sta, u16 tid, u16 *ssn, - u8 buf_size, bool amsdu); + struct ieee80211_ampdu_params *params); int (*get_survey)(struct ieee80211_hw *hw, int idx, struct survey_info *survey); void (*rfkill_poll)(struct ieee80211_hw *hw); -- cgit v1.2.3 From 6e7333d315a768170a59ac771297ee0551bdddbf Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Mon, 1 Feb 2016 18:51:05 -0500 Subject: net: add rx_nohandler stat counter This adds an rx_nohandler stat counter, along with a sysfs statistics node, and copies the counter out via netlink as well. CC: "David S. Miller" CC: Eric Dumazet CC: Jiri Pirko CC: Daniel Borkmann CC: Tom Herbert CC: Jay Vosburgh CC: Veaceslav Falico CC: Andy Gospodarek CC: netdev@vger.kernel.org Signed-off-by: Jarod Wilson Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ include/uapi/linux/if_link.h | 4 ++++ 2 files changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 289c2314d766..78a20cec2a0a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1397,6 +1397,8 @@ enum netdev_priv_flags { * do not use this in drivers * @tx_dropped: Dropped packets by core network, * do not use this in drivers + * @rx_nohandler: nohandler dropped packets by core network on + * inactive devices, do not use this in drivers * * @wireless_handlers: List of functions to handle Wireless Extensions, * instead of ioctl, @@ -1611,6 +1613,7 @@ struct net_device { atomic_long_t rx_dropped; atomic_long_t tx_dropped; + atomic_long_t rx_nohandler; #ifdef CONFIG_WIRELESS_EXT const struct iw_handler_def * wireless_handlers; diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index a30b78090594..d3e90b91e07e 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -35,6 +35,8 @@ struct rtnl_link_stats { /* for cslip etc */ __u32 rx_compressed; __u32 tx_compressed; + + __u32 rx_nohandler; /* dropped, no handler found */ }; /* The main device statistics structure */ @@ -68,6 +70,8 @@ struct rtnl_link_stats64 { /* for cslip etc */ __u64 rx_compressed; __u64 tx_compressed; + + __u64 rx_nohandler; /* dropped, no handler found */ }; /* The struct should be in sync with struct ifmap */ -- cgit v1.2.3 From bb63daf9efb4f2bcb657d7179a53bd808f978dc9 Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Mon, 1 Feb 2016 18:51:06 -0500 Subject: team: track sum of rx_nohandler for all slaves CC: Jiri Pirko CC: netdev@vger.kernel.org Signed-off-by: Jarod Wilson Signed-off-by: David S. Miller --- include/linux/if_team.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/if_team.h b/include/linux/if_team.h index b84e49c3a738..174f43f43aff 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -24,6 +24,7 @@ struct team_pcpu_stats { struct u64_stats_sync syncp; u32 rx_dropped; u32 tx_dropped; + u32 rx_nohandler; }; struct team; -- cgit v1.2.3 From 61d2bcae99f66a640b3dd9632180209143fb5512 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 1 Feb 2016 21:03:07 -0800 Subject: tcp: fastopen: accept data/FIN present in SYNACK message RFC 7413 (TCP Fast Open) 4.2.2 states that the SYNACK message MAY include data and/or FIN This patch adds support for the client side : If we receive a SYNACK with payload or FIN, queue the skb instead of ignoring it. Since we already support the same for SYN, we refactor the existing code and reuse it. Note we need to clone the skb, so this operation might fail under memory pressure. Sara Dickinson pointed out FreeBSD server Fast Open implementation was planned to generate such SYNACK in the future. The server side might be implemented on linux later. Reported-by: Sara Dickinson Signed-off-by: Eric Dumazet Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- include/net/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index f6f8f032c73e..27f4c733116d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1437,6 +1437,7 @@ void tcp_free_fastopen_req(struct tcp_sock *tp); extern struct tcp_fastopen_context __rcu *tcp_fastopen_ctx; int tcp_fastopen_reset_cipher(void *key, unsigned int len); +void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb); struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct tcp_fastopen_cookie *foc, -- cgit v1.2.3 From ba905f5e2f63d86ed4cfbd3d9096fb28d156f1ee Mon Sep 17 00:00:00 2001 From: Kim Jones Date: Tue, 2 Feb 2016 03:51:16 +0000 Subject: ethtool: Declare netdev_rss_key as __read_mostly. netdev_rss_key is written to once and thereafter is read by drivers when they are initialising. The fact that it is mostly read and not written to makes it a candidate for a __read_mostly declaration. Signed-off-by: Kim Jones Signed-off-by: Alan Carey Acked-by: Rami Rosen Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 78a20cec2a0a..219f53c30cb3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3744,7 +3744,7 @@ void netdev_lower_state_changed(struct net_device *lower_dev, /* RSS keys are 40 or 52 bytes long */ #define NETDEV_RSS_KEY_LEN 52 -extern u8 netdev_rss_key[NETDEV_RSS_KEY_LEN]; +extern u8 netdev_rss_key[NETDEV_RSS_KEY_LEN] __read_mostly; void netdev_rss_key_fill(void *buffer, size_t len); int dev_get_nest_level(struct net_device *dev, -- cgit v1.2.3 From 824bd0ce6c7c43a9e1e210abf124958e54d88342 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 1 Feb 2016 22:39:53 -0800 Subject: bpf: introduce BPF_MAP_TYPE_PERCPU_HASH map Introduce BPF_MAP_TYPE_PERCPU_HASH map type which is used to do accurate counters without need to use BPF_XADD instruction which turned out to be too costly for high-performance network monitoring. In the typical use case the 'key' is the flow tuple or other long living object that sees a lot of events per second. bpf_map_lookup_elem() returns per-cpu area. Example: struct { u32 packets; u32 bytes; } * ptr = bpf_map_lookup_elem(&map, &key); /* ptr points to this_cpu area of the value, so the following * increments will not collide with other cpus */ ptr->packets ++; ptr->bytes += skb->len; bpf_update_elem() atomically creates a new element where all per-cpu values are zero initialized and this_cpu value is populated with given 'value'. Note that non-per-cpu hash map always allocates new element and then deletes old after rcu grace period to maintain atomicity of update. Per-cpu hash map updates element values in-place. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index aa6f8571de13..43ae40c8763e 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -81,6 +81,7 @@ enum bpf_map_type { BPF_MAP_TYPE_ARRAY, BPF_MAP_TYPE_PROG_ARRAY, BPF_MAP_TYPE_PERF_EVENT_ARRAY, + BPF_MAP_TYPE_PERCPU_HASH, }; enum bpf_prog_type { -- cgit v1.2.3 From a10423b87a7eae75da79ce80a8d9475047a674ee Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 1 Feb 2016 22:39:54 -0800 Subject: bpf: introduce BPF_MAP_TYPE_PERCPU_ARRAY map Primary use case is a histogram array of latency where bpf program computes the latency of block requests or other events and stores histogram of latency into array of 64 elements. All cpus are constantly running, so normal increment is not accurate, bpf_xadd causes cache ping-pong and this per-cpu approach allows fastest collision-free counters. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 83d1926c61e4..141fb0d45731 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -151,6 +151,7 @@ struct bpf_array { union { char value[0] __aligned(8); void *ptrs[0] __aligned(8); + void __percpu *pptrs[0] __aligned(8); }; }; #define MAX_TAIL_CALL_CNT 32 diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 43ae40c8763e..2ee0fde1bf96 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -82,6 +82,7 @@ enum bpf_map_type { BPF_MAP_TYPE_PROG_ARRAY, BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_MAP_TYPE_PERCPU_HASH, + BPF_MAP_TYPE_PERCPU_ARRAY, }; enum bpf_prog_type { -- cgit v1.2.3 From 15a07b33814d14ca817887dbea8530728dc0fbe4 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 1 Feb 2016 22:39:55 -0800 Subject: bpf: add lookup/update support for per-cpu hash and array maps The functions bpf_map_lookup_elem(map, key, value) and bpf_map_update_elem(map, key, value, flags) need to get/set values from all-cpus for per-cpu hash and array maps, so that user space can aggregate/update them as necessary. Example of single counter aggregation in user space: unsigned int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); long values[nr_cpus]; long value = 0; bpf_lookup_elem(fd, key, values); for (i = 0; i < nr_cpus; i++) value += values[i]; The user space must provide round_up(value_size, 8) * nr_cpus array to get/set values, since kernel will use 'long' copy of per-cpu values to try to copy good counters atomically. It's a best-effort, since bpf programs and user space are racing to access the same memory. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 141fb0d45731..90ee6ab24bc5 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -183,6 +183,29 @@ int bpf_prog_new_fd(struct bpf_prog *prog); int bpf_obj_pin_user(u32 ufd, const char __user *pathname); int bpf_obj_get_user(const char __user *pathname); +int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value); +int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value); +int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value, + u64 flags); +int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, + u64 flags); + +/* memcpy that is used with 8-byte aligned pointers, power-of-8 size and + * forced to use 'long' read/writes to try to atomically copy long counters. + * Best-effort only. No barriers here, since it _will_ race with concurrent + * updates from BPF programs. Called from bpf syscall and mostly used with + * size 8 or 16 bytes, so ask compiler to inline it. + */ +static inline void bpf_long_memcpy(void *dst, const void *src, u32 size) +{ + const long *lsrc = src; + long *ldst = dst; + + size /= sizeof(long); + while (size--) + *ldst++ = *lsrc++; +} + /* verify correctness of eBPF program */ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr); #else -- cgit v1.2.3 From 7267bcda332e2782e21a559f3b1b859a35b4062d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Sat, 16 Jan 2016 00:48:52 +0100 Subject: bcma: identify bus cores (devices) found on BCM47189 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add missing defines and print proper names. Signed-off-by: Rafał Miłecki Signed-off-by: Kalle Valo --- include/linux/bcma/bcma.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h index 3feb1b2d75d8..991ebb4c2015 100644 --- a/include/linux/bcma/bcma.h +++ b/include/linux/bcma/bcma.h @@ -151,6 +151,8 @@ struct bcma_host_ops { #define BCMA_CORE_PCIE2 0x83C /* PCI Express Gen2 */ #define BCMA_CORE_USB30_DEV 0x83D #define BCMA_CORE_ARM_CR4 0x83E +#define BCMA_CORE_GCI 0x840 +#define BCMA_CORE_CMEM 0x846 /* CNDS DDR2/3 memory controller */ #define BCMA_CORE_ARM_CA7 0x847 #define BCMA_CORE_SYS_MEM 0x849 #define BCMA_CORE_DEFAULT 0xFFF -- cgit v1.2.3 From 67edf354faaf93156646e741483b2313bc756c0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Tue, 19 Jan 2016 08:45:25 +0100 Subject: bcma: use _PMU_ in all names of PMU registers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PMU (Power Management Unit) seems to be a separated piece of hardware, just accessed using ChipCommon core registers. In recent Broadcom chipsets PMU is not bounded to CC but available as separated core. To make code cleaner & easier to review (for a correct R/W access) use clearer names. Signed-off-by: Rafał Miłecki Signed-off-by: Kalle Valo --- include/linux/bcma/bcma_driver_chipcommon.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h index db51a6ffb7d6..96d8d56f240f 100644 --- a/include/linux/bcma/bcma_driver_chipcommon.h +++ b/include/linux/bcma/bcma_driver_chipcommon.h @@ -351,12 +351,12 @@ #define BCMA_CC_PMU_RES_REQTS 0x0640 /* PMU res req timer sel */ #define BCMA_CC_PMU_RES_REQT 0x0644 /* PMU res req timer */ #define BCMA_CC_PMU_RES_REQM 0x0648 /* PMU res req mask */ -#define BCMA_CC_CHIPCTL_ADDR 0x0650 -#define BCMA_CC_CHIPCTL_DATA 0x0654 -#define BCMA_CC_REGCTL_ADDR 0x0658 -#define BCMA_CC_REGCTL_DATA 0x065C -#define BCMA_CC_PLLCTL_ADDR 0x0660 -#define BCMA_CC_PLLCTL_DATA 0x0664 +#define BCMA_CC_PMU_CHIPCTL_ADDR 0x0650 +#define BCMA_CC_PMU_CHIPCTL_DATA 0x0654 +#define BCMA_CC_PMU_REGCTL_ADDR 0x0658 +#define BCMA_CC_PMU_REGCTL_DATA 0x065C +#define BCMA_CC_PMU_PLLCTL_ADDR 0x0660 +#define BCMA_CC_PMU_PLLCTL_DATA 0x0664 #define BCMA_CC_PMU_STRAPOPT 0x0668 /* (corerev >= 28) */ #define BCMA_CC_PMU_XTAL_FREQ 0x066C /* (pmurev >= 10) */ #define BCMA_CC_PMU_XTAL_FREQ_ILPCTL_MASK 0x00001FFF -- cgit v1.2.3 From b3c47afbf54d86daa0473895e8ca9e8b663f5c1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Tue, 19 Jan 2016 08:45:26 +0100 Subject: bcma: support PMU present as separated bus core MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On recent Broadcom chipsets PMU is present as separated core and it can't be accessed using ChipCommon anymore as it fails with e.g.: [ 0.000577] Unhandled fault: external abort on non-linefetch (0x1008) at 0xf1000604 Solve it by using a new (PMU) core pointer set to ChipCommon or PMU depending on the hardware capabilities. Signed-off-by: Rafał Miłecki Signed-off-by: Kalle Valo --- include/linux/bcma/bcma_driver_chipcommon.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h index 96d8d56f240f..700d0c6f7480 100644 --- a/include/linux/bcma/bcma_driver_chipcommon.h +++ b/include/linux/bcma/bcma_driver_chipcommon.h @@ -217,6 +217,11 @@ #define BCMA_CC_CLKDIV_JTAG_SHIFT 8 #define BCMA_CC_CLKDIV_UART 0x000000FF #define BCMA_CC_CAP_EXT 0x00AC /* Capabilities */ +#define BCMA_CC_CAP_EXT_SECI_PRESENT 0x00000001 +#define BCMA_CC_CAP_EXT_GSIO_PRESENT 0x00000002 +#define BCMA_CC_CAP_EXT_GCI_PRESENT 0x00000004 +#define BCMA_CC_CAP_EXT_SECI_PUART_PRESENT 0x00000008 /* UART present */ +#define BCMA_CC_CAP_EXT_AOB_PRESENT 0x00000040 #define BCMA_CC_PLLONDELAY 0x00B0 /* Rev >= 4 only */ #define BCMA_CC_FREFSELDELAY 0x00B4 /* Rev >= 4 only */ #define BCMA_CC_SLOWCLKCTL 0x00B8 /* 6 <= Rev <= 9 only */ @@ -566,6 +571,7 @@ * Check availability with ((struct bcma_chipcommon)->capabilities & BCMA_CC_CAP_PMU) */ struct bcma_chipcommon_pmu { + struct bcma_device *core; /* Can be separated core or just ChipCommon one */ u8 rev; /* PMU revision */ u32 crystalfreq; /* The active crystal frequency (in kHz) */ }; @@ -660,6 +666,19 @@ struct bcma_drv_cc_b { #define bcma_cc_maskset32(cc, offset, mask, set) \ bcma_cc_write32(cc, offset, (bcma_cc_read32(cc, offset) & (mask)) | (set)) +/* PMU registers access */ +#define bcma_pmu_read32(cc, offset) \ + bcma_read32((cc)->pmu.core, offset) +#define bcma_pmu_write32(cc, offset, val) \ + bcma_write32((cc)->pmu.core, offset, val) + +#define bcma_pmu_mask32(cc, offset, mask) \ + bcma_pmu_write32(cc, offset, bcma_pmu_read32(cc, offset) & (mask)) +#define bcma_pmu_set32(cc, offset, set) \ + bcma_pmu_write32(cc, offset, bcma_pmu_read32(cc, offset) | (set)) +#define bcma_pmu_maskset32(cc, offset, mask, set) \ + bcma_pmu_write32(cc, offset, (bcma_pmu_read32(cc, offset) & (mask)) | (set)) + extern u32 bcma_chipco_watchdog_timer_set(struct bcma_drv_cc *cc, u32 ticks); extern u32 bcma_chipco_get_alp_clock(struct bcma_drv_cc *cc); -- cgit v1.2.3 From 61dba73cdbba8ec5c01b31beaf9e2debc2d2f273 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Sun, 24 Jan 2016 16:37:33 +0100 Subject: bcma: add support for BCM47094 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's another SoC with 32 GPIOs and simplified watchdog handling. It was tested on D-Link DIR-885L. Signed-off-by: Rafał Miłecki Signed-off-by: Kalle Valo --- include/linux/bcma/bcma.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h index 991ebb4c2015..0367c63f5960 100644 --- a/include/linux/bcma/bcma.h +++ b/include/linux/bcma/bcma.h @@ -201,6 +201,7 @@ struct bcma_host_ops { #define BCMA_PKG_ID_BCM4707 1 #define BCMA_PKG_ID_BCM4708 2 #define BCMA_PKG_ID_BCM4709 0 +#define BCMA_CHIP_ID_BCM47094 53030 #define BCMA_CHIP_ID_BCM53018 53018 /* Board types (on PCI usually equals to the subsystem dev id) */ -- cgit v1.2.3 From e3e17b773bfe45462b7f3fae20c550025975cb13 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 6 Feb 2016 11:16:28 -0800 Subject: tcp: fastopen: call tcp_fin() if FIN present in SYNACK When we acknowledge a FIN, it is not enough to ack the sequence number and queue the skb into receive queue. We also have to call tcp_fin() to properly update socket state and send proper poll() notifications. It seems we also had the problem if we received a SYN packet with the FIN flag set, but it does not seem an urgent issue, as no known implementation can do that. Fixes: 61d2bcae99f6 ("tcp: fastopen: accept data/FIN present in SYNACK message") Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Cc: Neal Cardwell Signed-off-by: David S. Miller --- include/net/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 27f4c733116d..479d535609fd 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -568,6 +568,7 @@ void tcp_rearm_rto(struct sock *sk); void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req); void tcp_reset(struct sock *sk); void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb); +void tcp_fin(struct sock *sk); /* tcp_timer.c */ void tcp_init_xmit_timers(struct sock *); -- cgit v1.2.3 From 0e715d6fbd2a4a1dcd215d6d51091346e6a3d3fa Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Tue, 2 Feb 2016 18:09:11 +0100 Subject: vxlan: cleanup types include/net/vxlan.h is a kernel header, no need to prefix fixed size types with double underscore. Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- include/net/vxlan.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 0fb86442544b..5c64250619c5 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -30,15 +30,15 @@ * [0] https://tools.ietf.org/html/draft-smith-vxlan-group-policy */ struct vxlanhdr_gbp { - __u8 vx_flags; + u8 vx_flags; #ifdef __LITTLE_ENDIAN_BITFIELD - __u8 reserved_flags1:3, + u8 reserved_flags1:3, policy_applied:1, reserved_flags2:2, dont_learn:1, reserved_flags3:1; #elif defined(__BIG_ENDIAN_BITFIELD) - __u8 reserved_flags1:1, + u8 reserved_flags1:1, dont_learn:1, reserved_flags2:2, policy_applied:1, @@ -138,10 +138,10 @@ struct vxlan_config { int remote_ifindex; int mtu; __be16 dst_port; - __u16 port_min; - __u16 port_max; - __u8 tos; - __u8 ttl; + u16 port_min; + u16 port_max; + u8 tos; + u8 ttl; u32 flags; unsigned long age_interval; unsigned int addrmax; -- cgit v1.2.3 From 427bc465bf9fcdab749f6997ff7a4eecaef4ca40 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Tue, 2 Feb 2016 18:09:12 +0100 Subject: vxlan: remove duplicated macros VNI_HASH_BITS and VNI_HASH_SIZE are defined twice. Remove the extra definitions. Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- include/net/vxlan.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 5c64250619c5..234bf1ef2737 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -9,9 +9,6 @@ #include #include -#define VNI_HASH_BITS 10 -#define VNI_HASH_SIZE (1< Date: Tue, 2 Feb 2016 18:09:13 +0100 Subject: vxlan: restructure vxlan.h definitions RCO and GBP are VXLAN extensions, not specified in RFC 7348. Because of that, they need to be explicitly enabled when creating vxlan interface. By default, those extensions are not used and plain VXLAN header is sent and received. Reflect this in vxlan.h: first, the plain VXLAN header is defined. Following it, RCO is documented and defined, and likewise for GBP. Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- include/net/vxlan.h | 104 +++++++++++++++++++++++++++++++--------------------- 1 file changed, 63 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 234bf1ef2737..25bd919c9ef0 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -9,14 +9,71 @@ #include #include +/* VXLAN protocol (RFC 7348) header: + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |R|R|R|R|I|R|R|R| Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | VXLAN Network Identifier (VNI) | Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * I = VXLAN Network Identifier (VNI) present. + */ +struct vxlanhdr { + __be32 vx_flags; + __be32 vx_vni; +}; + +/* VXLAN header flags. */ +#define VXLAN_HF_VNI BIT(27) + +#define VXLAN_N_VID (1u << 24) +#define VXLAN_VID_MASK (VXLAN_N_VID - 1) +#define VXLAN_VNI_MASK (VXLAN_VID_MASK << 8) +#define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr)) + +#define VNI_HASH_BITS 10 +#define VNI_HASH_SIZE (1<mark mapping @@ -59,44 +119,6 @@ struct vxlanhdr_gbp { #define VXLAN_GBP_POLICY_APPLIED (BIT(3) << 16) #define VXLAN_GBP_ID_MASK (0xFFFF) -/* VXLAN protocol header: - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * |G|R|R|R|I|R|R|C| Reserved | - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * | VXLAN Network Identifier (VNI) | Reserved | - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * - * G = 1 Group Policy (VXLAN-GBP) - * I = 1 VXLAN Network Identifier (VNI) present - * C = 1 Remote checksum offload (RCO) - */ -struct vxlanhdr { - __be32 vx_flags; - __be32 vx_vni; -}; - -/* VXLAN header flags. */ -#define VXLAN_HF_RCO BIT(21) -#define VXLAN_HF_VNI BIT(27) -#define VXLAN_HF_GBP BIT(31) - -/* Remote checksum offload header option */ -#define VXLAN_RCO_MASK 0x7f /* Last byte of vni field */ -#define VXLAN_RCO_UDP 0x80 /* Indicate UDP RCO (TCP when not set *) */ -#define VXLAN_RCO_SHIFT 1 /* Left shift of start */ -#define VXLAN_RCO_SHIFT_MASK ((1 << VXLAN_RCO_SHIFT) - 1) -#define VXLAN_MAX_REMCSUM_START (VXLAN_RCO_MASK << VXLAN_RCO_SHIFT) - -#define VXLAN_N_VID (1u << 24) -#define VXLAN_VID_MASK (VXLAN_N_VID - 1) -#define VXLAN_VNI_MASK (VXLAN_VID_MASK << 8) -#define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr)) - -#define VNI_HASH_BITS 10 -#define VNI_HASH_SIZE (1< Date: Tue, 2 Feb 2016 07:43:45 -0800 Subject: net: Add support for fill_slave_info to VRF device Allows userspace to have direct access to VRF table association versus looking up master device and its table. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index d3e90b91e07e..d452cea59020 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -405,6 +405,14 @@ enum { #define IFLA_VRF_MAX (__IFLA_VRF_MAX - 1) +enum { + IFLA_VRF_PORT_UNSPEC, + IFLA_VRF_PORT_TABLE, + __IFLA_VRF_PORT_MAX +}; + +#define IFLA_VRF_PORT_MAX (__IFLA_VRF_PORT_MAX - 1) + /* IPVLAN section */ enum { IFLA_IPVLAN_UNSPEC, -- cgit v1.2.3 From ddf1af6fa00e772fdb67a7d22cb83fac2b8968a8 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Tue, 2 Feb 2016 10:33:06 -0800 Subject: tcp: new delivery accounting This patch changes the accounting of how many packets are newly acked or sacked when the sender receives an ACK. The current approach basically computes newly_acked_sacked = (prior_packets - prior_sacked) - (tp->packets_out - tp->sacked_out) where prior_packets and prior_sacked out are snapshot at the beginning of the ACK processing. The new approach tracks the delivery information via a new TCP state variable "delivered" which monotically increases as new packets are delivered in order or out-of-order. The reason for this change is that the current approach is brittle that produces negative or inaccurate estimate. 1) For non-SACK connections, an ACK that advances the SND.UNA could reset the DUPACK counters (tp->sacked_out) in tcp_process_loss() or tcp_fastretrans_alert(). This inflates the inflight suddenly and causes under-estimate or even negative estimate. Here is a real example: before after (processing ACK) packets_out 75 73 sacked_out 23 0 ca state Loss Open The old approach computes (75-23) - (73 - 0) = -21 delivered while the new approach computes 1 delivered since it considers the 2nd-24th packets are delivered OOO. 2) MSS change would re-count packets_out and sacked_out so the estimate is in-accurate and can even become negative. E.g., the inflight is doubled when MSS is halved. 3) Spurious retransmission signaled by DSACK is not accounted The new approach is simpler and more robust. For SACK connections, tp->delivered increments as packets are being acked or sacked in SACK and ACK processing. For non-sack connections, it's done in tcp_remove_reno_sacks() and tcp_add_reno_sack(). When an ACK advances the SND.UNA, tp->delivered is incremented by the number of packets ACKed (less the current number of DUPACKs received plus one packet hole). Upon receiving a DUPACK, tp->delivered is incremented assuming one out-of-order packet is delivered. Upon receiving a DSACK, tp->delivered is incremtened assuming one retransmission is delivered in tcp_sacktag_write_queue(). Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index b386361ba3e8..d909feeeaea2 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -256,6 +256,7 @@ struct tcp_sock { u32 prr_delivered; /* Number of newly delivered packets to * receiver in Recovery. */ u32 prr_out; /* Total number of pkts sent during Recovery. */ + u32 delivered; /* Total data packets delivered incl. rexmits */ u32 rcv_wnd; /* Current receiver window */ u32 write_seq; /* Tail(+1) of data held in tcp send buffer */ -- cgit v1.2.3 From 46fcc6ef9d39eb7b1becaa5ef5cba64d230f7c3f Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Tue, 2 Feb 2016 10:41:55 -0800 Subject: sunvnet: Add support for perf LDC event tracing Add perf event macros for support of tracing and instrumentation of LDC state machine Signed-off-by: Sowmini Varadhan Signed-off-by: David S. Miller --- include/trace/events/sunvnet.h | 139 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 139 insertions(+) create mode 100644 include/trace/events/sunvnet.h (limited to 'include') diff --git a/include/trace/events/sunvnet.h b/include/trace/events/sunvnet.h new file mode 100644 index 000000000000..eb080b267e55 --- /dev/null +++ b/include/trace/events/sunvnet.h @@ -0,0 +1,139 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM sunvnet + +#if !defined(_TRACE_SUNVNET_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_SUNVNET_H + +#include + +TRACE_EVENT(vnet_rx_one, + + TP_PROTO(int lsid, int rsid, int index, int needs_ack), + + TP_ARGS(lsid, rsid, index, needs_ack), + + TP_STRUCT__entry( + __field(int, lsid) + __field(int, rsid) + __field(int, index) + __field(int, needs_ack) + ), + + TP_fast_assign( + __entry->lsid = lsid; + __entry->rsid = rsid; + __entry->index = index; + __entry->needs_ack = needs_ack; + ), + + TP_printk("(%x:%x) walk_rx_one index %d; needs_ack %d", + __entry->lsid, __entry->rsid, + __entry->index, __entry->needs_ack) +); + +DECLARE_EVENT_CLASS(vnet_tx_stopped_ack_template, + + TP_PROTO(int lsid, int rsid, int ack_end, int npkts), + + TP_ARGS(lsid, rsid, ack_end, npkts), + + TP_STRUCT__entry( + __field(int, lsid) + __field(int, rsid) + __field(int, ack_end) + __field(int, npkts) + ), + + TP_fast_assign( + __entry->lsid = lsid; + __entry->rsid = rsid; + __entry->ack_end = ack_end; + __entry->npkts = npkts; + ), + + TP_printk("(%x:%x) stopped ack for %d; npkts %d", + __entry->lsid, __entry->rsid, + __entry->ack_end, __entry->npkts) +); +DEFINE_EVENT(vnet_tx_stopped_ack_template, vnet_tx_send_stopped_ack, + TP_PROTO(int lsid, int rsid, int ack_end, int npkts), + TP_ARGS(lsid, rsid, ack_end, npkts)); +DEFINE_EVENT(vnet_tx_stopped_ack_template, vnet_tx_defer_stopped_ack, + TP_PROTO(int lsid, int rsid, int ack_end, int npkts), + TP_ARGS(lsid, rsid, ack_end, npkts)); +DEFINE_EVENT(vnet_tx_stopped_ack_template, vnet_tx_pending_stopped_ack, + TP_PROTO(int lsid, int rsid, int ack_end, int npkts), + TP_ARGS(lsid, rsid, ack_end, npkts)); + +TRACE_EVENT(vnet_rx_stopped_ack, + + TP_PROTO(int lsid, int rsid, int end), + + TP_ARGS(lsid, rsid, end), + + TP_STRUCT__entry( + __field(int, lsid) + __field(int, rsid) + __field(int, end) + ), + + TP_fast_assign( + __entry->lsid = lsid; + __entry->rsid = rsid; + __entry->end = end; + ), + + TP_printk("(%x:%x) stopped ack for index %d", + __entry->lsid, __entry->rsid, __entry->end) +); + +TRACE_EVENT(vnet_tx_trigger, + + TP_PROTO(int lsid, int rsid, int start, int err), + + TP_ARGS(lsid, rsid, start, err), + + TP_STRUCT__entry( + __field(int, lsid) + __field(int, rsid) + __field(int, start) + __field(int, err) + ), + + TP_fast_assign( + __entry->lsid = lsid; + __entry->rsid = rsid; + __entry->start = start; + __entry->err = err; + ), + + TP_printk("(%x:%x) Tx trigger for %d sent with err %d %s", + __entry->lsid, __entry->rsid, __entry->start, + __entry->err, __entry->err > 0 ? "(ok)" : " ") +); + +TRACE_EVENT(vnet_skip_tx_trigger, + + TP_PROTO(int lsid, int rsid, int last), + + TP_ARGS(lsid, rsid, last), + + TP_STRUCT__entry( + __field(int, lsid) + __field(int, rsid) + __field(int, last) + ), + + TP_fast_assign( + __entry->lsid = lsid; + __entry->rsid = rsid; + __entry->last = last; + ), + + TP_printk("(%x:%x) Skip Tx trigger. Last trigger sent was %d", + __entry->lsid, __entry->rsid, __entry->last) +); +#endif /* _TRACE_SOCK_H */ + +/* This part must be outside protection */ +#include -- cgit v1.2.3 From 103a8ad1fa3b261c78dfc842cb315defe9d40be0 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 3 Feb 2016 04:04:36 +0100 Subject: ethtool: add speed/duplex validation functions Add functions which check if the speed/duplex are defined. Signed-off-by: Nikolay Aleksandrov Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 57fa39005e79..b2e180181629 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1319,11 +1319,45 @@ enum ethtool_sfeatures_retval_bits { #define SPEED_UNKNOWN -1 +static inline int ethtool_validate_speed(__u32 speed) +{ + switch (speed) { + case SPEED_10: + case SPEED_100: + case SPEED_1000: + case SPEED_2500: + case SPEED_5000: + case SPEED_10000: + case SPEED_20000: + case SPEED_25000: + case SPEED_40000: + case SPEED_50000: + case SPEED_56000: + case SPEED_100000: + case SPEED_UNKNOWN: + return 1; + } + + return 0; +} + /* Duplex, half or full. */ #define DUPLEX_HALF 0x00 #define DUPLEX_FULL 0x01 #define DUPLEX_UNKNOWN 0xff +static inline int ethtool_validate_duplex(__u8 duplex) +{ + switch (duplex) { + case DUPLEX_HALF: + case DUPLEX_FULL: + case DUPLEX_UNKNOWN: + return 1; + } + + return 0; +} + /* Which connector port. */ #define PORT_TP 0x00 #define PORT_AUI 0x01 -- cgit v1.2.3 From 6fa251663069e05daadd1666cbf3b658bf840ea4 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 3 Feb 2016 09:46:49 +0200 Subject: ipv4: Namespaceify tcp syn retries sysctl knob Signed-off-by: Nikolay Borisov Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 2 ++ include/net/tcp.h | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 2b7907a35568..b7b5bd64df35 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -98,6 +98,8 @@ struct netns_ipv4 { int sysctl_tcp_keepalive_probes; int sysctl_tcp_keepalive_intvl; + int sysctl_tcp_syn_retries; + struct ping_group_range ping_group_range; atomic_t dev_addr_genid; diff --git a/include/net/tcp.h b/include/net/tcp.h index 479d535609fd..825485c7cc1a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -240,7 +240,6 @@ extern int sysctl_tcp_timestamps; extern int sysctl_tcp_window_scaling; extern int sysctl_tcp_sack; extern int sysctl_tcp_fin_timeout; -extern int sysctl_tcp_syn_retries; extern int sysctl_tcp_synack_retries; extern int sysctl_tcp_retries1; extern int sysctl_tcp_retries2; -- cgit v1.2.3 From 7c083ecb3ba4583a625d5ff9655d1a819e374493 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 3 Feb 2016 09:46:50 +0200 Subject: ipv4: Namespaceify tcp synack retries sysctl knob Signed-off-by: Nikolay Borisov Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index b7b5bd64df35..9e83084ab8c1 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -99,6 +99,7 @@ struct netns_ipv4 { int sysctl_tcp_keepalive_intvl; int sysctl_tcp_syn_retries; + int sysctl_tcp_synack_retries; struct ping_group_range ping_group_range; diff --git a/include/net/tcp.h b/include/net/tcp.h index 825485c7cc1a..05659e860039 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -240,7 +240,6 @@ extern int sysctl_tcp_timestamps; extern int sysctl_tcp_window_scaling; extern int sysctl_tcp_sack; extern int sysctl_tcp_fin_timeout; -extern int sysctl_tcp_synack_retries; extern int sysctl_tcp_retries1; extern int sysctl_tcp_retries2; extern int sysctl_tcp_orphan_retries; -- cgit v1.2.3 From 12ed8244ed8b31b023ea6d2851fd8b15f2999e9b Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 3 Feb 2016 09:46:51 +0200 Subject: ipv4: Namespaceify tcp syncookies sysctl knob Signed-off-by: Nikolay Borisov Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 2 ++ include/net/tcp.h | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 9e83084ab8c1..ac000fccdf0f 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -101,6 +101,8 @@ struct netns_ipv4 { int sysctl_tcp_syn_retries; int sysctl_tcp_synack_retries; + int sysctl_tcp_syncookies; + struct ping_group_range ping_group_range; atomic_t dev_addr_genid; diff --git a/include/net/tcp.h b/include/net/tcp.h index 05659e860039..1fb23b70d237 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -243,7 +243,6 @@ extern int sysctl_tcp_fin_timeout; extern int sysctl_tcp_retries1; extern int sysctl_tcp_retries2; extern int sysctl_tcp_orphan_retries; -extern int sysctl_tcp_syncookies; extern int sysctl_tcp_fastopen; extern int sysctl_tcp_retrans_collapse; extern int sysctl_tcp_stdurg; -- cgit v1.2.3 From 1043e25ff96a1efc7bd34d11f5f32203a28a3bd7 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 3 Feb 2016 09:46:52 +0200 Subject: ipv4: Namespaceify tcp reordering sysctl knob Signed-off-by: Nikolay Borisov Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 2 +- include/net/tcp.h | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index ac000fccdf0f..eb4cd0a3c296 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -100,8 +100,8 @@ struct netns_ipv4 { int sysctl_tcp_syn_retries; int sysctl_tcp_synack_retries; - int sysctl_tcp_syncookies; + int sysctl_tcp_reordering; struct ping_group_range ping_group_range; diff --git a/include/net/tcp.h b/include/net/tcp.h index 1fb23b70d237..7e9a147cabae 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -961,9 +961,11 @@ static inline void tcp_enable_fack(struct tcp_sock *tp) */ static inline void tcp_enable_early_retrans(struct tcp_sock *tp) { + struct net *net = sock_net((struct sock *)tp); + tp->do_early_retrans = sysctl_tcp_early_retrans && sysctl_tcp_early_retrans < 4 && !sysctl_tcp_thin_dupack && - sysctl_tcp_reordering == 3; + net->ipv4.sysctl_tcp_reordering == 3; } static inline void tcp_disable_early_retrans(struct tcp_sock *tp) -- cgit v1.2.3 From ae5c3f406cffe15ffd2aa544961b7cd027468d46 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 3 Feb 2016 09:46:53 +0200 Subject: ipv4: Namespaceify tcp_retries1 sysctl knob Signed-off-by: Nikolay Borisov Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index eb4cd0a3c296..dee6ba647461 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -102,6 +102,7 @@ struct netns_ipv4 { int sysctl_tcp_synack_retries; int sysctl_tcp_syncookies; int sysctl_tcp_reordering; + int sysctl_tcp_retries1; struct ping_group_range ping_group_range; diff --git a/include/net/tcp.h b/include/net/tcp.h index 7e9a147cabae..da96b9af3e5f 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -240,7 +240,6 @@ extern int sysctl_tcp_timestamps; extern int sysctl_tcp_window_scaling; extern int sysctl_tcp_sack; extern int sysctl_tcp_fin_timeout; -extern int sysctl_tcp_retries1; extern int sysctl_tcp_retries2; extern int sysctl_tcp_orphan_retries; extern int sysctl_tcp_fastopen; -- cgit v1.2.3 From c6214a97c86c660de4f7ddb8eed925192e646161 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 3 Feb 2016 09:46:54 +0200 Subject: ipv4: Namespaceify tcp_retries2 sysctl knob Signed-off-by: Nikolay Borisov Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index dee6ba647461..d92c8e5d0fbc 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -103,6 +103,7 @@ struct netns_ipv4 { int sysctl_tcp_syncookies; int sysctl_tcp_reordering; int sysctl_tcp_retries1; + int sysctl_tcp_retries2; struct ping_group_range ping_group_range; diff --git a/include/net/tcp.h b/include/net/tcp.h index da96b9af3e5f..a786cfa6301b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -240,7 +240,6 @@ extern int sysctl_tcp_timestamps; extern int sysctl_tcp_window_scaling; extern int sysctl_tcp_sack; extern int sysctl_tcp_fin_timeout; -extern int sysctl_tcp_retries2; extern int sysctl_tcp_orphan_retries; extern int sysctl_tcp_fastopen; extern int sysctl_tcp_retrans_collapse; -- cgit v1.2.3 From c402d9beffb6141ab2e4d2ad8be71128803a28ca Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 3 Feb 2016 09:46:55 +0200 Subject: ipv4: Namespaceify tcp_orphan_retries sysctl knob Signed-off-by: Nikolay Borisov Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index d92c8e5d0fbc..080230321985 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -104,6 +104,7 @@ struct netns_ipv4 { int sysctl_tcp_reordering; int sysctl_tcp_retries1; int sysctl_tcp_retries2; + int sysctl_tcp_orphan_retries; struct ping_group_range ping_group_range; diff --git a/include/net/tcp.h b/include/net/tcp.h index a786cfa6301b..71f840b89c76 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -240,7 +240,6 @@ extern int sysctl_tcp_timestamps; extern int sysctl_tcp_window_scaling; extern int sysctl_tcp_sack; extern int sysctl_tcp_fin_timeout; -extern int sysctl_tcp_orphan_retries; extern int sysctl_tcp_fastopen; extern int sysctl_tcp_retrans_collapse; extern int sysctl_tcp_stdurg; -- cgit v1.2.3 From 1e579caa18b96f9eb18f4f5416658cd15f37c062 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 3 Feb 2016 09:46:56 +0200 Subject: ipv4: Namespaceify tcp_fin_timeout sysctl knob Signed-off-by: Nikolay Borisov Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 080230321985..de5ff4385e84 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -105,6 +105,7 @@ struct netns_ipv4 { int sysctl_tcp_retries1; int sysctl_tcp_retries2; int sysctl_tcp_orphan_retries; + int sysctl_tcp_fin_timeout; struct ping_group_range ping_group_range; diff --git a/include/net/tcp.h b/include/net/tcp.h index 71f840b89c76..3f160c2e6960 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -239,7 +239,6 @@ extern struct inet_timewait_death_row tcp_death_row; extern int sysctl_tcp_timestamps; extern int sysctl_tcp_window_scaling; extern int sysctl_tcp_sack; -extern int sysctl_tcp_fin_timeout; extern int sysctl_tcp_fastopen; extern int sysctl_tcp_retrans_collapse; extern int sysctl_tcp_stdurg; @@ -1249,7 +1248,7 @@ static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp) static inline int tcp_fin_time(const struct sock *sk) { - int fin_timeout = tcp_sk(sk)->linger2 ? : sysctl_tcp_fin_timeout; + int fin_timeout = tcp_sk(sk)->linger2 ? : sock_net(sk)->ipv4.sysctl_tcp_fin_timeout; const int rto = inet_csk(sk)->icsk_rto; if (fin_timeout < (rto << 2) - (rto >> 1)) -- cgit v1.2.3 From 4979f2d9f7262b9b180bc83de8d70f7a7721c085 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 3 Feb 2016 09:46:57 +0200 Subject: ipv4: Namespaceify tcp_notsent_lowat sysctl knob Signed-off-by: Nikolay Borisov Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index de5ff4385e84..4d6ec3f6fafe 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -106,6 +106,7 @@ struct netns_ipv4 { int sysctl_tcp_retries2; int sysctl_tcp_orphan_retries; int sysctl_tcp_fin_timeout; + unsigned int sysctl_tcp_notsent_lowat; struct ping_group_range ping_group_range; diff --git a/include/net/tcp.h b/include/net/tcp.h index 3f160c2e6960..9b2cb0c8d876 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -267,7 +267,6 @@ extern int sysctl_tcp_thin_dupack; extern int sysctl_tcp_early_retrans; extern int sysctl_tcp_limit_output_bytes; extern int sysctl_tcp_challenge_ack_limit; -extern unsigned int sysctl_tcp_notsent_lowat; extern int sysctl_tcp_min_tso_segs; extern int sysctl_tcp_min_rtt_wlen; extern int sysctl_tcp_autocorking; @@ -1682,7 +1681,8 @@ void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr); static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp) { - return tp->notsent_lowat ?: sysctl_tcp_notsent_lowat; + struct net *net = sock_net((struct sock *)tp); + return tp->notsent_lowat ?: net->ipv4.sysctl_tcp_notsent_lowat; } static inline bool tcp_stream_memory_free(const struct sock *sk) -- cgit v1.2.3 From 157ede6784ba2837c7dc43f195418c75927f8488 Mon Sep 17 00:00:00 2001 From: Elad Raz Date: Wed, 3 Feb 2016 09:57:04 +0100 Subject: bridge: mdb: add support for offloaded mdb entries Add new bitmask member 'flags' to br_mdb_entry structure. Adding MDB_FLAGS_OFFLOAD bit which indicates MDB entries is offloaded to hardware. Signed-off-by: Elad Raz Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 18db14477bdd..ec3547234998 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -183,6 +183,8 @@ struct br_mdb_entry { #define MDB_TEMPORARY 0 #define MDB_PERMANENT 1 __u8 state; +#define MDB_FLAGS_OFFLOAD (1 << 0) + __u8 flags; __u16 vid; struct { union { -- cgit v1.2.3 From 5ee14e6d336f1daacf5ba73e831029c5ab7ae329 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 3 Feb 2016 13:17:01 +0100 Subject: bonding: 3ad: apply ad_actor settings changes immediately Currently the bonding allows to set ad_actor_system and prio while the bond device is down, but these are actually applied only if there aren't any slaves yet (applied to bond device when first slave shows up, and to slaves at 3ad bind time). After this patch changes are applied immediately and the new values can be used/seen after the bond's upped so it's not necessary anymore to release all and enslave again to see the changes. CC: Jay Vosburgh CC: Veaceslav Falico CC: Andy Gospodarek Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jay Vosburgh Signed-off-by: David S. Miller --- include/net/bond_3ad.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/bond_3ad.h b/include/net/bond_3a