40 files changed, 1493 insertions, 290 deletions
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 59f4a7304419..f0ba9c2ec639 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -26,6 +26,7 @@
 #define PHY_ID_BCM7366			0x600d8490
 #define PHY_ID_BCM7425			0x600d86b0
 #define PHY_ID_BCM7429			0x600d8730
+#define PHY_ID_BCM7435			0x600d8750
 #define PHY_ID_BCM7439			0x600d8480
 #define PHY_ID_BCM7439_2		0xae025080
 #define PHY_ID_BCM7445			0x600d8510
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 06b77f9dd3f2..e5f4164cbd99 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -231,6 +231,14 @@ struct cgroup {
 	int id;
 
 	/*
+	 * The depth this cgroup is at.  The root is at depth zero and each
+	 * step down the hierarchy increments the level.  This along with
+	 * ancestor_ids[] can determine whether a given cgroup is a
+	 * descendant of another without traversing the hierarchy.
+	 */
+	int level;
+
+	/*
 	 * Each non-empty css_set associated with this cgroup contributes
 	 * one to populated_cnt.  All children with non-zero popuplated_cnt
 	 * of their own contribute one.  The count is zero iff there's no
@@ -285,6 +293,9 @@ struct cgroup {
 
 	/* used to schedule release agent */
 	struct work_struct release_agent_work;
+
+	/* ids of the ancestors at each level including self */
+	int ancestor_ids[];
 };
 
 /*
@@ -304,6 +315,9 @@ struct cgroup_root {
 	/* The root cgroup.  Root is destroyed on its release. */
 	struct cgroup cgrp;
 
+	/* for cgrp->ancestor_ids[0] */
+	int cgrp_ancestor_id_storage;
+
 	/* Number of cgroups in the hierarchy, used only for /proc/cgroups */
 	atomic_t nr_cgrps;
 
@@ -521,4 +535,116 @@ static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) {}
 
 #endif	/* CONFIG_CGROUPS */
 
+#ifdef CONFIG_SOCK_CGROUP_DATA
+
+/*
+ * sock_cgroup_data is embedded at sock->sk_cgrp_data and contains
+ * per-socket cgroup information except for memcg association.
+ *
+ * On legacy hierarchies, net_prio and net_cls controllers directly set
+ * attributes on each sock which can then be tested by the network layer.
+ * On the default hierarchy, each sock is associated with the cgroup it was
+ * created in and the networking layer can match the cgroup directly.
+ *
+ * To avoid carrying all three cgroup related fields separately in sock,
+ * sock_cgroup_data overloads (prioidx, classid) and the cgroup pointer.
+ * On boot, sock_cgroup_data records the cgroup that the sock was created
+ * in so that cgroup2 matches can be made; however, once either net_prio or
+ * net_cls starts being used, the area is overriden to carry prioidx and/or
+ * classid.  The two modes are distinguished by whether the lowest bit is
+ * set.  Clear bit indicates cgroup pointer while set bit prioidx and
+ * classid.
+ *
+ * While userland may start using net_prio or net_cls at any time, once
+ * either is used, cgroup2 matching no longer works.  There is no reason to
+ * mix the two and this is in line with how legacy and v2 compatibility is
+ * handled.  On mode switch, cgroup references which are already being
+ * pointed to by socks may be leaked.  While this can be remedied by adding
+ * synchronization around sock_cgroup_data, given that the number of leaked
+ * cgroups is bound and highly unlikely to be high, this seems to be the
+ * better trade-off.
+ */
+struct sock_cgroup_data {
+	union {
+#ifdef __LITTLE_ENDIAN
+		struct {
+			u8	is_data;
+			u8	padding;
+			u16	prioidx;
+			u32	classid;
+		} __packed;
+#else
+		struct {
+			u32	classid;
+			u16	prioidx;
+			u8	padding;
+			u8	is_data;
+		} __packed;
+#endif
+		u64		val;
+	};
+};
+
+/*
+ * There's a theoretical window where the following accessors race with
+ * updaters and return part of the previous pointer as the prioidx or
+ * classid.  Such races are short-lived and the result isn't critical.
+ */
+static inline u16 sock_cgroup_prioidx(struct sock_cgroup_data *skcd)
+{
+	/* fallback to 1 which is always the ID of the root cgroup */
+	return (skcd->is_data & 1) ? skcd->prioidx : 1;
+}
+
+static inline u32 sock_cgroup_classid(struct sock_cgroup_data *skcd)
+{
+	/* fallback to 0 which is the unconfigured default classid */
+	return (skcd->is_data & 1) ? skcd->classid : 0;
+}
+
+/*
+ * If invoked concurrently, the updaters may clobber each other.  The
+ * caller is responsible for synchronization.
+ */
+static inline void sock_cgroup_set_prioidx(struct sock_cgroup_data *skcd,
+					   u16 prioidx)
+{
+	struct sock_cgroup_data skcd_buf = {{ .val = READ_ONCE(skcd->val) }};
+
+	if (sock_cgroup_prioidx(&skcd_buf) == prioidx)
+		return;
+
+	if (!(skcd_buf.is_data & 1)) {
+		skcd_buf.val = 0;
+		skcd_buf.is_data = 1;
+	}
+
+	skcd_buf.prioidx = prioidx;
+	WRITE_ONCE(skcd->val, skcd_buf.val);	/* see sock_cgroup_ptr() */
+}
+
+static inline void sock_cgroup_set_classid(struct sock_cgroup_data *skcd,
+					   u32 classid)
+{
+	struct sock_cgroup_data skcd_buf = {{ .val = READ_ONCE(skcd->val) }};
+
+	if (sock_cgroup_classid(&skcd_buf) == classid)
+		return;
+
+	if (!(skcd_buf.is_data & 1)) {
+		skcd_buf.val = 0;
+		skcd_buf.is_data = 1;
+	}
+
+	skcd_buf.classid = classid;
+	WRITE_ONCE(skcd->val, skcd_buf.val);	/* see sock_cgroup_ptr() */
+}
+
+#else	/* CONFIG_SOCK_CGROUP_DATA */
+
+struct sock_cgroup_data {
+};
+
+#endif	/* CONFIG_SOCK_CGROUP_DATA */
+
 #endif	/* _LINUX_CGROUP_DEFS_H */
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index cb91b44f5f78..322a28482745 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -81,7 +81,8 @@ struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup,
 struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
 						       struct cgroup_subsys *ss);
 
-bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor);
+struct cgroup *cgroup_get_from_path(const char *path);
+
 int cgroup_attach_task_all(struct task_struct *from, struct task_struct *);
 int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from);
 
@@ -364,6 +365,11 @@ static inline void css_put_many(struct cgroup_subsys_state *css, unsigned int n)
 		percpu_ref_put_many(&css->refcnt, n);
 }
 
+static inline void cgroup_put(struct cgroup *cgrp)
+{
+	css_put(&cgrp->self);
+}
+
 /**
  * task_css_set_check - obtain a task's css_set with extra access conditions
  * @task: the task to obtain css_set for
@@ -471,6 +477,23 @@ static inline struct cgroup *task_cgroup(struct task_struct *task,
 	return task_css(task, subsys_id)->cgroup;
 }
 
+/**
+ * cgroup_is_descendant - test ancestry
+ * @cgrp: the cgroup to be tested
+ * @ancestor: possible ancestor of @cgrp
+ *
+ * Test whether @cgrp is a descendant of @ancestor.  It also returns %true
+ * if @cgrp == @ancestor.  This function is safe to call as long as @cgrp
+ * and @ancestor are accessible.
+ */
+static inline bool cgroup_is_descendant(struct cgroup *cgrp,
+					struct cgroup *ancestor)
+{
+	if (cgrp->root != ancestor->root || cgrp->level < ancestor->level)
+		return false;
+	return cgrp->ancestor_ids[ancestor->level] == ancestor->id;
+}
+
 /* no synchronization, the result can only be used as a hint */
 static inline bool cgroup_is_populated(struct cgroup *cgrp)
 {
@@ -554,4 +577,45 @@ static inline int cgroup_init(void) { return 0; }
 
 #endif /* !CONFIG_CGROUPS */
 
+/*
+ * sock->sk_cgrp_data handling.  For more info, see sock_cgroup_data
+ * definition in cgroup-defs.h.
+ */
+#ifdef CONFIG_SOCK_CGROUP_DATA
+
+#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID)
+extern spinlock_t cgroup_sk_update_lock;
+#endif
+
+void cgroup_sk_alloc_disable(void);
+void cgroup_sk_alloc(struct sock_cgroup_data *skcd);
+void cgroup_sk_free(struct sock_cgroup_data *skcd);
+
+static inline struct cgroup *sock_cgroup_ptr(struct sock_cgroup_data *skcd)
+{
+#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID)
+	unsigned long v;
+
+	/*
+	 * @skcd->val is 64bit but the following is safe on 32bit too as we
+	 * just need the lower ulong to be written and read atomically.
+	 */
+	v = READ_ONCE(skcd->val);
+
+	if (v & 1)
+		return &cgrp_dfl_root.cgrp;
+
+	return (struct cgroup *)(unsigned long)v ?: &cgrp_dfl_root.cgrp;
+#else
+	return (struct cgroup *)(unsigned long)skcd->val;
+#endif
+}
+
+#else	/* CONFIG_CGROUP_DATA */
+
+static inline void cgroup_sk_alloc(struct sock_cgroup_data *skcd) {}
+static inline void cgroup_sk_free(struct sock_cgroup_data *skcd) {}
+
+#endif	/* CONFIG_CGROUP_DATA */
+
 #endif /* _LINUX_CGROUP_H */
diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index eb049c622208..37ff4a6faa9a 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -29,6 +29,9 @@
 #include <asm/bitsperlong.h>
 
 #ifdef __KERNEL__
+struct device;
+int eth_platform_get_mac_address(struct device *dev, u8 *mac_addr);
+unsigned char *arch_get_platform_get_mac_address(void);
 u32 eth_get_headlen(void *data, unsigned int max_len);
 __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev);
 extern const struct header_ops eth_header_ops;
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 5972ffe5719a..43aa1f8855c7 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -350,25 +350,43 @@ struct sk_filter {
 
 #define BPF_PROG_RUN(filter, ctx)  (*filter->bpf_func)(ctx, filter->insnsi)
 
+#define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN
+
+static inline u8 *bpf_skb_cb(struct sk_buff *skb)
+{
+	/* eBPF programs may read/write skb->cb[] area to transfer meta
+	 * data between tail calls. Since this also needs to work with
+	 * tc, that scratch memory is mapped to qdisc_skb_cb's data area.
+	 *
+	 * In some socket filter cases, the cb unfortunately needs to be
+	 * saved/restored so that protocol specific skb->cb[] data won't
+	 * be lost. In any case, due to unpriviledged eBPF programs
+	 * attached to sockets, we need to clear the bpf_skb_cb() area
+	 * to not leak previous contents to user space.
+	 */
+	BUILD_BUG_ON(FIELD_SIZEOF(struct __sk_buff, cb) != BPF_SKB_CB_LEN);
+	BUILD_BUG_ON(FIELD_SIZEOF(struct __sk_buff, cb) !=
+		     FIELD_SIZEOF(struct qdisc_skb_cb, data));
+
+	return qdisc_skb_cb(skb)->data;
+}
+
 static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog,
 				       struct sk_buff *skb)
 {
-	u8 *cb_data = qdisc_skb_cb(skb)->data;
-	u8 saved_cb[QDISC_CB_PRIV_LEN];
+	u8 *cb_data = bpf_skb_cb(skb);
+	u8 cb_saved[BPF_SKB_CB_LEN];
 	u32 res;
 
-	BUILD_BUG_ON(FIELD_SIZEOF(struct __sk_buff, cb) !=
-		     QDISC_CB_PRIV_LEN);
-
 	if (unlikely(prog->cb_access)) {
-		memcpy(saved_cb, cb_data, sizeof(saved_cb));
-		memset(cb_data, 0, sizeof(saved_cb));
+		memcpy(cb_saved, cb_data, sizeof(cb_saved));
+		memset(cb_data, 0, sizeof(cb_saved));
 	}
 
 	res = BPF_PROG_RUN(prog, skb);
 
 	if (unlikely(prog->cb_access))
-		memcpy(cb_data, saved_cb, sizeof(saved_cb));
+		memcpy(cb_data, cb_saved, sizeof(cb_saved));
 
 	return res;
 }
@@ -376,10 +394,11 @@ static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog,
 static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog,
 					struct sk_buff *skb)
 {
-	u8 *cb_data = qdisc_skb_cb(skb)->data;
+	u8 *cb_data = bpf_skb_cb(skb);
 
 	if (unlikely(prog->cb_access))
-		memset(cb_data, 0, QDISC_CB_PRIV_LEN);
+		memset(cb_data, 0, BPF_SKB_CB_LEN);
+
 	return BPF_PROG_RUN(prog, skb);
 }
 
@@ -447,6 +466,8 @@ void bpf_prog_destroy(struct bpf_prog *fp);
 
 int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
 int sk_attach_bpf(u32 ufd, struct sock *sk);
+int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk);
+int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk);
 int sk_detach_filter(struct sock *sk);
 int sk_get_filter(struct sock *sk, struct sock_filter __user *filter,
 		  unsigned int len);
diff --git a/include/linux/hashtable.h b/include/linux/hashtable.h
index 519b6e2d769e..661e5c2a8e2a 100644
--- a/include/linux/hashtable.h
+++ b/include/linux/hashtable.h
@@ -16,6 +16,10 @@
 	struct hlist_head name[1 << (bits)] =					\
 			{ [0 ... ((1 << (bits)) - 1)] = HLIST_HEAD_INIT }
 
+#define DEFINE_READ_MOSTLY_HASHTABLE(name, bits)				\
+	struct hlist_head name[1 << (bits)] __read_mostly =			\
+			{ [0 ... ((1 << (bits)) - 1)] = HLIST_HEAD_INIT }
+
 #define DECLARE_HASHTABLE(name, bits)                                   	\
 	struct hlist_head name[1 << (bits)]
 
diff --git a/include/linux/hdlc.h b/include/linux/hdlc.h
index 1acb1445e05f..e31bcd4c7859 100644
--- a/include/linux/hdlc.h
+++ b/include/linux/hdlc.h
@@ -101,7 +101,7 @@ netdev_tx_t hdlc_start_xmit(struct sk_buff *skb, struct net_device *dev);
 int attach_hdlc_protocol(struct net_device *dev, struct hdlc_proto *proto,
 			 size_t size);
 /* May be used by hardware driver to gain control over HDLC device */
-void detach_hdlc_protocol(struct net_device *dev);
+int detach_hdlc_protocol(struct net_device *dev);
 
 static __inline__ __be16 hdlc_type_trans(struct sk_buff *skb,
 					 struct net_device *dev)
diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h
index b49cf923becc..ba7a9b0c7c57 100644
--- a/include/linux/if_pppox.h
+++ b/include/linux/if_pppox.h
@@ -91,7 +91,6 @@ enum {
     PPPOX_CONNECTED	= 1,  /* connection established ==TCP_ESTABLISHED */
     PPPOX_BOUND		= 2,  /* bound to ppp device */
     PPPOX_RELAY		= 4,  /* forwarding is enabled */
-    PPPOX_ZOMBIE	= 8,  /* dead, but still bound to ppp device */
     PPPOX_DEAD		= 16  /* dead, useless, please clean me up!*/
 };
 
diff --git a/include/linux/if_team.h b/include/linux/if_team.h
index a6aa970758a2..b84e49c3a738 100644
--- a/include/linux/if_team.h
+++ b/include/linux/if_team.h
@@ -164,6 +164,7 @@ struct team_mode {
 	size_t priv_size;
 	size_t port_priv_size;
 	const struct team_mode_ops *ops;
+	enum netdev_lag_tx_type lag_tx_type;
 };
 
 #define TEAM_PORT_HASHBITS 4
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 67ce5bd3b56a..a5f6ce6b578c 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -73,7 +73,7 @@ static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb)
 /* found in socket.c */
 extern void vlan_ioctl_set(int (*hook)(struct net *, void __user *));
 
-static inline bool is_vlan_dev(struct net_device *dev)
+static inline bool is_vlan_dev(const struct net_device *dev)
 {
         return dev->priv_flags & IFF_802_1Q_VLAN;
 }
@@ -621,7 +621,7 @@ static inline netdev_features_t vlan_features_check(const struct sk_buff *skb,
 						     NETIF_F_SG |
 						     NETIF_F_HIGHDMA |
 						     NETIF_F_FRAGLIST |
-						     NETIF_F_GEN_CSUM |
+						     NETIF_F_HW_CSUM |
 						     NETIF_F_HW_VLAN_CTAG_TX |
 						     NETIF_F_HW_VLAN_STAG_TX);
 
diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index 0e707f0c1a3e..7c27fa1030e8 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -3,6 +3,7 @@
 
 #include <uapi/linux/inet_diag.h>
 
+struct net;
 struct sock;
 struct inet_hashinfo;
 struct nlattr;
@@ -23,6 +24,10 @@ struct inet_diag_handler {
 	void		(*idiag_get_info)(struct sock *sk,
 					  struct inet_diag_msg *r,
 					  void *info);
+
+	int		(*destroy)(struct sk_buff *in_skb,
+				   const struct inet_diag_req_v2 *req);
+
 	__u16		idiag_type;
 	__u16		idiag_info_size;
 };
@@ -41,6 +46,10 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
 			    struct sk_buff *in_skb, const struct nlmsghdr *nlh,
 			    const struct inet_diag_req_v2 *req);
 
+struct sock *inet_diag_find_one_icsk(struct net *net,
+				     struct inet_hashinfo *hashinfo,
+				     const struct inet_diag_req_v2 *req);
+
 int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk);
 
 extern int  inet_diag_register(const struct inet_diag_handler *handler);
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index 5d4e9c4b821d..af51df35d749 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -274,6 +274,8 @@ void pr_cont_kernfs_path(struct kernfs_node *kn);
 struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn);
 struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent,
 					   const char *name, const void *ns);
+struct kernfs_node *kernfs_walk_and_get_ns(struct kernfs_node *parent,
+					   const char *path, const void *ns);
 void kernfs_get(struct kernfs_node *kn);
 void kernfs_put(struct kernfs_node *kn);
 
@@ -350,6 +352,10 @@ static inline struct kernfs_node *
 kernfs_find_and_get_ns(struct kernfs_node *parent, const char *name,
 		       const void *ns)
 { return NULL; }
+static inline struct kernfs_node *
+kernfs_walk_and_get_ns(struct kernfs_node *parent, const char *path,
+		       const void *ns)
+{ return NULL; }
 
 static inline void kernfs_get(struct kernfs_node *kn) { }
 static inline void kernfs_put(struct kernfs_node *kn) { }
@@ -431,6 +437,12 @@ kernfs_find_and_get(struct kernfs_node *kn, const char *name)
 }
 
 static inline struct kernfs_node *
+kernfs_walk_and_get(struct kernfs_node *kn, const char *path)
+{
+	return kernfs_walk_and_get_ns(kn, path, NULL);
+}
+
+static inline struct kernfs_node *
 kernfs_create_dir(struct kernfs_node *parent, const char *name, umode_t mode,
 		  void *priv)
 {
diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index b42963bc81dd..5bfd99d1a40a 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -11,6 +11,55 @@
 
 #include <uapi/linux/mdio.h>
 
+struct mii_bus;
+
+struct mdio_device {
+	struct device dev;
+
+	const struct dev_pm_ops *pm_ops;
+	struct mii_bus *bus;
+
+	int (*bus_match)(struct device *dev, struct device_driver *drv);
+	void (*device_free)(struct mdio_device *mdiodev);
+	void (*device_remove)(struct mdio_device *mdiodev);
+
+	/* Bus address of the MDIO device (0-31) */
+	int addr;
+	int flags;
+};
+#define to_mdio_device(d) container_of(d, struct mdio_device, dev)
+
+/* struct mdio_driver_common: Common to all MDIO drivers */
+struct mdio_driver_common {
+	struct device_driver driver;
+	int flags;
+};
+#define MDIO_DEVICE_FLAG_PHY		1
+#define to_mdio_common_driver(d) \
+	container_of(d, struct mdio_driver_common, driver)
+
+/* struct mdio_driver: Generic MDIO driver */
+struct mdio_driver {
+	struct mdio_driver_common mdiodrv;
+
+	/*
+	 * Called during discovery.  Used to set
+	 * up device-specific structures, if any
+	 */
+	int (*probe)(struct mdio_device *mdiodev);
+
+	/* Clears up any memory if needed */
+	void (*remove)(struct mdio_device *mdiodev);
+};
+#define to_mdio_driver(d)						\
+	container_of(to_mdio_common_driver(d), struct mdio_driver, mdiodrv)
+
+void mdio_device_free(struct mdio_device *mdiodev);
+struct mdio_device *mdio_device_create(struct mii_bus *bus, int addr);
+int mdio_device_register(struct mdio_device *mdiodev);
+void mdio_device_remove(struct mdio_device *mdiodev);
+int mdio_driver_register(struct mdio_driver *drv);
+void mdio_driver_unregister(struct mdio_driver *drv);
 
 static inline bool mdio_phy_id_is_c45(int phy_id)
 {
@@ -173,4 +222,33 @@ static inline u16 ethtool_adv_to_mmd_eee_adv_t(u32 adv)
 	return reg;
 }
 
+int mdiobus_read(struct mii_bus *bus, int addr, u32 regnum);
+int mdiobus_read_nested(struct mii_bus *bus, int addr, u32 regnum);
+int mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val);
+int mdiobus_write_nested(struct mii_bus *bus, int addr, u32 regnum, u16 val);
+
+int mdiobus_register_device(struct mdio_device *mdiodev);
+int mdiobus_unregister_device(struct mdio_device *mdiodev);
+bool mdiobus_is_registered_device(struct mii_bus *bus, int addr);
+struct phy_device *mdiobus_get_phy(struct mii_bus *bus, int addr);
+
+/**
+ * module_mdio_driver() - Helper macro for registering mdio drivers
+ *
+ * Helper macro for MDIO drivers which do not do anything special in module
+ * init/exit. Each module may only use this macro once, and calling it
+ * replaces module_init() and module_exit().
+ */
+#define mdio_module_driver(_mdio_driver)				\
+static int __init mdio_module_init(void)				\
+{									\
+	return mdio_driver_register(&_mdio_driver);			\
+}									\
+module_init(mdio_module_init);						\
+static void __exit mdio_module_exit(void)				\
+{									\
+	mdio_driver_unregister(&_mdio_driver);				\
+}									\
+module_exit(mdio_module_exit)
+
 #endif /* __LINUX_MDIO_H__ */
diff --git a/include/linux/mlx4/driver.h b/include/linux/mlx4/driver.h
index 5a06d969338e..2e8af001c5da 100644
--- a/include/linux/mlx4/driver.h
+++ b/include/linux/mlx4/driver.h
@@ -75,6 +75,11 @@ static inline int mlx4_is_bonded(struct mlx4_dev *dev)
 	return !!(dev->flags & MLX4_FLAG_BONDED);
 }
 
+static inline int mlx4_is_mf_bonded(struct mlx4_dev *dev)
+{
+	return (mlx4_is_bonded(dev) && mlx4_is_mfunc(dev));
+}
+
 struct mlx4_port_map {
 	u8	port1;
 	u8	port2;
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 0b473cbfa7ef..7be845e30689 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -251,6 +251,7 @@ enum mlx5_event {
 	MLX5_EVENT_TYPE_PAGE_REQUEST	   = 0xb,
 
 	MLX5_EVENT_TYPE_PAGE_FAULT	   = 0xc,
+	MLX5_EVENT_TYPE_NIC_VPORT_CHANGE   = 0xd,
 };
 
 enum {
@@ -442,9 +443,12 @@ struct mlx5_init_seg {
 	__be32			rsvd1[120];
 	__be32			initializing;
 	struct health_buffer	health;
-	__be32			rsvd2[884];
+	__be32			rsvd2[880];
+	__be32			internal_timer_h;
+	__be32			internal_timer_l;
+	__be32			rsrv3[2];
 	__be32			health_counter;
-	__be32			rsvd3[1019];
+	__be32			rsvd4[1019];
 	__be64			ieee1588_clk;
 	__be32			ieee1588_clk_type;
 	__be32			clr_intx;
@@ -520,6 +524,12 @@ struct mlx5_eqe_page_fault {
 	__be32 flags_qpn;
 } __packed;
 
+struct mlx5_eqe_vport_change {
+	u8		rsvd0[2];
+	__be16		vport_num;
+	__be32		rsvd1[6];
+} __packed;
+
 union ev_data {
 	__be32				raw[7];
 	struct mlx5_eqe_cmd		cmd;
@@ -532,6 +542,7 @@ union ev_data {
 	struct mlx5_eqe_stall_vl	stall_vl;
 	struct mlx5_eqe_page_req	req_pages;
 	struct mlx5_eqe_page_fault	page_fault;
+	struct mlx5_eqe_vport_change	vport_change;
 } __packed;
 
 struct mlx5_eqe {
@@ -593,7 +604,8 @@ struct mlx5_cqe64 {
 	__be32		imm_inval_pkey;
 	u8		rsvd40[4];
 	__be32		byte_cnt;
-	__be64		timestamp;
+	__be32		timestamp_h;
+	__be32		timestamp_l;
 	__be32		sop_drop_qpn;
 	__be16		wqe_counter;
 	u8		signature;
@@ -615,6 +627,16 @@ static inline int cqe_has_vlan(struct mlx5_cqe64 *cqe)
 	return !!(cqe->l4_hdr_type_etc & 0x1);
 }
 
+static inline u64 get_cqe_ts(struct mlx5_cqe64 *cqe)
+{
+	u32 hi, lo;
+
+	hi = be32_to_cpu(cqe->timestamp_h);
+	lo = be32_to_cpu(cqe->timestamp_l);
+
+	return (u64)lo | ((u64)hi << 32);
+}
+
 enum {
 	CQE_L4_HDR_TYPE_NONE			= 0x0,
 	CQE_L4_HDR_TYPE_TCP_NO_ACK		= 0x1,
@@ -1067,6 +1089,12 @@ enum {
 };
 
 enum {
+	MLX5_ESW_VPORT_ADMIN_STATE_DOWN  = 0x0,
+	MLX5_ESW_VPORT_ADMIN_STATE_UP    = 0x1,
+	MLX5_ESW_VPORT_ADMIN_STATE_AUTO  = 0x2,
+};
+
+enum {
 	MLX5_L3_PROT_TYPE_IPV4		= 0,
 	MLX5_L3_PROT_TYPE_IPV6		= 1,
 };
@@ -1102,6 +1130,12 @@ enum {
 	MLX5_FLOW_CONTEXT_DEST_TYPE_TIR		= 2,
 };
 
+enum mlx5_list_type {
+	MLX5_NVPRT_LIST_TYPE_UC   = 0x0,
+	MLX5_NVPRT_LIST_TYPE_MC   = 0x1,
+	MLX5_NVPRT_LIST_TYPE_VLAN = 0x2,
+};
+
 enum {
 	MLX5_RQC_RQ_TYPE_MEMORY_RQ_INLINE = 0x0,
 	MLX5_RQC_RQ_TYPE_MEMORY_RQ_RPM    = 0x1,
@@ -1124,6 +1158,8 @@ enum mlx5_cap_type {
 	MLX5_CAP_IPOIB_OFFLOADS,
 	MLX5_CAP_EOIB_OFFLOADS,
 	MLX5_CAP_FLOW_TABLE,
+	MLX5_CAP_ESWITCH_FLOW_TABLE,
+	MLX5_CAP_ESWITCH,
 	/* NUM OF CAP Types */
 	MLX5_CAP_NUM
 };
@@ -1161,6 +1197,28 @@ enum mlx5_cap_type {
 #define MLX5_CAP_FLOWTABLE_MAX(mdev, cap) \
 	MLX5_GET(flow_table_nic_cap, mdev->hca_caps_max[MLX5_CAP_FLOW_TABLE], cap)
 
+#define MLX5_CAP_ESW_FLOWTABLE(mdev, cap) \
+	MLX5_GET(flow_table_eswitch_cap, \
+		 mdev->hca_caps_cur[MLX5_CAP_ESWITCH_FLOW_TABLE], cap)
+
+#define MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, cap) \
+	MLX5_GET(flow_table_eswitch_cap, \
+		 mdev->hca_caps_max[MLX5_CAP_ESWITCH_FLOW_TABLE], cap)
+
+#define MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, cap) \
+	MLX5_CAP_ESW_FLOWTABLE(mdev, flow_table_properties_nic_esw_fdb.cap)
+
+#define MLX5_CAP_ESW_FLOWTABLE_FDB_MAX(mdev, cap) \
+	MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, flow_table_properties_nic_esw_fdb.cap)
+
+#define MLX5_CAP_ESW(mdev, cap) \
+	MLX5_GET(e_switch_cap, \
+		 mdev->hca_caps_cur[MLX5_CAP_ESWITCH], cap)
+
+#define MLX5_CAP_ESW_MAX(mdev, cap) \
+	MLX5_GET(e_switch_cap, \
+		 mdev->hca_caps_