From a364298359e74a414857bbbf3b725564feb22d09 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Wed, 21 Feb 2018 05:17:24 +0100
Subject: nohz: Convert tick_nohz_tick_stopped() to bool

It makes this function more self-explanatory about what it does and how
to use it.

Reported-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Chris Metcalf <cmetcalf@mellanox.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luiz Capitulino <lcapitulino@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Wanpeng Li <kernellwp@gmail.com>
Link: http://lkml.kernel.org/r/1519186649-3242-3-git-send-email-frederic@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/tick.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/tick.h b/include/linux/tick.h
index 7cc35921218e..86576d9d2311 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -113,7 +113,7 @@ enum tick_dep_bits {
 
 #ifdef CONFIG_NO_HZ_COMMON
 extern bool tick_nohz_enabled;
-extern int tick_nohz_tick_stopped(void);
+extern bool tick_nohz_tick_stopped(void);
 extern void tick_nohz_idle_enter(void);
 extern void tick_nohz_idle_exit(void);
 extern void tick_nohz_irq_exit(void);
-- 
cgit v1.2.3


From 22ab8bc02a5f6e8ffc418759894f7a6b0b632331 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Wed, 21 Feb 2018 05:17:25 +0100
Subject: nohz: Allow to check if remote CPU tick is stopped

This check is racy but provides a good heuristic to determine whether
a CPU may need a remote tick or not.

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Chris Metcalf <cmetcalf@mellanox.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luiz Capitulino <lcapitulino@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Wanpeng Li <kernellwp@gmail.com>
Link: http://lkml.kernel.org/r/1519186649-3242-4-git-send-email-frederic@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/tick.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/tick.h b/include/linux/tick.h
index 86576d9d2311..7f8c9a127f5a 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -114,6 +114,7 @@ enum tick_dep_bits {
 #ifdef CONFIG_NO_HZ_COMMON
 extern bool tick_nohz_enabled;
 extern bool tick_nohz_tick_stopped(void);
+extern bool tick_nohz_tick_stopped_cpu(int cpu);
 extern void tick_nohz_idle_enter(void);
 extern void tick_nohz_idle_exit(void);
 extern void tick_nohz_irq_exit(void);
@@ -125,6 +126,7 @@ extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time);
 #else /* !CONFIG_NO_HZ_COMMON */
 #define tick_nohz_enabled (0)
 static inline int tick_nohz_tick_stopped(void) { return 0; }
+static inline int tick_nohz_tick_stopped_cpu(int cpu) { return 0; }
 static inline void tick_nohz_idle_enter(void) { }
 static inline void tick_nohz_idle_exit(void) { }
 
-- 
cgit v1.2.3


From 1bda3f8087fce9063da0b8aef87f17a3fe541aca Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Wed, 21 Feb 2018 05:17:26 +0100
Subject: sched/isolation: Isolate workqueues when "nohz_full=" is set

As we prepare for offloading the residual 1hz scheduler ticks to
workqueue, let's affine those to housekeepers so that they don't
interrupt the CPUs that don't want to be disturbed.

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Chris Metcalf <cmetcalf@mellanox.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luiz Capitulino <lcapitulino@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Wanpeng Li <kernellwp@gmail.com>
Link: http://lkml.kernel.org/r/1519186649-3242-5-git-send-email-frederic@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/isolation.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h
index d849431c8060..4a6582c27dea 100644
--- a/include/linux/sched/isolation.h
+++ b/include/linux/sched/isolation.h
@@ -12,6 +12,7 @@ enum hk_flags {
 	HK_FLAG_SCHED		= (1 << 3),
 	HK_FLAG_TICK		= (1 << 4),
 	HK_FLAG_DOMAIN		= (1 << 5),
+	HK_FLAG_WQ		= (1 << 6),
 };
 
 #ifdef CONFIG_CPU_ISOLATION
-- 
cgit v1.2.3


From dcdedb24159be3487e3dbbe1faa79ae7d00c92ac Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Wed, 21 Feb 2018 05:17:28 +0100
Subject: sched/nohz: Remove the 1 Hz tick code

Now that the 1Hz tick is offloaded to workqueues, we can safely remove
the residual code that used to handle it locally.

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Chris Metcalf <cmetcalf@mellanox.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luiz Capitulino <lcapitulino@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Wanpeng Li <kernellwp@gmail.com>
Link: http://lkml.kernel.org/r/1519186649-3242-7-git-send-email-frederic@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/nohz.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h
index 3d3a97d9399d..094217273ff9 100644
--- a/include/linux/sched/nohz.h
+++ b/include/linux/sched/nohz.h
@@ -37,8 +37,4 @@ extern void wake_up_nohz_cpu(int cpu);
 static inline void wake_up_nohz_cpu(int cpu) { }
 #endif
 
-#ifdef CONFIG_NO_HZ_FULL
-extern u64 scheduler_tick_max_deferment(void);
-#endif
-
 #endif /* _LINUX_SCHED_NOHZ_H */
-- 
cgit v1.2.3


From 325ea10c0809406ce23f038602abbc454f3f761d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 3 Mar 2018 12:20:47 +0100
Subject: sched/headers: Simplify and clean up header usage in the scheduler

Do the following cleanups and simplifications:

 - sched/sched.h already includes <asm/paravirt.h>, so no need to
   include it in sched/core.c again.

 - order the <linux/sched/*.h> headers alphabetically

 - add all <linux/sched/*.h> headers to kernel/sched/sched.h

 - remove all unnecessary includes from the .c files that
   are already included in kernel/sched/sched.h.

Finally, make all scheduler .c files use a single common header:

  #include "sched.h"

... which now contains a union of the relied upon headers.

This makes the various .c files easier to read and easier to handle.

Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/deadline.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h
index a5bc8728ead7..0cb034331cbb 100644
--- a/include/linux/sched/deadline.h
+++ b/include/linux/sched/deadline.h
@@ -1,8 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LINUX_SCHED_DEADLINE_H
-#define _LINUX_SCHED_DEADLINE_H
-
-#include <linux/sched.h>
 
 /*
  * SCHED_DEADLINE tasks has negative priorities, reflecting
@@ -28,5 +24,3 @@ static inline bool dl_time_before(u64 a, u64 b)
 {
 	return (s64)(a - b) < 0;
 }
-
-#endif /* _LINUX_SCHED_DEADLINE_H */
-- 
cgit v1.2.3


From 6b2bb7265f0b62605e8caee3613449ed0db270b9 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 15 Mar 2018 11:40:33 +0100
Subject: sched/wait: Introduce wait_var_event()

As a replacement for the wait_on_atomic_t() API provide the
wait_var_event() API.

The wait_var_event() API is based on the very same hashed-waitqueue
idea, but doesn't care about the type (atomic_t) or the specific
condition (atomic_read() == 0). IOW. it's much more widely
applicable/flexible.

It shares all the benefits/disadvantages of a hashed-waitqueue
approach with the existing wait_on_atomic_t/wait_on_bit() APIs.

The API is modeled after the existing wait_event() API, but instead of
taking a wait_queue_head, it takes an address. This addresses is
hashed to obtain a wait_queue_head from the bit_wait_table.

Similar to the wait_event() API, it takes a condition expression as
second argument and will wait until this expression becomes true.

The following are (mostly) identical replacements:

	wait_on_atomic_t(&my_atomic, atomic_t_wait, TASK_UNINTERRUPTIBLE);
	wake_up_atomic_t(&my_atomic);

	wait_var_event(&my_atomic, !atomic_read(&my_atomic));
	wake_up_var(&my_atomic);

The only difference is that wake_up_var() is an unconditional wakeup
and doesn't check the previously hard-coded (atomic_read() == 0)
condition here. This is of little concequence, since most callers are
already conditional on atomic_dec_and_test() and the ones that are
not, are trivial to make so.

Tested-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait_bit.h | 70 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 70 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h
index 61b39eaf7cad..3fcdb75d69cf 100644
--- a/include/linux/wait_bit.h
+++ b/include/linux/wait_bit.h
@@ -262,4 +262,74 @@ int wait_on_atomic_t(atomic_t *val, wait_atomic_t_action_f action, unsigned mode
 	return out_of_line_wait_on_atomic_t(val, action, mode);
 }
 
+extern void init_wait_var_entry(struct wait_bit_queue_entry *wbq_entry, void *var, int flags);
+extern void wake_up_var(void *var);
+extern wait_queue_head_t *__var_waitqueue(void *p);
+
+#define ___wait_var_event(var, condition, state, exclusive, ret, cmd)	\
+({									\
+	__label__ __out;						\
+	struct wait_queue_head *__wq_head = __var_waitqueue(var);	\
+	struct wait_bit_queue_entry __wbq_entry;			\
+	long __ret = ret; /* explicit shadow */				\
+									\
+	init_wait_var_entry(&__wbq_entry, var,				\
+			    exclusive ? WQ_FLAG_EXCLUSIVE : 0);		\
+	for (;;) {							\
+		long __int = prepare_to_wait_event(__wq_head,		\
+						   &__wbq_entry.wq_entry, \
+						   state);		\
+		if (condition)						\
+			break;						\
+									\
+		if (___wait_is_interruptible(state) && __int) {		\
+			__ret = __int;					\
+			goto __out;					\
+		}							\
+									\
+		cmd;							\
+	}								\
+	finish_wait(__wq_head, &__wbq_entry.wq_entry);			\
+__out:	__ret;								\
+})
+
+#define __wait_var_event(var, condition)				\
+	___wait_var_event(var, condition, TASK_UNINTERRUPTIBLE, 0, 0,	\
+			  schedule())
+
+#define wait_var_event(var, condition)					\
+do {									\
+	might_sleep();							\
+	if (condition)							\
+		break;							\
+	__wait_var_event(var, condition);				\
+} while (0)
+
+#define __wait_var_event_killable(var, condition)			\
+	___wait_var_event(var, condition, TASK_KILLABLE, 0, 0,		\
+			  schedule())
+
+#define wait_var_event_killable(var, condition)				\
+({									\
+	int __ret = 0;							\
+	might_sleep();							\
+	if (!(condition))						\
+		__ret = __wait_var_event_killable(var, condition);	\
+	__ret;								\
+})
+
+#define __wait_var_event_timeout(var, condition, timeout)		\
+	___wait_var_event(var, ___wait_cond_timeout(condition),		\
+			  TASK_UNINTERRUPTIBLE, 0, timeout,		\
+			  __ret = schedule_timeout(__ret))
+
+#define wait_var_event_timeout(var, condition, timeout)			\
+({									\
+	long __ret = timeout;						\
+	might_sleep();							\
+	if (!___wait_cond_timeout(condition))				\
+		__ret = __wait_var_event_timeout(var, condition, timeout); \
+	__ret;								\
+})
+
 #endif /* _LINUX_WAIT_BIT_H */
-- 
cgit v1.2.3


From f44c77630d26ca2c2a60b20c47dd9ce07c4361b3 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 7 Mar 2018 15:26:44 -0800
Subject: fs, dax: prepare for dax-specific address_space_operations

In preparation for the dax implementation to start associating dax pages
to inodes via page->mapping, we need to provide a 'struct
address_space_operations' instance for dax. Define some generic VFS aops
helpers for dax. These noop implementations are there in the dax case to
prevent the VFS from falling back to operations with page-cache
assumptions, dax_writeback_mapping_range() may not be referenced in the
FS_DAX=n case.

Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Suggested-by: Matthew Wilcox <mawilcox@microsoft.com>
Suggested-by: Jan Kara <jack@suse.cz>
Suggested-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Suggested-by: Dave Chinner <david@fromorbit.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/dax.h | 12 +++++++++---
 include/linux/fs.h  |  4 ++++
 2 files changed, 13 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dax.h b/include/linux/dax.h
index 0185ecdae135..ae27a7efe7ab 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -38,6 +38,7 @@ static inline void put_dax(struct dax_device *dax_dev)
 }
 #endif
 
+struct writeback_control;
 int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff);
 #if IS_ENABLED(CONFIG_FS_DAX)
 int __bdev_dax_supported(struct super_block *sb, int blocksize);
@@ -57,6 +58,8 @@ static inline void fs_put_dax(struct dax_device *dax_dev)
 }
 
 struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev);
+int dax_writeback_mapping_range(struct address_space *mapping,
+		struct block_device *bdev, struct writeback_control *wbc);
 #else
 static inline int bdev_dax_supported(struct super_block *sb, int blocksize)
 {
@@ -76,6 +79,12 @@ static inline struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev)
 {
 	return NULL;
 }
+
+static inline int dax_writeback_mapping_range(struct address_space *mapping,
+		struct block_device *bdev, struct writeback_control *wbc)
+{
+	return -EOPNOTSUPP;
+}
 #endif
 
 int dax_read_lock(void);
@@ -121,7 +130,4 @@ static inline bool dax_mapping(struct address_space *mapping)
 	return mapping->host && IS_DAX(mapping->host);
 }
 
-struct writeback_control;
-int dax_writeback_mapping_range(struct address_space *mapping,
-		struct block_device *bdev, struct writeback_control *wbc);
 #endif
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 79c413985305..44f7f7080faa 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3129,6 +3129,10 @@ extern int simple_rmdir(struct inode *, struct dentry *);
 extern int simple_rename(struct inode *, struct dentry *,
 			 struct inode *, struct dentry *, unsigned int);
 extern int noop_fsync(struct file *, loff_t, loff_t, int);
+extern int noop_set_page_dirty(struct page *page);
+extern void noop_invalidatepage(struct page *page, unsigned int offset,
+		unsigned int length);
+extern ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter);
 extern int simple_empty(struct dentry *);
 extern int simple_readpage(struct file *file, struct page *page);
 extern int simple_write_begin(struct file *file, struct address_space *mapping,
-- 
cgit v1.2.3


From 976431b02c2ef92ae3f8b6a7d699fc554025e118 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 29 Mar 2018 17:22:13 -0700
Subject: dax, dm: allow device-mapper to operate without dax support

Change device-mapper's DAX dependency to require the presence of at
least one DAX_DRIVER. This allows device-mapper to be built without
bringing the DAX core along which is especially wasteful when there are
no DAX drivers, like BLK_DEV_PMEM, configured.

Cc: Alasdair Kergon <agk@redhat.com>
Reported-by: Bart Van Assche <Bart.VanAssche@wdc.com>
Reported-by: kbuild test robot <lkp@intel.com>
Reported-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/dax.h | 30 ++++++++++++++++++++++++------
 1 file changed, 24 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dax.h b/include/linux/dax.h
index ae27a7efe7ab..f9eb22ad341e 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -26,16 +26,39 @@ extern struct attribute_group dax_attribute_group;
 
 #if IS_ENABLED(CONFIG_DAX)
 struct dax_device *dax_get_by_host(const char *host);
+struct dax_device *alloc_dax(void *private, const char *host,
+		const struct dax_operations *ops);
 void put_dax(struct dax_device *dax_dev);
+void kill_dax(struct dax_device *dax_dev);
+void dax_write_cache(struct dax_device *dax_dev, bool wc);
+bool dax_write_cache_enabled(struct dax_device *dax_dev);
 #else
 static inline struct dax_device *dax_get_by_host(const char *host)
 {
 	return NULL;
 }
-
+static inline struct dax_device *alloc_dax(void *private, const char *host,
+		const struct dax_operations *ops)
+{
+	/*
+	 * Callers should check IS_ENABLED(CONFIG_DAX) to know if this
+	 * NULL is an error or expected.
+	 */
+	return NULL;
+}
 static inline void put_dax(struct dax_device *dax_dev)
 {
 }
+static inline void kill_dax(struct dax_device *dax_dev)
+{
+}
+static inline void dax_write_cache(struct dax_device *dax_dev, bool wc)
+{
+}
+static inline bool dax_write_cache_enabled(struct dax_device *dax_dev)
+{
+	return false;
+}
 #endif
 
 struct writeback_control;
@@ -89,18 +112,13 @@ static inline int dax_writeback_mapping_range(struct address_space *mapping,
 
 int dax_read_lock(void);
 void dax_read_unlock(int id);
-struct dax_device *alloc_dax(void *private, const char *host,
-		const struct dax_operations *ops);
 bool dax_alive(struct dax_device *dax_dev);
-void kill_dax(struct dax_device *dax_dev);
 void *dax_get_private(struct dax_device *dax_dev);
 long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
 		void **kaddr, pfn_t *pfn);
 size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
 		size_t bytes, struct iov_iter *i);
 void dax_flush(struct dax_device *dax_dev, void *addr, size_t size);
-void dax_write_cache(struct dax_device *dax_dev, bool wc);
-bool dax_write_cache_enabled(struct dax_device *dax_dev);
 
 ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
 		const struct iomap_ops *ops);
-- 
cgit v1.2.3