From f96e9232e04856c781d4f71923a46dd3f7b429fa Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 4 Jan 2010 15:09:00 -0800 Subject: rcu: Adjust force_quiescent_state() locking, step 1 This causes rnp->lock to be held on entry to force_quiescent_state()'s switch statement. This is a first step towards prohibiting starting grace periods while force_quiescent_state() is executing, which will reduce the number and complexity of races that force_quiescent_state() is involved in. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <12626465501455-git-send-email-> Signed-off-by: Ingo Molnar --- kernel/rcutree.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 53ae9598f798..eae331da6bee 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1204,7 +1204,7 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) } if (relaxed && (long)(rsp->jiffies_force_qs - jiffies) >= 0) - goto unlock_ret; /* no emergency and done recently. */ + goto unlock_fqs_ret; /* no emergency and done recently. */ rsp->n_force_qs++; spin_lock(&rnp->lock); lastcomp = rsp->gpnum - 1; @@ -1213,31 +1213,32 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) if(!rcu_gp_in_progress(rsp)) { rsp->n_force_qs_ngp++; spin_unlock(&rnp->lock); - goto unlock_ret; /* no GP in progress, time updated. */ + goto unlock_fqs_ret; /* no GP in progress, time updated. */ } - spin_unlock(&rnp->lock); switch (signaled) { case RCU_GP_IDLE: case RCU_GP_INIT: + spin_unlock(&rnp->lock); break; /* grace period idle or initializing, ignore. */ case RCU_SAVE_DYNTICK: + spin_unlock(&rnp->lock); if (RCU_SIGNAL_INIT != RCU_SAVE_DYNTICK) break; /* So gcc recognizes the dead code. */ /* Record dyntick-idle state. */ if (rcu_process_dyntick(rsp, lastcomp, dyntick_save_progress_counter)) - goto unlock_ret; + goto unlock_fqs_ret; + spin_lock(&rnp->lock); /* fall into next case. */ case RCU_SAVE_COMPLETED: /* Update state, record completion counter. */ forcenow = 0; - spin_lock(&rnp->lock); if (lastcomp + 1 == rsp->gpnum && lastcomp == rsp->completed && rsp->signaled == signaled) { @@ -1245,23 +1246,31 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) rsp->completed_fqs = lastcomp; forcenow = signaled == RCU_SAVE_COMPLETED; } - spin_unlock(&rnp->lock); - if (!forcenow) + if (!forcenow) { + spin_unlock(&rnp->lock); break; + } /* fall into next case. */ case RCU_FORCE_QS: /* Check dyntick-idle state, send IPI to laggarts. */ + spin_unlock(&rnp->lock); if (rcu_process_dyntick(rsp, rsp->completed_fqs, rcu_implicit_dynticks_qs)) - goto unlock_ret; + goto unlock_fqs_ret; /* Leave state in case more forcing is required. */ break; + + default: + + spin_unlock(&rnp->lock); + WARN_ON_ONCE(1); + break; } -unlock_ret: +unlock_fqs_ret: spin_unlock_irqrestore(&rsp->fqslock, flags); } -- cgit v1.2.3 From 559569acf94f538b56bd6eead80b439d6a78cdff Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 4 Jan 2010 15:09:01 -0800 Subject: rcu: Adjust force_quiescent_state() locking, step 2 This patch releases rnp->lock after the end of force_quiescent_state()'s switch statement. This is a second step towards prohibiting starting grace periods while force_quiescent_state() is executing, which will reduce the number and complexity of races that force_quiescent_state() is involved in. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <12626465501994-git-send-email-> Signed-off-by: Ingo Molnar --- kernel/rcutree.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'kernel') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index eae331da6bee..d42ad30c4d70 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1219,7 +1219,6 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) case RCU_GP_IDLE: case RCU_GP_INIT: - spin_unlock(&rnp->lock); break; /* grace period idle or initializing, ignore. */ case RCU_SAVE_DYNTICK: @@ -1246,10 +1245,8 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) rsp->completed_fqs = lastcomp; forcenow = signaled == RCU_SAVE_COMPLETED; } - if (!forcenow) { - spin_unlock(&rnp->lock); + if (!forcenow) break; - } /* fall into next case. */ case RCU_FORCE_QS: @@ -1262,14 +1259,10 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) /* Leave state in case more forcing is required. */ - break; - - default: - - spin_unlock(&rnp->lock); - WARN_ON_ONCE(1); + spin_lock(&rnp->lock); break; } + spin_unlock(&rnp->lock); unlock_fqs_ret: spin_unlock_irqrestore(&rsp->fqslock, flags); } -- cgit v1.2.3 From 07079d5357a4d53c2b13126c4a38fb40e6e04966 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 4 Jan 2010 15:09:02 -0800 Subject: rcu: Prohibit starting new grace periods while forcing quiescent states Reduce the number and variety of race conditions by prohibiting the start of a new grace period while force_quiescent_state() is active. A new fqs_active flag in the rcu_state structure is used to trace whether or not force_quiescent_state() is active, and this new flag is tested by rcu_start_gp(). If the CPU that closed out the last grace period needs another grace period, this new grace period may be delayed up to one scheduling-clock tick, but it will eventually get started. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <126264655052-git-send-email-> Signed-off-by: Ingo Molnar --- kernel/rcutree.c | 31 +++++++++++++++++-------------- kernel/rcutree.h | 2 ++ 2 files changed, 19 insertions(+), 14 deletions(-) (limited to 'kernel') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index d42ad30c4d70..41688ff60e07 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -659,7 +659,7 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) struct rcu_data *rdp = rsp->rda[smp_processor_id()]; struct rcu_node *rnp = rcu_get_root(rsp); - if (!cpu_needs_another_gp(rsp, rdp)) { + if (!cpu_needs_another_gp(rsp, rdp) || rsp->fqs_active) { if (rnp->completed == rsp->completed) { spin_unlock_irqrestore(&rnp->lock, flags); return; @@ -1195,6 +1195,7 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) struct rcu_node *rnp = rcu_get_root(rsp); u8 signaled; u8 forcenow; + u8 gpdone; if (!rcu_gp_in_progress(rsp)) return; /* No grace period in progress, nothing to force. */ @@ -1206,15 +1207,16 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) (long)(rsp->jiffies_force_qs - jiffies) >= 0) goto unlock_fqs_ret; /* no emergency and done recently. */ rsp->n_force_qs++; - spin_lock(&rnp->lock); + spin_lock(&rnp->lock); /* irqs already disabled */ lastcomp = rsp->gpnum - 1; signaled = rsp->signaled; rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; if(!rcu_gp_in_progress(rsp)) { rsp->n_force_qs_ngp++; - spin_unlock(&rnp->lock); + spin_unlock(&rnp->lock); /* irqs remain disabled */ goto unlock_fqs_ret; /* no GP in progress, time updated. */ } + rsp->fqs_active = 1; switch (signaled) { case RCU_GP_IDLE: case RCU_GP_INIT: @@ -1223,15 +1225,16 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) case RCU_SAVE_DYNTICK: - spin_unlock(&rnp->lock); + spin_unlock(&rnp->lock); /* irqs remain disabled */ if (RCU_SIGNAL_INIT != RCU_SAVE_DYNTICK) break; /* So gcc recognizes the dead code. */ /* Record dyntick-idle state. */ - if (rcu_process_dyntick(rsp, lastcomp, - dyntick_save_progress_counter)) - goto unlock_fqs_ret; - spin_lock(&rnp->lock); + gpdone = rcu_process_dyntick(rsp, lastcomp, + dyntick_save_progress_counter); + spin_lock(&rnp->lock); /* irqs already disabled */ + if (gpdone) + break; /* fall into next case. */ case RCU_SAVE_COMPLETED: @@ -1252,17 +1255,17 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) case RCU_FORCE_QS: /* Check dyntick-idle state, send IPI to laggarts. */ - spin_unlock(&rnp->lock); - if (rcu_process_dyntick(rsp, rsp->completed_fqs, - rcu_implicit_dynticks_qs)) - goto unlock_fqs_ret; + spin_unlock(&rnp->lock); /* irqs remain disabled */ + gpdone = rcu_process_dyntick(rsp, rsp->completed_fqs, + rcu_implicit_dynticks_qs); /* Leave state in case more forcing is required. */ - spin_lock(&rnp->lock); + spin_lock(&rnp->lock); /* irqs already disabled */ break; } - spin_unlock(&rnp->lock); + rsp->fqs_active = 0; + spin_unlock(&rnp->lock); /* irqs remain disabled */ unlock_fqs_ret: spin_unlock_irqrestore(&rsp->fqslock, flags); } diff --git a/kernel/rcutree.h b/kernel/rcutree.h index d2a0046f63b2..dc386a7c634f 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -277,6 +277,8 @@ struct rcu_state { u8 signaled ____cacheline_internodealigned_in_smp; /* Force QS state. */ + u8 fqs_active; /* force_quiescent_state() */ + /* is running. */ long gpnum; /* Current gp number. */ long completed; /* # of last completed gp. */ -- cgit v1.2.3 From f3a8b5c6aa543bd87764418d63632eb65b80e2f6 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 4 Jan 2010 15:09:03 -0800 Subject: rcu: Eliminate local variable signaled from force_quiescent_state() Because the root rcu_node lock is held across entry to the switch statement in force_quiescent_state(), it is no longer necessary to snapshot rsp->signaled to a local variable. Eliminate both the snapshotting and the local variable. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1262646550602-git-send-email-> Signed-off-by: Ingo Molnar --- kernel/rcutree.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 41688ff60e07..1d8cfb1711fd 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1193,7 +1193,6 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) unsigned long flags; long lastcomp; struct rcu_node *rnp = rcu_get_root(rsp); - u8 signaled; u8 forcenow; u8 gpdone; @@ -1209,7 +1208,6 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) rsp->n_force_qs++; spin_lock(&rnp->lock); /* irqs already disabled */ lastcomp = rsp->gpnum - 1; - signaled = rsp->signaled; rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; if(!rcu_gp_in_progress(rsp)) { rsp->n_force_qs_ngp++; @@ -1217,7 +1215,7 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) goto unlock_fqs_ret; /* no GP in progress, time updated. */ } rsp->fqs_active = 1; - switch (signaled) { + switch (rsp->signaled) { case RCU_GP_IDLE: case RCU_GP_INIT: @@ -1242,11 +1240,10 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) /* Update state, record completion counter. */ forcenow = 0; if (lastcomp + 1 == rsp->gpnum && - lastcomp == rsp->completed && - rsp->signaled == signaled) { + lastcomp == rsp->completed) { + forcenow = rsp->signaled == RCU_SAVE_COMPLETED; rsp->signaled = RCU_FORCE_QS; rsp->completed_fqs = lastcomp; - forcenow = signaled == RCU_SAVE_COMPLETED; } if (!forcenow) break; -- cgit v1.2.3 From 39c0bbfc07c6e28db7346d0e11106f2d045d3035 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 4 Jan 2010 15:09:04 -0800 Subject: rcu: Eliminate local variable lastcomp from force_quiescent_state() Because rsp->fqs_active is set to 1 across force_quiescent_state()'s switch statement, rcu_start_gp() will refrain from starting a new grace period during this time. Therefore, rsp->gpnum is constant, and can be propagated to all uses of lastcomp, eliminating this local variable. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <12626465502985-git-send-email-> Signed-off-by: Ingo Molnar --- kernel/rcutree.c | 10 +++------- kernel/rcutree.h | 2 -- 2 files changed, 3 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 1d8cfb1711fd..62b64332effb 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1191,7 +1191,6 @@ static int rcu_process_dyntick(struct rcu_state *rsp, long lastcomp, static void force_quiescent_state(struct rcu_state *rsp, int relaxed) { unsigned long flags; - long lastcomp; struct rcu_node *rnp = rcu_get_root(rsp); u8 forcenow; u8 gpdone; @@ -1207,7 +1206,6 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) goto unlock_fqs_ret; /* no emergency and done recently. */ rsp->n_force_qs++; spin_lock(&rnp->lock); /* irqs already disabled */ - lastcomp = rsp->gpnum - 1; rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; if(!rcu_gp_in_progress(rsp)) { rsp->n_force_qs_ngp++; @@ -1228,7 +1226,7 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) break; /* So gcc recognizes the dead code. */ /* Record dyntick-idle state. */ - gpdone = rcu_process_dyntick(rsp, lastcomp, + gpdone = rcu_process_dyntick(rsp, rsp->gpnum - 1, dyntick_save_progress_counter); spin_lock(&rnp->lock); /* irqs already disabled */ if (gpdone) @@ -1239,11 +1237,9 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) /* Update state, record completion counter. */ forcenow = 0; - if (lastcomp + 1 == rsp->gpnum && - lastcomp == rsp->completed) { + if (rsp->gpnum - 1 == rsp->completed) { forcenow = rsp->signaled == RCU_SAVE_COMPLETED; rsp->signaled = RCU_FORCE_QS; - rsp->completed_fqs = lastcomp; } if (!forcenow) break; @@ -1253,7 +1249,7 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) /* Check dyntick-idle state, send IPI to laggarts. */ spin_unlock(&rnp->lock); /* irqs remain disabled */ - gpdone = rcu_process_dyntick(rsp, rsp->completed_fqs, + gpdone = rcu_process_dyntick(rsp, rsp->gpnum - 1, rcu_implicit_dynticks_qs); /* Leave state in case more forcing is required. */ diff --git a/kernel/rcutree.h b/kernel/rcutree.h index dc386a7c634f..534856121b06 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -296,8 +296,6 @@ struct rcu_state { long orphan_qlen; /* Number of orphaned cbs. */ spinlock_t fqslock; /* Only one task forcing */ /* quiescent states. */ - long completed_fqs; /* Value of completed @ snap. */ - /* Protected by fqslock. */ unsigned long jiffies_force_qs; /* Time at which to invoke */ /* force_quiescent_state(). */ unsigned long n_force_qs; /* Number of calls to */ -- cgit v1.2.3 From eb1ba45f1e7f6e626fefc063b340c7cbec9bd8c7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 4 Jan 2010 15:09:05 -0800 Subject: rcu: Eliminate second argument of rcu_process_dyntick() At this point, the second argument to all calls to rcu_process_dyntick() is a function of the same field of the structure passed in as the first argument, namely, rsp->gpnum-1. So propagate rsp->gpnum-1 to all uses of the second argument within rcu_process_dyntick() and then eliminate the second argument. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <12626465503786-git-send-email-> Signed-off-by: Ingo Molnar --- kernel/rcutree.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 62b64332effb..c7d00700fc4e 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1147,7 +1147,7 @@ void rcu_check_callbacks(int cpu, int user) * Returns 1 if the current grace period ends while scanning (possibly * because we made it end). */ -static int rcu_process_dyntick(struct rcu_state *rsp, long lastcomp, +static int rcu_process_dyntick(struct rcu_state *rsp, int (*f)(struct rcu_data *)) { unsigned long bit; @@ -1159,7 +1159,7 @@ static int rcu_process_dyntick(struct rcu_state *rsp, long lastcomp, rcu_for_each_leaf_node(rsp, rnp) { mask = 0; spin_lock_irqsave(&rnp->lock, flags); - if (rnp->completed != lastcomp) { + if (rnp->completed != rsp->gpnum - 1) { spin_unlock_irqrestore(&rnp->lock, flags); return 1; } @@ -1173,7 +1173,7 @@ static int rcu_process_dyntick(struct rcu_state *rsp, long lastcomp, if ((rnp->qsmask & bit) != 0 && f(rsp->rda[cpu])) mask |= bit; } - if (mask != 0 && rnp->completed == lastcomp) { + if (mask != 0 && rnp->completed == rsp->gpnum - 1) { /* rcu_report_qs_rnp() releases rnp->lock. */ rcu_report_qs_rnp(mask, rsp, rnp, flags); @@ -1226,7 +1226,7 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) break; /* So gcc recognizes the dead code. */ /* Record dyntick-idle state. */ - gpdone = rcu_process_dyntick(rsp, rsp->gpnum - 1, + gpdone = rcu_process_dyntick(rsp, dyntick_save_progress_counter); spin_lock(&rnp->lock); /* irqs already disabled */ if (gpdone) @@ -1249,8 +1249,7 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) /* Check dyntick-idle state, send IPI to laggarts. */ spin_unlock(&rnp->lock); /* irqs remain disabled */ - gpdone = rcu_process_dyntick(rsp, rsp->gpnum - 1, - rcu_implicit_dynticks_qs); + gpdone = rcu_process_dyntick(rsp, rcu_implicit_dynticks_qs); /* Leave state in case more forcing is required. */ -- cgit v1.2.3 From 0f10dc826646134dce3e5751512b87d30f3903e4 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 4 Jan 2010 15:09:06 -0800 Subject: rcu: Eliminate rcu_process_dyntick() return value Because a new grace period cannot start while we are executing within the force_quiescent_state() function's switch statement, if any test within that switch statement or within any function called from that switch statement shows that the current grace period has ended, we can safely re-do that test any time before we leave the switch statement. This means that we no longer need a return value from rcu_process_dyntick(), as we can simply invoke rcu_gp_in_progress() to check whether the old grace period has finished -- there is no longer any need to worry about whether or not a new grace period has been started. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <12626465501857-git-send-email-> Signed-off-by: Ingo Molnar --- kernel/rcutree.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) (limited to 'kernel') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index c7d00700fc4e..e4971192fa9c 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1144,11 +1144,9 @@ void rcu_check_callbacks(int cpu, int user) /* * Scan the leaf rcu_node structures, processing dyntick state for any that * have not yet encountered a quiescent state, using the function specified. - * Returns 1 if the current grace period ends while scanning (possibly - * because we made it end). */ -static int rcu_process_dyntick(struct rcu_state *rsp, - int (*f)(struct rcu_data *)) +static void rcu_process_dyntick(struct rcu_state *rsp, + int (*f)(struct rcu_data *)) { unsigned long bit; int cpu; @@ -1161,7 +1159,7 @@ static int rcu_process_dyntick(struct rcu_state *rsp, spin_lock_irqsave(&rnp->lock, flags); if (rnp->completed != rsp->gpnum - 1) { spin_unlock_irqrestore(&rnp->lock, flags); - return 1; + return; } if (rnp->qsmask == 0) { spin_unlock_irqrestore(&rnp->lock, flags); @@ -1181,7 +1179,6 @@ static int rcu_process_dyntick(struct rcu_state *rsp, } spin_unlock_irqrestore(&rnp->lock, flags); } - return 0; } /* @@ -1193,7 +1190,6 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) unsigned long flags; struct rcu_node *rnp = rcu_get_root(rsp); u8 forcenow; - u8 gpdone; if (!rcu_gp_in_progress(rsp)) return; /* No grace period in progress, nothing to force. */ @@ -1226,10 +1222,9 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) break; /* So gcc recognizes the dead code. */ /* Record dyntick-idle state. */ - gpdone = rcu_process_dyntick(rsp, - dyntick_save_progress_counter); + rcu_process_dyntick(rsp, dyntick_save_progress_counter); spin_lock(&rnp->lock); /* irqs already disabled */ - if (gpdone) + if (!rcu_gp_in_progress(rsp)) break; /* fall into next case. */ @@ -1249,7 +1244,7 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) /* Check dyntick-idle state, send IPI to laggarts. */ spin_unlock(&rnp->lock); /* irqs remain disabled */ - gpdone = rcu_process_dyntick(rsp, rcu_implicit_dynticks_qs); + rcu_process_dyntick(rsp, rcu_implicit_dynticks_qs); /* Leave state in case more forcing is required. */ -- cgit v1.2.3 From ee47eb9f4da6f44af965d6d049e77ee8c8a4b822 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 4 Jan 2010 15:09:07 -0800 Subject: rcu: Remove leg of force_quiescent_state() switch statement The comparisons of rsp->gpnum nad rsp->completed in rcu_process_dyntick() and force_quiescent_state() can be replaced by the much more clear rcu_gp_in_progress() predicate function. After doing this, it becomes clear that the RCU_SAVE_COMPLETED leg of the force_quiescent_state() function's switch statement is almost completely a no-op. A small change to the RCU_SAVE_DYNTICK leg renders it a complete no-op, after which it can be removed. Doing so also eliminates the forcenow local variable from force_quiescent_state(). Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <12626465501781-git-send-email-> Signed-off-by: Ingo Molnar --- kernel/rcutree.c | 22 +++++----------------- kernel/rcutree.h | 5 ++--- 2 files changed, 7 insertions(+), 20 deletions(-) (limited to 'kernel') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index e4971192fa9c..6268f37adfc4 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1144,6 +1144,7 @@ void rcu_check_callbacks(int cpu, int user) /* * Scan the leaf rcu_node structures, processing dyntick state for any that * have not yet encountered a quiescent state, using the function specified. + * The caller must have suppressed start of new grace periods. */ static void rcu_process_dyntick(struct rcu_state *rsp, int (*f)(struct rcu_data *)) @@ -1157,7 +1158,7 @@ static void rcu_process_dyntick(struct rcu_state *rsp, rcu_for_each_leaf_node(rsp, rnp) { mask = 0; spin_lock_irqsave(&rnp->lock, flags); - if (rnp->completed != rsp->gpnum - 1) { + if (!rcu_gp_in_progress(rsp)) { spin_unlock_irqrestore(&rnp->lock, flags); return; } @@ -1171,7 +1172,7 @@ static void rcu_process_dyntick(struct rcu_state *rsp, if ((rnp->qsmask & bit) != 0 && f(rsp->rda[cpu])) mask |= bit; } - if (mask != 0 && rnp->completed == rsp->gpnum - 1) { + if (mask != 0 && rcu_gp_in_progress(rsp)) { /* rcu_report_qs_rnp() releases rnp->lock. */ rcu_report_qs_rnp(mask, rsp, rnp, flags); @@ -1189,7 +1190,6 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) { unsigned long flags; struct rcu_node *rnp = rcu_get_root(rsp); - u8 forcenow; if (!rcu_gp_in_progress(rsp)) return; /* No grace period in progress, nothing to force. */ @@ -1224,21 +1224,9 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) /* Record dyntick-idle state. */ rcu_process_dyntick(rsp, dyntick_save_progress_counter); spin_lock(&rnp->lock); /* irqs already disabled */ - if (!rcu_gp_in_progress(rsp)) - break; - /* fall into next case. */ - - case RCU_SAVE_COMPLETED: - - /* Update state, record completion counter. */ - forcenow = 0; - if (rsp->gpnum - 1 == rsp->completed) { - forcenow = rsp->signaled == RCU_SAVE_COMPLETED; + if (rcu_gp_in_progress(rsp)) rsp->signaled = RCU_FORCE_QS; - } - if (!forcenow) - break; - /* fall into next case. */ + break; case RCU_FORCE_QS: diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 534856121b06..edb6fae0fa94 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -237,12 +237,11 @@ struct rcu_data { #define RCU_GP_IDLE 0 /* No grace period in progress. */ #define RCU_GP_INIT 1 /* Grace period being initialized. */ #define RCU_SAVE_DYNTICK 2 /* Need to scan dyntick state. */ -#define RCU_SAVE_COMPLETED 3 /* Need to save rsp->completed. */ -#define RCU_FORCE_QS 4 /* Need to force quiescent state. */ +#define RCU_FORCE_QS 3 /* Need to force quiescent state. */ #ifdef CONFIG_NO_HZ #define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK #else /* #ifdef CONFIG_NO_HZ */ -#define RCU_SIGNAL_INIT RCU_SAVE_COMPLETED +#define RCU_SIGNAL_INIT RCU_FORCE_QS #endif /* #else #ifdef CONFIG_NO_HZ */ #define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */ -- cgit v1.2.3 From 45f014c52eef022873b19d6a20eb0ec9668f2b09 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 4 Jan 2010 15:09:08 -0800 Subject: rcu: Remove redundant grace-period check The rcu_process_dyntick() function checks twice for the end of the current grace period. However, it holds the current rcu_node structure's ->lock field throughout, and doesn't get to the second call to rcu_gp_in_progress() unless there is at least one CPU corresponding to this rcu_node structure that has not yet checked in for the current grace period, which would prevent the current grace period from ending. So the current grace period cannot have ended, and the second check is redundant, so remove it. Also, given that this function is used even with !CONFIG_NO_HZ, its name is quite misleading. Change from rcu_process_dyntick() to force_qs_rnp(). Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1262646550562-git-send-email-> Signed-off-by: Ingo Molnar --- kernel/rcutree.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 6268f37adfc4..d9202857d3ad 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1146,8 +1146,7 @@ void rcu_check_callbacks(int cpu, int user) * have not yet encountered a quiescent state, using the function specified. * The caller must have suppressed start of new grace periods. */ -static void rcu_process_dyntick(struct rcu_state *rsp, - int (*f)(struct rcu_data *)) +static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *)) { unsigned long bit; int cpu; @@ -1172,7 +1171,7 @@ static void rcu_process_dyntick(struct rcu_state *rsp, if ((rnp->qsmask & bit) != 0 && f(rsp->rda[cpu])) mask |= bit; } - if (mask != 0 && rcu_gp_in_progress(rsp)) { + if (mask != 0) { /* rcu_report_qs_rnp() releases rnp->lock. */ rcu_report_qs_rnp(mask, rsp, rnp, flags); @@ -1222,7 +1221,7 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) break; /* So gcc recognizes the dead code. */ /* Record dyntick-idle state. */ - rcu_process_dyntick(rsp, dyntick_save_progress_counter); + force_qs_rnp(rsp, dyntick_save_progress_counter); spin_lock(&rnp->lock); /* irqs already disabled */ if (rcu_gp_in_progress(rsp)) rsp->signaled = RCU_FORCE_QS; @@ -1232,7 +1231,7 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) /* Check dyntick-idle state, send IPI to laggarts. */ spin_unlock(&rnp->lock); /* irqs remain disabled */ - rcu_process_dyntick(rsp, rcu_implicit_dynticks_qs); + force_qs_rnp(rsp, rcu_implicit_dynticks_qs); /* Leave state in case more forcing is required. */ -- cgit v1.2.3 From 46a1e34eda805501a8b32f26394faa435149f6d1 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 4 Jan 2010 15:09:09 -0800 Subject: rcu: Make force_quiescent_state() start grace period if needed Grace periods cannot be started while force_quiescent_state() is active. This is OK in that the affected CPUs will try again later, but it does induce needless grace-period delays. This patch causes rcu_start_gp() to record a failed attempt to start a grace period. When force_quiescent_state() prepares to return, it then starts the grace period if there was such a failed attempt. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <12626465501854-git-send-email-> Signed-off-by: Ingo Molnar --- kernel/rcutree.c | 8 ++++++++ kernel/rcutree.h | 5 +++++ 2 files changed, 13 insertions(+) (limited to 'kernel') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index d9202857d3ad..55e8f6ef8195 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -660,6 +660,8 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) struct rcu_node *rnp = rcu_get_root(rsp); if (!cpu_needs_another_gp(rsp, rdp) || rsp->fqs_active) { + if (cpu_needs_another_gp(rsp, rdp)) + rsp->fqs_need_gp = 1; if (rnp->completed == rsp->completed) { spin_unlock_irqrestore(&rnp->lock, flags); return; @@ -1239,6 +1241,12 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) break; } rsp->fqs_active = 0; + if (rsp->fqs_need_gp) { + spin_unlock(&rsp->fqslock); /* irqs remain disabled */ + rsp->fqs_need_gp = 0; + rcu_start_gp(rsp, flags); /* releases rnp->lock */ + return; + } spin_unlock(&rnp->lock); /* irqs remain disabled */ unlock_fqs_ret: spin_unlock_irqrestore(&rsp->fqslock, flags); diff --git a/kernel/rcutree.h b/kernel/rcutree.h index edb6fae0fa94..bd5d78ad1c48 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -278,6 +278,11 @@ struct rcu_state { /* Force QS state. */ u8 fqs_active; /* force_quiescent_state() */ /* is running. */ + u8 fqs_need_gp; /* A CPU was prevented from */ + /* starting a new grace */ + /* period because */ + /* force_quiescent_state() */ + /* was running. */ long gpnum; /* Current gp number. */ long completed; /* # of last completed gp. */ -- cgit v1.2.3 From bf66f18e79e34c421bbd8f6511e2c556b779df2f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 4 Jan 2010 15:09:10 -0800 Subject: rcu: Add force_quiescent_state() testing to rcutorture Add force_quiescent_state() testing to rcutorture, with a separate thread that repeatedly invokes force_quiescent_state() in bursts. This can greatly increase the probability of encountering certain types of race conditions. Suggested-by: Josh Triplett Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1262646551116-git-send-email-> Signed-off-by: Ingo Molnar --- kernel/rcutorture.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++-- kernel/rcutree.c | 18 +++++++++++ kernel/rcutree_plugin.h | 19 ++++++++++++ 3 files changed, 115 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 9bb52177af02..adda92bfafac 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -61,6 +61,9 @@ static int test_no_idle_hz; /* Test RCU's support for tickless idle CPUs. */ static int shuffle_interval = 3; /* Interval between shuffles (in sec)*/ static int stutter = 5; /* Start/stop testing interval (in sec) */ static int irqreader = 1; /* RCU readers from irq (timers). */ +static int fqs_duration = 0; /* Duration of bursts (us), 0 to disable. */ +static int fqs_holdoff = 0; /* Hold time within burst (us). */ +static int fqs_stutter = 3; /* Wait time between bursts (s). */ static char *torture_type = "rcu"; /* What RCU implementation to torture. */ module_param(nreaders, int, 0444); @@ -79,6 +82,12 @@ module_param(stutter, int, 0444); MODULE_PARM_DESC(stutter, "Number of seconds to run/halt test"); module_param(irqreader, int, 0444); MODULE_PARM_DESC(irqreader, "Allow RCU readers from irq handlers"); +module_param(fqs_duration, int, 0444); +MODULE_PARM_DESC(fqs_duration, "Duration of fqs bursts (us)"); +module_param(fqs_holdoff, int, 0444); +MODULE_PARM_DESC(fqs_holdoff, "Holdoff time within fqs bursts (us)"); +module_param(fqs_stutter, int, 0444); +MODULE_PARM_DESC(fqs_stutter, "Wait time between fqs bursts (s)"); module_param(torture_type, charp, 0444); MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, srcu)"); @@ -99,6 +108,7 @@ static struct task_struct **reader_tasks; static struct task_struct *stats_task; static struct task_struct *shuffler_task; static struct task_struct *stutter_task; +static struct task_struct *fqs_task; #define RCU_TORTURE_PIPE_LEN 10 @@ -263,6 +273,7 @@ struct rcu_torture_ops { void (*deferred_free)(struct rcu_torture *p); void (*sync)(void); void (*cb_barrier)(void); + void (*fqs)(void); int (*stats)(char *page); int irq_capable; char *name; @@ -347,6 +358,7 @@ static struct rcu_torture_ops rcu_ops = { .deferred_free = rcu_torture_deferred_free, .sync = synchronize_rcu, .cb_barrier = rcu_barrier, + .fqs = rcu_force_quiescent_state, .stats = NULL, .irq_capable = 1, .name = "rcu" @@ -388,6 +400,7 @@ static struct rcu_torture_ops rcu_sync_ops = { .deferred_free = rcu_sync_torture_deferred_free, .sync = synchronize_rcu, .cb_barrier = NULL, + .fqs = rcu_force_quiescent_state, .stats = NULL, .irq_capable = 1, .name = "rcu_sync" @@ -403,6 +416,7 @@ static struct rcu_torture_ops rcu_expedited_ops = { .deferred_free = rcu_sync_torture_deferred_free, .sync = synchronize_rcu_expedited, .cb_barrier = NULL, + .fqs = rcu_force_quiescent_state, .stats = NULL, .irq_capable = 1, .name = "rcu_expedited" @@ -465,6 +479,7 @@ static struct rcu_torture_ops rcu_bh_ops = { .deferred_free = rcu_bh_torture_deferred_free, .sync = rcu_bh_torture_synchronize, .cb_barrier = rcu_barrier_bh, + .fqs = rcu_bh_force_quiescent_state, .stats = NULL, .irq_capable = 1, .name = "rcu_bh" @@ -480,6 +495,7 @@ static struct rcu_torture_ops rcu_bh_sync_ops = { .deferred_free = rcu_sync_torture_deferred_free, .sync = rcu_bh_torture_synchronize, .cb_barrier = NULL, + .fqs = rcu_bh_force_quiescent_state, .stats = NULL, .irq_capable = 1, .name = "rcu_bh_sync" @@ -621,6 +637,7 @@ static struct rcu_torture_ops sched_ops = { .deferred_free = rcu_sched_torture_deferred_free, .sync = sched_torture_synchronize, .cb_barrier = rcu_barrier_sched, + .fqs = rcu_sched_force_quiescent_state, .stats = NULL, .irq_capable = 1, .name = "sched" @@ -636,6 +653,7 @@ static struct rcu_torture_ops sched_sync_ops = { .deferred_free = rcu_sync_torture_deferred_free, .sync = sched_torture_synchronize, .cb_barrier = NULL, + .fqs = rcu_sched_force_quiescent_state, .stats = NULL, .name = "sched_sync" }; @@ -650,11 +668,44 @@ static struct rcu_torture_ops sched_expedited_ops = { .deferred_free = rcu_sync_torture_deferred_free, .sync = synchronize_sched_expedited, .cb_barrier = NULL, + .fqs = rcu_sched_force_quiescent_state, .stats = rcu_expedited_torture_stats, .irq_capable = 1, .name = "sched_expedited" }; +/* + * RCU torture force-quiescent-state kthread. Repeatedly induces + * bursts of calls to force_quiescent_state(), increasing the probability + * of occurrence of some important types of race conditions. + */ +static int +rcu_torture_fqs(void *arg) +{ + unsigned long fqs_resume_time; + int fqs_burst_remaining; + + VERBOSE_PRINTK_STRING("rcu_torture_fqs task started"); + do { + fqs_resume_time = jiffies + fqs_stutter * HZ; + while (jiffies - fqs_resume_time > LONG_MAX) { + schedule_timeout_interruptible(1); + } + fqs_burst_remaining = fqs_duration; + while (fqs_burst_remaining > 0) { + cur_ops->fqs(); + udelay(fqs_holdoff); + fqs_burst_remaining -= fqs_holdoff; + } + rcu_stutter_wait("rcu_torture_fqs"); + } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); + VERBOSE_PRINTK_STRING("rcu_torture_fqs task stopping"); + rcutorture_shutdown_absorb("rcu_torture_fqs"); + while (!kthread_should_stop()) + schedule_timeout_uninterruptible(1); + return 0; +} + /* * RCU torture writer kthread. Repeatedly substitutes a new structure * for that pointed to by rcu_torture_current, freeing the old structure @@ -1030,10 +1081,11 @@ rcu_torture_print_module_parms(char *tag) printk(KERN_ALERT "%s" TORTURE_FLAG "--- %s: nreaders=%d nfakewriters=%d " "stat_interval=%d verbose=%d test_no_idle_hz=%d " - "shuffle_interval=%d stutter=%d irqreader=%d\n", + "shuffle_interval=%d stutter=%d irqreader=%d " + "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d\n", torture_type, tag, nrealreaders, nfakewriters, stat_interval, verbose, test_no_idle_hz, shuffle_interval, - stutter, irqreader); + stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter); } static struct notifier_block rcutorture_nb = { @@ -1109,6 +1161,12 @@ rcu_torture_cleanup(void) } stats_task = NULL; + if (fqs_task) { + VERBOSE_PRINTK_STRING("Stopping rcu_torture_fqs task"); + kthread_stop(fqs_task); + } + fqs_task = NULL; + /* Wait for all RCU callbacks to fire. */ if (cur_ops->cb_barrier != NULL) @@ -1154,6 +1212,11 @@ rcu_torture_init(void) mutex_unlock(&fullstop_mutex); return -EINVAL; } + if (cur_ops->fqs == NULL && fqs_duration != 0) { + printk(KERN_ALERT "rcu-torture: ->fqs NULL and non-zero " + "fqs_duration, fqs disabled.\n"); + fqs_duration = 0; + } if (cur_ops->init) cur_ops->init(); /* no "goto unwind" prior to this point!!! */ @@ -1282,6 +1345,19 @@ rcu_torture_init(void) goto unwind; } } + if (fqs_duration < 0) + fqs_duration = 0; + if (fqs_duration) { + /* Create the stutter thread */ + fqs_task = kthread_run(rcu_torture_fqs, NULL, + "rcu_torture_fqs"); + if (IS_ERR(fqs_task)) { + firsterr = PTR_ERR(fqs_task); + VERBOSE_PRINTK_ERRSTRING("Failed to create fqs"); + fqs_task = NULL; + goto unwind; + } + } register_reboot_notifier(&rcutorture_nb); mutex_unlock(&fullstop_mutex); return 0; diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 55e8f6ef8195..0a4c32879398 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -156,6 +156,24 @@ long rcu_batches_completed_bh(void) } EXPORT_SYMBOL_GPL(rcu_batches_completed_bh); +/* + * Force a quiescent state for RCU BH. + */ +void rcu_bh_force_quiescent_state(void) +{ + force_quiescent_state(&rcu_bh_state, 0); +} +EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state); + +/* + * Force a quiescent state for RCU-sched. + */ +void rcu_sched_force_quiescent_state(void) +{ + force_quiescent_state(&rcu_sched_state, 0); +} +EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state); + /* * Does the CPU have callbacks ready to be invoked? */ diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 37fbccdf41d5..f11ebd44b454 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -61,6 +61,15 @@ long rcu_batches_completed(void) } EXPORT_SYMBOL_GPL(rcu_batches_completed); +/* + * Force a quiescent state for preemptible RCU. + */ +void rcu_force_quiescent_state(void) +{ + force_quiescent_state(&rcu_preempt_state, 0); +} +EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); + /* * Record a preemptable-RCU quiescent state for the specified CPU. Note * that this just means that the task currently running on the CPU is @@ -712,6 +721,16 @@ long rcu_batches_completed(void) } EXPORT_SYMBOL_GPL(rcu_batches_completed); +/* + * Force a quiescent state for RCU, which, because there is no preemptible + * RCU, becomes the same as rcu-sched. + */ +void rcu_force_quiescent_state(void) +{ + rcu_sched_force_quiescent_state(); +} +EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); + /* * Because preemptable RCU does not exist, we never have to check for * CPUs being in quiescent states. -- cgit v1.2.3 From cba8244a0f1c277b6b1e48ed6504fa434119e24d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 4 Jan 2010 16:04:01 -0800 Subject: rcu: Add debug check for too many rcu_read_unlock() TREE_PREEMPT_RCU maintains an rcu_read_lock_nesting counter in the task structure, which happens to be a signed int. So this patch adds a check for this counter being negative at the end of __rcu_read_unlock(). This check is under CONFIG_PROVE_LOCKING, so can be thought of as being part of lockdep. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <12626498423064-git-send-email-> Signed-off-by: Ingo Molnar --- kernel/rcutree_plugin.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel') diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index f11ebd44b454..e77cdf321e13 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -304,6 +304,9 @@ void __rcu_read_unlock(void) if (--ACCESS_ONCE(t->rcu_read_lock_nesting) == 0 && unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) rcu_read_unlock_special(t); +#ifdef CONFIG_PROVE_LOCKING + WARN_ON_ONCE(ACCESS_ONCE(t->rcu_read_lock_nesting) < 0); +#endif /* #ifdef CONFIG_PROVE_LOCKING */ } EXPORT_SYMBOL_GPL(__rcu_read_unlock); -- cgit v1.2.3 From b6407e863934965cdc66cbc244d811ceeb6f4d77 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 4 Jan 2010 16:04:02 -0800 Subject: rcu: Give different levels of the rcu_node hierarchy distinct lockdep names Previously, each level of the rcu_node hierarchy had the same rather unimaginative name: "&rcu_node_class[i]". This makes lockdep diagnostics involving these lockdep classes less helpful than would be nice. This patch fixes this by giving each level of the rcu_node hierarchy a distinct name: "rcu_node_level_0", "rcu_node_level_1", and so on. This version of the patch includes improved diagnostics suggested by Josh Triplett and Peter Zijlstra. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <12626498421830-git-send-email-> Signed-off-by: Ingo Molnar --- kernel/rcutree.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 0a4c32879398..3b13d64b010b 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1811,11 +1811,17 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp) */ static void __init rcu_init_one(struct rcu_state *rsp) { + static char *buf[] = { "rcu_node_level_0", + "rcu_node_level_1", + "rcu_node_level_2", + "rcu_node_level_3" }; /* Match MAX_RCU_LVLS */ int cpustride = 1; int i; int j; struct rcu_node *rnp; + BUILD_BUG_ON(MAX_RCU_LVLS > ARRAY_SIZE(buf)); /* Fix buf[] init! */ + /* Initialize the level-tracking arrays. */ for (i = 1; i < NUM_RCU_LVLS; i++) @@ -1829,7 +1835,8 @@ static void __init rcu_init_one(struct rcu_state *rsp) rnp = rsp->level[i]; for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) { spin_lock_init(&rnp->lock); - lockdep_set_class(&rnp->lock, &rcu_node_class[i]); + lockdep_set_class_and_name(&rnp->lock, + &rcu_node_class[i], buf[i]); rnp->gpnum = 0; rnp->qsmask = 0; rnp->qsmaskinit = 0; -- cgit v1.2.3 From 017c426138122c8e9b9f5057fbd0567c37b35247 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 14 Jan 2010 16:10:58 -0800 Subject: rcu: Fix sparse warnings Rename local variable "i" in rcu_init() to avoid conflict with RCU_INIT_FLAVOR(), restrict the scope of RCU_TREE_NONCORE, and make __synchronize_srcu() static. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <12635142581560-git-send-email-> Signed-off-by: Ingo Molnar --- kernel/rcutree.c | 6 +++--- kernel/rcutree.h | 6 ++---- kernel/srcu.c | 2 +- 3 files changed, 6 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 3b13d64b010b..099a255ede4c 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1888,7 +1888,7 @@ do { \ void __init rcu_init(void) { - int i; + int cpu; rcu_bootup_announce(); #ifdef CONFIG_RCU_CPU_STALL_DETECTOR @@ -1908,8 +1908,8 @@ void __init rcu_init(void) * or the scheduler are operational. */ cpu_notifier(rcu_cpu_notify, 0); - for_each_online_cpu(i) - rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)i); + for_each_online_cpu(cpu) + rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu); } #include "rcutree_plugin.h" diff --git a/kernel/rcutree.h b/kernel/rcutree.h index bd5d78ad1c48..d9d032abd665 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -323,8 +323,6 @@ struct rcu_state { #define RCU_OFL_TASKS_EXP_GP 0x2 /* Tasks blocking expedited */ /* GP were moved to root. */ -#ifdef RCU_TREE_NONCORE - /* * RCU implementation internal declarations: */ @@ -339,7 +337,7 @@ extern struct rcu_state rcu_preempt_state; DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data); #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ -#else /* #ifdef RCU_TREE_NONCORE */ +#ifndef RCU_TREE_NONCORE /* Forward declarations for rcutree_plugin.h */ static void rcu_bootup_announce(void); @@ -372,4 +370,4 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu); static void rcu_preempt_send_cbs_to_orphanage(void); static void __init __rcu_init_preempt(void); -#endif /* #else #ifdef RCU_TREE_NONCORE */ +#endif /* #ifndef RCU_TREE_NONCORE */ diff --git a/kernel/srcu.c b/kernel/srcu.c index 818d7d9aa03c..31b275b9c112 100644 --- a/kernel/srcu.c +++ b/kernel/srcu.c @@ -144,7 +144,7 @@ EXPORT_SYMBOL_GPL(srcu_read_unlock); /* * Helper function for synchronize_srcu() and synchronize_srcu_expedited(). */ -void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void)) +static void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void)) { int idx; -- cgit v1.2.3 From 632ee200130899252508c478ad0e808222573fbc Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 22 Feb 2010 17:04:45 -0800 Subject: rcu: Introduce lockdep-based checking to RCU read-side primitives Inspection is proving insufficient to catch all RCU misuses, which is understandable given that rcu_dereference() might be protected by any of four different flavors of RCU (RCU, RCU-bh, RCU-sched, and SRCU), and might also/instead be protected by any of a number of locking primitives. It is therefore time to enlist the aid of lockdep. This set of patches is inspired by earlier work by Peter Zijlstra and Thomas Gleixner, and takes the following approach: o Set up separate lockdep classes for RCU, RCU-bh, and RCU-sched. o Set up separate lockdep classes for each instance of SRCU. o Create primitives that check for being in an RCU read-side critical section. These return exact answers if lockdep is fully enabled, but if unsure, report being in an RCU read-side critical section. (We want to avoid false positives!) The primitives are: For RCU: rcu_read_lock_held(void) For RCU-bh: rcu_read_lock_bh_held(void) For RCU-sched: rcu_read_lock_sched_held(void) For SRCU: srcu_read_lock_held(struct srcu_struct *sp) o Add rcu_dereference_check(), which takes a second argument in which one places a boolean expression based on the above primitives and/or lockdep_is_held(). o A new kernel configuration parameter, CONFIG_PROVE_RCU, enables rcu_dereference_check(). This depends on CONFIG_PROVE_LOCKING, and should be quite helpful during the transition period while CONFIG_PROVE_RCU-unaware patches are in flight. The existing rcu_dereference() primitive does no checking, but upcoming patches will change that. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1266887105-1528-1-git-send-email-paulmck@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar --- kernel/rcupdate.c | 10 ++++++++++ kernel/rcutorture.c | 12 ++++++++++-- kernel/srcu.c | 50 +++++++++++++++++++++++++++++++++----------------- 3 files changed, 53 insertions(+), 19 deletions(-) (limited to 'kernel') diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 9b7fd4723878..033cb55c26df 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -50,6 +50,16 @@ static struct lock_class_key rcu_lock_key; struct lockdep_map rcu_lock_map = STATIC_LOCKDEP_MAP_INIT("rcu_read_lock", &rcu_lock_key); EXPORT_SYMBOL_GPL(rcu_lock_map); + +static struct lock_class_key rcu_bh_lock_key; +struct lockdep_map rcu_bh_lock_map = + STATIC_LOCKDEP_MAP_INIT("rcu_read_lock_bh", &rcu_bh_lock_key); +EXPORT_SYMBOL_GPL(rcu_bh_lock_map); + +static struct lock_class_key rcu_sched_lock_key; +struct lockdep_map rcu_sched_lock_map = + STATIC_LOCKDEP_MAP_INIT("rcu_read_lock_sched", &rcu_sched_lock_key); +EXPORT_SYMBOL_GPL(rcu_sched_lock_map); #endif /* diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index adda92bfafac..5f43f30fcd1d 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -796,7 +796,11 @@ static void rcu_torture_timer(unsigned long unused) idx = cur_ops->readlock(); completed = cur_ops->completed(); - p = rcu_dereference(rcu_torture_current); + p = rcu_dereference_check(rcu_torture_current, + rcu_read_lock_held() || + rcu_read_lock_bh_held() || + rcu_read_lock_sched_held() || + srcu_read_lock_held(&srcu_ctl)); if (p == NULL) { /* Leave because rcu_torture_writer is not yet underway */ cur_ops->readunlock(idx); @@ -853,7 +857,11 @@ rcu_torture_reader(void *arg) } idx = cur_ops->readlock(); completed = cur_ops->completed(); - p = rcu_dereference(rcu_torture_current); + p = rcu_dereference_check(rcu_torture_current, + rcu_read_lock_held() || + rcu_read_lock_bh_held() || + rcu_read_lock_sched_held() || + srcu_read_lock_held(&srcu_ctl)); if (p == NULL) { /* Wait for rcu_torture_writer to get underway */ cur_ops->readunlock(idx); diff --git a/kernel/srcu.c b/kernel/srcu.c index 31b275b9c112..bde4295774c8 100644 --- a/kernel/srcu.c +++ b/kernel/srcu.c @@ -34,6 +34,30 @@ #include #include +static int init_srcu_struct_fields(struct srcu_struct *sp) +{ + sp->completed = 0; + mutex_init(&sp->mutex); + sp->per_cpu_ref = alloc_percpu(struct srcu_struct_array); + return sp->per_cpu_ref ? 0 : -ENOMEM; +} + +#ifdef CONFIG_DEBUG_LOCK_ALLOC + +int __init_srcu_struct(struct srcu_struct *sp, const char *name, + struct lock_class_key *key) +{ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + /* Don't re-initialize a lock while it is held. */ + debug_check_no_locks_freed((void *)sp, sizeof(*sp)); + lockdep_init_map(&sp->dep_map, name, key, 0); +#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ + return init_srcu_struct_fields(sp); +} +EXPORT_SYMBOL_GPL(__init_srcu_struct); + +#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ + /** * init_srcu_struct - initialize a sleep-RCU structure * @sp: structure to initialize. @@ -44,13 +68,12 @@ */ int init_srcu_struct(struct srcu_struct *sp) { - sp->completed = 0; - mutex_init(&sp->mutex); - sp->per_cpu_ref = alloc_percpu(struct srcu_struct_array); - return (sp->per_cpu_ref ? 0 : -ENOMEM); + return init_srcu_struct_fields(sp); } EXPORT_SYMBOL_GPL(init_srcu_struct); +#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ + /* * srcu_readers_active_idx -- returns approximate number of readers * active on the specified rank of per-CPU counters. @@ -100,15 +123,12 @@ void cleanup_srcu_struct(struct srcu_struct *sp) } EXPORT_SYMBOL_GPL(cleanup_srcu_struct); -/** - * srcu_read_lock - register a new reader for an SRCU-protected structure. - * @sp: srcu_struct in which to register the new reader. - * +/* * Counts the new reader in the appropriate per-CPU element of the * srcu_struct. Must be called from process context. * Returns an index that must be passed to the matching srcu_read_unlock(). */ -int srcu_read_lock(struct srcu_struct *sp) +int __srcu_read_lock(struct srcu_struct *sp) { int idx; @@ -120,26 +140,22 @@ int srcu_read_lock(struct srcu_struct *sp) preempt_enable(); return idx; } -EXPORT_SYMBOL_GPL(srcu_read_lock); +EXPORT_SYMBOL_GPL(__srcu_read_lock); -/** - * srcu_read_unlock - unregister a old reader from an SRCU-protected structure. - * @sp: srcu_struct in which to unregister the old reader. - * @idx: return value from corresponding srcu_read_lock(). - * +/* * Removes the count for the old reader from the appropriate per-CPU * element of the srcu_struct. Note that this may well be a different * CPU than that which was incremented by the corresponding srcu_read_lock(). * Must be called from process context. */ -void srcu_read_unlock(struct srcu_struct *sp, int idx) +void __srcu_read_unlock(struct srcu_struct *sp, int idx) { preempt_disable(); srcu_barrier(); /* ensure compiler won't misorder critical section. */ per_cpu_ptr(sp->per_cpu_ref, smp_processor_id())->c[idx]--; preempt_enable(); } -EXPORT_SYMBOL_GPL(srcu_read_unlock); +EXPORT_SYMBOL_GPL(__srcu_read_unlock); /* * Helper function for synchronize_srcu() and synchronize_srcu_expedited(). -- cgit v1.2.3 From 0632eb3d7563d6a76d49a3860b6352d800c92854 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 22 Feb 2010 17:04:47 -0800 Subject: rcu: Integrate rcu_dereference_check() message into lockdep Make rcu_dereference_check() print the list of held locks in addition to the stack dump to ease debugging. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1266887105-1528-3-git-send-email-paulmck@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar --- kernel/lockdep.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'kernel') diff --git a/kernel/lockdep.c b/kernel/lockdep.c index c62ec14609b9..672c436946ce 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -3809,3 +3809,21 @@ void lockdep_sys_exit(void) lockdep_print_held_locks(curr); } } + +void lockdep_rcu_dereference(const char *file, const int line) +{ + struct task_struct *curr = current; + + if (!debug_locks_off()) + return; + printk("\n==============================================\n"); + printk( "[ BUG: Unsafe rcu_dereference_check() usage! ]\n"); + printk( "----------------------------------------------\n"); + printk("%s:%d invoked rcu_dereference_check() without protection!\n", + file, line); + printk("\nother info that might help us debug this:\n\n"); + lockdep_print_held_locks(curr); + printk("\nstack backtrace:\n"); + dump_stack(); +} +EXPORT_SYMBOL_GPL(lockdep_rcu_dereference); -- cgit v1.2.3 From d11c563dd20ff35da5652c3e1c989d9e10e1d6d0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 22 Feb 2010 17:04:50 -0800 Subject: sched: Use lockdep-based checking on rcu_dereference() Update the rcu_dereference() usages to take advantage of the new lockdep-based checking. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1266887105-1528-6-git-send-email-paulmck@linux.vnet.ibm.com> [ -v2: fix allmodconfig missing symbol export build failure on x86 ] Signed-off-by: Ingo Molnar --- kernel/cgroup.c | 14 ++++++++++++++ kernel/exit.c | 14 +++++++++++--- kernel/fork.c | 1 + kernel/notifier.c | 6 +++--- kernel/pid.c | 2 +- kernel/sched.c | 11 ++++++++--- 6 files changed, 38 insertions(+), 10 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index aa3bee566446..b1a0f5a528fe 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -166,6 +166,20 @@ static DEFINE_SPINLOCK(hierarchy_id_lock); */ static int need_forkexit_callback __read_mostly; +#ifdef CONFIG_PROVE_LOCKING +int cgroup_lock_is_held(void) +{ + return lockdep_is_held(&cgroup_mutex); +} +#else /* #ifdef CONFIG_PROVE_LOCKING */ +int cgroup_lock_is_held(void) +{ + return mutex_is_locked(&cgroup_mutex); +} +#endif /* #else #ifdef CONFIG_PROVE_LOCKING */ + +EXPORT_SYMBOL_GPL(cgroup_lock_is_held); + /* convenient tests for these bits */ inline int cgroup_is_removed(const struct cgroup *cgrp) { diff --git a/kernel/exit.c b/kernel/exit.c index 546774a31a66..45ed043b8bf5 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -85,7 +85,9 @@ static void __exit_signal(struct task_struct *tsk) BUG_ON(!sig); BUG_ON(!atomic_read(&sig->count)); - sighand = rcu_dereference(tsk->sighand); + sighand = rcu_dereference_check(tsk->sighand, + rcu_read_lock_held() || + lockdep_is_held(&tasklist_lock)); spin_lock(&sighand->siglock); posix_cpu_timers_exit(tsk); @@ -170,8 +172,10 @@ void release_task(struct task_struct * p) repeat: tracehook_prepare_release_task(p); /* don't need to get the RCU readlock here - the process is dead and - * can't be modifying its own credentials */ + * can't be modifying its own credentials. But shut RCU-lockdep up */ + rcu_read_lock(); atomic_dec(&__task_cred(p)->user->processes); + rcu_read_unlock(); proc_flush_task(p); @@ -473,9 +477,11 @@ static void close_files(struct files_struct * files) /* * It is safe to dereference the fd table without RCU or * ->file_lock because this is the last reference to the - * files structure. + * files structure. But use RCU to shut RCU-lockdep up. */ + rcu_read_lock(); fdt = files_fdtable(files); + rcu_read_unlock(); for (;;) { unsigned long set; i = j * __NFDBITS; @@ -521,10 +527,12 @@ void put_files_struct(struct files_struct *files) * at the end of the RCU grace period. Otherwise, * you can free files immediately. */ + rcu_read_lock(); fdt = files_fdtable(files); if (fdt != &files->fdtab) kmem_cache_free(files_cachep, files); free_fdtable(fdt); + rcu_read_unlock(); } } diff --git a/kernel/fork.c b/kernel/fork.c index f88bd984df35..17bbf093356d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -86,6 +86,7 @@ int max_threads; /* tunable limit on nr_threads */ DEFINE_PER_CPU(unsigned long, process_counts) = 0; __cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ +EXPORT_SYMBOL_GPL(tasklist_lock); int nr_processes(void) { diff --git a/kernel/notifier.c b/kernel/notifier.c index acd24e7643eb..2488ba7eb568 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -78,10 +78,10 @@ static int __kprobes notifier_call_chain(struct notifier_block **nl, int ret = NOTIFY_DONE; struct notifier_block *nb, *next_nb; - nb = rcu_dereference(*nl); + nb = rcu_dereference_raw(*nl); while (nb && nr_to_call) { - next_nb = rcu_dereference(nb->next); + next_nb = rcu_dereference_raw(nb->next); #ifdef CONFIG_DEBUG_NOTIFIERS if (unlikely(!func_ptr_is_kernel_text(nb->notifier_call))) { @@ -309,7 +309,7 @@ int __blocking_notifier_call_chain(struct blocking_notifier_head *nh, * racy then it does not matter what the result of the test * is, we re-check the list after having taken the lock anyway: */ - if (rcu_dereference(nh->head)) { + if (rcu_dereference_raw(nh->head)) { down_read(&nh->rwsem); ret = notifier_call_chain(&nh->head, val, v, nr_to_call, nr_calls); diff --git a/kernel/pid.c b/kernel/pid.c index 2e17c9c92cbe..b08e697cd83f 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -367,7 +367,7 @@ struct task_struct *pid_task(struct pid *pid, enum pid_type type) struct task_struct *result = NULL; if (pid) { struct hlist_node *first; - first = rcu_dereference(pid->tasks[type].first); + first = rcu_dereference_check(pid->tasks[type].first, rcu_read_lock_held() || lockdep_is_held(&tasklist_lock)); if (first) result = hlist_entry(first, struct task_struct, pids[(type)].node); } diff --git a/kernel/sched.c b/kernel/sched.c index 3a8fb30a91b1..70ae68680d4c 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -645,6 +645,11 @@ static inline int cpu_of(struct rq *rq) #endif } +#define for_each_domain_rd(p) \ + rcu_dereference_check((p), \ + rcu_read_lock_sched_held() || \ + lockdep_is_held(&sched_domains_mutex)) + /* * The domain tree (rq->sd) is protected by RCU's quiescent state transition. * See detach_destroy_domains: synchronize_sched for details. @@ -653,7 +658,7 @@ static inline int cpu_of(struct rq *rq) * preempt-disabled sections. */ #define for_each_domain(cpu, __sd)