From 08e23d05fa6dc4fc13da0ccf09defdd4bbc92ff4 Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Tue, 24 Oct 2023 20:30:15 +0200 Subject: PM / devfreq: Fix buffer overflow in trans_stat_show Fix buffer overflow in trans_stat_show(). Convert simple snprintf to the more secure scnprintf with size of PAGE_SIZE. Add condition checking if we are exceeding PAGE_SIZE and exit early from loop. Also add at the end a warning that we exceeded PAGE_SIZE and that stats is disabled. Return -EFBIG in the case where we don't have enough space to write the full transition table. Also document in the ABI that this function can return -EFBIG error. Link: https://lore.kernel.org/all/20231024183016.14648-2-ansuelsmth@gmail.com/ Cc: stable@vger.kernel.org Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218041 Fixes: e552bbaf5b98 ("PM / devfreq: Add sysfs node for representing frequency transition information.") Signed-off-by: Christian Marangi Signed-off-by: Chanwoo Choi --- Documentation/ABI/testing/sysfs-class-devfreq | 3 ++ drivers/devfreq/devfreq.c | 57 ++++++++++++++++++--------- 2 files changed, 42 insertions(+), 18 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-class-devfreq b/Documentation/ABI/testing/sysfs-class-devfreq index 5e6b74f30406..1e7e0bb4c14e 100644 --- a/Documentation/ABI/testing/sysfs-class-devfreq +++ b/Documentation/ABI/testing/sysfs-class-devfreq @@ -52,6 +52,9 @@ Description: echo 0 > /sys/class/devfreq/.../trans_stat + If the transition table is bigger than PAGE_SIZE, reading + this will return an -EFBIG error. + What: /sys/class/devfreq/.../available_frequencies Date: October 2012 Contact: Nishanth Menon diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index b3a68d5833bd..907f50ab70ed 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -1688,7 +1688,7 @@ static ssize_t trans_stat_show(struct device *dev, struct device_attribute *attr, char *buf) { struct devfreq *df = to_devfreq(dev); - ssize_t len; + ssize_t len = 0; int i, j; unsigned int max_state; @@ -1697,7 +1697,7 @@ static ssize_t trans_stat_show(struct device *dev, max_state = df->max_state; if (max_state == 0) - return sprintf(buf, "Not Supported.\n"); + return scnprintf(buf, PAGE_SIZE, "Not Supported.\n"); mutex_lock(&df->lock); if (!df->stop_polling && @@ -1707,31 +1707,52 @@ static ssize_t trans_stat_show(struct device *dev, } mutex_unlock(&df->lock); - len = sprintf(buf, " From : To\n"); - len += sprintf(buf + len, " :"); - for (i = 0; i < max_state; i++) - len += sprintf(buf + len, "%10lu", - df->freq_table[i]); + len += scnprintf(buf + len, PAGE_SIZE - len, " From : To\n"); + len += scnprintf(buf + len, PAGE_SIZE - len, " :"); + for (i = 0; i < max_state; i++) { + if (len >= PAGE_SIZE - 1) + break; + len += scnprintf(buf + len, PAGE_SIZE - len, "%10lu", + df->freq_table[i]); + } + if (len >= PAGE_SIZE - 1) + return PAGE_SIZE - 1; - len += sprintf(buf + len, " time(ms)\n"); + len += scnprintf(buf + len, PAGE_SIZE - len, " time(ms)\n"); for (i = 0; i < max_state; i++) { + if (len >= PAGE_SIZE - 1) + break; if (df->freq_table[i] == df->previous_freq) - len += sprintf(buf + len, "*"); + len += scnprintf(buf + len, PAGE_SIZE - len, "*"); else - len += sprintf(buf + len, " "); + len += scnprintf(buf + len, PAGE_SIZE - len, " "); + if (len >= PAGE_SIZE - 1) + break; + + len += scnprintf(buf + len, PAGE_SIZE - len, "%10lu:", + df->freq_table[i]); + for (j = 0; j < max_state; j++) { + if (len >= PAGE_SIZE - 1) + break; + len += scnprintf(buf + len, PAGE_SIZE - len, "%10u", + df->stats.trans_table[(i * max_state) + j]); + } + if (len >= PAGE_SIZE - 1) + break; + len += scnprintf(buf + len, PAGE_SIZE - len, "%10llu\n", (u64) + jiffies64_to_msecs(df->stats.time_in_state[i])); + } - len += sprintf(buf + len, "%10lu:", df->freq_table[i]); - for (j = 0; j < max_state; j++) - len += sprintf(buf + len, "%10u", - df->stats.trans_table[(i * max_state) + j]); + if (len < PAGE_SIZE - 1) + len += scnprintf(buf + len, PAGE_SIZE - len, "Total transition : %u\n", + df->stats.total_trans); - len += sprintf(buf + len, "%10llu\n", (u64) - jiffies64_to_msecs(df->stats.time_in_state[i])); + if (len >= PAGE_SIZE - 1) { + pr_warn_once("devfreq transition table exceeds PAGE_SIZE. Disabling\n"); + return -EFBIG; } - len += sprintf(buf + len, "Total transition : %u\n", - df->stats.total_trans); return len; } -- cgit v1.2.3 From 4920ee6dcfaf9aec9f4bd14ce6c15a6a758a92ae Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Tue, 24 Oct 2023 20:30:16 +0200 Subject: PM / devfreq: Convert to use sysfs_emit_at() API Follow the advice of the Documentation/filesystems/sysfs.rst and show() should only use sysfs_emit() or sysfs_emit_at() when formatting the value to be returned to user space. Link: https://lore.kernel.org/all/20231024183016.14648-3-ansuelsmth@gmail.com/ Signed-off-by: Christian Marangi Signed-off-by: Chanwoo Choi --- drivers/devfreq/devfreq.c | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 907f50ab70ed..017a87465776 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -1697,7 +1697,7 @@ static ssize_t trans_stat_show(struct device *dev, max_state = df->max_state; if (max_state == 0) - return scnprintf(buf, PAGE_SIZE, "Not Supported.\n"); + return sysfs_emit(buf, "Not Supported.\n"); mutex_lock(&df->lock); if (!df->stop_polling && @@ -1707,47 +1707,44 @@ static ssize_t trans_stat_show(struct device *dev, } mutex_unlock(&df->lock); - len += scnprintf(buf + len, PAGE_SIZE - len, " From : To\n"); - len += scnprintf(buf + len, PAGE_SIZE - len, " :"); + len += sysfs_emit_at(buf, len, " From : To\n"); + len += sysfs_emit_at(buf, len, " :"); for (i = 0; i < max_state; i++) { if (len >= PAGE_SIZE - 1) break; - len += scnprintf(buf + len, PAGE_SIZE - len, "%10lu", - df->freq_table[i]); + len += sysfs_emit_at(buf, len, "%10lu", + df->freq_table[i]); } + if (len >= PAGE_SIZE - 1) return PAGE_SIZE - 1; - - len += scnprintf(buf + len, PAGE_SIZE - len, " time(ms)\n"); + len += sysfs_emit_at(buf, len, " time(ms)\n"); for (i = 0; i < max_state; i++) { if (len >= PAGE_SIZE - 1) break; - if (df->freq_table[i] == df->previous_freq) - len += scnprintf(buf + len, PAGE_SIZE - len, "*"); + if (df->freq_table[2] == df->previous_freq) + len += sysfs_emit_at(buf, len, "*"); else - len += scnprintf(buf + len, PAGE_SIZE - len, " "); + len += sysfs_emit_at(buf, len, " "); if (len >= PAGE_SIZE - 1) break; - - len += scnprintf(buf + len, PAGE_SIZE - len, "%10lu:", - df->freq_table[i]); + len += sysfs_emit_at(buf, len, "%10lu:", df->freq_table[i]); for (j = 0; j < max_state; j++) { if (len >= PAGE_SIZE - 1) break; - len += scnprintf(buf + len, PAGE_SIZE - len, "%10u", - df->stats.trans_table[(i * max_state) + j]); + len += sysfs_emit_at(buf, len, "%10u", + df->stats.trans_table[(i * max_state) + j]); } if (len >= PAGE_SIZE - 1) break; - len += scnprintf(buf + len, PAGE_SIZE - len, "%10llu\n", (u64) - jiffies64_to_msecs(df->stats.time_in_state[i])); + len += sysfs_emit_at(buf, len, "%10llu\n", (u64) + jiffies64_to_msecs(df->stats.time_in_state[i])); } if (len < PAGE_SIZE - 1) - len += scnprintf(buf + len, PAGE_SIZE - len, "Total transition : %u\n", - df->stats.total_trans); - + len += sysfs_emit_at(buf, len, "Total transition : %u\n", + df->stats.total_trans); if (len >= PAGE_SIZE - 1) { pr_warn_once("devfreq transition table exceeds PAGE_SIZE. Disabling\n"); return -EFBIG; -- cgit v1.2.3 From 4c58e9d85c24b5281a2d39a3e6510b5f3b7fc687 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 1 Nov 2023 09:45:00 -0500 Subject: opp: ti: Use device_get_match_data() Use preferred device_get_match_data() instead of of_match_device() to get the driver match data. With this, adjust the includes to explicitly include the correct headers. As this driver only does DT based matching, of_match_device() will never return NULL if we've gotten to probe(). Therefore, the NULL check and error return for it can be dropped. Signed-off-by: Rob Herring Signed-off-by: Viresh Kumar --- drivers/opp/ti-opp-supply.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/opp/ti-opp-supply.c b/drivers/opp/ti-opp-supply.c index 8f3f13fbbb25..e3b97cd1fbbf 100644 --- a/drivers/opp/ti-opp-supply.c +++ b/drivers/opp/ti-opp-supply.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -373,23 +374,15 @@ static int ti_opp_supply_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct device *cpu_dev = get_cpu_device(0); - const struct of_device_id *match; const struct ti_opp_supply_of_data *of_data; int ret = 0; - match = of_match_device(ti_opp_supply_of_match, dev); - if (!match) { - /* We do not expect this to happen */ - dev_err(dev, "%s: Unable to match device\n", __func__); - return -ENODEV; - } - if (!match->data) { + of_data = device_get_match_data(dev); + if (!of_data) { /* Again, unlikely.. but mistakes do happen */ dev_err(dev, "%s: Bad data in match\n", __func__); return -EINVAL; } - of_data = match->data; - dev_set_drvdata(dev, (void *)of_data); /* If we need optimized voltage */ -- cgit v1.2.3 From 073d3d2ca7d462afc8159ca0175675b9b7b4f162 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 27 Oct 2023 12:40:04 +0530 Subject: OPP: Level zero is valid The level zero can be used by some OPPs to drop performance state vote for the device. It is perfectly fine to allow the same. _set_opp_level() considers it as an invalid value currently and returns early. In order to support this properly, initialize the level field with U32_MAX, which denotes unused level field. Reported-by: Stephan Gerhold Reviewed-by: Ulf Hansson Tested-by: Stephan Gerhold Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 24 ++++++++++++++++++++---- drivers/opp/of.c | 8 +++++++- include/linux/pm_opp.h | 5 ++++- 3 files changed, 31 insertions(+), 6 deletions(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 84f345c69ea5..f2e2aa07b431 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -201,7 +201,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_freq_indexed); * @opp: opp for which level value has to be returned for * * Return: level read from device tree corresponding to the opp, else - * return 0. + * return U32_MAX. */ unsigned int dev_pm_opp_get_level(struct dev_pm_opp *opp) { @@ -221,7 +221,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_level); * @index: index of the required opp * * Return: performance state read from device tree corresponding to the - * required opp, else return 0. + * required opp, else return U32_MAX. */ unsigned int dev_pm_opp_get_required_pstate(struct dev_pm_opp *opp, unsigned int index) @@ -808,6 +808,14 @@ struct dev_pm_opp *dev_pm_opp_find_level_ceil(struct device *dev, struct dev_pm_opp *opp; opp = _find_key_ceil(dev, &temp, 0, true, _read_level, NULL); + + /* False match */ + if (temp == OPP_LEVEL_UNSET) { + dev_err(dev, "%s: OPP levels aren't available\n", __func__); + dev_pm_opp_put(opp); + return ERR_PTR(-ENODEV); + } + *level = temp; return opp; } @@ -1049,12 +1057,18 @@ static int _set_opp_bw(const struct opp_table *opp_table, static int _set_performance_state(struct device *dev, struct device *pd_dev, struct dev_pm_opp *opp, int i) { - unsigned int pstate = likely(opp) ? opp->required_opps[i]->level: 0; + unsigned int pstate = 0; int ret; if (!pd_dev) return 0; + if (likely(opp)) { + pstate = opp->required_opps[i]->level; + if (pstate == OPP_LEVEL_UNSET) + return 0; + } + ret = dev_pm_domain_set_performance_state(pd_dev, pstate); if (ret) { dev_err(dev, "Failed to set performance state of %s: %d (%d)\n", @@ -1135,7 +1149,7 @@ static int _set_opp_level(struct device *dev, struct opp_table *opp_table, int ret = 0; if (opp) { - if (!opp->level) + if (opp->level == OPP_LEVEL_UNSET) return 0; level = opp->level; @@ -1867,6 +1881,8 @@ struct dev_pm_opp *_opp_allocate(struct opp_table *opp_table) INIT_LIST_HEAD(&opp->node); + opp->level = OPP_LEVEL_UNSET; + return opp; } diff --git a/drivers/opp/of.c b/drivers/opp/of.c index 81fa27599d58..85fad7ca0007 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -1393,8 +1393,14 @@ int of_get_required_opp_performance_state(struct device_node *np, int index) opp = _find_opp_of_np(opp_table, required_np); if (opp) { - pstate = opp->level; + if (opp->level == OPP_LEVEL_UNSET) { + pr_err("%s: OPP levels aren't available for %pOF\n", + __func__, np); + } else { + pstate = opp->level; + } dev_pm_opp_put(opp); + } dev_pm_opp_put_opp_table(opp_table); diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index ccd97bcef269..af53101a1383 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -92,9 +92,12 @@ struct dev_pm_opp_config { struct device ***virt_devs; }; +#define OPP_LEVEL_UNSET U32_MAX + /** * struct dev_pm_opp_data - The data to use to initialize an OPP. - * @level: The performance level for the OPP. + * @level: The performance level for the OPP. Set level to OPP_LEVEL_UNSET if + * level field isn't used. * @freq: The clock rate in Hz for the OPP. * @u_volt: The voltage in uV for the OPP. */ -- cgit v1.2.3 From 6d366d0e544676bf608769b9520644e3f654ff99 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 12 Oct 2023 15:45:21 +0530 Subject: OPP: Use _set_opp_level() for single genpd case There are two genpd (as required-opp) cases that we need to handle, devices with a single genpd and ones with multiple genpds. The multiple genpds case is clear, where the OPP core calls dev_pm_domain_attach_by_name() for them and uses the virtual devices returned by this helper to call dev_pm_domain_set_performance_state() later to change the performance state. The single genpd case however requires special handling as we need to use the same `dev` structure (instead of a virtual one provided by genpd core) for setting the performance state via dev_pm_domain_set_performance_state(). As we move towards more generic code to take care of the required OPPs, where we will recursively call dev_pm_opp_set_opp() for all the required OPPs, the above special case becomes a problem. It doesn't make sense for a device's DT entry to have both "opp-level" and single "required-opps" entry pointing to a genpd's OPP, as that would make the OPP core call dev_pm_domain_set_performance_state() for two different values for the same device structure. And so we can reuse the 'opp->level" field in such a case and call _set_opp_level() for the device. Reviewed-by: Ulf Hansson Tested-by: Stephan Gerhold Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 6 ++++-- drivers/opp/of.c | 31 ++++++++++++++++++++++++++++--- 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index f2e2aa07b431..aeb216f7e978 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -1088,10 +1088,12 @@ static int _opp_set_required_opps_generic(struct device *dev, static int _opp_set_required_opps_genpd(struct device *dev, struct opp_table *opp_table, struct dev_pm_opp *opp, bool scaling_down) { - struct device **genpd_virt_devs = - opp_table->genpd_virt_devs ? opp_table->genpd_virt_devs : &dev; + struct device **genpd_virt_devs = opp_table->genpd_virt_devs; int index, target, delta, ret; + if (!genpd_virt_devs) + return 0; + /* Scaling up? Set required OPPs in normal order, else reverse */ if (!scaling_down) { index = 0; diff --git a/drivers/opp/of.c b/drivers/opp/of.c index 85fad7ca0007..4cdeeab5ceee 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -296,7 +296,7 @@ void _of_clear_opp(struct opp_table *opp_table, struct dev_pm_opp *opp) of_node_put(opp->np); } -static int _link_required_opps(struct dev_pm_opp *opp, +static int _link_required_opps(struct dev_pm_opp *opp, struct opp_table *opp_table, struct opp_table *required_table, int index) { struct device_node *np; @@ -314,6 +314,31 @@ static int _link_required_opps(struct dev_pm_opp *opp, return -ENODEV; } + /* + * There are two genpd (as required-opp) cases that we need to handle, + * devices with a single genpd and ones with multiple genpds. + * + * The single genpd case requires special handling as we need to use the + * same `dev` structure (instead of a virtual one provided by genpd + * core) for setting the performance state. + * + * It doesn't make sense for a device's DT entry to have both + * "opp-level" and single "required-opps" entry pointing to a genpd's + * OPP, as that would make the OPP core call + * dev_pm_domain_set_performance_state() for two different values for + * the same device structure. Lets treat single genpd configuration as a + * case where the OPP's level is directly available without required-opp + * link in the DT. + * + * Just update the `level` with the right value, which + * dev_pm_opp_set_opp() will take care of in the normal path itself. + */ + if (required_table->is_genpd && opp_table->required_opp_count == 1 && + !opp_table->genpd_virt_devs) { + if (!WARN_ON(opp->level != OPP_LEVEL_UNSET)) + opp->level = opp->required_opps[0]->level; + } + return 0; } @@ -338,7 +363,7 @@ static int _of_opp_alloc_required_opps(struct opp_table *opp_table, if (IS_ERR_OR_NULL(required_table)) continue; - ret = _link_required_opps(opp, required_table, i); + ret = _link_required_opps(opp, opp_table, required_table, i); if (ret) goto free_required_opps; } @@ -359,7 +384,7 @@ static int lazy_link_required_opps(struct opp_table *opp_table, int ret; list_for_each_entry(opp, &opp_table->opp_list, node) { - ret = _link_required_opps(opp, new_table, index); + ret = _link_required_opps(opp, opp_table, new_table, index); if (ret) return ret; } -- cgit v1.2.3 From e37440e7e2c2760475d60c5556b59c8880a7fd63 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 27 Oct 2023 14:17:48 +0530 Subject: OPP: Call dev_pm_opp_set_opp() for required OPPs Configuring the required OPP was never properly implemented, we just took an exception for genpds and configured them directly, while leaving out all other required OPP types. Now that a standard call to dev_pm_opp_set_opp() takes care of configuring the opp->level too, the special handling for genpds can be avoided by simply calling dev_pm_opp_set_opp() for the required OPPs, which shall eventually configure the corresponding level for genpds. This also makes it possible for us to configure other type of required OPPs (no concrete users yet though), via the same path. This is how other frameworks take care of parent nodes, like clock, regulators, etc, where we recursively call the same helper. In order to call dev_pm_opp_set_opp() for the virtual genpd devices, they must share the OPP table of the genpd. Call _add_opp_dev() for them to get that done. This commit also extends the struct dev_pm_opp_config to pass required devices, for non-genpd cases, which can be used to call dev_pm_opp_set_opp() for the non-genpd required devices. Reviewed-by: Ulf Hansson Tested-by: Stephan Gerhold Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 168 ++++++++++++++++++++++++------------------------- drivers/opp/of.c | 17 ++--- drivers/opp/opp.h | 8 +-- include/linux/pm_opp.h | 7 ++- 4 files changed, 100 insertions(+), 100 deletions(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index aeb216f7e978..e08375ed50aa 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -1054,48 +1054,22 @@ static int _set_opp_bw(const struct opp_table *opp_table, return 0; } -static int _set_performance_state(struct device *dev, struct device *pd_dev, - struct dev_pm_opp *opp, int i) -{ - unsigned int pstate = 0; - int ret; - - if (!pd_dev) - return 0; - - if (likely(opp)) { - pstate = opp->required_opps[i]->level; - if (pstate == OPP_LEVEL_UNSET) - return 0; - } - - ret = dev_pm_domain_set_performance_state(pd_dev, pstate); - if (ret) { - dev_err(dev, "Failed to set performance state of %s: %d (%d)\n", - dev_name(pd_dev), pstate, ret); - } - - return ret; -} - -static int _opp_set_required_opps_generic(struct device *dev, - struct opp_table *opp_table, struct dev_pm_opp *opp, bool scaling_down) -{ - dev_err(dev, "setting required-opps isn't supported for non-genpd devices\n"); - return -ENOENT; -} - -static int _opp_set_required_opps_genpd(struct device *dev, - struct opp_table *opp_table, struct dev_pm_opp *opp, bool scaling_down) +/* This is only called for PM domain for now */ +static int _set_required_opps(struct device *dev, struct opp_table *opp_table, + struct dev_pm_opp *opp, bool up) { - struct device **genpd_virt_devs = opp_table->genpd_virt_devs; + struct device **devs = opp_table->required_devs; int index, target, delta, ret; - if (!genpd_virt_devs) + if (!devs) return 0; + /* required-opps not fully initialized yet */ + if (lazy_linking_pending(opp_table)) + return -EBUSY; + /* Scaling up? Set required OPPs in normal order, else reverse */ - if (!scaling_down) { + if (up) { index = 0; target = opp_table->required_opp_count; delta = 1; @@ -1106,9 +1080,11 @@ static int _opp_set_required_opps_genpd(struct device *dev, } while (index != target) { - ret = _set_performance_state(dev, genpd_virt_devs[index], opp, index); - if (ret) - return ret; + if (devs[index]) { + ret = dev_pm_opp_set_opp(devs[index], opp->required_opps[index]); + if (ret) + return ret; + } index += delta; } @@ -1116,34 +1092,6 @@ static int _opp_set_required_opps_genpd(struct device *dev, return 0; } -/* This is only called for PM domain for now */ -static int _set_required_opps(struct device *dev, struct opp_table *opp_table, - struct dev_pm_opp *opp, bool up) -{ - /* required-opps not fully initialized yet */ - if (lazy_linking_pending(opp_table)) - return -EBUSY; - - if (opp_table->set_required_opps) - return opp_table->set_required_opps(dev, opp_table, opp, up); - - return 0; -} - -/* Update set_required_opps handler */ -void _update_set_required_opps(struct opp_table *opp_table) -{ - /* Already set */ - if (opp_table->set_required_opps) - return; - - /* All required OPPs will belong to genpd or none */ - if (opp_table->required_opp_tables[0]->is_genpd) - opp_table->set_required_opps = _opp_set_required_opps_genpd; - else - opp_table->set_required_opps = _opp_set_required_opps_generic; -} - static int _set_opp_level(struct device *dev, struct opp_table *opp_table, struct dev_pm_opp *opp) { @@ -2406,19 +2354,13 @@ static void _opp_detach_genpd(struct opp_table *opp_table) { int index; - if (!opp_table->genpd_virt_devs) - return; - for (index = 0; index < opp_table->required_opp_count; index++) { - if (!opp_table->genpd_virt_devs[index]) + if (!opp_table->required_devs[index]) continue; - dev_pm_domain_detach(opp_table->genpd_virt_devs[index], false); - opp_table->genpd_virt_devs[index] = NULL; + dev_pm_domain_detach(opp_table->required_devs[index], false); + opp_table->required_devs[index] = NULL; } - - kfree(opp_table->genpd_virt_devs); - opp_table->genpd_virt_devs = NULL; } /* @@ -2445,14 +2387,14 @@ static int _opp_attach_genpd(struct opp_table *opp_table, struct device *dev, int index = 0, ret = -EINVAL; const char * const *name = names; - if (opp_table->genpd_virt_devs) - return 0; + if (!opp_table->required_devs) { + dev_err(dev, "Required OPPs not available, can't attach genpd\n"); + return -EINVAL; + } - opp_table->genpd_virt_devs = kcalloc(opp_table->required_opp_count, - sizeof(*opp_table->genpd_virt_devs), - GFP_KERNEL); - if (!opp_table->genpd_virt_devs) - return -ENOMEM; + /* Checking only the first one is enough ? */ + if (opp_table->required_devs[0]) + return 0; while (*name) { if (index >= opp_table->required_opp_count) { @@ -2468,13 +2410,25 @@ static int _opp_attach_genpd(struct opp_table *opp_table, struct device *dev, goto err; } - opp_table->genpd_virt_devs[index] = virt_dev; + /* + * Add the virtual genpd device as a user of the OPP table, so + * we can call dev_pm_opp_set_opp() on it directly. + * + * This will be automatically removed when the OPP table is + * removed, don't need to handle that here. + */ + if (!_add_opp_dev(virt_dev, opp_table->required_opp_tables[index])) { + ret = -ENOMEM; + goto err; + } + + opp_table->required_devs[index] = virt_dev; index++; name++; } if (virt_devs) - *virt_devs = opp_table->genpd_virt_devs; + *virt_devs = opp_table->required_devs; return 0; @@ -2484,10 +2438,42 @@ err: } +static int _opp_set_required_devs(struct opp_table *opp_table, + struct device *dev, + struct device **required_devs) +{ + int i; + + if (!opp_table->required_devs) { + dev_err(dev, "Required OPPs not available, can't set required devs\n"); + return -EINVAL; + } + + /* Another device that shares the OPP table has set the required devs ? */ + if (opp_table->required_devs[0]) + return 0; + + for (i = 0; i < opp_table->required_opp_count; i++) + opp_table->required_devs[i] = required_devs[i]; + + return 0; +} + +static void _opp_put_required_devs(struct opp_table *opp_table) +{ + int i; + + for (i = 0; i < opp_table->required_opp_count; i++) + opp_table->required_devs[i] = NULL; +} + static void _opp_clear_config(struct opp_config_data *data) { - if (data->flags & OPP_CONFIG_GENPD) + if (data->flags & OPP_CONFIG_REQUIRED_DEVS) + _opp_put_required_devs(data->opp_table); + else if (data->flags & OPP_CONFIG_GENPD) _opp_detach_genpd(data->opp_table); + if (data->flags & OPP_CONFIG_REGULATOR) _opp_put_regulators(data->opp_table); if (data->flags & OPP_CONFIG_SUPPORTED_HW) @@ -2601,12 +2587,22 @@ int dev_pm_opp_set_config(struct device *dev, struct dev_pm_opp_config *config) /* Attach genpds */ if (config->genpd_names) { + if (config->required_devs) + goto err; + ret = _opp_attach_genpd(opp_table, dev, config->genpd_names, config->virt_devs); if (ret) goto err; data->flags |= OPP_CONFIG_GENPD; + } else if (config->required_devs) { + ret = _opp_set_required_devs(opp_table, dev, + config->required_devs); + if (ret) + goto err; + + data->flags |= OPP_CONFIG_REQUIRED_DEVS; } ret = xa_alloc(&opp_configs, &id, data, XA_LIMIT(1, INT_MAX), diff --git a/drivers/opp/of.c b/drivers/opp/of.c index 4cdeeab5ceee..5a7e294e56b7 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -165,7 +165,7 @@ static void _opp_table_alloc_required_tables(struct opp_table *opp_table, struct opp_table **required_opp_tables; struct device_node *required_np, *np; bool lazy = false; - int count, i; + int count, i, size; /* Traversing the first OPP node is all we need */ np = of_get_next_available_child(opp_np, NULL); @@ -179,12 +179,13 @@ static void _opp_table_alloc_required_tables(struct opp_table *opp_table, if (count <= 0) goto put_np; - required_opp_tables = kcalloc(count, sizeof(*required_opp_tables), - GFP_KERNEL); + size = sizeof(*required_opp_tables) + sizeof(*opp_table->required_devs); + required_opp_tables = kcalloc(count, size, GFP_KERNEL); if (!required_opp_tables) goto put_np; opp_table->required_opp_tables = required_opp_tables; + opp_table->required_devs = (void *)(required_opp_tables + count); opp_table->required_opp_count = count; for (i = 0; i < count; i++) { @@ -208,8 +209,6 @@ static void _opp_table_alloc_required_tables(struct opp_table *opp_table, mutex_lock(&opp_table_lock); list_add(&opp_table->lazy, &lazy_opp_tables); mutex_unlock(&opp_table_lock); - } else { - _update_set_required_opps(opp_table); } goto put_np; @@ -332,9 +331,14 @@ static int _link_required_opps(struct dev_pm_opp *opp, struct opp_table *opp_tab * * Just update the `level` with the right value, which * dev_pm_opp_set_opp() will take care of in the normal path itself. + * + * There is another case though, where a genpd's OPP table has + * required-opps set to a parent genpd. The OPP core expects the user to + * set the respective required `struct device` pointer via + * dev_pm_opp_set_config(). */ if (required_table->is_genpd && opp_table->required_opp_count == 1 && - !opp_table->genpd_virt_devs) { + !opp_table->required_devs[0]) { if (!WARN_ON(opp->level != OPP_LEVEL_UNSET)) opp->level = opp->required_opps[0]->level; } @@ -447,7 +451,6 @@ static void lazy_link_required_opp_table(struct opp_table *new_table) /* All required opp-tables found, remove from lazy list */ if (!lazy) { - _update_set_required_opps(opp_table); list_del_init(&opp_table->lazy); list_for_each_entry(opp, &opp_table->opp_list, node) diff --git a/drivers/opp/opp.h b/drivers/opp/opp.h index 08366f90f16b..23dcb2fbf8c3 100644 --- a/drivers/opp/opp.h +++ b/drivers/opp/opp.h @@ -35,6 +35,7 @@ extern struct list_head opp_tables; #define OPP_CONFIG_PROP_NAME BIT(3) #define OPP_CONFIG_SUPPORTED_HW BIT(4) #define OPP_CONFIG_GENPD BIT(5) +#define OPP_CONFIG_REQUIRED_DEVS BIT(6) /** * struct opp_config_data - data for set config operations @@ -160,9 +161,9 @@ enum opp_table_access { * @rate_clk_single: Currently configured frequency for single clk. * @current_opp: Currently configured OPP for the table. * @suspend_opp: Pointer to OPP to be used during device suspend. - * @genpd_virt_devs: List of virtual devices for multiple genpd support. * @required_opp_tables: List of device OPP tables that are required by OPPs in * this table. + * @required_devs: List of devices for required OPP tables. * @required_opp_count: Number of required devices. * @supported_hw: Array of version number to support. * @supported_hw_count: Number of elements in supported_hw array. @@ -180,7 +181,6 @@ enum opp_table_access { * @path_count: Number of interconnect paths * @enabled: Set to true if the device's resources are enabled/configured. * @is_genpd: Marks if the OPP table belongs to a genpd. - * @set_required_opps: Helper responsible to set required OPPs. * @dentry: debugfs dentry pointer of the real device directory (not links). * @dentry_name: Name of the real dentry. * @@ -211,8 +211,8 @@ struct opp_table { struct dev_pm_opp *current_opp; struct dev_pm_opp *suspend_opp; - struct device **genpd_virt_devs; struct opp_table **required_opp_tables; + struct device **required_devs; unsigned int required_opp_count; unsigned int *supported_hw; @@ -229,8 +229,6 @@ struct opp_table { unsigned int path_count; bool enabled; bool is_genpd; - int (*set_required_opps)(struct device *dev, - struct opp_table *opp_table, struct dev_pm_opp *opp, bool scaling_down); #ifdef CONFIG_DEBUG_FS struct dentry *dentry; diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index af53101a1383..81dff7facdc9 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -74,8 +74,10 @@ typedef int (*config_clks_t)(struct device *dev, struct opp_table *opp_table, * @supported_hw_count: Number of elements in the array. * @regulator_names: Array of pointers to the names of the regulator, NULL terminated. * @genpd_names: Null terminated array of pointers containing names of genpd to - * attach. - * @virt_devs: Pointer to return the array of virtual devices. + * attach. Mutually exclusive with required_devs. + * @virt_devs: Pointer to return the array of genpd virtual devices. Mutually + * exclusive with required_devs. + * @required_devs: Required OPP devices. Mutually exclusive with genpd_names/virt_devs. * * This structure contains platform specific OPP configurations for the device. */ @@ -90,6 +92,7 @@ struct dev_pm_opp_config { const char * const *regulator_names; const char * const *genpd_names; struct device ***virt_devs; + struct device **required_devs; }; #define OPP_LEVEL_UNSET U32_MAX -- cgit v1.2.3 From 925141432fa4d8325b7156e88e53d740b12d0b0e Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 16 Nov 2023 15:59:35 +0530 Subject: OPP: Don't set OPP recursively for a parent genpd Like other frameworks (clk, regulator, etc.) genpd core too takes care of propagation to performance state to parent genpds. The OPP core shouldn't attempt the same, or it may result in undefined behavior. Add checks at various places to take care of the same. Reviewed-by: Ulf Hansson Tested-by: Stephan Gerhold Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 16 +++++++++++++++- drivers/opp/of.c | 7 +++++-- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index e08375ed50aa..4f1ca84d9ed0 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -2392,6 +2392,12 @@ static int _opp_attach_genpd(struct opp_table *opp_table, struct device *dev, return -EINVAL; } + /* Genpd core takes care of propagation to parent genpd */ + if (opp_table->is_genpd) { + dev_err(dev, "%s: Operation not supported for genpds\n", __func__); + return -EOPNOTSUPP; + } + /* Checking only the first one is enough ? */ if (opp_table->required_devs[0]) return 0; @@ -2453,8 +2459,16 @@ static int _opp_set_required_devs(struct opp_table *opp_table, if (opp_table->required_devs[0]) return 0; - for (i = 0; i < opp_table->required_opp_count; i++) + for (i = 0; i < opp_table->required_opp_count; i++) { + /* Genpd core takes care of propagation to parent genpd */ + if (required_devs[i] && opp_table->is_genpd && + opp_table->required_opp_tables[i]->is_genpd) { + dev_err(dev, "%s: Operation not supported for genpds\n", __func__); + return -EOPNOTSUPP; + } + opp_table->required_devs[i] = required_devs[i]; + } return 0; } diff --git a/drivers/opp/of.c b/drivers/opp/of.c index 5a7e294e56b7..f9f0b22bccbb 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -339,8 +339,11 @@ static int _link_required_opps(struct dev_pm_opp *opp, struct opp_table *opp_tab */ if (required_table->is_genpd && opp_table->required_opp_count == 1 && !opp_table->required_devs[0]) { - if (!WARN_ON(opp->level != OPP_LEVEL_UNSET)) - opp->level = opp->required_opps[0]->level; + /* Genpd core takes care of propagation to parent genpd */ + if (!opp_table->is_genpd) { + if (!WARN_ON(opp->level != OPP_LEVEL_UNSET)) + opp->level = opp->required_opps[0]->level; + } } return 0; -- cgit v1.2.3 From 19cc8b1819a40410c50a3efab6cf27b73298deb5 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 28 Nov 2023 12:31:38 +0530 Subject: OPP: Check for invalid OPP in dev_pm_opp_find_level_ceil() _find_key_ceil() may return an error and that must be checked before passing the same to dev_pm_opp_put(). Fixes: 41907aa4ae37 ("OPP: Level zero is valid") Reported-by: Dan Carpenter Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 4f1ca84d9ed0..c022d548067d 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -808,6 +808,8 @@ struct dev_pm_opp *dev_pm_opp_find_level_ceil(struct device *dev, struct dev_pm_opp *opp; opp = _find_key_ceil(dev, &temp, 0, true, _read_level, NULL); + if (IS_ERR(opp)) + return opp; /* False match */ if (temp == OPP_LEVEL_UNSET) { -- cgit v1.2.3 From 2719675fa8111a8d7a060133e1dd4797d20c9754 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Mon, 20 Nov 2023 10:59:42 -0800 Subject: cpufreq: intel_pstate: Prioritize firmware-provided balance performance EPP The platform firmware can provide a balance performance EPP value by enabling HWP and programming the EPP to the desired value. However, currently this only takes effect for processors listed in intel_epp_balance_perf[], so in order to enable a new processor model to utilize this mechanism, that table needs to be updated. It arguably should not be necessary to modify the kernel to work properly with every new generation of processors, though, and distributions that don't always ship the most recent kernels should be able to run reasonably well on new hardware without code changes. For this reason, move the check to avoid updating the EPP when the balance performance EPP is unmodified from the power-up default of 0x80 after the check that allows the firmware-provided balance performance EPP value to be retrieved. This will cause the code to always look for the firmware- provided value before consulting intel_epp_balance_perf[] and the handling of new hardware will not depend on whether or not that thable has been updated yet. Signed-off-by: Srinivas Pandruvada [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index a534a1f7f1ee..dd6d23e389f1 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1691,13 +1691,6 @@ static void intel_pstate_update_epp_defaults(struct cpudata *cpudata) { cpudata->epp_default = intel_pstate_get_epp(cpudata, 0); - /* - * If this CPU gen doesn't call for change in balance_perf - * EPP return. - */ - if (epp_values[EPP_INDEX_BALANCE_PERFORMANCE] == HWP_EPP_BALANCE_PERFORMANCE) - return; - /* * If the EPP is set by firmware, which means that firmware enabled HWP * - Is equal or less than 0x80 (default balance_perf EPP) @@ -1710,6 +1703,13 @@ static void intel_pstate_update_epp_defaults(struct cpudata *cpudata) return; } + /* + * If this CPU gen doesn't call for change in balance_perf + * EPP return. + */ + if (epp_values[EPP_INDEX_BALANCE_PERFORMANCE] == HWP_EPP_BALANCE_PERFORMANCE) + return; + /* * Use hard coded value per gen to update the balance_perf * and default EPP. -- cgit v1.2.3 From c4a5118a3ae1eadc687d84eef9431f9e13eb015c Mon Sep 17 00:00:00 2001 From: Alexandra Diupina Date: Tue, 5 Dec 2023 18:12:20 +0300 Subject: cpufreq: scmi: process the result of devm_of_clk_add_hw_provider() devm_of_clk_add_hw_provider() may return an errno, so add a return value check Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 8410e7f3b31e ("cpufreq: scmi: Fix OPP addition failure with a dummy clock provider") Signed-off-by: Alexandra Diupina Signed-off-by: Viresh Kumar --- drivers/cpufreq/scmi-cpufreq.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c index c8a7ccc42c16..4ee23f4ebf4a 100644 --- a/drivers/cpufreq/scmi-cpufreq.c +++ b/drivers/cpufreq/scmi-cpufreq.c @@ -334,8 +334,11 @@ static int scmi_cpufreq_probe(struct scmi_device *sdev) #ifdef CONFIG_COMMON_CLK /* dummy clock provider as needed by OPP if clocks property is used */ - if (of_property_present(dev->of_node, "#clock-cells")) - devm_of_clk_add_hw_provider(dev, of_clk_hw_simple_get, NULL); + if (of_property_present(dev->of_node, "#clock-cells")) { + ret = devm_of_clk_add_hw_provider(dev, of_clk_hw_simple_get, NULL); + if (ret) + return dev_err_probe(dev, ret, "%s: registering clock provider failed\n", __func__); + } #endif ret = cpufreq_register_driver(&scmi_cpufreq_driver); -- cgit v1.2.3 From eeae55ed9c0a74604a49789e36b7cdf0ee8bd69c Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Sat, 2 Dec 2023 02:09:28 +0800 Subject: intel_idle: Add Meteorlake support Add intel_idle support for MeteorLake. C1 and C1E states on Meteorlake are mutually exclusive, like Alderlake and Raptorlake, but they have little latency difference with measureable power difference, so always enable "C1E promotion" bit and expose C1E only. Expose C6 because it has less power compared with C1E, and smaller latency compared with C8/C10. Ignore C8 and expose C10, because C8 does not show latency advantage compared with C10. Signed-off-by: Zhang Rui Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index dcda0afecfc5..cfd0b24fd7f1 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -923,6 +923,35 @@ static struct cpuidle_state adl_l_cstates[] __initdata = { .enter = NULL } }; +static struct cpuidle_state mtl_l_cstates[] __initdata = { + { + .name = "C1E", + .desc = "MWAIT 0x01", + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, + .exit_latency = 1, + .target_residency = 1, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C6", + .desc = "MWAIT 0x20", + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 140, + .target_residency = 420, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C10", + .desc = "MWAIT 0x60", + .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 310, + .target_residency = 930, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .enter = NULL } +}; + static struct cpuidle_state gmt_cstates[] __initdata = { { .name = "C1", @@ -1349,6 +1378,10 @@ static const struct idle_cpu idle_cpu_adl_l __initconst = { .state_table = adl_l_cstates, }; +static const struct idle_cpu idle_cpu_mtl_l __initconst = { + .state_table = mtl_l_cstates, +}; + static const struct idle_cpu idle_cpu_gmt __initconst = { .state_table = gmt_cstates, }; @@ -1423,6 +1456,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &idle_cpu_icx), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &idle_cpu_adl), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &idle_cpu_adl_l), + X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, &idle_cpu_mtl_l), X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT, &idle_cpu_gmt), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &idle_cpu_spr), X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &idle_cpu_spr), -- cgit v1.2.3 From a1ca8295ee53a2fc57085fae26df37228c655791 Mon Sep 17 00:00:00 2001 From: Wang chaodong Date: Fri, 20 Oct 2023 16:51:06 +0800 Subject: PM: hibernate: Drop unnecessary local variable initialization It is not necessary to intialize the error variable in create_basic_memory_bitmaps(), because it is only read after being assigned a value. Signed-off-by: Wang chaodong [ rjw: Subject and changelog rewrite ] Signed-off-by: Rafael J. Wysocki --- kernel/power/snapshot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 50a15408c3fc..71b2f12ed3b5 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -1119,7 +1119,7 @@ static void mark_nosave_pages(struct memory_bitmap *bm) int create_basic_memory_bitmaps(void) { struct memory_bitmap *bm1, *bm2; - int error = 0; + int error; if (forbidden_pages_map && free_pages_map) return 0; -- cgit v1.2.3 From bbeaa4691fa8682e2fe2e87f28d5fce39805fa68 Mon Sep 17 00:00:00 2001 From: Li zeming Date: Fri, 27 Oct 2023 09:55:33 +0800 Subject: PM: hibernate: Do not initialize error in swap_write_page() 'error' first receives the function result before it is used, and it does not need to be assigned a value during definition. Signed-off-by: Li zeming [ rjw: Subject rewrite ] Signed-off-by: Rafael J. Wysocki --- kernel/power/swap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/power/swap.c b/kernel/power/swap.c index a2cb0babb5ec..68973ca2cf07 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -451,7 +451,7 @@ err_close: static int swap_write_page(struct swap_map_handle *handle, void *buf, struct hib_bio_batch *hb) { - int error = 0; + int error; sector_t offset; if (!handle->cur) -- cgit v1.2.3 From 4ac934b1aaa99e00ca25875d55094a4fe34e212d Mon Sep 17 00:00:00 2001 From: Li zeming Date: Tue, 24 Oct 2023 10:04:34 +0800 Subject: PM: hibernate: Do not initialize error in snapshot_write_next() The error variable in snapshot_write_next() gets a value before it is used, so don't initialize it to 0 upfront. Signed-off-by: Li zeming [ rjw: Subject and changelog rewrite ] Signed-off-by: Rafael J. Wysocki --- kernel/power/snapshot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 71b2f12ed3b5..e3e8f1c6e75f 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -2778,7 +2778,7 @@ static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca) int snapshot_write_next(struct snapshot_handle *handle) { static struct chain_allocator ca; - int error = 0; + int error; next: /* Check if we have already loaded the entire image */ -- cgit v1.2.3 From 0c4cae1bc00d31c78858c184ede351baea232bdb Mon Sep 17 00:00:00 2001 From: Chris Feng Date: Wed, 13 Dec 2023 16:32:51 +0800 Subject: PM: hibernate: Avoid missing wakeup events during hibernation Wakeup events that occur in the hibernation process's hibernation_platform_enter() cannot wake up the system. Although the current hibernation framework will execute part of the recovery process after a wakeup event occurs, it ultimately performs a shutdown operation because the system does not check the return value of hibernation_platform_enter(). In short, if a wakeup event occurs before putting the system into the final low-power state, it will be missed. To solve this problem, check the return value of hibernation_platform_enter(). When it returns -EAGAIN or -EBUSY (indicate the occurrence of a wakeup event), execute the hibernation recovery process, discard the previously saved image, and ultimately return to the working state. Signed-off-by: Chris Feng [ rjw: Rephrase the message printed when going back to the working state ] Signed-off-by: Rafael J. Wysocki --- kernel/power/hibernate.c | 10 ++++++++-- kernel/power/power.h | 2 ++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index dee341ae4ace..4b0b7cf2e019 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -642,9 +642,9 @@ int hibernation_platform_enter(void) */ static void power_down(void) { -#ifdef CONFIG_SUSPEND int error; +#ifdef CONFIG_SUSPEND if (hibernation_mode == HIBERNATION_SUSPEND) { error = suspend_devices_and_enter(mem_sleep_current); if (error) { @@ -667,7 +667,13 @@ static void power_down(void) kernel_restart(NULL); break; case HIBERNATION_PLATFORM: - hibernation_platform_enter(); + error = hibernation_platform_enter(); + if (error == -EAGAIN || error == -EBUSY) { + swsusp_unmark(); + events_check_enabled = false; + pr_info("Wakeup event detected during hibernation, rolling back.\n"); + return; + } fallthrough; case HIBERNATION_SHUTDOWN: if (kernel_can_power_off()) diff --git a/kernel/power/power.h b/kernel/power/power.h index 17fd9aaaf084..8499a39c62f4 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -175,6 +175,8 @@ extern int swsusp_write(unsigned int flags); void swsusp_close(void); #ifdef CONFIG_SUSPEND extern int swsusp_unmark(void); +#else +static inline int swsusp_unmark(void) { return 0; } #endif struct __kernel_old_timeval; -- cgit v1.2.3 From 71cd7e80cfde548959952eac7063aeaea1f2e1c6 Mon Sep 17 00:00:00 2001 From: Hongchen Zhang Date: Thu, 16 Nov 2023 08:56:09 +0800 Subject: PM: hibernate: Enforce ordering during image compression/decompression An S4 (suspend to disk) test on the LoongArch 3A6000 platform sometimes fails with the following error messaged in the dmesg log: Invalid LZO compressed length That happens because when compressing/decompressing the image, the synchronization between the control thread and the compress/decompress/crc thread is based on a relaxed ordering interface, which is unreliable, and the following situation may occur: CPU 0 CPU 1 save_image_lzo lzo_compress_threadfn atomic_set(&d->stop, 1); atomic_read(&data[thr].stop) data[thr].cmp = data[thr].cmp_len; WRITE data[thr].cmp_len Then CPU0 gets a stale cmp_len and writes it to disk. During resume from S4, wrong cmp_len is loaded. To maintain data consistency between the two threads, use the acquire/release variants of atomic set and read operations. Fixes: 081a9d043c98 ("PM / Hibernate: Improve performance of LZO/plain hibernation, checksum image") Cc: All applicable Signed-off-by: Hongchen Zhang Co-developed-by: Weihao Li Signed-off-by: Weihao Li [ rjw: Subject rewrite and changelog edits ] Signed-off-by: Rafael J. Wysocki --- kernel/power/swap.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 68973ca2cf07..975e7195573b 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -606,11 +606,11 @@ static int crc32_threadfn(void *data) unsigned i; while (1) { - wait_event(d->go, atomic_read(&d->ready) || + wait_event(d->go, atomic_read_acquire(&d->ready) || kthread_should_stop()); if (kthread_should_stop()) { d->thr = NULL; - atomic_set(&d->stop, 1); + atomic_set_release(&d->stop, 1); wake_up(&d->done); break; } @@ -619,7 +619,7 @@ static int crc32_threadfn(void *data) for (i = 0; i < d->run_threads; i++) *d->crc32 = crc32_le(*d->crc32, d->unc[i], *d->unc_len[i]); - atomic_set(&d->stop, 1); + atomic_set_release(&d->stop, 1); wake_up(&d->done); } return 0; @@ -649,12 +649,12 @@ static int lzo_compress_threadfn(void *data) struct cmp_data *d = data; while (1) { - wait_event(d->go, atomic_read(&d->ready) || + wait_event(d->go, atomic_read_acquire(&d->ready) || kthread_should_stop()); if (kthread_should_stop()) { d->thr = NULL; d->ret = -1; - atomic_set(&d->stop, 1); + atomic_set_release(&d->stop, 1); wake_up(&d->done); break; } @@ -663,7 +663,7 @@ static int lzo_compress_threadfn(void *data) d->ret = lzo1x_1_compress(d->unc, d->unc_len, d->cmp + LZO_HEADER, &d->cmp_len, d->wrk); - atomic_set(&d->stop, 1); + atomic_set_release(&d->stop, 1); wake_up(&d->done); } return 0; @@ -798,7 +798,7 @@ static int save_image_lzo(struct swap_map_handle *handle, data[thr].unc_len = off; - atomic_set(&data[thr].ready, 1); + atomic_set_release(&data[thr].ready, 1); wake_up(&data[thr].go); } @@ -806,12 +806,12 @@ static int save_image_lzo(struct swap_map_handle *handle, break; crc->run_threads = thr; - atomic_set(&crc->ready, 1); + atomic_set_release(&crc->ready, 1); wake_up(&crc->go); for (run_threads = thr, thr = 0; thr < run_threads; thr++) { wait_event(data[thr].done, - atomic_read(&data[thr].stop)); + atomic_read_acquire(&data[thr].stop)); atomic_set(&data[thr].stop, 0); ret = data[thr].ret; @@ -850,7 +850,7 @@ static int save_image_lzo(struct swap_map_handle *handle, } } - wait_event(crc->done, atomic_read(&crc->stop)); + wait_event(crc->done, atomic_read_acquire(&crc->stop)); atomic_set(&crc->stop, 0); } @@ -1132,12 +1132,12 @@ static int lzo_decompress_threadfn(void *data) struct dec_data *d = data; while (1) { - wait_event(d->go, atomic_read(&d->ready) || + wait_event(d->go, atomic_read_acquire(&d->ready) || kthread_should_stop()); if (kthread_should_stop()) { d->thr = NULL; d->ret = -1; - atomic_set(&d->stop, 1); + atomic_set_release(&d->stop, 1); wake_up(&d->done); break; } @@ -1150,7 +1150,7 @@ static int lzo_decompress_threadfn(void *data) flush_icache_range((unsigned long)d->unc, (unsigned long)d->unc + d->unc_len); - atomic_set(&d->stop, 1); + atomic_set_release(&d->stop, 1); wake_up(&d->done); } return 0; @@ -1335,7 +1335,7 @@ static int load_image_lzo(struct swap_map_handle *handle, } if (crc->run_threads) { - wait_event(crc->done, atomic_read(&crc->stop)); + wait_event(crc->done, atomic_read_acquire(&crc->stop)); atomic_set(&crc->stop, 0); crc->run_threads = 0; } @@ -1371,7 +1371,7 @@ static int load_image_lzo(struct swap_map_handle *handle, pg = 0; } - atomic_set(&data[thr].ready, 1); + atomic_set_release(&data[thr].ready, 1); wake_up(&data[thr].go); } @@ -1390,7 +1390,7 @@ static int load_image_lzo(struct swap_map_handle *handle, for (run_threads = thr, thr = 0; thr < run_threads; thr++) { wait_event(data[thr].done, - atomic_read(&data[thr].stop)); + atomic_read_acquire(&data[thr].stop)); atomic_set(&data[thr].stop, 0); ret = data[thr].ret; @@ -1421,7 +1421,7 @@ static int load_image_lzo(struct swap_map_handle *handle, ret = snapshot_write_next(snapshot); if (ret <= 0) { crc->run_threads = thr + 1; - atomic_set(&crc->ready, 1); + atomic_set_release(&crc->ready, 1); wake_up(&crc->go); goto out_finish; } @@ -1429,13 +1429,13 @@ static int load_image_lzo(struct swap_map_handle *handle, } crc->run_threads = thr; - atomic_set(&crc->ready, 1); + atomic_set_release(&crc->ready, 1); wake_up(&crc->go); } out_finish: if (crc->run_threads) { - wait_event(crc->done, atomic_read(&crc->stop)); + wait_event(crc->done, atomic_read_acquire(&crc->stop)); atomic_set(&crc->stop, 0); } stop = ktime_get(); -- cgit v1.2.3 From 0990319a0400db1d6069b5549327cd9105a266d5 Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Fri, 15 Dec 2023 16:37:06 +0100 Subject: cpufreq: armada-8k: Fix parameter type warning The second parameter of clk_get() is of type 'const char *', so use NULL instead of the integer 0 to resolve a sparse warning: drivers/cpufreq/armada-8k-cpufreq.c:60:40: warning: Using plain integer as NULL pointer drivers/cpufreq/armada-8k-cpufreq.c:168:40: warning: Using plain integer as NULL pointer Reported-by: kernel test robot Signed-off-by: Gregory CLEMENT Reviewed-by: Andrew Lunn Signed-off-by: Viresh Kumar --- drivers/cpufreq/armada-8k-cpufreq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/armada-8k-cpufreq.c b/drivers/cpufreq/armada-8k-cpufreq.c index 8afefdea4d80..ce5a5641b6dd 100644 --- a/drivers/cpufreq/armada-8k-cpufreq.c +++ b/drivers/cpufreq/armada-8k-cpufreq.c @@ -57,7 +57,7 @@ static void __init armada_8k_get_sharing_cpus(struct clk *cur_clk, continue; } - clk = clk_get(cpu_dev, 0); + clk = clk_get(cpu_dev, NULL); if (IS_ERR(clk)) { pr_warn("Cannot get clock for CPU %d\n", cpu); } else { @@ -165,7 +165,7 @@ static int __init armada_8k_cpufreq_init(void) continue; } - clk = clk_get(cpu_dev, 0); + clk = clk_get(cpu_dev, NULL); if (IS_ERR(clk)) { pr_err("Cannot get clock for CPU %d\n", cpu); -- cgit v1.2.3 From aed5ed595960c6d301dcd4ed31aeaa7a8054c0c6 Mon Sep 17 00:00:00 2001 From: Mukesh Ojha Date: Sat, 25 Nov 2023 02:41:58 +0530 Subject: PM / devfreq: Synchronize devfreq_monitor_[start/stop] There is a chance if a frequent switch of the governor done in a loop result in timer list corruption where timer cancel being done from two place one from cancel_delayed_work_sync() and followed by expire_timers() can be seen from the traces[1]. while true do echo "simple_ondemand" > /sys/class/devfreq/1d84000.ufshc/governor echo "performance" > /sys/class/devfreq/1d84000.ufshc/governor done It looks to be issue with devfreq driver where device_monitor_[start/stop] need to synchronized so that delayed work should get corrupted while it is either being queued or running or being cancelled. Let's use polling flag and devfreq lock to synchronize the queueing the timer instance twice and work data being corrupted. [1] ... .. -0 [003] 9436.209662: timer_cancel timer=0xffffff80444f0428 -0 [003] 9436.209664: timer_expire_entry timer=0xffffff80444f0428 now=0x10022da1c function=__typeid__ZTSFvP10timer_listE_global_addr baseclk=0x10022da1c -0 [003] 9436.209718: timer_expire_exit timer=0xffffff80444f0428 kworker/u16:6-14217 [003] 9436.209863: timer_start timer=0xffffff80444f0428 function=__typeid__ZTSFvP10timer_listE_global_addr expires=0x10022da2b now=0x10022da1c flags=182452227 vendor.xxxyyy.ha-1593 [004] 9436.209888: timer_cancel timer=0xffffff80444f0428 vendor.xxxyyy.ha-1593 [004] 9436.216390: timer_init timer=0xffffff80444f0428 vendor.xxxyyy.ha-1593 [004] 9436.216392: timer_start timer=0xffffff80444f0428 function=__typeid__ZTSFvP10timer_listE_global_addr expires=0x10022da2c now=0x10022da1d flags=186646532 vendor.xxxyyy.ha-1593 [005] 9436.220992: timer_cancel timer=0xffffff80444f0428 xxxyyyTraceManag-7795 [004] 9436.261641: timer_cancel timer=0xffffff80444f0428 [2] 9436.261653][ C4] Unable to handle kernel paging request at virtual address dead00000000012a [ 9436.261664][ C4] Mem abort info: [ 9436.261666][ C4] ESR = 0x96000044 [ 9436.261669][ C4] EC = 0x25: DABT (current EL), IL = 32 bits [ 9436.261671][ C4] SET = 0, FnV = 0 [ 9436.261673][ C4] EA = 0, S1PTW = 0 [ 9436.261675][ C4] Data abort info: [ 9436.261677][ C4] ISV = 0, ISS = 0x00000044 [ 9436.261680][ C4] CM = 0, WnR = 1 [ 9436.261682][ C4] [dead00000000012a] address between user and kernel address ranges [ 9436.261685][ C4] Internal error: Oops: 96000044 [#1] PREEMPT SMP [ 9436.261701][ C4] Skip md ftrace buffer dump for: 0x3a982d0 ... [ 9436.262138][ C4] CPU: 4 PID: 7795 Comm: TraceManag Tainted: G S W O 5.10.149-android12-9-o-g17f915d29d0c #1 [ 9436.262141][ C4] Hardware name: Qualcomm Technologies, Inc. (DT) [ 9436.262144][ C4] pstate: 22400085 (nzCv daIf +PAN -UAO +TCO BTYPE=--) [ 9436.262161][ C4] pc : expire_timers+0x9c/0x438 [ 9436.262164][ C4] lr : expire_timers+0x2a4/0x438 [ 9436.262168][ C4] sp : ffffffc010023dd0 [ 9436.262171][ C4] x29: ffffffc010023df0 x28: ffffffd0636fdc18 [ 9436.262178][ C4] x27: ffffffd063569dd0 x26: ffffffd063536008 [ 9436.262182][ C4] x25: 0000000000000001 x24: ffffff88f7c69280 [ 9436.262185][ C4] x23: 00000000000000e0 x22: dead000000000122 [ 9436.262188][ C4] x21: 000000010022da29 x20: ffffff8af72b4e80 [ 9436.262191][ C4] x19: ffffffc010023e50 x18: ffffffc010025038 [ 9436.262195][ C4] x17: 0000000000000240 x16: 0000000000000201 [ 9436.262199][ C4] x15: ffffffffffffffff x14: ffffff889f3c3100 [ 9436.262203][ C4] x13: ffffff889f3c3100 x12: 00000000049f56b8 [ 9436.262207][ C4] x11: 00000000049f56b8 x10: 00000000ffffffff [ 9436.262212][ C4] x9 : ffffffc010023e50 x8 : dead000000000122 [ 9436.262216][ C4] x7 : ffffffffffffffff x6 : ffffffc0100239d8 [ 9436.262220][ C4] x5 : 0000000000000000 x4 : 0000000000000101 [ 9436.262223][ C4] x3 : 0000000000000080 x2 : ffffff889edc155c [ 9436.262227][ C4] x1 : ffffff8001005200 x0 : ffffff80444f0428 [ 9436.262232][ C4] Call trace: [ 9436.262236][ C4] expire_timers+0x9c/0x438 [ 9436.262240][ C4] __run_timers+0x1f0/0x330 [ 9436.262245][ C4] run_timer_softirq+0x28/0x58 [ 9436.262255][ C4] efi_header_end+0x168/0x5ec [ 9436.262265][ C4] __irq_exit_rcu+0x108/0x124 [ 9436.262274][ C4] __handle_domain_irq+0x118/0x1e4 [ 9436.262282][ C4] gic_handle_irq.30369+0x6c/0x2bc [ 9436.262286][ C4] el0_irq_naked+0x60/0x6c Link: https://lore.kernel.org/all/1700860318-4025-1-git-send-email-quic_mojha@quicinc.com/ Reported-by: Joyyoung Huang Acked-by: MyungJoo Ham Signed-off-by: Mukesh Ojha Signed-off-by: Chanwoo Choi --- drivers/devfreq/devfreq.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 017a87465776..98657d3b9435 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -461,10 +461,14 @@ static void devfreq_monitor(struct work_struct *work) if (err) dev_err(&devfreq->dev, "dvfs failed with (%d) error\n", err); + if (devfreq->stop_polling) + goto out; + queue_delayed_work(devfreq_wq, &devfreq->work, msecs_to_jiffies(devfreq->profile->polling_ms)); - mutex_unlock(&devfreq->lock); +out: + mutex_unlock(&devfreq->lock); trace_devfreq_monitor(devfreq); } @@ -483,6 +487,10 @@ void devfreq_monitor_start(struct devfreq *devfreq) if (IS_SUPPORTED_FLAG(devfreq->governor->flags, IRQ_DRIVEN)) return; + mutex_lock(&devfreq->lock); + if (delayed_work_pending(&devfreq->work)) + goto out; + switch (devfreq->profile->timer) { case DEVFREQ_TIMER_DEFERRABLE: INIT_DEFERRABLE_WORK(&devfreq->work, devfreq_monitor); @@ -491,12 +499,16 @@ void devfreq_monitor_start(struct devfreq *devfreq) INIT_DELAYED_WORK(&devfreq->work, devfreq_monitor); break; default: - return; + goto out; } if (devfreq->profile->polling_ms) queue_delayed_work(devfreq_wq, &devfreq->work, msecs_to_jiffies(devfreq->profile->polling_ms)); + +out: + devfreq->stop_polling = false; + mutex_unlock(&devfreq->lock); } EXPORT_SYMBOL(devfreq_monitor_start); @@ -513,6 +525,14 @@ void devfreq_monitor_stop(struct devfreq *devfreq) if (IS_SUPPORTED_FLAG(devfreq->governor->flags, IRQ_DRIVEN)) return; + mutex_lock(&devfreq->lock); + if (devfreq->stop_polling) { + mutex_unlock(&devfreq->lock); + return; + } + + devfreq->stop_polling = true; + mutex_unlock(&devfreq->lock); cancel_delayed_work_sync(&devfreq->work); } EXPORT_SYMBOL(devfreq_monitor_stop); -- cgit v1.2.3 From ac89d11b93cc37c52dc38206c3eaffd4fa603f91 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 14 Dec 2023 18:56:21 +0200 Subject: intel_idle: add Grand Ridge SoC support Add Intel Grand Ridge SoC C-states, which are C1, C1E, and C6S. The Grand Ridge SoC is built with modules, each module includes 4 cores (Crestmont microarchitecture). There is one L2 cache per module, shared between the 4 cores. There is no core C6 state, but there is C6S state, which has module scope: when all 4 cores request C6S, the entire module (4 cores + L2 cache) enters the low power state. Package C6 is not supported by Grand Ridge SoC. Signed-off-by: Artem Bityutskiy Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index cfd0b24fd7f1..3b846d4f8707 100644 --- a/drivers/idle/intel_