From c957e8f084e0d21febcd6b8a0ea9631eccc92f36 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 29 Dec 2014 10:33:36 +0200 Subject: [PATCH 01/14] spi/pxa2xx: Clear cur_chip pointer before starting next message Once the current message is finished, the driver notifies SPI core about this by calling spi_finalize_current_message(). This function queues next message to be transferred. If there are more messages in the queue, it is possible that the driver is asked to transfer the next message at this point. When spi_finalize_current_message() returns the driver clears the drv_data->cur_chip pointer to NULL. The problem is that if the driver already started the next message clearing drv_data->cur_chip will cause NULL pointer dereference which crashes the kernel like: BUG: unable to handle kernel NULL pointer dereference at 0000000000000048 IP: [] cs_deassert+0x18/0x70 [spi_pxa2xx_platform] PGD 78bb8067 PUD 37712067 PMD 0 Oops: 0000 [#1] SMP Modules linked in: CPU: 1 PID: 11 Comm: ksoftirqd/1 Tainted: G O 3.18.0-rc4-mjo #5 Hardware name: Intel Corp. VALLEYVIEW B3 PLATFORM/NOTEBOOK, BIOS MNW2CRB1.X64.0071.R30.1408131301 08/13/2014 task: ffff880077f9f290 ti: ffff88007a820000 task.ti: ffff88007a820000 RIP: 0010:[] [] cs_deassert+0x18/0x70 [spi_pxa2xx_platform] RSP: 0018:ffff88007a823d08 EFLAGS: 00010202 RAX: 0000000000000008 RBX: ffff8800379a4430 RCX: 0000000000000026 RDX: 0000000000000000 RSI: 0000000000000246 RDI: ffff8800379a4430 RBP: ffff88007a823d18 R08: 00000000ffffffff R09: 000000007a9bc65a R10: 000000000000028f R11: 0000000000000005 R12: ffff880070123e98 R13: ffff880070123de8 R14: 0000000000000100 R15: ffffc90004888000 FS: 0000000000000000(0000) GS:ffff880079a80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000000048 CR3: 000000007029b000 CR4: 00000000001007e0 Stack: ffff88007a823d58 ffff8800379a4430 ffff88007a823d48 ffffffffa0022c89 0000000000000000 ffff8800379a4430 0000000000000000 0000000000000006 ffff88007a823da8 ffffffffa0023be0 ffff88007a823dd8 ffffffff81076204 Call Trace: [] giveback+0x69/0xa0 [spi_pxa2xx_platform] [] pump_transfers+0x710/0x740 [spi_pxa2xx_platform] [] ? pick_next_task_fair+0x744/0x830 [] tasklet_action+0xa9/0xe0 [] __do_softirq+0xee/0x280 [] run_ksoftirqd+0x20/0x40 [] smpboot_thread_fn+0xff/0x1b0 [] ? SyS_setgroups+0x150/0x150 [] kthread+0xcd/0xf0 [] ? kthread_create_on_node+0x180/0x180 [] ret_from_fork+0x7c/0xb0 Fix this by clearing drv_data->cur_chip before we call spi_finalize_current_message(). Reported-by: Martin Oldfield Signed-off-by: Mika Westerberg Acked-by: Robert Jarzmik Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- drivers/spi/spi-pxa2xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c index 05c623cfb078d6..23822e7df6c1c6 100644 --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -546,8 +546,8 @@ static void giveback(struct driver_data *drv_data) cs_deassert(drv_data); } - spi_finalize_current_message(drv_data->master); drv_data->cur_chip = NULL; + spi_finalize_current_message(drv_data->master); } static void reset_sccr1(struct driver_data *drv_data) From d297933cc7fcfbaaf2d37570baac73287bf0357d Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Mon, 5 Jan 2015 09:32:56 +0800 Subject: [PATCH 02/14] spi: dw: Fix detecting FIFO depth Current code tries to find the highest valid fifo depth by checking the value it wrote to DW_SPI_TXFLTR. There are a few problems in current code: 1) There is an off-by-one in dws->fifo_len setting because it assumes the latest register write fails so the latest valid value should be fifo - 1. 2) We know the depth could be from 2 to 256 from HW spec, so it is not necessary to test fifo == 257. In the case fifo is 257, it means the latest valid setting is fifo = 256. So after the for loop iteration, we should check fifo == 2 case instead of fifo == 257 if detecting the FIFO depth fails. This patch fixes above issues. Signed-off-by: Axel Lin Reviewed-and-tested-by: Andy Shevchenko Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- drivers/spi/spi-dw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-dw.c b/drivers/spi/spi-dw.c index d0d5542efc06db..1a0f266c42685b 100644 --- a/drivers/spi/spi-dw.c +++ b/drivers/spi/spi-dw.c @@ -621,13 +621,13 @@ static void spi_hw_init(struct dw_spi *dws) if (!dws->fifo_len) { u32 fifo; - for (fifo = 2; fifo <= 257; fifo++) { + for (fifo = 2; fifo <= 256; fifo++) { dw_writew(dws, DW_SPI_TXFLTR, fifo); if (fifo != dw_readw(dws, DW_SPI_TXFLTR)) break; } - dws->fifo_len = (fifo == 257) ? 0 : fifo; + dws->fifo_len = (fifo == 2) ? 0 : fifo - 1; dw_writew(dws, DW_SPI_TXFLTR, 0); } } From 67bf9cda4b498b8cea4a40be67a470afe57d2e88 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 2 Jan 2015 17:48:51 +0200 Subject: [PATCH 03/14] spi: dw-mid: fix FIFO size The FIFO size is 40 accordingly to the specifications, but this means 0x40, i.e. 64 bytes. This patch fixes the typo and enables FIFO size autodetection for Intel MID devices. Fixes: 7063c0d942a1 (spi/dw_spi: add DMA support) Signed-off-by: Andy Shevchenko Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- drivers/spi/spi-dw-mid.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/spi/spi-dw-mid.c b/drivers/spi/spi-dw-mid.c index 7281316a5ecba7..a67d37c7e3c00f 100644 --- a/drivers/spi/spi-dw-mid.c +++ b/drivers/spi/spi-dw-mid.c @@ -271,7 +271,6 @@ int dw_spi_mid_init(struct dw_spi *dws) iounmap(clk_reg); dws->num_cs = 16; - dws->fifo_len = 40; /* FIFO has 40 words buffer */ #ifdef CONFIG_SPI_DW_MID_DMA dws->dma_priv = kzalloc(sizeof(struct mid_dma), GFP_KERNEL); From 6d40530e4789b3e2f14d61ca57ab02bd5395d5c9 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Tue, 6 Jan 2015 19:01:26 +0900 Subject: [PATCH 04/14] spi: sh-msiof: fix MDR1_FLD_MASK value Since the FLD bit field is bit[3:2], the MDR1_FLD_MASK value should be 0x0000000c. Signed-off-by: Yoshihiro Shimoda Acked-by: Geert Uytterhoeven Signed-off-by: Mark Brown --- drivers/spi/spi-sh-msiof.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c index 239be7cbe5a83e..87253eaadd4cfa 100644 --- a/drivers/spi/spi-sh-msiof.c +++ b/drivers/spi/spi-sh-msiof.c @@ -82,7 +82,7 @@ struct sh_msiof_spi_priv { #define MDR1_SYNCMD_LR 0x30000000 /* L/R mode */ #define MDR1_SYNCAC_SHIFT 25 /* Sync Polarity (1 = Active-low) */ #define MDR1_BITLSB_SHIFT 24 /* MSB/LSB First (1 = LSB first) */ -#define MDR1_FLD_MASK 0x000000c0 /* Frame Sync Signal Interval (0-3) */ +#define MDR1_FLD_MASK 0x0000000c /* Frame Sync Signal Interval (0-3) */ #define MDR1_FLD_SHIFT 2 #define MDR1_XXSTP 0x00000001 /* Transmission/Reception Stop on FIFO */ /* TMDR1 */ From 3dbb3b98e854bceed38fbde930fb8cf8701def73 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 7 Jan 2015 16:56:54 +0200 Subject: [PATCH 05/14] spi: dw: amend warning message In case of warning message in ->probe() we have to use HW device name instead of master because last is not defined yet. Signed-off-by: Andy Shevchenko Signed-off-by: Mark Brown --- drivers/spi/spi-dw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-dw.c b/drivers/spi/spi-dw.c index 1a0f266c42685b..8edcd1b8456210 100644 --- a/drivers/spi/spi-dw.c +++ b/drivers/spi/spi-dw.c @@ -673,7 +673,7 @@ int dw_spi_add_host(struct device *dev, struct dw_spi *dws) if (dws->dma_ops && dws->dma_ops->dma_init) { ret = dws->dma_ops->dma_init(dws); if (ret) { - dev_warn(&master->dev, "DMA init failed\n"); + dev_warn(dev, "DMA init failed\n"); dws->dma_inited = 0; } } From 83b0302d347a49f951e904184afe57ac3723476e Mon Sep 17 00:00:00 2001 From: Ashay Jaiswal Date: Thu, 8 Jan 2015 18:54:25 +0530 Subject: [PATCH 06/14] regulator: core: fix race condition in regulator_put() The regulator framework maintains a list of consumer regulators for a regulator device and protects it from concurrent access using the regulator device's mutex lock. In the case of regulator_put() the consumer is removed and regulator device's parameters are updated without holding the regulator device's mutex. This would lead to a race condition between the regulator_put() and any function which traverses the consumer list or modifies regulator device's parameters. Fix this race condition by holding the regulator device's mutex in case of regulator_put. Signed-off-by: Ashay Jaiswal Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- drivers/regulator/core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index e225711bb8bc00..9c48fb32f6601b 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -1488,7 +1488,7 @@ struct regulator *regulator_get_optional(struct device *dev, const char *id) } EXPORT_SYMBOL_GPL(regulator_get_optional); -/* Locks held by regulator_put() */ +/* regulator_list_mutex lock held by regulator_put() */ static void _regulator_put(struct regulator *regulator) { struct regulator_dev *rdev; @@ -1503,12 +1503,14 @@ static void _regulator_put(struct regulator *regulator) /* remove any sysfs entries */ if (regulator->dev) sysfs_remove_link(&rdev->dev.kobj, regulator->supply_name); + mutex_lock(&rdev->mutex); kfree(regulator->supply_name); list_del(®ulator->list); kfree(regulator); rdev->open_count--; rdev->exclusive = 0; + mutex_unlock(&rdev->mutex); module_put(rdev->owner); } From ad26aa6c60974acf3228ed0ade97ba5793093dbe Mon Sep 17 00:00:00 2001 From: Jonghwa Lee Date: Thu, 8 Jan 2015 11:04:07 +0900 Subject: [PATCH 07/14] regulator: s2mps11: Fix wrong calculation of register offset This patch adds missing registers('BUCK7_SW' & 'LDO29_CTRL'). Since BUCK7 has 1 more register (BUCK7_SW) than others, register offset should be added one more for which has bigger address than BUCK7 registers. Fixes: 76b9840b24ae04(regulator: s2mps11: Add support S2MPS13 regulator device) Signed-off-by: Jonghwa Lee Signed-off-by: Chanwoo Choi Signed-off-by: Mark Brown Cc: --- drivers/regulator/s2mps11.c | 42 ++++++++++++++++++++++++++--- include/linux/mfd/samsung/s2mps13.h | 2 ++ 2 files changed, 40 insertions(+), 4 deletions(-) diff --git a/drivers/regulator/s2mps11.c b/drivers/regulator/s2mps11.c index c1444c3d84c282..13ca20ed33a667 100644 --- a/drivers/regulator/s2mps11.c +++ b/drivers/regulator/s2mps11.c @@ -405,6 +405,40 @@ static struct regulator_ops s2mps14_reg_ops; .enable_mask = S2MPS14_ENABLE_MASK \ } +#define regulator_desc_s2mps13_buck7(num, min, step, min_sel) { \ + .name = "BUCK"#num, \ + .id = S2MPS13_BUCK##num, \ + .ops = &s2mps14_reg_ops, \ + .type = REGULATOR_VOLTAGE, \ + .owner = THIS_MODULE, \ + .min_uV = min, \ + .uV_step = step, \ + .linear_min_sel = min_sel, \ + .n_voltages = S2MPS14_BUCK_N_VOLTAGES, \ + .ramp_delay = S2MPS13_BUCK_RAMP_DELAY, \ + .vsel_reg = S2MPS13_REG_B1OUT + (num) * 2 - 1, \ + .vsel_mask = S2MPS14_BUCK_VSEL_MASK, \ + .enable_reg = S2MPS13_REG_B1CTRL + (num - 1) * 2, \ + .enable_mask = S2MPS14_ENABLE_MASK \ +} + +#define regulator_desc_s2mps13_buck8_10(num, min, step, min_sel) { \ + .name = "BUCK"#num, \ + .id = S2MPS13_BUCK##num, \ + .ops = &s2mps14_reg_ops, \ + .type = REGULATOR_VOLTAGE, \ + .owner = THIS_MODULE, \ + .min_uV = min, \ + .uV_step = step, \ + .linear_min_sel = min_sel, \ + .n_voltages = S2MPS14_BUCK_N_VOLTAGES, \ + .ramp_delay = S2MPS13_BUCK_RAMP_DELAY, \ + .vsel_reg = S2MPS13_REG_B1OUT + (num) * 2 - 1, \ + .vsel_mask = S2MPS14_BUCK_VSEL_MASK, \ + .enable_reg = S2MPS13_REG_B1CTRL + (num) * 2 - 1, \ + .enable_mask = S2MPS14_ENABLE_MASK \ +} + static const struct regulator_desc s2mps13_regulators[] = { regulator_desc_s2mps13_ldo(1, MIN_800_MV, STEP_12_5_MV, 0x00), regulator_desc_s2mps13_ldo(2, MIN_1400_MV, STEP_50_MV, 0x0C), @@ -452,10 +486,10 @@ static const struct regulator_desc s2mps13_regulators[] = { regulator_desc_s2mps13_buck(4, MIN_500_MV, STEP_6_25_MV, 0x10), regulator_desc_s2mps13_buck(5, MIN_500_MV, STEP_6_25_MV, 0x10), regulator_desc_s2mps13_buck(6, MIN_500_MV, STEP_6_25_MV, 0x10), - regulator_desc_s2mps13_buck(7, MIN_500_MV, STEP_6_25_MV, 0x10), - regulator_desc_s2mps13_buck(8, MIN_1000_MV, STEP_12_5_MV, 0x20), - regulator_desc_s2mps13_buck(9, MIN_1000_MV, STEP_12_5_MV, 0x20), - regulator_desc_s2mps13_buck(10, MIN_500_MV, STEP_6_25_MV, 0x10), + regulator_desc_s2mps13_buck7(7, MIN_500_MV, STEP_6_25_MV, 0x10), + regulator_desc_s2mps13_buck8_10(8, MIN_1000_MV, STEP_12_5_MV, 0x20), + regulator_desc_s2mps13_buck8_10(9, MIN_1000_MV, STEP_12_5_MV, 0x20), + regulator_desc_s2mps13_buck8_10(10, MIN_500_MV, STEP_6_25_MV, 0x10), }; static int s2mps14_regulator_enable(struct regulator_dev *rdev) diff --git a/include/linux/mfd/samsung/s2mps13.h b/include/linux/mfd/samsung/s2mps13.h index ce5dda8958fe83..b1fd675fa36f36 100644 --- a/include/linux/mfd/samsung/s2mps13.h +++ b/include/linux/mfd/samsung/s2mps13.h @@ -59,6 +59,7 @@ enum s2mps13_reg { S2MPS13_REG_B6CTRL, S2MPS13_REG_B6OUT, S2MPS13_REG_B7CTRL, + S2MPS13_REG_B7SW, S2MPS13_REG_B7OUT, S2MPS13_REG_B8CTRL, S2MPS13_REG_B8OUT, @@ -102,6 +103,7 @@ enum s2mps13_reg { S2MPS13_REG_L26CTRL, S2MPS13_REG_L27CTRL, S2MPS13_REG_L28CTRL, + S2MPS13_REG_L29CTRL, S2MPS13_REG_L30CTRL, S2MPS13_REG_L31CTRL, S2MPS13_REG_L32CTRL, From 3c606d35fe9766ede86a45273a628b320be13b45 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 22 Jan 2015 10:19:43 -0500 Subject: [PATCH 08/14] cgroup: prevent mount hang due to memory controller lifetime Since b2052564e66d ("mm: memcontrol: continue cache reclaim from offlined groups"), re-mounting the memory controller after using it is very likely to hang. The cgroup core assumes that any remaining references after deleting a cgroup are temporary in nature, and synchroneously waits for them, but the above-mentioned commit has left-over page cache pin its css until it is reclaimed naturally. That being said, swap entries and charged kernel memory have been doing the same indefinite pinning forever, the bug is just more likely to trigger with left-over page cache. Reparenting kernel memory is highly impractical, which leaves changing the cgroup assumptions to reflect this: once a controller has been mounted and used, it has internal state that is independent from mount and cgroup lifetime. It can be unmounted and remounted, but it can't be reconfigured during subsequent mounts. Don't offline the controller root as long as there are any children, dead or alive. A remount will no longer wait for these old references to drain, it will simply mount the persistent controller state again. Reported-by: "Suzuki K. Poulose" Reported-by: Will Deacon Signed-off-by: Johannes Weiner Signed-off-by: Tejun Heo --- kernel/cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index bb263d0caab323..04cfe8ace52088 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1909,7 +1909,7 @@ static void cgroup_kill_sb(struct super_block *sb) * * And don't kill the default root. */ - if (css_has_online_children(&root->cgrp.self) || + if (!list_empty(&root->cgrp.self.children) || root == &cgrp_dfl_root) cgroup_put(&root->cgrp); else From 9879de7373fcfb466ec198293b6ccc1ad7a42dd8 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Mon, 26 Jan 2015 12:58:32 -0800 Subject: [PATCH 09/14] mm: page_alloc: embed OOM killing naturally into allocation slowpath The OOM killing invocation does a lot of duplicative checks against the task's allocation context. Rework it to take advantage of the existing checks in the allocator slowpath. The OOM killer is invoked when the allocator is unable to reclaim any pages but the allocation has to keep looping. Instead of having a check for __GFP_NORETRY hidden in oom_gfp_allowed(), just move the OOM invocation to the true branch of should_alloc_retry(). The __GFP_FS check from oom_gfp_allowed() can then be moved into the OOM avoidance branch in __alloc_pages_may_oom(), along with the PF_DUMPCORE test. __alloc_pages_may_oom() can then signal to the caller whether the OOM killer was invoked, instead of requiring it to duplicate the order and high_zoneidx checks to guess this when deciding whether to continue. Signed-off-by: Johannes Weiner Acked-by: Michal Hocko Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/oom.h | 5 --- mm/page_alloc.c | 82 +++++++++++++++++++-------------------------- 2 files changed, 35 insertions(+), 52 deletions(-) diff --git a/include/linux/oom.h b/include/linux/oom.h index 853698c721f7d1..76200984d1e220 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -85,11 +85,6 @@ static inline void oom_killer_enable(void) oom_killer_disabled = false; } -static inline bool oom_gfp_allowed(gfp_t gfp_mask) -{ - return (gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY); -} - extern struct task_struct *find_lock_task_mm(struct task_struct *p); static inline bool task_will_free_mem(struct task_struct *task) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7633c503a116c2..8e20f9c2fa5ab7 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2332,12 +2332,21 @@ static inline struct page * __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, enum zone_type high_zoneidx, nodemask_t *nodemask, struct zone *preferred_zone, - int classzone_idx, int migratetype) + int classzone_idx, int migratetype, unsigned long *did_some_progress) { struct page *page; - /* Acquire the per-zone oom lock for each zone */ + *did_some_progress = 0; + + if (oom_killer_disabled) + return NULL; + + /* + * Acquire the per-zone oom lock for each zone. If that + * fails, somebody else is making progress for us. + */ if (!oom_zonelist_trylock(zonelist, gfp_mask)) { + *did_some_progress = 1; schedule_timeout_uninterruptible(1); return NULL; } @@ -2363,12 +2372,18 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, goto out; if (!(gfp_mask & __GFP_NOFAIL)) { + /* Coredumps can quickly deplete all memory reserves */ + if (current->flags & PF_DUMPCORE) + goto out; /* The OOM killer will not help higher order allocs */ if (order > PAGE_ALLOC_COSTLY_ORDER) goto out; /* The OOM killer does not needlessly kill tasks for lowmem */ if (high_zoneidx < ZONE_NORMAL) goto out; + /* The OOM killer does not compensate for light reclaim */ + if (!(gfp_mask & __GFP_FS)) + goto out; /* * GFP_THISNODE contains __GFP_NORETRY and we never hit this. * Sanity check for bare calls of __GFP_THISNODE, not real OOM. @@ -2381,7 +2396,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, } /* Exhausted what can be done so it's blamo time */ out_of_memory(zonelist, gfp_mask, order, nodemask, false); - + *did_some_progress = 1; out: oom_zonelist_unlock(zonelist, gfp_mask); return page; @@ -2658,7 +2673,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, (gfp_mask & GFP_THISNODE) == GFP_THISNODE) goto nopage; -restart: +retry: if (!(gfp_mask & __GFP_NO_KSWAPD)) wake_all_kswapds(order, zonelist, high_zoneidx, preferred_zone, nodemask); @@ -2681,7 +2696,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, classzone_idx = zonelist_zone_idx(preferred_zoneref); } -rebalance: /* This is the last chance, in general, before the goto nopage. */ page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS, @@ -2788,54 +2802,28 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, if (page) goto got_pg; - /* - * If we failed to make any progress reclaiming, then we are - * running out of options and have to consider going OOM - */ - if (!did_some_progress) { - if (oom_gfp_allowed(gfp_mask)) { - if (oom_killer_disabled) - goto nopage; - /* Coredumps can quickly deplete all memory reserves */ - if ((current->flags & PF_DUMPCORE) && - !(gfp_mask & __GFP_NOFAIL)) - goto nopage; - page = __alloc_pages_may_oom(gfp_mask, order, - zonelist, high_zoneidx, - nodemask, preferred_zone, - classzone_idx, migratetype); - if (page) - goto got_pg; - - if (!(gfp_mask & __GFP_NOFAIL)) { - /* - * The oom killer is not called for high-order - * allocations that may fail, so if no progress - * is being made, there are no other options and - * retrying is unlikely to help. - */ - if (order > PAGE_ALLOC_COSTLY_ORDER) - goto nopage; - /* - * The oom killer is not called for lowmem - * allocations to prevent needlessly killing - * innocent tasks. - */ - if (high_zoneidx < ZONE_NORMAL) - goto nopage; - } - - goto restart; - } - } - /* Check if we should retry the allocation */ pages_reclaimed += did_some_progress; if (should_alloc_retry(gfp_mask, order, did_some_progress, pages_reclaimed)) { + /* + * If we fail to make progress by freeing individual + * pages, but the allocation wants us to keep going, + * start OOM killing tasks. + */ + if (!did_some_progress) { + page = __alloc_pages_may_oom(gfp_mask, order, zonelist, + high_zoneidx, nodemask, + preferred_zone, classzone_idx, + migratetype,&did_some_progress); + if (page) + goto got_pg; + if (!did_some_progress) + goto nopage; + } /* Wait for some write requests to complete then retry */ wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50); - goto rebalance; + goto retry; } else { /* * High-order allocations do not necessarily loop after From d69911a68c865b152a067feaa45e98e6bb0f655b Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 26 Jan 2015 12:58:35 -0800 Subject: [PATCH 10/14] x86, build: replace Perl script with Shell script Commit e6023367d779 ("x86, kaslr: Prevent .bss from overlaping initrd") added Perl to the required build environment. This reimplements in shell the Perl script used to find the size of the kernel with bss and brk added. Signed-off-by: Kees Cook Reported-by: Rob Landley Acked-by: Rob Landley Cc: Anca Emanuel Cc: Fengguang Wu Cc: Junjie Mao Cc: Kees Cook Cc: Thomas Gleixner Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/boot/compressed/Makefile | 2 +- arch/x86/tools/calc_run_size.pl | 39 ---------------------------- arch/x86/tools/calc_run_size.sh | 42 +++++++++++++++++++++++++++++++ 3 files changed, 43 insertions(+), 40 deletions(-) delete mode 100644 arch/x86/tools/calc_run_size.pl create mode 100644 arch/x86/tools/calc_run_size.sh diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index d999398928bc81..ad754b4411f7e4 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -90,7 +90,7 @@ suffix-$(CONFIG_KERNEL_LZO) := lzo suffix-$(CONFIG_KERNEL_LZ4) := lz4 RUN_SIZE = $(shell $(OBJDUMP) -h vmlinux | \ - perl $(srctree)/arch/x86/tools/calc_run_size.pl) + $(CONFIG_SHELL) $(srctree)/arch/x86/tools/calc_run_size.sh) quiet_cmd_mkpiggy = MKPIGGY $@ cmd_mkpiggy = $(obj)/mkpiggy $< $(RUN_SIZE) > $@ || ( rm -f $@ ; false ) diff --git a/arch/x86/tools/calc_run_size.pl b/arch/x86/tools/calc_run_size.pl deleted file mode 100644 index 23210baade2d5f..00000000000000 --- a/arch/x86/tools/calc_run_size.pl +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/perl -# -# Calculate the amount of space needed to run the kernel, including room for -# the .bss and .brk sections. -# -# Usage: -# objdump -h a.out | perl calc_run_size.pl -use strict; - -my $mem_size = 0; -my $file_offset = 0; - -my $sections=" *[0-9]+ \.(?:bss|brk) +"; -while (<>) { - if (/^$sections([0-9a-f]+) +(?:[0-9a-f]+ +){2}([0-9a-f]+)/) { - my $size = hex($1); - my $offset = hex($2); - $mem_size += $size; - if ($file_offset == 0) { - $file_offset = $offset; - } elsif ($file_offset != $offset) { - # BFD linker shows the same file offset in ELF. - # Gold linker shows them as consecutive. - next if ($file_offset + $mem_size == $offset + $size); - - printf STDERR "file_offset: 0x%lx\n", $file_offset; - printf STDERR "mem_size: 0x%lx\n", $mem_size; - printf STDERR "offset: 0x%lx\n", $offset; - printf STDERR "size: 0x%lx\n", $size; - - die ".bss and .brk are non-contiguous\n"; - } - } -} - -if ($file_offset == 0) { - die "Never found .bss or .brk file offset\n"; -} -printf("%d\n", $mem_size + $file_offset); diff --git a/arch/x86/tools/calc_run_size.sh b/arch/x86/tools/calc_run_size.sh new file mode 100644 index 00000000000000..1a4c17bb391082 --- /dev/null +++ b/arch/x86/tools/calc_run_size.sh @@ -0,0 +1,42 @@ +#!/bin/sh +# +# Calculate the amount of space needed to run the kernel, including room for +# the .bss and .brk sections. +# +# Usage: +# objdump -h a.out | sh calc_run_size.sh + +NUM='\([0-9a-fA-F]*[ \t]*\)' +OUT=$(sed -n 's/^[ \t0-9]*.b[sr][sk][ \t]*'"$NUM$NUM$NUM$NUM"'.*/\1\4/p') +if [ -z "$OUT" ] ; then + echo "Never found .bss or .brk file offset" >&2 + exit 1 +fi + +OUT=$(echo ${OUT# }) +sizeA=$(printf "%d" 0x${OUT%% *}) +OUT=${OUT#* } +offsetA=$(printf "%d" 0x${OUT%% *}) +OUT=${OUT#* } +sizeB=$(printf "%d" 0x${OUT%% *}) +OUT=${OUT#* } +offsetB=$(printf "%d" 0x${OUT%% *}) + +run_size=$(( $offsetA + $sizeA + $sizeB )) + +# BFD linker shows the same file offset in ELF. +if [ "$offsetA" -ne "$offsetB" ] ; then + # Gold linker shows them as consecutive. + endB=$(( $offsetB + $sizeB )) + if [ "$endB" != "$run_size" ] ; then + printf "sizeA: 0x%x\n" $sizeA >&2 + printf "offsetA: 0x%x\n" $offsetA >&2 + printf "sizeB: 0x%x\n" $sizeB >&2 + printf "offsetB: 0x%x\n" $offsetB >&2 + echo ".bss and .brk are non-contiguous" >&2 + exit 1 + fi +fi + +printf "%d\n" $run_size +exit 0 From 0346dadbf041a2606bcb5bd27828b0d105897f4a Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Mon, 26 Jan 2015 12:58:38 -0800 Subject: [PATCH 11/14] memcg: remove extra newlines from memcg oom kill log Commit e61734c55c24 ("cgroup: remove cgroup->name") added two extra newlines to memcg oom kill log messages. This makes dmesg hard to read and parse. The issue affects 3.15+. Example: Task in /t <<< extra #1 killed as a result of limit of /t <<< extra #2 memory: usage 102400kB, limit 102400kB, failcnt 274712 Remove the extra newlines from memcg oom kill messages, so the messages look like: Task in /t killed as a result of limit of /t memory: usage 102400kB, limit 102400kB, failcnt 240649 Fixes: e61734c55c24 ("cgroup: remove cgroup->name") Signed-off-by: Greg Thelen Acked-by: Michal Hocko Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 851924fa5170e1..683b4782019b2c 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1477,9 +1477,9 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) pr_info("Task in "); pr_cont_cgroup_path(task_cgroup(p, memory_cgrp_id)); - pr_info(" killed as a result of limit of "); + pr_cont(" killed as a result of limit of "); pr_cont_cgroup_path(memcg->css.cgroup); - pr_info("\n"); + pr_cont("\n"); rcu_read_unlock(); From 17636faada9c2f6696feafd633c5857ccf7ed0cd Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 26 Jan 2015 12:58:41 -0800 Subject: [PATCH 12/14] mm/vmscan: fix highidx argument type for_each_zone_zonelist_nodemask wants an enum zone_type argument, but is passed gfp_t: mm/vmscan.c:2658:9: expected int enum zone_type [signed] highest_zoneidx mm/vmscan.c:2658:9: got restricted gfp_t [usertype] gfp_mask mm/vmscan.c:2658:9: warning: incorrect type in argument 2 (different base types) mm/vmscan.c:2658:9: expected int enum zone_type [signed] highest_zoneidx mm/vmscan.c:2658:9: got restricted gfp_t [usertype] gfp_mask convert argument to the correct type. Signed-off-by: Michael S. Tsirkin Cc: Johannes Weiner Cc: Vladimir Davydov Cc: Rik van Riel Cc: Michal Hocko Cc: Mel Gorman Cc: Vlastimil Babka Cc: Suleiman Souhlal Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index ab2505c3ef5460..dcd90c891d8e53 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2656,7 +2656,7 @@ static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist, * should make reasonable progress. */ for_each_zone_zonelist_nodemask(zone, z, zonelist, - gfp_mask, nodemask) { + gfp_zone(gfp_mask), nodemask) { if (zone_idx(zone) > ZONE_NORMAL) continue; From 07261edb971492c6b41b44d7b1b51f76807d30ad Mon Sep 17 00:00:00 2001 From: Pranith Kumar Date: Mon, 26 Jan 2015 12:58:43 -0800 Subject: [PATCH 13/14] printk: add dummy routine for when CONFIG_PRINTK=n There are missing dummy routines for log_buf_addr_get() and log_buf_len_get() for when CONFIG_PRINTK is not set causing build failures. This patch adds these dummy routines at the appropriate location. Signed-off-by: Pranith Kumar Cc: Michael Ellerman Reviewed-by: Petr Mladek Acked-by: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/printk.h | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/include/linux/printk.h b/include/linux/printk.h index c8f170324e643c..4d5bf5726578c5 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -10,9 +10,6 @@ extern const char linux_banner[]; extern const char linux_proc_banner[]; -extern char *log_buf_addr_get(void); -extern u32 log_buf_len_get(void); - static inline int printk_get_level(const char *buffer) { if (buffer[0] == KERN_SOH_ASCII && buffer[1]) { @@ -163,6 +160,8 @@ extern int kptr_restrict; extern void wake_up_klogd(void); +char *log_buf_addr_get(void); +u32 log_buf_len_get(void); void log_buf_kexec_setup(void); void __init setup_log_buf(int early); void dump_stack_set_arch_desc(const char *fmt, ...); @@ -198,6 +197,16 @@ static inline void wake_up_klogd(void) { } +static inline char *log_buf_addr_get(void) +{ + return NULL; +} + +static inline u32 log_buf_len_get(void) +{ + return 0; +} + static inline void log_buf_kexec_setup(void) { } From 45cd15e600ec8006305ce83f62c7208c2cb7a052 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Mon, 26 Jan 2015 12:58:46 -0800 Subject: [PATCH 14/14] drivers/rtc/rtc-s5m.c: terminate s5m_rtc_id array with empty element Array of platform_device_id elements should be terminated with empty element. Fixes: 5bccae6ec458 ("rtc: s5m-rtc: add real-time clock driver for s5m8767") Signed-off-by: Andrey Ryabinin Reviewed-by: Krzysztof Kozlowski Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rtc/rtc-s5m.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/rtc/rtc-s5m.c b/drivers/rtc/rtc-s5m.c index b5e7c4670205ba..89ac1d5083c660 100644 --- a/drivers/rtc/rtc-s5m.c +++ b/drivers/rtc/rtc-s5m.c @@ -832,6 +832,7 @@ static SIMPLE_DEV_PM_OPS(s5m_rtc_pm_ops, s5m_rtc_suspend, s5m_rtc_resume); static const struct platform_device_id s5m_rtc_id[] = { { "s5m-rtc", S5M8767X }, { "s2mps14-rtc", S2MPS14X }, + { }, }; static struct platform_driver s5m_rtc_driver = {