From 09c826447cb018a60c733ce5ee105f316430cf1b Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 23 Sep 2020 15:30:16 +0100 Subject: [PATCH 01/58] btrfs: fix filesystem corruption after a device replace commit 4c8f353272dd1262013873990c0fafd0e3c8f274 upstream. We use a device's allocation state tree to track ranges in a device used for allocated chunks, and we set ranges in this tree when allocating a new chunk. However after a device replace operation, we were not setting the allocated ranges in the new device's allocation state tree, so that tree is empty after a device replace. This means that a fitrim operation after a device replace will trim the device ranges that have allocated chunks and extents, as we trim every range for which there is not a range marked in the device's allocation state tree. It is also important during chunk allocation, since the device's allocation state is used to determine if a range is already allocated when allocating a new chunk. This is trivial to reproduce and the following script triggers the bug: $ cat reproducer.sh #!/bin/bash DEV1="/dev/sdg" DEV2="/dev/sdh" DEV3="/dev/sdi" wipefs -a $DEV1 $DEV2 $DEV3 &> /dev/null # Create a raid1 test fs on 2 devices. mkfs.btrfs -f -m raid1 -d raid1 $DEV1 $DEV2 > /dev/null mount $DEV1 /mnt/btrfs xfs_io -f -c "pwrite -S 0xab 0 10M" /mnt/btrfs/foo echo "Starting to replace $DEV1 with $DEV3" btrfs replace start -B $DEV1 $DEV3 /mnt/btrfs echo echo "Running fstrim" fstrim /mnt/btrfs echo echo "Unmounting filesystem" umount /mnt/btrfs echo "Mounting filesystem in degraded mode using $DEV3 only" wipefs -a $DEV1 $DEV2 &> /dev/null mount -o degraded $DEV3 /mnt/btrfs if [ $? -ne 0 ]; then dmesg | tail echo echo "Failed to mount in degraded mode" exit 1 fi echo echo "File foo data (expected all bytes = 0xab):" od -A d -t x1 /mnt/btrfs/foo umount /mnt/btrfs When running the reproducer: $ ./replace-test.sh wrote 10485760/10485760 bytes at offset 0 10 MiB, 2560 ops; 0.0901 sec (110.877 MiB/sec and 28384.5216 ops/sec) Starting to replace /dev/sdg with /dev/sdi Running fstrim Unmounting filesystem Mounting filesystem in degraded mode using /dev/sdi only mount: /mnt/btrfs: wrong fs type, bad option, bad superblock on /dev/sdi, missing codepage or helper program, or other error. [19581.748641] BTRFS info (device sdg): dev_replace from /dev/sdg (devid 1) to /dev/sdi started [19581.803842] BTRFS info (device sdg): dev_replace from /dev/sdg (devid 1) to /dev/sdi finished [19582.208293] BTRFS info (device sdi): allowing degraded mounts [19582.208298] BTRFS info (device sdi): disk space caching is enabled [19582.208301] BTRFS info (device sdi): has skinny extents [19582.212853] BTRFS warning (device sdi): devid 2 uuid 1f731f47-e1bb-4f00-bfbb-9e5a0cb4ba9f is missing [19582.213904] btree_readpage_end_io_hook: 25839 callbacks suppressed [19582.213907] BTRFS error (device sdi): bad tree block start, want 30490624 have 0 [19582.214780] BTRFS warning (device sdi): failed to read root (objectid=7): -5 [19582.231576] BTRFS error (device sdi): open_ctree failed Failed to mount in degraded mode So fix by setting all allocated ranges in the replace target device when the replace operation is finishing, when we are holding the chunk mutex and we can not race with new chunk allocations. A test case for fstests follows soon. Fixes: 1c11b63eff2a67 ("btrfs: replace pending/pinned chunks lists with io tree") CC: stable@vger.kernel.org # 5.2+ Reviewed-by: Nikolay Borisov Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/dev-replace.c | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 48890826b5e666..196bd241e701a4 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -562,6 +562,37 @@ static void btrfs_rm_dev_replace_unblocked(struct btrfs_fs_info *fs_info) wake_up(&fs_info->dev_replace.replace_wait); } +/* + * When finishing the device replace, before swapping the source device with the + * target device we must update the chunk allocation state in the target device, + * as it is empty because replace works by directly copying the chunks and not + * through the normal chunk allocation path. + */ +static int btrfs_set_target_alloc_state(struct btrfs_device *srcdev, + struct btrfs_device *tgtdev) +{ + struct extent_state *cached_state = NULL; + u64 start = 0; + u64 found_start; + u64 found_end; + int ret = 0; + + lockdep_assert_held(&srcdev->fs_info->chunk_mutex); + + while (!find_first_extent_bit(&srcdev->alloc_state, start, + &found_start, &found_end, + CHUNK_ALLOCATED, &cached_state)) { + ret = set_extent_bits(&tgtdev->alloc_state, found_start, + found_end, CHUNK_ALLOCATED); + if (ret) + break; + start = found_end + 1; + } + + free_extent_state(cached_state); + return ret; +} + static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, int scrub_ret) { @@ -636,8 +667,14 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, dev_replace->time_stopped = ktime_get_real_seconds(); dev_replace->item_needs_writeback = 1; - /* replace old device with new one in mapping tree */ + /* + * Update allocation state in the new device and replace the old device + * with the new one in the mapping tree. + */ if (!scrub_ret) { + scrub_ret = btrfs_set_target_alloc_state(src_device, tgt_device); + if (scrub_ret) + goto error; btrfs_dev_replace_update_device_in_mapping_tree(fs_info, src_device, tgt_device); @@ -648,6 +685,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, btrfs_dev_name(src_device), src_device->devid, rcu_str_deref(tgt_device->name), scrub_ret); +error: up_write(&dev_replace->rwsem); mutex_unlock(&fs_info->chunk_mutex); mutex_unlock(&fs_info->fs_devices->device_list_mutex); From 23389cf97aa14716df9fc44c8d0606ffd76edaf0 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 27 Sep 2020 12:48:21 +0200 Subject: [PATCH 02/58] mmc: sdhci: Workaround broken command queuing on Intel GLK based IRBIS models commit afd7f30886b0b445a4240a99020458a9772f2b89 upstream. Commit bedf9fc01ff1 ("mmc: sdhci: Workaround broken command queuing on Intel GLK"), disabled command-queuing on Intel GLK based LENOVO models because of it being broken due to what is believed to be a bug in the BIOS. It seems that the BIOS of some IRBIS models, including the IRBIS NB111 model has the same issue, so disable command queuing there too. Fixes: bedf9fc01ff1 ("mmc: sdhci: Workaround broken command queuing on Intel GLK") BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=209397 Reported-and-tested-by: RussianNeuroMancer Signed-off-by: Hans de Goede Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/20200927104821.5676-1-hdegoede@redhat.com Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-pci-core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c index 425aa898e797aa..91d0cb08238cf3 100644 --- a/drivers/mmc/host/sdhci-pci-core.c +++ b/drivers/mmc/host/sdhci-pci-core.c @@ -798,7 +798,8 @@ static int byt_emmc_probe_slot(struct sdhci_pci_slot *slot) static bool glk_broken_cqhci(struct sdhci_pci_slot *slot) { return slot->chip->pdev->device == PCI_DEVICE_ID_INTEL_GLK_EMMC && - dmi_match(DMI_BIOS_VENDOR, "LENOVO"); + (dmi_match(DMI_BIOS_VENDOR, "LENOVO") || + dmi_match(DMI_SYS_VENDOR, "IRBIS")); } static int glk_emmc_probe_slot(struct sdhci_pci_slot *slot) From 57bd08a301f7b4c1d7c3bdf3025c96f9721c9d74 Mon Sep 17 00:00:00 2001 From: Bryan O'Donoghue Date: Sun, 20 Sep 2020 18:01:58 +0100 Subject: [PATCH 03/58] USB: gadget: f_ncm: Fix NDP16 datagram validation commit 2b405533c2560d7878199c57d95a39151351df72 upstream. commit 2b74b0a04d3e ("USB: gadget: f_ncm: add bounds checks to ncm_unwrap_ntb()") adds important bounds checking however it unfortunately also introduces a bug with respect to section 3.3.1 of the NCM specification. wDatagramIndex[1] : "Byte index, in little endian, of the second datagram described by this NDP16. If zero, then this marks the end of the sequence of datagrams in this NDP16." wDatagramLength[1]: "Byte length, in little endian, of the second datagram described by this NDP16. If zero, then this marks the end of the sequence of datagrams in this NDP16." wDatagramIndex[1] and wDatagramLength[1] respectively then may be zero but that does not mean we should throw away the data referenced by wDatagramIndex[0] and wDatagramLength[0] as is currently the case. Breaking the loop on (index2 == 0 || dg_len2 == 0) should come at the end as was previously the case and checks for index2 and dg_len2 should be removed since zero is valid. I'm not sure how much testing the above patch received but for me right now after enumeration ping doesn't work. Reverting the commit restores ping, scp, etc. The extra validation associated with wDatagramIndex[0] and wDatagramLength[0] appears to be valid so, this change removes the incorrect restriction on wDatagramIndex[1] and wDatagramLength[1] restoring data processing between host and device. Fixes: 2b74b0a04d3e ("USB: gadget: f_ncm: add bounds checks to ncm_unwrap_ntb()") Cc: Ilja Van Sprundel Cc: Brooke Basile Cc: stable Signed-off-by: Bryan O'Donoghue Link: https://lore.kernel.org/r/20200920170158.1217068-1-bryan.odonoghue@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_ncm.c | 30 ++--------------------------- 1 file changed, 2 insertions(+), 28 deletions(-) diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c index b4206b0dede54e..1f638759a95336 100644 --- a/drivers/usb/gadget/function/f_ncm.c +++ b/drivers/usb/gadget/function/f_ncm.c @@ -1189,7 +1189,6 @@ static int ncm_unwrap_ntb(struct gether *port, const struct ndp_parser_opts *opts = ncm->parser_opts; unsigned crc_len = ncm->is_crc ? sizeof(uint32_t) : 0; int dgram_counter; - bool ndp_after_header; /* dwSignature */ if (get_unaligned_le32(tmp) != opts->nth_sign) { @@ -1216,7 +1215,6 @@ static int ncm_unwrap_ntb(struct gether *port, } ndp_index = get_ncm(&tmp, opts->ndp_index); - ndp_after_header = false; /* Run through all the NDP's in the NTB */ do { @@ -1232,8 +1230,6 @@ static int ncm_unwrap_ntb(struct gether *port, ndp_index); goto err; } - if (ndp_index == opts->nth_size) - ndp_after_header = true; /* * walk through NDP @@ -1312,37 +1308,13 @@ static int ncm_unwrap_ntb(struct gether *port, index2 = get_ncm(&tmp, opts->dgram_item_len); dg_len2 = get_ncm(&tmp, opts->dgram_item_len); - if (index2 == 0 || dg_len2 == 0) - break; - /* wDatagramIndex[1] */ - if (ndp_after_header) { - if (index2 < opts->nth_size + opts->ndp_size) { - INFO(port->func.config->cdev, - "Bad index: %#X\n", index2); - goto err; - } - } else { - if (index2 < opts->nth_size + opts->dpe_size) { - INFO(port->func.config->cdev, - "Bad index: %#X\n", index2); - goto err; - } - } if (index2 > block_len - opts->dpe_size) { INFO(port->func.config->cdev, "Bad index: %#X\n", index2); goto err; } - /* wDatagramLength[1] */ - if ((dg_len2 < 14 + crc_len) || - (dg_len2 > frame_max)) { - INFO(port->func.config->cdev, - "Bad dgram length: %#X\n", dg_len); - goto err; - } - /* * Copy the data into a new skb. * This ensures the truesize is correct @@ -1359,6 +1331,8 @@ static int ncm_unwrap_ntb(struct gether *port, ndp_len -= 2 * (opts->dgram_item_len * 2); dgram_counter++; + if (index2 == 0 || dg_len2 == 0) + break; } while (ndp_len > 2 * (opts->dgram_item_len * 2)); } while (ndp_index); From b079337f697aa6fcb3be991a68bb0f849ea1cd57 Mon Sep 17 00:00:00 2001 From: Ahmad Fatoum Date: Mon, 7 Sep 2020 17:31:35 +0200 Subject: [PATCH 04/58] gpio: siox: explicitly support only threaded irqs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 45ccf6556720293323c20cda717756014ff63007 upstream. The gpio-siox driver uses handle_nested_irq() to implement its interrupt support. This is only capable of handling threaded irq actions. For a hardirq action it triggers a NULL pointer oops. (It calls action->thread_fn which is NULL then.) Prevent registration of a hardirq action by setting gpio_irq_chip::threaded to true. Cc: u.kleine-koenig@pengutronix.de Fixes: be8c8facc707 ("gpio: new driver to work with a 8x12 siox") Cc: stable@vger.kernel.org Signed-off-by: Ahmad Fatoum Acked-by: Uwe Kleine-König Signed-off-by: Bartosz Golaszewski Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-siox.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpio-siox.c b/drivers/gpio/gpio-siox.c index 006a7e6a75f216..7e70d2d06c3fec 100644 --- a/drivers/gpio/gpio-siox.c +++ b/drivers/gpio/gpio-siox.c @@ -245,6 +245,7 @@ static int gpio_siox_probe(struct siox_device *sdevice) girq->chip = &ddata->ichip; girq->default_type = IRQ_TYPE_NONE; girq->handler = handle_level_irq; + girq->threaded = true; ret = devm_gpiochip_add_data(dev, &ddata->gchip, NULL); if (ret) From 035f59ad4ba875c86cf9613905d828ac0da053ee Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 8 Sep 2020 15:07:49 +0200 Subject: [PATCH 05/58] gpio: mockup: fix resource leak in error path commit 1b02d9e770cd7087f34c743f85ccf5ea8372b047 upstream. If the module init function fails after creating the debugs directory, it's never removed. Add proper cleanup calls to avoid this resource leak. Fixes: 9202ba2397d1 ("gpio: mockup: implement event injecting over debugfs") Cc: Signed-off-by: Bartosz Golaszewski Reviewed-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-mockup.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpio/gpio-mockup.c b/drivers/gpio/gpio-mockup.c index 213aedc97dc2e7..9c1c4d81aa7b6b 100644 --- a/drivers/gpio/gpio-mockup.c +++ b/drivers/gpio/gpio-mockup.c @@ -497,6 +497,7 @@ static int __init gpio_mockup_init(void) err = platform_driver_register(&gpio_mockup_driver); if (err) { gpio_mockup_err("error registering platform driver\n"); + debugfs_remove_recursive(gpio_mockup_dbg_dir); return err; } @@ -527,6 +528,7 @@ static int __init gpio_mockup_init(void) gpio_mockup_err("error registering device"); platform_driver_unregister(&gpio_mockup_driver); gpio_mockup_unregister_pdevs(); + debugfs_remove_recursive(gpio_mockup_dbg_dir); return PTR_ERR(pdev); } From 79c8ebdce55c98996ad1697352ccc1318b39f7ca Mon Sep 17 00:00:00 2001 From: dillon min Date: Thu, 3 Sep 2020 15:30:21 +0800 Subject: [PATCH 06/58] gpio: tc35894: fix up tc35894 interrupt configuration commit 214b0e1ad01abf4c1f6d8d28fa096bf167e47cef upstream. The offset of regmap is incorrect, j * 8 is move to the wrong register. for example: asume i = 0, j = 1. we want to set KPY5 as interrupt falling edge mode, regmap[0][1] should be TC3589x_GPIOIBE1 0xcd but, regmap[i] + j * 8 = TC3589x_GPIOIBE0 + 8 ,point to 0xd4, this is TC3589x_GPIOIE2 not TC3589x_GPIOIBE1. Fixes: d88b25be3584 ("gpio: Add TC35892 GPIO driver") Cc: Cc: stable@vger.kernel.org Signed-off-by: dillon min Signed-off-by: Bartosz Golaszewski Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-tc3589x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-tc3589x.c b/drivers/gpio/gpio-tc3589x.c index 75b1135b383a7d..daf29044d0f19c 100644 --- a/drivers/gpio/gpio-tc3589x.c +++ b/drivers/gpio/gpio-tc3589x.c @@ -209,7 +209,7 @@ static void tc3589x_gpio_irq_sync_unlock(struct irq_data *d) continue; tc3589x_gpio->oldregs[i][j] = new; - tc3589x_reg_write(tc3589x, regmap[i] + j * 8, new); + tc3589x_reg_write(tc3589x, regmap[i] + j, new); } } From 14c79ef213c25c652b53702574a1497a8bef7dda Mon Sep 17 00:00:00 2001 From: Dinh Nguyen Date: Mon, 31 Aug 2020 15:26:57 -0500 Subject: [PATCH 07/58] clk: socfpga: stratix10: fix the divider for the emac_ptp_free_clk commit b02cf0c4736c65c6667f396efaae6b5521e82abf upstream. The fixed divider the emac_ptp_free_clk should be 2, not 4. Fixes: 07afb8db7340 ("clk: socfpga: stratix10: add clock driver for Stratix10 platform") Cc: stable@vger.kernel.org Signed-off-by: Dinh Nguyen Link: https://lore.kernel.org/r/20200831202657.8224-1-dinguyen@kernel.org Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/socfpga/clk-s10.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/socfpga/clk-s10.c b/drivers/clk/socfpga/clk-s10.c index 993f3a73c71e72..55d3b505b08c9f 100644 --- a/drivers/clk/socfpga/clk-s10.c +++ b/drivers/clk/socfpga/clk-s10.c @@ -107,7 +107,7 @@ static const struct stratix10_perip_cnt_clock s10_main_perip_cnt_clks[] = { { STRATIX10_EMAC_B_FREE_CLK, "emacb_free_clk", NULL, emacb_free_mux, ARRAY_SIZE(emacb_free_mux), 0, 0, 2, 0xB0, 1}, { STRATIX10_EMAC_PTP_FREE_CLK, "emac_ptp_free_clk", NULL, emac_ptp_free_mux, - ARRAY_SIZE(emac_ptp_free_mux), 0, 0, 4, 0xB0, 2}, + ARRAY_SIZE(emac_ptp_free_mux), 0, 0, 2, 0xB0, 2}, { STRATIX10_GPIO_DB_FREE_CLK, "gpio_db_free_clk", NULL, gpio_db_free_mux, ARRAY_SIZE(gpio_db_free_mux), 0, 0, 0, 0xB0, 3}, { STRATIX10_SDMMC_FREE_CLK, "sdmmc_free_clk", NULL, sdmmc_free_mux, From 215459ff36664a8f15ae3ea0efe4d3e76f354657 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Thu, 14 Nov 2019 10:57:40 +0100 Subject: [PATCH 08/58] vsock/virtio: add transport parameter to the virtio_transport_reset_no_sock() [ Upstream commit 4c7246dc45e2706770d5233f7ce1597a07e069ba ] We are going to add 'struct vsock_sock *' parameter to virtio_transport_get_ops(). In some cases, like in the virtio_transport_reset_no_sock(), we don't have any socket assigned to the packet received, so we can't use the virtio_transport_get_ops(). In order to allow virtio_transport_reset_no_sock() to use the '.send_pkt' callback from the 'vhost_transport' or 'virtio_transport', we add the 'struct virtio_transport *' to it and to its caller: virtio_transport_recv_pkt(). We moved the 'vhost_transport' and 'virtio_transport' definition, to pass their address to the virtio_transport_recv_pkt(). Reviewed-by: Stefan Hajnoczi Signed-off-by: Stefano Garzarella Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/vhost/vsock.c | 94 +++++++------- include/linux/virtio_vsock.h | 3 +- net/vmw_vsock/virtio_transport.c | 160 ++++++++++++------------ net/vmw_vsock/virtio_transport_common.c | 12 +- 4 files changed, 135 insertions(+), 134 deletions(-) diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index ca68a27b98edd0..f21f5bfbb78dcc 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -384,6 +384,52 @@ static bool vhost_vsock_more_replies(struct vhost_vsock *vsock) return val < vq->num; } +static struct virtio_transport vhost_transport = { + .transport = { + .get_local_cid = vhost_transport_get_local_cid, + + .init = virtio_transport_do_socket_init, + .destruct = virtio_transport_destruct, + .release = virtio_transport_release, + .connect = virtio_transport_connect, + .shutdown = virtio_transport_shutdown, + .cancel_pkt = vhost_transport_cancel_pkt, + + .dgram_enqueue = virtio_transport_dgram_enqueue, + .dgram_dequeue = virtio_transport_dgram_dequeue, + .dgram_bind = virtio_transport_dgram_bind, + .dgram_allow = virtio_transport_dgram_allow, + + .stream_enqueue = virtio_transport_stream_enqueue, + .stream_dequeue = virtio_transport_stream_dequeue, + .stream_has_data = virtio_transport_stream_has_data, + .stream_has_space = virtio_transport_stream_has_space, + .stream_rcvhiwat = virtio_transport_stream_rcvhiwat, + .stream_is_active = virtio_transport_stream_is_active, + .stream_allow = virtio_transport_stream_allow, + + .notify_poll_in = virtio_transport_notify_poll_in, + .notify_poll_out = virtio_transport_notify_poll_out, + .notify_recv_init = virtio_transport_notify_recv_init, + .notify_recv_pre_block = virtio_transport_notify_recv_pre_block, + .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue, + .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue, + .notify_send_init = virtio_transport_notify_send_init, + .notify_send_pre_block = virtio_transport_notify_send_pre_block, + .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue, + .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue, + + .set_buffer_size = virtio_transport_set_buffer_size, + .set_min_buffer_size = virtio_transport_set_min_buffer_size, + .set_max_buffer_size = virtio_transport_set_max_buffer_size, + .get_buffer_size = virtio_transport_get_buffer_size, + .get_min_buffer_size = virtio_transport_get_min_buffer_size, + .get_max_buffer_size = virtio_transport_get_max_buffer_size, + }, + + .send_pkt = vhost_transport_send_pkt, +}; + static void vhost_vsock_handle_tx_kick(struct vhost_work *work) { struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, @@ -440,7 +486,7 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work) if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid && le64_to_cpu(pkt->hdr.dst_cid) == vhost_transport_get_local_cid()) - virtio_transport_recv_pkt(pkt); + virtio_transport_recv_pkt(&vhost_transport, pkt); else virtio_transport_free_pkt(pkt); @@ -793,52 +839,6 @@ static struct miscdevice vhost_vsock_misc = { .fops = &vhost_vsock_fops, }; -static struct virtio_transport vhost_transport = { - .transport = { - .get_local_cid = vhost_transport_get_local_cid, - - .init = virtio_transport_do_socket_init, - .destruct = virtio_transport_destruct, - .release = virtio_transport_release, - .connect = virtio_transport_connect, - .shutdown = virtio_transport_shutdown, - .cancel_pkt = vhost_transport_cancel_pkt, - - .dgram_enqueue = virtio_transport_dgram_enqueue, - .dgram_dequeue = virtio_transport_dgram_dequeue, - .dgram_bind = virtio_transport_dgram_bind, - .dgram_allow = virtio_transport_dgram_allow, - - .stream_enqueue = virtio_transport_stream_enqueue, - .stream_dequeue = virtio_transport_stream_dequeue, - .stream_has_data = virtio_transport_stream_has_data, - .stream_has_space = virtio_transport_stream_has_space, - .stream_rcvhiwat = virtio_transport_stream_rcvhiwat, - .stream_is_active = virtio_transport_stream_is_active, - .stream_allow = virtio_transport_stream_allow, - - .notify_poll_in = virtio_transport_notify_poll_in, - .notify_poll_out = virtio_transport_notify_poll_out, - .notify_recv_init = virtio_transport_notify_recv_init, - .notify_recv_pre_block = virtio_transport_notify_recv_pre_block, - .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue, - .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue, - .notify_send_init = virtio_transport_notify_send_init, - .notify_send_pre_block = virtio_transport_notify_send_pre_block, - .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue, - .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue, - - .set_buffer_size = virtio_transport_set_buffer_size, - .set_min_buffer_size = virtio_transport_set_min_buffer_size, - .set_max_buffer_size = virtio_transport_set_max_buffer_size, - .get_buffer_size = virtio_transport_get_buffer_size, - .get_min_buffer_size = virtio_transport_get_min_buffer_size, - .get_max_buffer_size = virtio_transport_get_max_buffer_size, - }, - - .send_pkt = vhost_transport_send_pkt, -}; - static int __init vhost_vsock_init(void) { int ret; diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index 07875ccc7bb50e..b139f76060a65e 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -150,7 +150,8 @@ virtio_transport_dgram_enqueue(struct vsock_sock *vsk, void virtio_transport_destruct(struct vsock_sock *vsk); -void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt); +void virtio_transport_recv_pkt(struct virtio_transport *t, + struct virtio_vsock_pkt *pkt); void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt); void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct virtio_vsock_pkt *pkt); u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 wanted); diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 861ec9a671f9df..5905f0cddc895b 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -86,33 +86,6 @@ static u32 virtio_transport_get_local_cid(void) return ret; } -static void virtio_transport_loopback_work(struct work_struct *work) -{ - struct virtio_vsock *vsock = - container_of(work, struct virtio_vsock, loopback_work); - LIST_HEAD(pkts); - - spin_lock_bh(&vsock->loopback_list_lock); - list_splice_init(&vsock->loopback_list, &pkts); - spin_unlock_bh(&vsock->loopback_list_lock); - - mutex_lock(&vsock->rx_lock); - - if (!vsock->rx_run) - goto out; - - while (!list_empty(&pkts)) { - struct virtio_vsock_pkt *pkt; - - pkt = list_first_entry(&pkts, struct virtio_vsock_pkt, list); - list_del_init(&pkt->list); - - virtio_transport_recv_pkt(pkt); - } -out: - mutex_unlock(&vsock->rx_lock); -} - static int virtio_transport_send_pkt_loopback(struct virtio_vsock *vsock, struct virtio_vsock_pkt *pkt) { @@ -370,59 +343,6 @@ static bool virtio_transport_more_replies(struct virtio_vsock *vsock) return val < virtqueue_get_vring_size(vq); } -static void virtio_transport_rx_work(struct work_struct *work) -{ - struct virtio_vsock *vsock = - container_of(work, struct virtio_vsock, rx_work); - struct virtqueue *vq; - - vq = vsock->vqs[VSOCK_VQ_RX]; - - mutex_lock(&vsock->rx_lock); - - if (!vsock->rx_run) - goto out; - - do { - virtqueue_disable_cb(vq); - for (;;) { - struct virtio_vsock_pkt *pkt; - unsigned int len; - - if (!virtio_transport_more_replies(vsock)) { - /* Stop rx until the device processes already - * pending replies. Leave rx virtqueue - * callbacks disabled. - */ - goto out; - } - - pkt = virtqueue_get_buf(vq, &len); - if (!pkt) { - break; - } - - vsock->rx_buf_nr--; - - /* Drop short/long packets */ - if (unlikely(len < sizeof(pkt->hdr) || - len > sizeof(pkt->hdr) + pkt->len)) { - virtio_transport_free_pkt(pkt); - continue; - } - - pkt->len = len - sizeof(pkt->hdr); - virtio_transport_deliver_tap_pkt(pkt); - virtio_transport_recv_pkt(pkt); - } - } while (!virtqueue_enable_cb(vq)); - -out: - if (vsock->rx_buf_nr < vsock->rx_buf_max_nr / 2) - virtio_vsock_rx_fill(vsock); - mutex_unlock(&vsock->rx_lock); -} - /* event_lock must be held */ static int virtio_vsock_event_fill_one(struct virtio_vsock *vsock, struct virtio_vsock_event *event) @@ -586,6 +506,86 @@ static struct virtio_transport virtio_transport = { .send_pkt = virtio_transport_send_pkt, }; +static void virtio_transport_loopback_work(struct work_struct *work) +{ + struct virtio_vsock *vsock = + container_of(work, struct virtio_vsock, loopback_work); + LIST_HEAD(pkts); + + spin_lock_bh(&vsock->loopback_list_lock); + list_splice_init(&vsock->loopback_list, &pkts); + spin_unlock_bh(&vsock->loopback_list_lock); + + mutex_lock(&vsock->rx_lock); + + if (!vsock->rx_run) + goto out; + + while (!list_empty(&pkts)) { + struct virtio_vsock_pkt *pkt; + + pkt = list_first_entry(&pkts, struct virtio_vsock_pkt, list); + list_del_init(&pkt->list); + + virtio_transport_recv_pkt(&virtio_transport, pkt); + } +out: + mutex_unlock(&vsock->rx_lock); +} + +static void virtio_transport_rx_work(struct work_struct *work) +{ + struct virtio_vsock *vsock = + container_of(work, struct virtio_vsock, rx_work); + struct virtqueue *vq; + + vq = vsock->vqs[VSOCK_VQ_RX]; + + mutex_lock(&vsock->rx_lock); + + if (!vsock->rx_run) + goto out; + + do { + virtqueue_disable_cb(vq); + for (;;) { + struct virtio_vsock_pkt *pkt; + unsigned int len; + + if (!virtio_transport_more_replies(vsock)) { + /* Stop rx until the device processes already + * pending replies. Leave rx virtqueue + * callbacks disabled. + */ + goto out; + } + + pkt = virtqueue_get_buf(vq, &len); + if (!pkt) { + break; + } + + vsock->rx_buf_nr--; + + /* Drop short/long packets */ + if (unlikely(len < sizeof(pkt->hdr) || + len > sizeof(pkt->hdr) + pkt->len)) { + virtio_transport_free_pkt(pkt); + continue; + } + + pkt->len = len - sizeof(pkt->hdr); + virtio_transport_deliver_tap_pkt(pkt); + virtio_transport_recv_pkt(&virtio_transport, pkt); + } + } while (!virtqueue_enable_cb(vq)); + +out: + if (vsock->rx_buf_nr < vsock->rx_buf_max_nr / 2) + virtio_vsock_rx_fill(vsock); + mutex_unlock(&vsock->rx_lock); +} + static int virtio_vsock_probe(struct virtio_device *vdev) { vq_callback_t *callbacks[] = { diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index fb2060dffb0afb..f0b8ad2656f5ea 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -696,9 +696,9 @@ static int virtio_transport_reset(struct vsock_sock *vsk, /* Normally packets are associated with a socket. There may be no socket if an * attempt was made to connect to a socket that does not exist. */ -static int virtio_transport_reset_no_sock(struct virtio_vsock_pkt *pkt) +static int virtio_transport_reset_no_sock(const struct virtio_transport *t, + struct virtio_vsock_pkt *pkt) { - const struct virtio_transport *t; struct virtio_vsock_pkt *reply; struct virtio_vsock_pkt_info info = { .op = VIRTIO_VSOCK_OP_RST, @@ -718,7 +718,6 @@ static int virtio_transport_reset_no_sock(struct virtio_vsock_pkt *pkt) if (!reply) return -ENOMEM; - t = virtio_transport_get_ops(); if (!t) { virtio_transport_free_pkt(reply); return -ENOTCONN; @@ -1060,7 +1059,8 @@ static bool virtio_transport_space_update(struct sock *sk, /* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex * lock. */ -void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt) +void virtio_transport_recv_pkt(struct virtio_transport *t, + struct virtio_vsock_pkt *pkt) { struct sockaddr_vm src, dst; struct vsock_sock *vsk; @@ -1082,7 +1082,7 @@ void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt) le32_to_cpu(pkt->hdr.fwd_cnt)); if (le16_to_cpu(pkt->hdr.type) != VIRTIO_VSOCK_TYPE_STREAM) { - (void)virtio_transport_reset_no_sock(pkt); + (void)virtio_transport_reset_no_sock(t, pkt); goto free_pkt; } @@ -1093,7 +1093,7 @@ void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt) if (!sk) { sk = vsock_find_bound_socket(&dst); if (!sk) { - (void)virtio_transport_reset_no_sock(pkt); + (void)virtio_transport_reset_no_sock(t, pkt); goto free_pkt; } } From aed60a1746baec5b747c2efedfabb6d5b7b758d3 Mon Sep 17 00:00:00 2001 From: Sebastien Boeuf Date: Fri, 14 Feb 2020 12:48:01 +0100 Subject: [PATCH 09/58] net: virtio_vsock: Enhance connection semantics [ Upstream commit df12eb6d6cd920ab2f0e0a43cd6e1c23a05cea91 ] Whenever the vsock backend on the host sends a packet through the RX queue, it expects an answer on the TX queue. Unfortunately, there is one case where the host side will hang waiting for the answer and might effectively never recover if no timeout mechanism was implemented. This issue happens when the guest side starts binding to the socket, which insert a new bound socket into the list of already bound sockets. At this time, we expect the guest to also start listening, which will trigger the sk_state to move from TCP_CLOSE to TCP_LISTEN. The problem occurs if the host side queued a RX packet and triggered an interrupt right between the end of the binding process and the beginning of the listening process. In this specific case, the function processing the packet virtio_transport_recv_pkt() will find a bound socket, which means it will hit the switch statement checking for the sk_state, but the state won't be changed into TCP_LISTEN yet, which leads the code to pick the default statement. This default statement will only free the buffer, while it should also respond to the host side, by sending a packet on its TX queue. In order to simply fix this unfortunate chain of events, it is important that in case the default statement is entered, and because at this stage we know the host side is waiting for an answer, we must send back a packet containing the operation VIRTIO_VSOCK_OP_RST. One could say that a proper timeout mechanism on the host side will be enough to avoid the backend to hang. But the point of this patch is to ensure the normal use case will be provided with proper responsiveness when it comes to establishing the connection. Signed-off-by: Sebastien Boeuf Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/vmw_vsock/virtio_transport_common.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index f0b8ad2656f5ea..efbb521bff135b 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -1127,6 +1127,7 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, virtio_transport_free_pkt(pkt); break; default: + (void)virtio_transport_reset_no_sock(t, pkt); virtio_transport_free_pkt(pkt); break; } From aee38af574a136425d89cf4760ece0e0c2fe0fb3 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Thu, 1 Oct 2020 08:34:48 -0500 Subject: [PATCH 10/58] xfs: trim IO to found COW extent limit A bug existed in the XFS reflink code between v5.1 and v5.5 in which the mapping for a COW IO was not trimmed to the mapping of the COW extent that was found. This resulted in a too-short copy, and corruption of other files which shared the original extent. (This happened only when extent size hints were set, which bypasses delalloc and led to this code path.) This was (inadvertently) fixed upstream with 36adcbace24e "xfs: fill out the srcmap in iomap_begin" and related patches which moved lots of this functionality to the iomap subsystem. Hence, this is a -stable only patch, targeted to fix this corruption vector without other major code changes. Fixes: 78f0cc9d55cb ("xfs: don't use delalloc extents for COW on files with extsize hints") Cc: # 5.4.x Signed-off-by: Eric Sandeen Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Sasha Levin --- fs/xfs/xfs_iomap.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index f780e223b11852..239c9548b1568e 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1002,9 +1002,15 @@ xfs_file_iomap_begin( * I/O, which must be block aligned, we need to report the * newly allocated address. If the data fork has a hole, copy * the COW fork mapping to avoid allocating to the data fork. + * + * Otherwise, ensure that the imap range does not extend past + * the range allocated/found in cmap. */ if (directio || imap.br_startblock == HOLESTARTBLOCK) imap = cmap; + else + xfs_trim_extent(&imap, cmap.br_startoff, + cmap.br_blockcount); end_fsb = imap.br_startoff + imap.br_blockcount; length = XFS_FSB_TO_B(mp, end_fsb) - offset; From ac3bf99fc26a176908089281a43518c06a3474ac Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Mon, 28 Sep 2020 16:21:17 -0700 Subject: [PATCH 11/58] Input: i8042 - add nopnp quirk for Acer Aspire 5 A515 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5fc27b098dafb8e30794a9db0705074c7d766179 upstream. Touchpad on this laptop is not detected properly during boot, as PNP enumerates (wrongly) AUX port as disabled on this machine. Fix that by adding this board (with admittedly quite funny DMI identifiers) to nopnp quirk list. Reported-by: Andrés Barrantes Silman Signed-off-by: Jiri Kosina Link: https://lore.kernel.org/r/nycvar.YFH.7.76.2009252337340.3336@cbobk.fhfr.pm Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/serio/i8042-x86ia64io.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index 42771b9b10a001..98f0c7729b754e 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -721,6 +721,13 @@ static const struct dmi_system_id __initconst i8042_dmi_nopnp_table[] = { DMI_MATCH(DMI_BOARD_VENDOR, "MICRO-STAR INTERNATIONAL CO., LTD"), }, }, + { + /* Acer Aspire 5 A515 */ + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "Grumpy_PK"), + DMI_MATCH(DMI_BOARD_VENDOR, "PK"), + }, + }, { } }; From 5ac7065e08668f60796ec2e85d77e8e572888a4c Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 10 Sep 2020 16:59:51 +0300 Subject: [PATCH 12/58] iio: adc: qcom-spmi-adc5: fix driver name commit fdb29f4de1374483291232ae7515e5e7bb464762 upstream. Remove superfluous '.c' from qcom-spmi-adc5 device driver name. Fixes: e13d757279bb ("iio: adc: Add QCOM SPMI PMIC5 ADC driver") Signed-off-by: Dmitry Baryshkov Acked-by: Manivannan Sadhasivam Cc: Link: https://lore.kernel.org/r/20200910140000.324091-2-dmitry.baryshkov@linaro.org Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/qcom-spmi-adc5.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/qcom-spmi-adc5.c b/drivers/iio/adc/qcom-spmi-adc5.c index 21fdcde77883f8..56e7696aa3c0fc 100644 --- a/drivers/iio/adc/qcom-spmi-adc5.c +++ b/drivers/iio/adc/qcom-spmi-adc5.c @@ -786,7 +786,7 @@ static int adc5_probe(struct platform_device *pdev) static struct platform_driver adc5_driver = { .driver = { - .name = "qcom-spmi-adc5.c", + .name = "qcom-spmi-adc5", .of_match_table = adc5_match_table, }, .probe = adc5_probe, From c524a17312d4c4ab8df79928aa1214d5afd086a9 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 29 Sep 2020 12:40:31 -0400 Subject: [PATCH 13/58] ftrace: Move RCU is watching check after recursion check commit b40341fad6cc2daa195f8090fd3348f18fff640a upstream. The first thing that the ftrace function callback helper functions should do is to check for recursion. Peter Zijlstra found that when "rcu_is_watching()" had its notrace removed, it caused perf function tracing to crash. This is because the call of rcu_is_watching() is tested before function recursion is checked and and if it is traced, it will cause an infinite recursion loop. rcu_is_watching() should still stay notrace, but to prevent this should never had crashed in the first place. The recursion prevention must be the first thing done in callback functions. Link: https://lore.kernel.org/r/20200929112541.GM2628@hirez.programming.kicks-ass.net Cc: stable@vger.kernel.org Cc: Paul McKenney Fixes: c68c0fa293417 ("ftrace: Have ftrace_ops_get_func() handle RCU and PER_CPU flags too") Acked-by: Peter Zijlstra (Intel) Reported-by: Peter Zijlstra (Intel) Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ftrace.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 705852c1724aa3..fbba31baef53c6 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -6382,16 +6382,14 @@ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip, { int bit; - if ((op->flags & FTRACE_OPS_FL_RCU) && !rcu_is_watching()) - return; - bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX); if (bit < 0) return; preempt_disable_notrace(); - op->func(ip, parent_ip, op, regs); + if (!(op->flags & FTRACE_OPS_FL_RCU) || rcu_is_watching()) + op->func(ip, parent_ip, op, regs); preempt_enable_notrace(); trace_clear_recursion(bit); From de21eb7f8cb0045aedcfe8083fbebedf1209a3ae Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Fri, 25 Sep 2020 16:49:51 +0800 Subject: [PATCH 14/58] memstick: Skip allocating card when removing host commit 62c59a8786e6bb75569cee91dab66e9da3ff4b68 upstream. After commit 6827ca573c03 ("memstick: rtsx_usb_ms: Support runtime power management"), removing module rtsx_usb_ms will be stuck. The deadlock is caused by powering on and powering off at the same time, the former one is when memstick_check() is flushed, and the later is called by memstick_remove_host(). Soe let's skip allocating card to prevent this issue. Fixes: 6827ca573c03 ("memstick: rtsx_usb_ms: Support runtime power management") Signed-off-by: Kai-Heng Feng Link: https://lore.kernel.org/r/20200925084952.13220-1-kai.heng.feng@canonical.com Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/memstick/core/memstick.c | 4 ++++ include/linux/memstick.h | 1 + 2 files changed, 5 insertions(+) diff --git a/drivers/memstick/core/memstick.c b/drivers/memstick/core/memstick.c index 693ee73eb2912c..ef03d6fafc5ce8 100644 --- a/drivers/memstick/core/memstick.c +++ b/drivers/memstick/core/memstick.c @@ -441,6 +441,9 @@ static void memstick_check(struct work_struct *work) } else if (host->card->stop) host->card->stop(host->card); + if (host->removing) + goto out_power_off; + card = memstick_alloc_card(host); if (!card) { @@ -545,6 +548,7 @@ EXPORT_SYMBOL(memstick_add_host); */ void memstick_remove_host(struct memstick_host *host) { + host->removing = 1; flush_workqueue(workqueue); mutex_lock(&host->lock); if (host->card) diff --git a/include/linux/memstick.h b/include/linux/memstick.h index 216a713bef7f0e..1198ea3d40126d 100644 --- a/include/linux/memstick.h +++ b/include/linux/memstick.h @@ -281,6 +281,7 @@ struct memstick_host { struct memstick_dev *card; unsigned int retries; + bool removing; /* Notify the host that some requests are pending. */ void (*request)(struct memstick_host *host); From 3d54a640e20cd4a48648ff7be3d859b4635a00fd Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 28 Sep 2020 11:10:37 +0200 Subject: [PATCH 15/58] drm/amdgpu: restore proper ref count in amdgpu_display_crtc_set_config commit a39d0d7bdf8c21ac7645c02e9676b5cb2b804c31 upstream. A recent attempt to fix a ref count leak in amdgpu_display_crtc_set_config() turned out to be doing too much and "fixed" an intended decrease as if it were a leak. Undo that part to restore the proper balance. This is the very nature of this function to increase or decrease the power reference count depending on the situation. Consequences of this bug is that the power reference would eventually get down to 0 while the display was still in use, resulting in that display switching off unexpectedly. Signed-off-by: Jean Delvare Fixes: e008fa6fb415 ("drm/amdgpu: fix ref count leak in amdgpu_display_crtc_set_config") Cc: stable@vger.kernel.org Cc: Navid Emamdoost Cc: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index e0aed42d9cbdab..b588e0e409e721 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -297,7 +297,7 @@ int amdgpu_display_crtc_set_config(struct drm_mode_set *set, take the current one */ if (active && !adev->have_disp_power_ref) { adev->have_disp_power_ref = true; - goto out; + return ret; } /* if we have no active crtcs, then drop the power ref we got before */ From 688aa0e0aaf947e397004236318a705181591828 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Tue, 18 Aug 2020 07:31:17 +0000 Subject: [PATCH 16/58] clocksource/drivers/timer-gx6605s: Fixup counter reload [ Upstream commit bc6717d55d07110d8f3c6d31ec2af50c11b07091 ] When the timer counts to the upper limit, an overflow interrupt is generated, and the count is reset with the value in the TIME_INI register. But the software expects to start counting from 0 when the count overflows, so it forces TIME_INI to 0 to solve the potential interrupt storm problem. Signed-off-by: Guo Ren Tested-by: Xu Kai Cc: Daniel Lezcano Cc: Thomas Gleixner Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/1597735877-71115-1-git-send-email-guoren@kernel.org Signed-off-by: Sasha Levin --- drivers/clocksource/timer-gx6605s.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clocksource/timer-gx6605s.c b/drivers/clocksource/timer-gx6605s.c index 80d0939d040b51..8d386adbe8009a 100644 --- a/drivers/clocksource/timer-gx6605s.c +++ b/drivers/clocksource/timer-gx6605s.c @@ -28,6 +28,7 @@ static irqreturn_t gx6605s_timer_interrupt(int irq, void *dev) void __iomem *base = timer_of_base(to_timer_of(ce)); writel_relaxed(GX6605S_STATUS_CLR, base + TIMER_STATUS); + writel_relaxed(0, base + TIMER_INI); ce->event_handler(ce); From 91d59157b10341c888edd4f91a0f677aa310693c Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Wed, 2 Sep 2020 14:12:46 +0530 Subject: [PATCH 17/58] libbpf: Remove arch-specific include path in Makefile [ Upstream commit 21e9ba5373fc2cec608fd68301a1dbfd14df3172 ] Ubuntu mainline builds for ppc64le are failing with the below error (*): CALL /home/kernel/COD/linux/scripts/atomic/check-atomics.sh DESCEND bpf/resolve_btfids Auto-detecting system features: ... libelf: [ [32mon[m ] ... zlib: [ [32mon[m ] ... bpf: [ [31mOFF[m ] BPF API too old make[6]: *** [Makefile:295: bpfdep] Error 1 make[5]: *** [Makefile:54: /home/kernel/COD/linux/debian/build/build-generic/tools/bpf/resolve_btfids//libbpf.a] Error 2 make[4]: *** [Makefile:71: bpf/resolve_btfids] Error 2 make[3]: *** [/home/kernel/COD/linux/Makefile:1890: tools/bpf/resolve_btfids] Error 2 make[2]: *** [/home/kernel/COD/linux/Makefile:335: __build_one_by_one] Error 2 make[2]: Leaving directory '/home/kernel/COD/linux/debian/build/build-generic' make[1]: *** [Makefile:185: __sub-make] Error 2 make[1]: Leaving directory '/home/kernel/COD/linux' resolve_btfids needs to be build as a host binary and it needs libbpf. However, libbpf Makefile hardcodes an include path utilizing $(ARCH). This results in mixing of cross-architecture headers resulting in a build failure. The specific header include path doesn't seem necessary for a libbpf build. Hence, remove the same. (*) https://kernel.ubuntu.com/~kernel-ppa/mainline/v5.9-rc3/ppc64el/log Reported-by: Vaidyanathan Srinivasan Signed-off-by: Naveen N. Rao Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200902084246.1513055-1-naveen.n.rao@linux.vnet.ibm.com Signed-off-by: Sasha Levin --- tools/lib/bpf/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index d045707e7c9a40..283caeaaffc305 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -59,7 +59,7 @@ FEATURE_USER = .libbpf FEATURE_TESTS = libelf libelf-mmap bpf reallocarray cxx FEATURE_DISPLAY = libelf bpf -INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi +INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/include/uapi FEATURE_CHECK_CFLAGS-bpf = $(INCLUDES) check_feat := 1 From 985a56c58c4f5fbea988c3b127780fd89fdf72be Mon Sep 17 00:00:00 2001 From: Xie He Date: Wed, 2 Sep 2020 17:06:58 -0700 Subject: [PATCH 18/58] drivers/net/wan/hdlc_fr: Add needed_headroom for PVC devices [ Upstream commit 44a049c42681de71c783d75cd6e56b4e339488b0 ] PVC devices are virtual devices in this driver stacked on top of the actual HDLC device. They are the devices normal users would use. PVC devices have two types: normal PVC devices and Ethernet-emulating PVC devices. When transmitting data with PVC devices, the ndo_start_xmit function will prepend a header of 4 or 10 bytes. Currently this driver requests this headroom to be reserved for normal PVC devices by setting their hard_header_len to 10. However, this does not work when these devices are used with AF_PACKET/RAW sockets. Also, this driver does not request this headroom for Ethernet-emulating PVC devices (but deals with this problem by reallocating the skb when needed, which is not optimal). This patch replaces hard_header_len with needed_headroom, and set needed_headroom for Ethernet-emulating PVC devices, too. This makes the driver to request headroom for all PVC devices in all cases. Cc: Krzysztof Halasa Cc: Martin Schiller Signed-off-by: Xie He Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/wan/hdlc_fr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c index 9acad651ea1f6e..12b35404cd8e73 100644 --- a/drivers/net/wan/hdlc_fr.c +++ b/drivers/net/wan/hdlc_fr.c @@ -1041,7 +1041,7 @@ static void pvc_setup(struct net_device *dev) { dev->type = ARPHRD_DLCI; dev->flags = IFF_POINTOPOINT; - dev->hard_header_len = 10; + dev->hard_header_len = 0; dev->addr_len = 2; netif_keep_dst(dev); } @@ -1093,6 +1093,7 @@ static int fr_add_pvc(struct net_device *frad, unsigned int dlci, int type) dev->mtu = HDLC_MAX_MTU; dev->min_mtu = 68; dev->max_mtu = HDLC_MAX_MTU; + dev->needed_headroom = 10; dev->priv_flags |= IFF_NO_QUEUE; dev->ml_priv = pvc; From e9af030ddd4bce02b2fcecb7f63a082b577a44d9 Mon Sep 17 00:00:00 2001 From: Martin Cerveny Date: Sun, 6 Sep 2020 18:21:40 +0200 Subject: [PATCH 19/58] drm/sun4i: mixer: Extend regmap max_register [ Upstream commit 74ea06164cda81dc80e97790164ca533fd7e3087 ] Better guess. Secondary CSC registers are from 0xF0000. Signed-off-by: Martin Cerveny Reviewed-by: Jernej Skrabec Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20200906162140.5584-3-m.cerveny@computer.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/sun4i/sun8i_mixer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/sun4i/sun8i_mixer.c b/drivers/gpu/drm/sun4i/sun8i_mixer.c index 18b4881f44814a..12b99ba5750177 100644 --- a/drivers/gpu/drm/sun4i/sun8i_mixer.c +++ b/drivers/gpu/drm/sun4i/sun8i_mixer.c @@ -396,7 +396,7 @@ static struct regmap_config sun8i_mixer_regmap_config = { .reg_bits = 32, .val_bits = 32, .reg_stride = 4, - .max_register = 0xbfffc, /* guessed */ + .max_register = 0xffffc, /* guessed */ }; static int sun8i_mixer_of_get_id(struct device_node *node) From f19ff011027ba1a54d6602b48358be887bfbcd5e Mon Sep 17 00:00:00 2001 From: Lucy Yan Date: Thu, 10 Sep 2020 12:05:09 -0700 Subject: [PATCH 20/58] net: dec: de2104x: Increase receive ring size for Tulip [ Upstream commit ee460417d254d941dfea5fb7cff841f589643992 ] Increase Rx ring size to address issue where hardware is reaching the receive work limit. Before: [ 102.223342] de2104x 0000:17:00.0 eth0: rx work limit reached [ 102.245695] de2104x 0000:17:00.0 eth0: rx work limit reached [ 102.251387] de2104x 0000:17:00.0 eth0: rx work limit reached [ 102.267444] de2104x 0000:17:00.0 eth0: rx work limit reached Signed-off-by: Lucy Yan Reviewed-by: Moritz Fischer Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/dec/tulip/de2104x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/dec/tulip/de2104x.c b/drivers/net/ethernet/dec/tulip/de2104x.c index f1a2da15dd0a6f..b14d93da242f10 100644 --- a/drivers/net/ethernet/dec/tulip/de2104x.c +++ b/drivers/net/ethernet/dec/tulip/de2104x.c @@ -91,7 +91,7 @@ MODULE_PARM_DESC (rx_copybreak, "de2104x Breakpoint at which Rx packets are copi #define DSL CONFIG_DE2104X_DSL #endif -#define DE_RX_RING_SIZE 64 +#define DE_RX_RING_SIZE 128 #define DE_TX_RING_SIZE 64 #define DE_RING_BYTES \ ((sizeof(struct de_desc) * DE_RX_RING_SIZE) + \ From 0bcc3480393bc8aaa820f35a9490af438be90d3f Mon Sep 17 00:00:00 2001 From: Olympia Giannou Date: Fri, 11 Sep 2020 14:17:24 +0000 Subject: [PATCH 21/58] rndis_host: increase sleep time in the query-response loop [ Upstream commit 4202c9fdf03d79dedaa94b2c4cf574f25793d669 ] Some WinCE devices face connectivity issues via the NDIS interface. They fail to register, resulting in -110 timeout errors and failures during the probe procedure. In this kind of WinCE devices, the Windows-side ndis driver needs quite more time to be loaded and configured, so that the linux rndis host queries to them fail to be responded correctly on time. More specifically, when INIT is called on the WinCE side - no other requests can be served by the Client and this results in a failed QUERY afterwards. The increase of the waiting time on the side of the linux rndis host in the command-response loop leaves the INIT process to complete and respond to a QUERY, which comes afterwards. The WinCE devices with this special "feature" in their ndis driver are satisfied by this fix. Signed-off-by: Olympia Giannou Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/rndis_host.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/rndis_host.c b/drivers/net/usb/rndis_host.c index bd9c07888ebb43..6fa7a009a24a48 100644 --- a/drivers/net/usb/rndis_host.c +++ b/drivers/net/usb/rndis_host.c @@ -201,7 +201,7 @@ int rndis_command(struct usbnet *dev, struct rndis_msg_hdr *buf, int buflen) dev_dbg(&info->control->dev, "rndis response error, code %d\n", retval); } - msleep(20); + msleep(40); } dev_dbg(&info->control->dev, "rndis response timeout\n"); return -ETIMEDOUT; From 44b4baf850bdd4bda50bd807875db8e0b2ba3073 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 15 Sep 2020 20:53:25 -0700 Subject: [PATCH 22/58] nvme-core: get/put ctrl and transport module in nvme_dev_open/release() [ Upstream commit 52a3974feb1a3eec25d8836d37a508b67b0a9cd0 ] Get and put the reference to the ctrl in the nvme_dev_open() and nvme_dev_release() before and after module get/put for ctrl in char device file operations. Introduce char_dev relase function, get/put the controller and module which allows us to fix the potential Oops which can be easily reproduced with a passthru ctrl (although the problem also exists with pure user access): Entering kdb (current=0xffff8887f8290000, pid 3128) on processor 30 Oops: (null) due to oops @ 0xffffffffa01019ad CPU: 30 PID: 3128 Comm: bash Tainted: G W OE 5.8.0-rc4nvme-5.9+ #35 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.4 RIP: 0010:nvme_free_ctrl+0x234/0x285 [nvme_core] Code: 57 10 a0 e8 73 bf 02 e1 ba 3d 11 00 00 48 c7 c6 98 33 10 a0 48 c7 c7 1d 57 10 a0 e8 5b bf 02 e1 8 RSP: 0018:ffffc90001d63de0 EFLAGS: 00010246 RAX: ffffffffa05c0440 RBX: ffff8888119e45a0 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff8888177e9550 RDI: ffff8888119e43b0 RBP: ffff8887d4768000 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: ffffc90001d63c90 R12: ffff8888119e43b0 R13: ffff8888119e5108 R14: dead000000000100 R15: ffff8888119e5108 FS: 00007f1ef27b0740(0000) GS:ffff888817600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffa05c0470 CR3: 00000007f6bee000 CR4: 00000000003406e0 Call Trace: device_release+0x27/0x80 kobject_put+0x98/0x170 nvmet_passthru_ctrl_disable+0x4a/0x70 [nvmet] nvmet_passthru_enable_store+0x4c/0x90 [nvmet] configfs_write_file+0xe6/0x150 vfs_write+0xba/0x1e0 ksys_write+0x5f/0xe0 do_syscall_64+0x52/0xb0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f1ef1eb2840 Code: Bad RIP value. RSP: 002b:00007fffdbff0eb8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f1ef1eb2840 RDX: 0000000000000002 RSI: 00007f1ef27d2000 RDI: 0000000000000001 RBP: 00007f1ef27d2000 R08: 000000000000000a R09: 00007f1ef27b0740 R10: 0000000000000001 R11: 0000000000000246 R12: 00007f1ef2186400 R13: 0000000000000002 R14: 0000000000000001 R15: 0000000000000000 With this patch fix we take the module ref count in nvme_dev_open() and release that ref count in newly introduced nvme_dev_release(). Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/host/core.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 2cd32901d95c73..24c6d5a446b79a 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2933,10 +2933,24 @@ static int nvme_dev_open(struct inode *inode, struct file *file) return -EWOULDBLOCK; } + nvme_get_ctrl(ctrl); + if (!try_module_get(ctrl->ops->module)) + return -EINVAL; + file->private_data = ctrl; return 0; } +static int nvme_dev_release(struct inode *inode, struct file *file) +{ + struct nvme_ctrl *ctrl = + container_of(inode->i_cdev, struct nvme_ctrl, cdev); + + module_put(ctrl->ops->module); + nvme_put_ctrl(ctrl); + return 0; +} + static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp) { struct nvme_ns *ns; @@ -2999,6 +3013,7 @@ static long nvme_dev_ioctl(struct file *file, unsigned int cmd, static const struct file_operations nvme_dev_fops = { .owner = THIS_MODULE, .open = nvme_dev_open, + .release = nvme_dev_release, .unlocked_ioctl = nvme_dev_ioctl, .compat_ioctl = nvme_dev_ioctl, }; From 89fd103fbbb0956a278061af8e1b66d294951390 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 17 Sep 2020 17:26:56 -0400 Subject: [PATCH 23/58] fuse: fix the ->direct_IO() treatment of iov_iter [ Upstream commit 933a3752babcf6513117d5773d2b70782d6ad149 ] the callers rely upon having any iov_iter_truncate() done inside ->direct_IO() countered by iov_iter_reexpand(). Reported-by: Qian Cai Tested-by: Qian Cai Signed-off-by: Al Viro Signed-off-by: Sasha Levin --- fs/fuse/file.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index f8d8a8e34b808b..ab4fc1255aca83 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -3074,11 +3074,10 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter) ssize_t ret = 0; struct file *file = iocb->ki_filp; struct fuse_file *ff = file->private_data; - bool async_dio = ff->fc->async_dio; loff_t pos = 0; struct inode *inode; loff_t i_size; - size_t count = iov_iter_count(iter); + size_t count = iov_iter_count(iter), shortened = 0; loff_t offset = iocb->ki_pos; struct fuse_io_priv *io; @@ -3086,17 +3085,9 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter) inode = file->f_mapping->host; i_size = i_size_read(inode); - if ((iov_iter_rw(iter) == READ) && (offset > i_size)) + if ((iov_iter_rw(iter) == READ) && (offset >= i_size)) return 0; - /* optimization for short read */ - if (async_dio && iov_iter_rw(iter) != WRITE && offset + count > i_size) { - if (offset >= i_size) - return 0; - iov_iter_truncate(iter, fuse_round_up(ff->fc, i_size - offset)); - count = iov_iter_count(iter); - } - io = kmalloc(sizeof(struct fuse_io_priv), GFP_KERNEL); if (!io) return -ENOMEM; @@ -3112,15 +3103,22 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter) * By default, we want to optimize all I/Os with async request * submission to the client filesystem if supported. */ - io->async = async_dio; + io->async = ff->fc->async_dio; io->iocb = iocb; io->blocking = is_sync_kiocb(iocb); + /* optimization for short read */ + if (io->async && !io->write && offset + count > i_size) { + iov_iter_truncate(iter, fuse_round_up(ff->fc, i_size - offset)); + shortened = count - iov_iter_count(iter); + count -= shortened; + } + /* * We cannot asynchronously extend the size of a file. * In such case the aio will behave exactly like sync io. */ - if ((offset + count > i_size) && iov_iter_rw(iter) == WRITE) + if ((offset + count > i_size) && io->write) io->blocking = true; if (io->async && io->blocking) { @@ -3138,6 +3136,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter) } else { ret = __fuse_direct_read(io, iter, &pos); } + iov_iter_reexpand(iter, iov_iter_count(iter) + shortened); if (io->async) { bool blocking = io->blocking; From 3ba3fc3e7ea6175423ac8eb53d6e90ed1258ed94 Mon Sep 17 00:00:00 2001 From: Xie He Date: Wed, 16 Sep 2020 09:49:18 -0700 Subject: [PATCH 24/58] drivers/net/wan/lapbether: Make skb->protocol consistent with the header [ Upstream commit 83f9a9c8c1edc222846dc1bde6e3479703e8e5a3 ] This driver is a virtual driver stacked on top of Ethernet interfaces. When this driver transmits data on the Ethernet device, the skb->protocol setting is inconsistent with the Ethernet header prepended to the skb. This causes a user listening on the Ethernet interface with an AF_PACKET socket, to see different sll_protocol values for incoming and outgoing frames, because incoming frames would have this value set by parsing the Ethernet header. This patch changes the skb->protocol value for outgoing Ethernet frames, making it consistent with the Ethernet header prepended. This makes a user listening on the Ethernet device with an AF_PACKET socket, to see the same sll_protocol value for incoming and outgoing frames. Cc: Martin Schiller Signed-off-by: Xie He Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/wan/lapbether.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c index 2cff914aada553..709e3de0f6af16 100644 --- a/drivers/net/wan/lapbether.c +++ b/drivers/net/wan/lapbether.c @@ -198,8 +198,6 @@ static void lapbeth_data_transmit(struct net_device *ndev, struct sk_buff *skb) struct net_device *dev; int size = skb->len; - skb->protocol = htons(ETH_P_X25); - ptr = skb_push(skb, 2); *ptr++ = size % 256; @@ -210,6 +208,8 @@ static void lapbeth_data_transmit(struct net_device *ndev, struct sk_buff *skb) skb->dev = dev = lapbeth->ethdev; + skb->protocol = htons(ETH_P_DEC); + skb_reset_network_header(skb); dev_hard_header(skb, dev, ETH_P_DEC, bcast_addr, NULL, 0); From 126e6099b8c11e160a8a38943e68aaa3782dfab4 Mon Sep 17 00:00:00 2001 From: Xie He Date: Wed, 16 Sep 2020 14:25:07 -0700 Subject: [PATCH 25/58] drivers/net/wan/hdlc: Set skb->protocol before transmitting [ Upstream commit 9fb030a70431a2a2a1b292dbf0b2f399cc072c16 ] This patch sets skb->protocol before transmitting frames on the HDLC device, so that a user listening on the HDLC device with an AF_PACKET socket will see outgoing frames' sll_protocol field correctly set and consistent with that of incoming frames. 1. Control frames in hdlc_cisco and hdlc_ppp When these drivers send control frames, skb->protocol is not set. This value should be set to htons(ETH_P_HDLC), because when receiving control frames, their skb->protocol is set to htons(ETH_P_HDLC). When receiving, hdlc_type_trans in hdlc.h is called, which then calls cisco_type_trans or ppp_type_trans. The skb->protocol of control frames is set to htons(ETH_P_HDLC) so that the control frames can be received by hdlc_rcv in hdlc.c, which calls cisco_rx or ppp_rx to process the control frames. 2. hdlc_fr When this driver sends control frames, skb->protocol is set to internal values used in this driver. When this driver sends data frames (from upper stacked PVC devices), skb->protocol is the same as that of the user data packet being sent on the upper PVC device (for normal PVC devices), or is htons(ETH_P_802_3) (for Ethernet-emulating PVC devices). However, skb->protocol for both control frames and data frames should be set to htons(ETH_P_HDLC), because when receiving, all frames received on the HDLC device will have their skb->protocol set to htons(ETH_P_HDLC). When receiving, hdlc_type_trans in hdlc.h is called, and because this driver doesn't provide a type_trans function in struct hdlc_proto, all frames will have their skb->protocol set to htons(ETH_P_HDLC). The frames are then received by hdlc_rcv in hdlc.c, which calls fr_rx to process the frames (control frames are consumed and data frames are re-received on upper PVC devices). Cc: Krzysztof Halasa Signed-off-by: Xie He Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/wan/hdlc_cisco.c | 1 + drivers/net/wan/hdlc_fr.c | 3 +++ drivers/net/wan/hdlc_ppp.c | 1 + 3 files changed, 5 insertions(+) diff --git a/drivers/net/wan/hdlc_cisco.c b/drivers/net/wan/hdlc_cisco.c index cc33441af46918..50804d04730835 100644 --- a/drivers/net/wan/hdlc_cisco.c +++ b/drivers/net/wan/hdlc_cisco.c @@ -118,6 +118,7 @@ static void cisco_keepalive_send(struct net_device *dev, u32 type, skb_put(skb, sizeof(struct cisco_packet)); skb->priority = TC_PRIO_CONTROL; skb->dev = dev; + skb->protocol = htons(ETH_P_HDLC); skb_reset_network_header(skb); dev_queue_xmit(skb); diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c index 12b35404cd8e73..d6cfd51613ed8d 100644 --- a/drivers/net/wan/hdlc_fr.c +++ b/drivers/net/wan/hdlc_fr.c @@ -433,6 +433,8 @@ static netdev_tx_t pvc_xmit(struct sk_buff *skb, struct net_device *dev) if (pvc->state.fecn) /* TX Congestion counter */ dev->stats.tx_compressed++; skb->dev = pvc->frad; + skb->protocol = htons(ETH_P_HDLC); + skb_reset_network_header(skb); dev_queue_xmit(skb); return NETDEV_TX_OK; } @@ -555,6 +557,7 @@ static void fr_lmi_send(struct net_device *dev, int fullrep) skb_put(skb, i); skb->priority = TC_PRIO_CONTROL; skb->dev = dev; + skb->protocol = htons(ETH_P_HDLC); skb_reset_network_header(skb); dev_queue_xmit(skb); diff --git a/drivers/net/wan/hdlc_ppp.c b/drivers/net/wan/hdlc_ppp.c index 16f33d1ffbfb97..64f85565133696 100644 --- a/drivers/net/wan/hdlc_ppp.c +++ b/drivers/net/wan/hdlc_ppp.c @@ -251,6 +251,7 @@ static void ppp_tx_cp(struct net_device *dev, u16 pid, u8 code, skb->priority = TC_PRIO_CONTROL; skb->dev = dev; + skb->protocol = htons(ETH_P_HDLC); skb_reset_network_header(skb); skb_queue_tail(&tx_queue, skb); } From 355a710f0813458456d5e7b79aa3b106a74bd900 Mon Sep 17 00:00:00 2001 From: Aloka Dixit Date: Fri, 11 Sep 2020 01:11:35 +0000 Subject: [PATCH 26/58] mac80211: Fix radiotap header channel flag for 6GHz band [ Upstream commit 412a84b5714af56f3eb648bba155107b5edddfdf ] Radiotap header field 'Channel flags' has '2 GHz spectrum' set to 'true' for 6GHz packet. Change it to 5GHz as there isn't a separate option available for 6GHz. Signed-off-by: Aloka Dixit Link: https://lore.kernel.org/r/010101747ab7b703-1d7c9851-1594-43bf-81f7-f79ce7a67cc6-000000@us-west-2.amazonses.com Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/rx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index e5fb9002d31477..3ab85e1e38d82a 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -419,7 +419,8 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, else if (status->bw == RATE_INFO_BW_5) channel_flags |= IEEE80211_CHAN_QUARTER; - if (status->band == NL80211_BAND_5GHZ) + if (status->band == NL80211_BAND_5GHZ || + status->band == NL80211_BAND_6GHZ) channel_flags |= IEEE80211_CHAN_OFDM | IEEE80211_CHAN_5GHZ; else if (status->encoding != RX_ENC_LEGACY) channel_flags |= IEEE80211_CHAN_DYN | IEEE80211_CHAN_2GHZ; From a0fe7f7054576a6e6193e62aa3c11b7b69106289 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 17 Sep 2020 14:50:31 +0200 Subject: [PATCH 27/58] mac80211: do not allow bigger VHT MPDUs than the hardware supports [ Upstream commit 3bd5c7a28a7c3aba07a2d300d43f8e988809e147 ] Limit maximum VHT MPDU size by local capability. Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20200917125031.45009-1-nbd@nbd.name Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/vht.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index ccdcb9ad9ac72e..aabc63dadf1768 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -168,10 +168,7 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, /* take some capabilities as-is */ cap_info = le32_to_cpu(vht_cap_ie->vht_cap_info); vht_cap->cap = cap_info; - vht_cap->cap &= IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895 | - IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991 | - IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454 | - IEEE80211_VHT_CAP_RXLDPC | + vht_cap->cap &= IEEE80211_VHT_CAP_RXLDPC | IEEE80211_VHT_CAP_VHT_TXOP_PS | IEEE80211_VHT_CAP_HTC_VHT | IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK | @@ -180,6 +177,9 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, IEEE80211_VHT_CAP_RX_ANTENNA_PATTERN | IEEE80211_VHT_CAP_TX_ANTENNA_PATTERN; + vht_cap->cap |= min_t(u32, cap_info & IEEE80211_VHT_CAP_MAX_MPDU_MASK, + own_cap.cap & IEEE80211_VHT_CAP_MAX_MPDU_MASK); + /* and some based on our own capabilities */ switch (own_cap.cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) { case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ: From 4e4646c85e891ae289df420105602bee4619422c Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 4 Sep 2020 10:23:31 +0200 Subject: [PATCH 28/58] tracing: Make the space reserved for the pid wider [ Upstream commit 795d6379a47bcbb88bd95a69920e4acc52849f88 ] For 64bit CONFIG_BASE_SMALL=0 systems PID_MAX_LIMIT is set by default to 4194304. During boot the kernel sets a new value based on number of CPUs but no lower than 32768. It is 1024 per CPU so with 128 CPUs the default becomes 131072 which needs six digits. This value can be increased during run time but must not exceed the initial upper limit. Systemd sometime after v241 sets it to the upper limit during boot. The result is that when the pid exceeds five digits, the trace output is a little hard to read because it is no longer properly padded (same like on big iron with 98+ CPUs). Increase the pid padding to seven digits. Link: https://lkml.kernel.org/r/20200904082331.dcdkrr3bkn3e4qlg@linutronix.de Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Sasha Levin --- kernel/trace/trace.c | 38 ++++++++++++++++++------------------- kernel/trace/trace_output.c | 12 ++++++------ 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index db8162b34ef647..5b2a664812b106 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3584,14 +3584,14 @@ unsigned long trace_total_entries(struct trace_array *tr) static void print_lat_help_header(struct seq_file *m) { - seq_puts(m, "# _------=> CPU# \n" - "# / _-----=> irqs-off \n" - "# | / _----=> need-resched \n" - "# || / _---=> hardirq/softirq \n" - "# ||| / _--=> preempt-depth \n" - "# |||| / delay \n" - "# cmd pid ||||| time | caller \n" - "# \\ / ||||| \\ | / \n"); + seq_puts(m, "# _------=> CPU# \n" + "# / _-----=> irqs-off \n" + "# | / _----=> need-resched \n" + "# || / _---=> hardirq/softirq \n" + "# ||| / _--=> preempt-depth \n" + "# |||| / delay \n" + "# cmd pid ||||| time | caller \n" + "# \\ / ||||| \\ | / \n"); } static void print_event_info(struct trace_buffer *buf, struct seq_file *m) @@ -3612,26 +3612,26 @@ static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m, print_event_info(buf, m); - seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? "TGID " : ""); - seq_printf(m, "# | | %s | | |\n", tgid ? " | " : ""); + seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? " TGID " : ""); + seq_printf(m, "# | | %s | | |\n", tgid ? " | " : ""); } static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m, unsigned int flags) { bool tgid = flags & TRACE_ITER_RECORD_TGID; - const char *space = " "; - int prec = tgid ? 10 : 2; + const char *space = " "; + int prec = tgid ? 12 : 2; print_event_info(buf, m); - seq_printf(m, "# %.*s _-----=> irqs-off\n", prec, space); - seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space); - seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space); - seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space); - seq_printf(m, "# %.*s||| / delay\n", prec, space); - seq_printf(m, "# TASK-PID %.*sCPU# |||| TIMESTAMP FUNCTION\n", prec, " TGID "); - seq_printf(m, "# | | %.*s | |||| | |\n", prec, " | "); + seq_printf(m, "# %.*s _-----=> irqs-off\n", prec, space); + seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space); + seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space); + seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space); + seq_printf(m, "# %.*s||| / delay\n", prec, space); + seq_printf(m, "# TASK-PID %.*s CPU# |||| TIMESTAMP FUNCTION\n", prec, " TGID "); + seq_printf(m, "# | | %.*s | |||| | |\n", prec, " | "); } void diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index d54ce252b05a84..a0a45901dc027c 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -482,7 +482,7 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu) trace_find_cmdline(entry->pid, comm); - trace_seq_printf(s, "%8.8s-%-5d %3d", + trace_seq_printf(s, "%8.8s-%-7d %3d", comm, entry->pid, cpu); return trace_print_lat_fmt(s, entry); @@ -573,15 +573,15 @@ int trace_print_context(struct trace_iterator *iter) trace_find_cmdline(entry->pid, comm); - trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid); + trace_seq_printf(s, "%16s-%-7d ", comm, entry->pid); if (tr->trace_flags & TRACE_ITER_RECORD_TGID) { unsigned int tgid = trace_find_tgid(entry->pid); if (!tgid) - trace_seq_printf(s, "(-----) "); + trace_seq_printf(s, "(-------) "); else - trace_seq_printf(s, "(%5d) ", tgid); + trace_seq_printf(s, "(%7d) ", tgid); } trace_seq_printf(s, "[%03d] ", iter->cpu); @@ -624,7 +624,7 @@ int trace_print_lat_context(struct trace_iterator *iter) trace_find_cmdline(entry->pid, comm); trace_seq_printf( - s, "%16s %5d %3d %d %08x %08lx ", + s, "%16s %7d %3d %d %08x %08lx ", comm, entry->pid, iter->cpu, entry->flags, entry->preempt_count, iter->idx); } else { @@ -905,7 +905,7 @@ static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter, S = task_index_to_char(field->prev_state); trace_find_cmdline(field->next_pid, comm); trace_seq_printf(&iter->seq, - " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n", + " %7d:%3d:%c %s [%03d] %7d:%3d:%c %s\n", field->prev_pid, field->prev_prio, S, delim, From 8cc5eb809aa5e2af9af5ef5d2ab9a2a40dce9b35 Mon Sep 17 00:00:00 2001 From: Douglas Gilbert Date: Mon, 14 Sep 2020 17:36:09 -0400 Subject: [PATCH 29/58] tools/io_uring: fix compile breakage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 72f04da48a9828ba3ae8ac77bea648bda8b7d0ff ] It would seem none of the kernel continuous integration does this: $ cd tools/io_uring $ make Otherwise it may have noticed: cc -Wall -Wextra -g -D_GNU_SOURCE -c -o io_uring-bench.o io_uring-bench.c io_uring-bench.c:133:12: error: static declaration of ‘gettid’ follows non-static declaration 133 | static int gettid(void) | ^~~~~~ In file included from /usr/include/unistd.h:1170, from io_uring-bench.c:27: /usr/include/x86_64-linux-gnu/bits/unistd_ext.h:34:16: note: previous declaration of ‘gettid’ was here 34 | extern __pid_t gettid (void) __THROW; | ^~~~~~ make: *** [: io_uring-bench.o] Error 1 The problem on Ubuntu 20.04 (with lk 5.9.0-rc5) is that unistd.h already defines gettid(). So prefix the local definition with "lk_". Signed-off-by: Douglas Gilbert Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- tools/io_uring/io_uring-bench.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/io_uring/io_uring-bench.c b/tools/io_uring/io_uring-bench.c index 0f257139b003e1..7703f011838540 100644 --- a/tools/io_uring/io_uring-bench.c +++ b/tools/io_uring/io_uring-bench.c @@ -130,7 +130,7 @@ static int io_uring_register_files(struct submitter *s) s->nr_files); } -static int gettid(void) +static int lk_gettid(void) { return syscall(__NR_gettid); } @@ -281,7 +281,7 @@ static void *submitter_fn(void *data) struct io_sq_ring *ring = &s->sq_ring; int ret, prepped; - printf("submitter=%d\n", gettid()); + printf("submitter=%d\n", lk_gettid()); srand48_r(pthread_self(), &s->rand); From 157ccdf7eb2ca7121e570514f857ed0659cc83a1 Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Fri, 4 Sep 2020 12:28:12 +1200 Subject: [PATCH 30/58] spi: fsl-espi: Only process interrupts for expected events [ Upstream commit b867eef4cf548cd9541225aadcdcee644669b9e1 ] The SPIE register contains counts for the TX FIFO so any time the irq handler was invoked we would attempt to process the RX/TX fifos. Use the SPIM value to mask the events so that we only process interrupts that were expected. This was a latent issue exposed by commit 3282a3da25bd ("powerpc/64: Implement soft interrupt replay in C"). Signed-off-by: Chris Packham Link: https://lore.kernel.org/r/20200904002812.7300-1-chris.packham@alliedtelesis.co.nz Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-fsl-espi.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-fsl-espi.c b/drivers/spi/spi-fsl-espi.c index f20326714b9d50..215bf6624e7c32 100644 --- a/drivers/spi/spi-fsl-espi.c +++ b/drivers/spi/spi-fsl-espi.c @@ -555,13 +555,14 @@ static void fsl_espi_cpu_irq(struct fsl_espi *espi, u32 events) static irqreturn_t fsl_espi_irq(s32 irq, void *context_data) { struct fsl_espi *espi = context_data; - u32 events; + u32 events, mask; spin_lock(&espi->lock); /* Get interrupt events(tx/rx) */ events = fsl_espi_read_reg(espi, ESPI_SPIE); - if (!events) { + mask = fsl_espi_read_reg(espi, ESPI_SPIM); + if (!(events & mask)) { spin_unlock(&espi->lock); return IRQ_NONE; } From 2b217eafcf749bb30c7bf59f731286fab030fd33 Mon Sep 17 00:00:00 2001 From: Xianting Tian Date: Tue, 22 Sep 2020 14:25:17 +0800 Subject: [PATCH 31/58] nvme-pci: fix NULL req in completion handler [ Upstream commit 50b7c24390a53c78de546215282fb52980f1d7b7 ] Currently, we use nvmeq->q_depth as the upper limit for a valid tag in nvme_handle_cqe(), it is not correct. Because the available tag number is recorded in tagset, which is not equal to nvmeq->q_depth. The nvme driver registers interrupts for queues before initializing the tagset, because it uses the number of successful request_irq() calls to configure the tagset parameters. This allows a race condition with the current tag validity check if the controller happens to produce an interrupt with a corrupted CQE before the tagset is initialized. Replace the driver's indirect tag check with the one already provided by the block layer. Signed-off-by: Xianting Tian Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/host/pci.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 75f26d2ec64295..af0b51d1d43e8b 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -941,13 +941,6 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) volatile struct nvme_completion *cqe = &nvmeq->cqes[idx]; struct request *req; - if (unlikely(cqe->command_id >= nvmeq->q_depth)) { - dev_warn(nvmeq->dev->ctrl.device, - "invalid id %d completed on queue %d\n", - cqe->command_id, le16_to_cpu(cqe->sq_id)); - return; - } - /* * AEN requests are special as they don't time out and can * survive any kind of queue freeze and often don't respond to @@ -962,6 +955,13 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) } req = blk_mq_tag_to_rq(nvme_queue_tagset(nvmeq), cqe->command_id); + if (unlikely(!req)) { + dev_warn(nvmeq->dev->ctrl.device, + "invalid id %d completed on queue %d\n", + cqe->command_id, le16_to_cpu(cqe->sq_id)); + return; + } + trace_nvme_sq(req, cqe->sq_head, nvmeq->sq_tail); nvme_end_request(req, cqe->status, cqe->result); } From 8c03d0ef62ddc34bcd6695acbc99af9d6f201edc Mon Sep 17 00:00:00 2001 From: James Smart Date: Thu, 17 Sep 2020 13:33:22 -0700 Subject: [PATCH 32/58] nvme-fc: fail new connections to a deleted host or remote port [ Upstream commit 9e0e8dac985d4bd07d9e62922b9d189d3ca2fccf ] The lldd may have made calls to delete a remote port or local port and the delete is in progress when the cli then attempts to create a new controller. Currently, this proceeds without error although it can't be very successful. Fix this by validating that both the host port and remote port are present when a new controller is to be created. Signed-off-by: James Smart Reviewed-by: Himanshu Madhani Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/host/fc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index da801a14cd13df..65b3dc9cd693b2 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -3319,12 +3319,14 @@ nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts) spin_lock_irqsave(&nvme_fc_lock, flags); list_for_each_entry(lport, &nvme_fc_lport_list, port_list) { if (lport->localport.node_name != laddr.nn || - lport->localport.port_name != laddr.pn) + lport->localport.port_name != laddr.pn || + lport->localport.port_state != FC_OBJSTATE_ONLINE) continue; list_for_each_entry(rport, &lport->endp_list, endp_list) { if (rport->remoteport.node_name != raddr.nn || - rport->remoteport.port_name != raddr.pn) + rport->remoteport.port_name != raddr.pn || + rport->remoteport.port_state != FC_OBJSTATE_ONLINE) continue; /* if fail to get reference fall through. Will error */ From 543ea1af57444a06fdb73873cbd2580a3cd891c4 Mon Sep 17 00:00:00 2001 From: Taiping Lai Date: Mon, 31 Aug 2020 17:09:47 +0800 Subject: [PATCH 33/58] gpio: sprd: Clear interrupt when setting the type as edge [ Upstream commit 5fcface659aab7eac4bd65dd116d98b8f7bb88d5 ] The raw interrupt status of GPIO maybe set before the interrupt is enabled, which would trigger the interrupt event once enabled it from user side. This is the case for edge interrupts only. Adding a clear operation when setting interrupt type can avoid that. There're a few considerations for the solution: 1) This issue is for edge interrupt only; The interrupts requested by users are IRQ_TYPE_LEVEL_HIGH as default, so clearing interrupt when request is useless. 2) The interrupt type can be set to edge when request and following up with clearing it though, but the problem is still there once users set the interrupt type to level trggier. 3) We can add a clear operation after each time of setting interrupt enable bit, but it is redundant for level trigger interrupt. Therefore, the solution is this patch seems the best for now. Fixes: 9a3821c2bb47 ("gpio: Add GPIO driver for Spreadtrum SC9860 platform") Signed-off-by: Taiping Lai Signed-off-by: Chunyan Zhang Reviewed-by: Baolin Wang Signed-off-by: Bartosz Golaszewski Signed-off-by: Sasha Levin --- drivers/gpio/gpio-sprd.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpio/gpio-sprd.c b/drivers/gpio/gpio-sprd.c index d7314d39ab65ba..36ea8a3bd4510f 100644 --- a/drivers/gpio/gpio-sprd.c +++ b/drivers/gpio/gpio-sprd.c @@ -149,17 +149,20 @@ static int sprd_gpio_irq_set_type(struct irq_data *data, sprd_gpio_update(chip, offset, SPRD_GPIO_IS, 0); sprd_gpio_update(chip, offset, SPRD_GPIO_IBE, 0); sprd_gpio_update(chip, offset, SPRD_GPIO_IEV, 1); + sprd_gpio_update(chip, offset, SPRD_GPIO_IC, 1); irq_set_handler_locked(data, handle_edge_irq); break; case IRQ_TYPE_EDGE_FALLING: sprd_gpio_update(chip, offset, SPRD_GPIO_IS, 0); sprd_gpio_update(chip, offset, SPRD_GPIO_IBE, 0); sprd_gpio_update(chip, offset, SPRD_GPIO_IEV, 0); + sprd_gpio_update(chip, offset, SPRD_GPIO_IC, 1); irq_set_handler_locked(data, handle_edge_irq); break; case IRQ_TYPE_EDGE_BOTH: sprd_gpio_update(chip, offset, SPRD_GPIO_IS, 0); sprd_gpio_update(chip, offset, SPRD_GPIO_IBE, 1); + sprd_gpio_update(chip, offset, SPRD_GPIO_IC, 1); irq_set_handler_locked(data, handle_edge_irq); break; case IRQ_TYPE_LEVEL_HIGH: From ae68b15839b0f6c63d194bc5ae8f5d587d5c94af Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 5 Sep 2020 15:46:48 +0300 Subject: [PATCH 34/58] phy: ti: am654: Fix a leak in serdes_am654_probe() [ Upstream commit 850280156f6421a404f2351bee07a0e7bedfd4c6 ] If devm_phy_create() fails then we need to call of_clk_del_provider(node) to undo the call to of_clk_add_provider(). Fixes: 71e2f5c5c224 ("phy: ti: Add a new SERDES driver for TI's AM654x SoC") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/20200905124648.GA183976@mwanda Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/phy/ti/phy-am654-serdes.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/phy/ti/phy-am654-serdes.c b/drivers/phy/ti/phy-am654-serdes.c index 88a047b9fa6fa4..6ef12017ff4e8d 100644 --- a/drivers/phy/ti/phy-am654-serdes.c +++ b/drivers/phy/ti/phy-am654-serdes.c @@ -625,8 +625,10 @@ static int serdes_am654_probe(struct platform_device *pdev) pm_runtime_enable(dev); phy = devm_phy_create(dev, NULL, &ops); - if (IS_ERR(phy)) - return PTR_ERR(phy); + if (IS_ERR(phy)) { + ret = PTR_ERR(phy); + goto clk_err; + } phy_set_drvdata(phy, am654_phy); phy_provider = devm_of_phy_provider_register(dev, serdes_am654_xlate); From 6c5a11ead94206dfc424af84ce89e9c3bdb442ec Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Tue, 8 Sep 2020 09:17:10 +1200 Subject: [PATCH 35/58] pinctrl: mvebu: Fix i2c sda definition for 98DX3236 [ Upstream commit 63c3212e7a37d68c89a13bdaebce869f4e064e67 ] Per the datasheet the i2c functions use MPP_Sel=0x1. They are documented as using MPP_Sel=0x4 as well but mixing 0x1 and 0x4 is clearly wrong. On the board tested 0x4 resulted in a non-functioning i2c bus so stick with 0x1 which works. Fixes: d7ae8f8dee7f ("pinctrl: mvebu: pinctrl driver for 98DX3236 SoC") Signed-off-by: Chris Packham Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20200907211712.9697-2-chris.packham@alliedtelesis.co.nz Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/mvebu/pinctrl-armada-xp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-xp.c b/drivers/pinctrl/mvebu/pinctrl-armada-xp.c index a767a05fa3a0d0..48e2a6c56a83b9 100644 --- a/drivers/pinctrl/mvebu/pinctrl-armada-xp.c +++ b/drivers/pinctrl/mvebu/pinctrl-armada-xp.c @@ -414,7 +414,7 @@ static struct mvebu_mpp_mode mv98dx3236_mpp_modes[] = { MPP_VAR_FUNCTION(0x1, "i2c0", "sck", V_98DX3236_PLUS)), MPP_MODE(15, MPP_VAR_FUNCTION(0x0, "gpio", NULL, V_98DX3236_PLUS), - MPP_VAR_FUNCTION(0x4, "i2c0", "sda", V_98DX3236_PLUS)), + MPP_VAR_FUNCTION(0x1, "i2c0", "sda", V_98DX3236_PLUS)), MPP_MODE(16, MPP_VAR_FUNCTION(0x0, "gpo", NULL, V_98DX3236_PLUS), MPP_VAR_FUNCTION(0x4, "dev", "oe", V_98DX3236_PLUS)), From 2f37a1ef1e5db5197f1e3d4f9d5589e2f6c0cf55 Mon Sep 17 00:00:00 2001 From: Jeffrey Mitchell Date: Tue, 15 Sep 2020 16:42:52 -0500 Subject: [PATCH 36/58] nfs: Fix security label length not being reset [ Upstream commit d33030e2ee3508d65db5644551435310df86010e ] nfs_readdir_page_filler() iterates over entries in a directory, reusing the same security label buffer, but does not reset the buffer's length. This causes decode_attr_security_label() to return -ERANGE if an entry's security label is longer than the previous one's. This error, in nfs4_decode_dirent(), only gets passed up as -EAGAIN, which causes another failed attempt to copy into the buffer. The second error is ignored and the remaining entries do not show up in ls, specifically the getdents64() syscall. Reproduce by creating multiple files in NFS and giving one of the later files a longer security label. ls will not see that file nor any that are added afterwards, though they will exist on the backend. In nfs_readdir_page_filler(), reset security label buffer length before every reuse Signed-off-by: Jeffrey Mitchell Fixes: b4487b935452 ("nfs: Fix getxattr kernel panic and memory overflow") Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- fs/nfs/dir.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 05ed7be8a6345e..188b17a3b19eb6 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -553,6 +553,9 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); do { + if (entry->label) + entry->label->len = NFS4_MAXLABELLEN; + status = xdr_decode(desc, entry, &stream); if (status != 0) { if (status == -EAGAIN) From 0ded28e3c46810999505877b6dba572b4438ab79 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Wed, 3 Jun 2020 13:13:07 +0200 Subject: [PATCH 37/58] clk: tegra: Always program PLL_E when enabled [ Upstream commit 5105660ee80862b85f7769626d0f936c18ce1885 ] Commit bff1cef5f23a ("clk: tegra: Don't enable already enabled PLLs") added checks to avoid enabling PLLs that have already been enabled by the bootloader. However, the PLL_E configuration inherited from the bootloader isn't necessarily the one that is needed for the kernel. This can cause SATA to fail like this: [ 5.310270] phy phy-sata.6: phy poweron failed --> -110 [ 5.315604] tegra-ahci 70027000.sata: failed to power on AHCI controller: -110 [ 5.323022] tegra-ahci: probe of 70027000.sata failed with error -110 Fix this by always programming the PLL_E. This ensures that any mis- configuration by the bootloader will be overwritten by the kernel. Fixes: bff1cef5f23a ("clk: tegra: Don't enable already enabled PLLs") Reported-by: LABBE Corentin Tested-by: Corentin Labbe Reviewed-by: Dmitry Osipenko Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin --- drivers/clk/tegra/clk-pll.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/clk/tegra/clk-pll.c b/drivers/clk/tegra/clk-pll.c index 1583f5fc992f33..80f640d9ea71c2 100644 --- a/drivers/clk/tegra/clk-pll.c +++ b/drivers/clk/tegra/clk-pll.c @@ -1569,9 +1569,6 @@ static int clk_plle_tegra114_enable(struct clk_hw *hw) unsigned long flags = 0; unsigned long input_rate; - if (clk_pll_is_enabled(hw)) - return 0; - input_rate = clk_hw_get_rate(clk_hw_get_parent(hw)); if (_get_table_rate(hw, &sel, pll->params->fixed_rate, input_rate)) From 08e66c0c1c0e3e2ca65b8fb1e609105b0b991302 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Tue, 22 Sep 2020 14:40:46 +0200 Subject: [PATCH 38/58] clk: samsung: exynos4: mark 'chipid' clock as CLK_IGNORE_UNUSED [ Upstream commit f3bb0f796f5ffe32f0fbdce5b1b12eb85511158f ] The ChipID IO region has it's own clock, which is being disabled while scanning for unused clocks. It turned out that some CPU hotplug, CPU idle or even SOC firmware code depends on the reads from that area. Fix the mysterious hang caused by entering deep CPU idle state by ignoring the 'chipid' clock during unused clocks scan, as there are no direct clients for it which will keep it enabled. Fixes: e062b571777f ("clk: exynos4: register clocks using common clock framework") Signed-off-by: Marek Szyprowski Link: https://lore.kernel.org/r/20200922124046.10496-1-m.szyprowski@samsung.com Reviewed-by: Krzysztof Kozlowski Acked-by: Sylwester Nawrocki Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/samsung/clk-exynos4.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/samsung/clk-exynos4.c b/drivers/clk/samsung/clk-exynos4.c index 51564fc23c6398..f4086287bb71bb 100644 --- a/drivers/clk/samsung/clk-exynos4.c +++ b/drivers/clk/samsung/clk-exynos4.c @@ -927,7 +927,7 @@ static const struct samsung_gate_clock exynos4210_gate_clks[] __initconst = { GATE(CLK_PCIE, "pcie", "aclk133", GATE_IP_FSYS, 14, 0, 0), GATE(CLK_SMMU_PCIE, "smmu_pcie", "aclk133", GATE_IP_FSYS, 18, 0, 0), GATE(CLK_MODEMIF, "modemif", "aclk100", GATE_IP_PERIL, 28, 0, 0), - GATE(CLK_CHIPID, "chipid", "aclk100", E4210_GATE_IP_PERIR, 0, 0, 0), + GATE(CLK_CHIPID, "chipid", "aclk100", E4210_GATE_IP_PERIR, 0, CLK_IGNORE_UNUSED, 0), GATE(CLK_SYSREG, "sysreg", "aclk100", E4210_GATE_IP_PERIR, 0, CLK_IGNORE_UNUSED, 0), GATE(CLK_HDMI_CEC, "hdmi_cec", "aclk100", E4210_GATE_IP_PERIR, 11, 0, @@ -969,7 +969,7 @@ static const struct samsung_gate_clock exynos4x12_gate_clks[] __initconst = { 0), GATE(CLK_TSADC, "tsadc", "aclk133", E4X12_GATE_BUS_FSYS1, 16, 0, 0), GATE(CLK_MIPI_HSI, "mipi_hsi", "aclk133", GATE_IP_FSYS, 10, 0, 0), - GATE(CLK_CHIPID, "chipid", "aclk100", E4X12_GATE_IP_PERIR, 0, 0, 0), + GATE(CLK_CHIPID, "chipid", "aclk100", E4X12_GATE_IP_PERIR, 0, CLK_IGNORE_UNUSED, 0), GATE(CLK_SYSREG, "sysreg", "aclk100", E4X12_GATE_IP_PERIR, 1, CLK_IGNORE_UNUSED, 0), GATE(CLK_HDMI_CEC, "hdmi_cec", "aclk100", E4X12_GATE_IP_PERIR, 11, 0, From eddeff708c158c3f85ca497129473dd37654358e Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Fri, 18 Sep 2020 09:13:35 +0800 Subject: [PATCH 39/58] iommu/exynos: add missing put_device() call in exynos_iommu_of_xlate() [ Upstream commit 1a26044954a6d1f4d375d5e62392446af663be7a ] if of_find_device_by_node() succeed, exynos_iommu_of_xlate() doesn't have a corresponding put_device(). Thus add put_device() to fix the exception handling for this function implementation. Fixes: aa759fd376fb ("iommu/exynos: Add callback for initializing devices from device tree") Signed-off-by: Yu Kuai Acked-by: Marek Szyprowski Link: https://lore.kernel.org/r/20200918011335.909141-1-yukuai3@huawei.com Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/exynos-iommu.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 9c94e16fb1277f..55ed857f804f7a 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -1299,13 +1299,17 @@ static int exynos_iommu_of_xlate(struct device *dev, return -ENODEV; data = platform_get_drvdata(sysmmu); - if (!data) + if (!data) { + put_device(&sysmmu->dev); return -ENODEV; + } if (!owner) { owner = kzalloc(sizeof(*owner), GFP_KERNEL); - if (!owner) + if (!owner) { + put_device(&sysmmu->dev); return -ENOMEM; + } INIT_LIST_HEAD(&owner->controllers); mutex_init(&owner->rpm_lock); From 8323d1e09037fb712542063fa3223dde5a71b345 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Fri, 11 Sep 2020 09:51:04 +0800 Subject: [PATCH 40/58] gpio/aspeed-sgpio: enable access to all 80 input & output sgpios [ Upstream commit ac67b07e268d46eba675a60c37051bb3e59fd201 ] Currently, the aspeed-sgpio driver exposes up to 80 GPIO lines, corresponding to the 80 status bits available in hardware. Each of these lines can be configured as either an input or an output. However, each of these GPIOs is actually an input *and* an output; we actually have 80 inputs plus 80 outputs. This change expands the maximum number of GPIOs to 160; the lower half of this range are the input-only GPIOs, the upper half are the outputs. We fix the GPIO directions to correspond to this mapping. This also fixes a bug when setting GPIOs - we were reading from the input register, making it impossible to set more than one output GPIO. Signed-off-by: Jeremy Kerr Fixes: 7db47faae79b ("gpio: aspeed: Add SGPIO driver") Reviewed-by: Joel Stanley Reviewed-by: Andrew Jeffery Acked-by: Rob Herring Signed-off-by: Bartosz Golaszewski Signed-off-by: Sasha Levin --- .../devicetree/bindings/gpio/sgpio-aspeed.txt | 5 +- drivers/gpio/sgpio-aspeed.c | 126 ++++++++++++------ 2 files changed, 87 insertions(+), 44 deletions(-) diff --git a/Documentation/devicetree/bindings/gpio/sgpio-aspeed.txt b/Documentation/devicetree/bindings/gpio/sgpio-aspeed.txt index d4d83916c09dd9..be329ea4794f88 100644 --- a/Documentation/devicetree/bindings/gpio/sgpio-aspeed.txt +++ b/Documentation/devicetree/bindings/gpio/sgpio-aspeed.txt @@ -20,8 +20,9 @@ Required properties: - gpio-controller : Marks the device node as a GPIO controller - interrupts : Interrupt specifier, see interrupt-controller/interrupts.txt - interrupt-controller : Mark the GPIO controller as an interrupt-controller -- ngpios : number of GPIO lines, see gpio.txt - (should be multiple of 8, up to 80 pins) +- ngpios : number of *hardware* GPIO lines, see gpio.txt. This will expose + 2 software GPIOs per hardware GPIO: one for hardware input, one for hardware + output. Up to 80 pins, must be a multiple of 8. - clocks : A phandle to the APB clock for SGPM clock division - bus-frequency : SGPM CLK frequency diff --git a/drivers/gpio/sgpio-aspeed.c b/drivers/gpio/sgpio-aspeed.c index 8319812593e312..7cd86d5e8dc90e 100644 --- a/drivers/gpio/sgpio-aspeed.c +++ b/drivers/gpio/sgpio-aspeed.c @@ -17,7 +17,17 @@ #include #include -#define MAX_NR_SGPIO 80 +/* + * MAX_NR_HW_GPIO represents the number of actual hardware-supported GPIOs (ie, + * slots within the clocked serial GPIO data). Since each HW GPIO is both an + * input and an output, we provide MAX_NR_HW_GPIO * 2 lines on our gpiochip + * device. + * + * We use SGPIO_OUTPUT_OFFSET to define the split between the inputs and + * outputs; the inputs start at line 0, the outputs start at OUTPUT_OFFSET. + */ +#define MAX_NR_HW_SGPIO 80 +#define SGPIO_OUTPUT_OFFSET MAX_NR_HW_SGPIO #define ASPEED_SGPIO_CTRL 0x54 @@ -30,8 +40,8 @@ struct aspeed_sgpio { struct clk *pclk; spinlock_t lock; void __iomem *base; - uint32_t dir_in[3]; int irq; + int n_sgpio; }; struct aspeed_sgpio_bank { @@ -111,31 +121,69 @@ static void __iomem *bank_reg(struct aspeed_sgpio *gpio, } } -#define GPIO_BANK(x) ((x) >> 5) -#define GPIO_OFFSET(x) ((x) & 0x1f) +#define GPIO_BANK(x) ((x % SGPIO_OUTPUT_OFFSET) >> 5) +#define GPIO_OFFSET(x) ((x % SGPIO_OUTPUT_OFFSET) & 0x1f) #define GPIO_BIT(x) BIT(GPIO_OFFSET(x)) static const struct aspeed_sgpio_bank *to_bank(unsigned int offset) { - unsigned int bank = GPIO_BANK(offset); + unsigned int bank; + + bank = GPIO_BANK(offset); WARN_ON(bank >= ARRAY_SIZE(aspeed_sgpio_banks)); return &aspeed_sgpio_banks[bank]; } +static int aspeed_sgpio_init_valid_mask(struct gpio_chip *gc, + unsigned long *valid_mask, unsigned int ngpios) +{ + struct aspeed_sgpio *sgpio = gpiochip_get_data(gc); + int n = sgpio->n_sgpio; + int c = SGPIO_OUTPUT_OFFSET - n; + + WARN_ON(ngpios < MAX_NR_HW_SGPIO * 2); + + /* input GPIOs in the lower range */ + bitmap_set(valid_mask, 0, n); + bitmap_clear(valid_mask, n, c); + + /* output GPIOS above SGPIO_OUTPUT_OFFSET */ + bitmap_set(valid_mask, SGPIO_OUTPUT_OFFSET, n); + bitmap_clear(valid_mask, SGPIO_OUTPUT_OFFSET + n, c); + + return 0; +} + +static void aspeed_sgpio_irq_init_valid_mask(struct gpio_chip *gc, + unsigned long *valid_mask, unsigned int ngpios) +{ + struct aspeed_sgpio *sgpio = gpiochip_get_data(gc); + int n = sgpio->n_sgpio; + + WARN_ON(ngpios < MAX_NR_HW_SGPIO * 2); + + /* input GPIOs in the lower range */ + bitmap_set(valid_mask, 0, n); + bitmap_clear(valid_mask, n, ngpios - n); +} + +static bool aspeed_sgpio_is_input(unsigned int offset) +{ + return offset < SGPIO_OUTPUT_OFFSET; +} + static int aspeed_sgpio_get(struct gpio_chip *gc, unsigned int offset) { struct aspeed_sgpio *gpio = gpiochip_get_data(gc); const struct aspeed_sgpio_bank *bank = to_bank(offset); unsigned long flags; enum aspeed_sgpio_reg reg; - bool is_input; int rc = 0; spin_lock_irqsave(&gpio->lock, flags); - is_input = gpio->dir_in[GPIO_BANK(offset)] & GPIO_BIT(offset); - reg = is_input ? reg_val : reg_rdata; + reg = aspeed_sgpio_is_input(offset) ? reg_val : reg_rdata; rc = !!(ioread32(bank_reg(gpio, bank, reg)) & GPIO_BIT(offset)); spin_unlock_irqrestore(&gpio->lock, flags); @@ -143,22 +191,31 @@ static int aspeed_sgpio_get(struct gpio_chip *gc, unsigned int offset) return rc; } -static void sgpio_set_value(struct gpio_chip *gc, unsigned int offset, int val) +static int sgpio_set_value(struct gpio_chip *gc, unsigned int offset, int val) { struct aspeed_sgpio *gpio = gpiochip_get_data(gc); const struct aspeed_sgpio_bank *bank = to_bank(offset); - void __iomem *addr; + void __iomem *addr_r, *addr_w; u32 reg = 0; - addr = bank_reg(gpio, bank, reg_val); - reg = ioread32(addr); + if (aspeed_sgpio_is_input(offset)) + return -EINVAL; + + /* Since this is an output, read the cached value from rdata, then + * update val. */ + addr_r = bank_reg(gpio, bank, reg_rdata); + addr_w = bank_reg(gpio, bank, reg_val); + + reg = ioread32(addr_r); if (val) reg |= GPIO_BIT(offset); else reg &= ~GPIO_BIT(offset); - iowrite32(reg, addr); + iowrite32(reg, addr_w); + + return 0; } static void aspeed_sgpio_set(struct gpio_chip *gc, unsigned int offset, int val) @@ -175,43 +232,28 @@ static void aspeed_sgpio_set(struct gpio_chip *gc, unsigned int offset, int val) static int aspeed_sgpio_dir_in(struct gpio_chip *gc, unsigned int offset) { - struct aspeed_sgpio *gpio = gpiochip_get_data(gc); - unsigned long flags; - - spin_lock_irqsave(&gpio->lock, flags); - gpio->dir_in[GPIO_BANK(offset)] |= GPIO_BIT(offset); - spin_unlock_irqrestore(&gpio->lock, flags); - - return 0; + return aspeed_sgpio_is_input(offset) ? 0 : -EINVAL; } static int aspeed_sgpio_dir_out(struct gpio_chip *gc, unsigned int offset, int val) { struct aspeed_sgpio *gpio = gpiochip_get_data(gc); unsigned long flags; + int rc; - spin_lock_irqsave(&gpio->lock, flags); - - gpio->dir_in[GPIO_BANK(offset)] &= ~GPIO_BIT(offset); - sgpio_set_value(gc, offset, val); + /* No special action is required for setting the direction; we'll + * error-out in sgpio_set_value if this isn't an output GPIO */ + spin_lock_irqsave(&gpio->lock, flags); + rc = sgpio_set_value(gc, offset, val); spin_unlock_irqrestore(&gpio->lock, flags); - return 0; + return rc; } static int aspeed_sgpio_get_direction(struct gpio_chip *gc, unsigned int offset) { - int dir_status; - struct aspeed_sgpio *gpio = gpiochip_get_data(gc); - unsigned long flags; - - spin_lock_irqsave(&gpio->lock, flags); - dir_status = gpio->dir_in[GPIO_BANK(offset)] & GPIO_BIT(offset); - spin_unlock_irqrestore(&gpio->lock, flags); - - return dir_status; - + return !!aspeed_sgpio_is_input(offset); } static void irqd_to_aspeed_sgpio_data(struct irq_data *d, @@ -402,6 +444,7 @@ static int aspeed_sgpio_setup_irqs(struct aspeed_sgpio *gpio, irq = &gpio->chip.irq; irq->chip = &aspeed_sgpio_irqchip; + irq->init_valid_mask = aspeed_sgpio_irq_init_valid_mask; irq->handler = handle_bad_irq; irq->default_type = IRQ_TYPE_NONE; irq->parent_handler = aspeed_sgpio_irq_handler; @@ -452,11 +495,12 @@ static int __init aspeed_sgpio_probe(struct platform_device *pdev) if (rc < 0) { dev_err(&pdev->dev, "Could not read ngpios property\n"); return -EINVAL; - } else if (nr_gpios > MAX_NR_SGPIO) { + } else if (nr_gpios > MAX_NR_HW_SGPIO) { dev_err(&pdev->dev, "Number of GPIOs exceeds the maximum of %d: %d\n", - MAX_NR_SGPIO, nr_gpios); + MAX_NR_HW_SGPIO, nr_gpios); return -EINVAL; } + gpio->n_sgpio = nr_gpios; rc = of_property_read_u32(pdev->dev.of_node, "bus-frequency", &sgpio_freq); if (rc < 0) { @@ -497,7 +541,8 @@ static int __init aspeed_sgpio_probe(struct platform_device *pdev) spin_lock_init(&gpio->lock); gpio->chip.parent = &pdev->dev; - gpio->chip.ngpio = nr_gpios; + gpio->chip.ngpio = MAX_NR_HW_SGPIO * 2; + gpio->chip.init_valid_mask = aspeed_sgpio_init_valid_mask; gpio->chip.direction_input = aspeed_sgpio_dir_in; gpio->chip.direction_output = aspeed_sgpio_dir_out; gpio->chip.get_direction = aspeed_sgpio_get_direction; @@ -509,9 +554,6 @@ static int __init aspeed_sgpio_probe(struct platform_device *pdev) gpio->chip.label = dev_name(&pdev->dev); gpio->chip.base = -1; - /* set all SGPIO pins as input (1). */ - memset(gpio->dir_in, 0xff, sizeof(gpio->dir_in)); - aspeed_sgpio_setup_irqs(gpio, pdev); rc = devm_gpiochip_add_data(&pdev->dev, &gpio->chip, gpio); From f2a2380812c6e295f6fe3169dd53d1dfdf20f659 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Fri, 11 Sep 2020 09:51:05 +0800 Subject: [PATCH 41/58] gpio/aspeed-sgpio: don't enable all interrupts by default [ Upstream commit bf0d394e885015941ed2d5724c0a6ed8d42dd95e ] Currently, the IRQ setup for the SGPIO driver enables all interrupts in dual-edge trigger mode. Since the default handler is handle_bad_irq, any state change on input GPIOs will trigger bad IRQ warnings. This change applies sensible IRQ defaults: single-edge trigger, and all IRQs disabled. Signed-off-by: Jeremy Kerr Fixes: 7db47faae79b ("gpio: aspeed: Add SGPIO driver") Reviewed-by: Joel Stanley Acked-by: Andrew Jeffery Signed-off-by: Bartosz Golaszewski Signed-off-by: Sasha Levin --- drivers/gpio/sgpio-aspeed.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/gpio/sgpio-aspeed.c b/drivers/gpio/sgpio-aspeed.c index 7cd86d5e8dc90e..3a5dfb8ded1fb3 100644 --- a/drivers/gpio/sgpio-aspeed.c +++ b/drivers/gpio/sgpio-aspeed.c @@ -452,17 +452,15 @@ static int aspeed_sgpio_setup_irqs(struct aspeed_sgpio *gpio, irq->parents = &gpio->irq; irq->num_parents = 1; - /* set IRQ settings and Enable Interrupt */ + /* Apply default IRQ settings */ for (i = 0; i < ARRAY_SIZE(aspeed_sgpio_banks); i++) { bank = &aspeed_sgpio_banks[i]; /* set falling or level-low irq */ iowrite32(0x00000000, bank_reg(gpio, bank, reg_irq_type0)); /* trigger type is edge */ iowrite32(0x00000000, bank_reg(gpio, bank, reg_irq_type1)); - /* dual edge trigger mode. */ - iowrite32(0xffffffff, bank_reg(gpio, bank, reg_irq_type2)); - /* enable irq */ - iowrite32(0xffffffff, bank_reg(gpio, bank, reg_irq_enable)); + /* single edge trigger */ + iowrite32(0x00000000, bank_reg(gpio, bank, reg_irq_type2)); } return 0; From 811ac052e26425ee9f98d476642a3d1d1be8dd1d Mon Sep 17 00:00:00 2001 From: Tao Ren Date: Wed, 16 Sep 2020 13:42:16 -0700 Subject: [PATCH 42/58] gpio: aspeed: fix ast2600 bank properties [ Upstream commit 3e640b1eec38e4c8eba160f26cba4f592e657f3d ] GPIO_U is mapped to the least significant byte of input/output mask, and the byte in "output" mask should be 0 because GPIO_U is input only. All the other bits need to be 1 because GPIO_V/W/X support both input and output modes. Similarly, GPIO_Y/Z are mapped to the 2 least significant bytes, and the according bits need to be 1 because GPIO_Y/Z support both input and output modes. Fixes: ab4a85534c3e ("gpio: aspeed: Add in ast2600 details to Aspeed driver") Signed-off-by: Tao Ren Reviewed-by: Andrew Jeffery Signed-off-by: Bartosz Golaszewski Signed-off-by: Sasha Levin --- drivers/gpio/gpio-aspeed.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-aspeed.c b/drivers/gpio/gpio-aspeed.c index 09e53c5f3b0a46..2820c59b5f0719 100644 --- a/drivers/gpio/gpio-aspeed.c +++ b/drivers/gpio/gpio-aspeed.c @@ -1115,8 +1115,8 @@ static const struct aspeed_gpio_config ast2500_config = static const struct aspeed_bank_props ast2600_bank_props[] = { /* input output */ - {5, 0xffffffff, 0x0000ffff}, /* U/V/W/X */ - {6, 0xffff0000, 0x0fff0000}, /* Y/Z */ + {5, 0xffffffff, 0xffffff00}, /* U/V/W/X */ + {6, 0x0000ffff, 0x0000ffff}, /* Y/Z */ { }, }; From 21b9387253a72b81d74056028dd34c2d96efb715 Mon Sep 17 00:00:00 2001 From: Nicolas VINCENT Date: Wed, 23 Sep 2020 16:08:40 +0200 Subject: [PATCH 43/58] i2c: cpm: Fix i2c_ram structure [ Upstream commit a2bd970aa62f2f7f80fd0d212b1d4ccea5df4aed ] the i2c_ram structure is missing the sdmatmp field mentionned in datasheet for MPC8272 at paragraph 36.5. With this field missing, the hardware would write past the allocated memory done through cpm_muram_alloc for the i2c_ram structure and land in memory allocated for the buffers descriptors corrupting the cbd_bufaddr field. Since this field is only set during setup(), the first i2c transaction would work and the following would send data read from an arbitrary memory location. Fixes: 61045dbe9d8d ("i2c: Add support for I2C bus on Freescale CPM1/CPM2 controllers") Signed-off-by: Nicolas VINCENT Acked-by: Jochen Friedrich Acked-by: Christophe Leroy Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-cpm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/i2c/busses/i2c-cpm.c b/drivers/i2c/busses/i2c-cpm.c index 1213e1932ccb5e..24d584a1c9a78f 100644 --- a/drivers/i2c/busses/i2c-cpm.c +++ b/drivers/i2c/busses/i2c-cpm.c @@ -65,6 +65,9 @@ struct i2c_ram { char res1[4]; /* Reserved */ ushort rpbase; /* Relocation pointer */ char res2[2]; /* Reserved */ + /* The following elements are only for CPM2 */ + char res3[4]; /* Reserved */ + uint sdmatmp; /* Internal */ }; #define I2COM_START 0x80 From ea4c691b58d76e0624ce9efe61b21da96e06706f Mon Sep 17 00:00:00 2001 From: Vincent Huang Date: Mon, 28 Sep 2020 16:19:05 -0700 Subject: [PATCH 44/58] Input: trackpoint - enable Synaptics trackpoints [ Upstream commit 996d585b079ad494a30cac10e08585bcd5345125 ] Add Synaptics IDs in trackpoint_start_protocol() to mark them as valid. Signed-off-by: Vincent Huang Fixes: 6c77545af100 ("Input: trackpoint - add new trackpoint variant IDs") Reviewed-by: Harry Cutts Tested-by: Harry Cutts Link: https://lore.kernel.org/r/20200924053013.1056953-1-vincent.huang@tw.synaptics.com Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/mouse/trackpoint.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/input/mouse/trackpoint.c b/drivers/input/mouse/trackpoint.c index 854d5e75872410..ef2fa0905208d1 100644 --- a/drivers/input/mouse/trackpoint.c +++ b/drivers/input/mouse/trackpoint.c @@ -282,6 +282,8 @@ static int trackpoint_start_protocol(struct psmouse *psmouse, case TP_VARIANT_ALPS: case TP_VARIANT_ELAN: case TP_VARIANT_NXP: + case TP_VARIANT_JYT_SYNAPTICS: + case TP_VARIANT_SYNAPTICS: if (variant_id) *variant_id = param[0]; if (firmware_id) From 4faf2c3a97ecd87a8f0603ec7d7be880991ae4ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sat, 19 Sep 2020 16:39:22 +0200 Subject: [PATCH 45/58] scripts/dtc: only append to HOST_EXTRACFLAGS instead of overwriting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit efe84d408bf41975db8506d3a1cc02e794e2309c ] When building with $ HOST_EXTRACFLAGS=-g make the expectation is that host tools are built with debug informations. This however doesn't happen if the Makefile assigns a new value to the HOST_EXTRACFLAGS instead of appending to it. So use += instead of := for the first assignment. Fixes: e3fd9b5384f3 ("scripts/dtc: consolidate include path options in Makefile") Signed-off-by: Uwe Kleine-König Signed-off-by: Rob Herring Signed-off-by: Sasha Levin --- scripts/dtc/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/dtc/Makefile b/scripts/dtc/Makefile index b5a5b1c548c9b2..c2dac994896b46 100644 --- a/scripts/dtc/Makefile +++ b/scripts/dtc/Makefile @@ -9,7 +9,7 @@ dtc-objs := dtc.o flattree.o fstree.o data.o livetree.o treesource.o \ dtc-objs += dtc-lexer.lex.o dtc-parser.tab.o # Source files need to get at the userspace version of libfdt_env.h to compile -HOST_EXTRACFLAGS := -I $(srctree)/$(src)/libfdt +HOST_EXTRACFLAGS += -I $(srctree)/$(src)/libfdt ifeq ($(shell pkg-config --exists yaml-0.1 2>/dev/null && echo yes),) ifneq ($(CHECK_DTBS),) From 1d13c3a5000b50de04e1611f7f8f04d3fd350d82 Mon Sep 17 00:00:00 2001 From: Thibaut Sautereau Date: Fri, 2 Oct 2020 17:16:11 +0200 Subject: [PATCH 46/58] random32: Restore __latent_entropy attribute on net_rand_state [ Upstream commit 09a6b0bc3be793ca8cba580b7992d73e9f68f15d ] Commit f227e3ec3b5c ("random32: update the net random state on interrupt and activity") broke compilation and was temporarily fixed by Linus in 83bdc7275e62 ("random32: remove net_rand_state from the latent entropy gcc plugin") by entirely moving net_rand_state out of the things handled by the latent_entropy GCC plugin. From what I understand when reading the plugin code, using the __latent_entropy attribute on a declaration was the wrong part and simply keeping the __latent_entropy attribute on the variable definition was the correct fix. Fixes: 83bdc7275e62 ("random32: remove net_rand_state from the latent entropy gcc plugin") Acked-by: Willy Tarreau Cc: Emese Revfy Signed-off-by: Thibaut Sautereau Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- lib/random32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/random32.c b/lib/random32.c index 3d749abb9e80d5..1786f78bf4c533 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -48,7 +48,7 @@ static inline void prandom_state_selftest(void) } #endif -DEFINE_PER_CPU(struct rnd_state, net_rand_state); +DEFINE_PER_CPU(struct rnd_state, net_rand_state) __latent_entropy; /** * prandom_u32_state - seeded pseudo-random number generator. From 2334b2d5a2bd1a4e9877623ef532b114d83bdf94 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Wed, 25 Mar 2020 16:07:04 +0300 Subject: [PATCH 47/58] block/diskstats: more accurate approximation of io_ticks for slow disks commit 2b8bd423614c595540eaadcfbc702afe8e155e50 upstream. Currently io_ticks is approximated by adding one at each start and end of requests if jiffies counter has changed. This works perfectly for requests shorter than a jiffy or if one of requests starts/ends at each jiffy. If disk executes just one request at a time and they are longer than two jiffies then only first and last jiffies will be accounted. Fix is simple: at the end of request add up into io_ticks jiffies passed since last update rather than just one jiffy. Example: common HDD executes random read 4k requests around 12ms. fio --name=test --filename=/dev/sdb --rw=randread --direct=1 --runtime=30 & iostat -x 10 sdb Note changes of iostat's "%util" 8,43% -> 99,99% before/after patch: Before: Device: rrqm/s wrqm/s r/s w/s rkB/s wkB/s avgrq-sz avgqu-sz await r_await w_await svctm %util sdb 0,00 0,00 82,60 0,00 330,40 0,00 8,00 0,96 12,09 12,09 0,00 1,02 8,43 After: Device: rrqm/s wrqm/s r/s w/s rkB/s wkB/s avgrq-sz avgqu-sz await r_await w_await svctm %util sdb 0,00 0,00 82,50 0,00 330,00 0,00 8,00 1,00 12,10 12,10 0,00 12,12 99,99 Now io_ticks does not loose time between start and end of requests, but for queue-depth > 1 some I/O time between adjacent starts might be lost. For load estimation "%util" is not as useful as average queue length, but it clearly shows how often disk queue is completely empty. Fixes: 5b18b5a73760 ("block: delete part_round_stats and switch to less precise counting") Signed-off-by: Konstantin Khlebnikov Reviewed-by: Ming Lei Signed-off-by: Jens Axboe From: "Banerjee, Debabrata" Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/iostats.rst | 5 ++++- block/bio.c | 8 ++++---- block/blk-core.c | 4 ++-- include/linux/genhd.h | 2 +- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/Documentation/admin-guide/iostats.rst b/Documentation/admin-guide/iostats.rst index 5d63b18bd6d1f6..60c45c916f7d05 100644 --- a/Documentation/admin-guide/iostats.rst +++ b/Documentation/admin-guide/iostats.rst @@ -99,7 +99,7 @@ Field 10 -- # of milliseconds spent doing I/Os Since 5.0 this field counts jiffies when at least one request was started or completed. If request runs more than 2 jiffies then some - I/O time will not be accounted unless there are other requests. + I/O time might be not accounted in case of concurrent requests. Field 11 -- weighted # of milliseconds spent doing I/Os This field is incremented at each I/O start, I/O completion, I/O @@ -133,6 +133,9 @@ are summed (possibly overflowing the unsigned long variable they are summed to) and the result given to the user. There is no convenient user interface for accessing the per-CPU counters themselves. +Since 4.19 request times are measured with nanoseconds precision and +truncated to milliseconds before showing in this interface. + Disks vs Partitions ------------------- diff --git a/block/bio.c b/block/bio.c index f07739300dfe39..24704bc2ad6f1c 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1754,14 +1754,14 @@ void bio_check_pages_dirty(struct bio *bio) schedule_work(&bio_dirty_work); } -void update_io_ticks(struct hd_struct *part, unsigned long now) +void update_io_ticks(struct hd_struct *part, unsigned long now, bool end) { unsigned long stamp; again: stamp = READ_ONCE(part->stamp); if (unlikely(stamp != now)) { if (likely(cmpxchg(&part->stamp, stamp, now) == stamp)) { - __part_stat_add(part, io_ticks, 1); + __part_stat_add(part, io_ticks, end ? now - stamp : 1); } } if (part->partno) { @@ -1777,7 +1777,7 @@ void generic_start_io_acct(struct request_queue *q, int op, part_stat_lock(); - update_io_ticks(part, jiffies); + update_io_ticks(part, jiffies, false); part_stat_inc(part, ios[sgrp]); part_stat_add(part, sectors[sgrp], sectors); part_inc_in_flight(q, part, op_is_write(op)); @@ -1795,7 +1795,7 @@ void generic_end_io_acct(struct request_queue *q, int req_op, part_stat_lock(); - update_io_ticks(part, now); + update_io_ticks(part, now, true); part_stat_add(part, nsecs[sgrp], jiffies_to_nsecs(duration)); part_stat_add(part, time_in_queue, duration); part_dec_in_flight(q, part, op_is_write(req_op)); diff --git a/block/blk-core.c b/block/blk-core.c index ca6b6773568643..81aafb601df06d 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1334,7 +1334,7 @@ void blk_account_io_done(struct request *req, u64 now) part_stat_lock(); part = req->part; - update_io_ticks(part, jiffies); + update_io_ticks(part, jiffies, true); part_stat_inc(part, ios[sgrp]); part_stat_add(part, nsecs[sgrp], now - req->start_time_ns); part_stat_add(part, time_in_queue, nsecs_to_jiffies64(now - req->start_time_ns)); @@ -1376,7 +1376,7 @@ void blk_account_io_start(struct request *rq, bool new_io) rq->part = part; } - update_io_ticks(part, jiffies); + update_io_ticks(part, jiffies, false); part_stat_unlock(); } diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 62a2ec9f17df80..c1bf9956256f6e 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -419,7 +419,7 @@ static inline void free_part_info(struct hd_struct *part) kfree(part->info); } -void update_io_ticks(struct hd_struct *part, unsigned long now); +void update_io_ticks(struct hd_struct *part, unsigned long now, bool end); /* block/genhd.c */ extern void device_add_disk(struct device *parent, struct gendisk *disk, From 42b7153dd6a64b61141771f4ae0b1b1131321720 Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Fri, 25 Sep 2020 21:19:28 -0700 Subject: [PATCH 48/58] mm: replace memmap_context by meminit_context commit c1d0da83358a2316d9be7f229f26126dbaa07468 upstream. Patch series "mm: fix memory to node bad links in sysfs", v3. Sometimes, firmware may expose interleaved memory layout like this: Early memory node ranges node 1: [mem 0x0000000000000000-0x000000011fffffff] node 2: [mem 0x0000000120000000-0x000000014fffffff] node 1: [mem 0x0000000150000000-0x00000001ffffffff] node 0: [mem 0x0000000200000000-0x000000048fffffff] node 2: [mem 0x0000000490000000-0x00000007ffffffff] In that case, we can see memory blocks assigned to multiple nodes in sysfs: $ ls -l /sys/devices/system/memory/memory21 total 0 lrwxrwxrwx 1 root root 0 Aug 24 05:27 node1 -> ../../node/node1 lrwxrwxrwx 1 root root 0 Aug 24 05:27 node2 -> ../../node/node2 -rw-r--r-- 1 root root 65536 Aug 24 05:27 online -r--r--r-- 1 root root 65536 Aug 24 05:27 phys_device -r--r--r-- 1 root root 65536 Aug 24 05:27 phys_index drwxr-xr-x 2 root root 0 Aug 24 05:27 power -r--r--r-- 1 root root 65536 Aug 24 05:27 removable -rw-r--r-- 1 root root 65536 Aug 24 05:27 state lrwxrwxrwx 1 root root 0 Aug 24 05:25 subsystem -> ../../../../bus/memory -rw-r--r-- 1 root root 65536 Aug 24 05:25 uevent -r--r--r-- 1 root root 65536 Aug 24 05:27 valid_zones The same applies in the node's directory with a memory21 link in both the node1 and node2's directory. This is wrong but doesn't prevent the system to run. However when later, one of these memory blocks is hot-unplugged and then hot-plugged, the system is detecting an inconsistency in the sysfs layout and a BUG_ON() is raised: kernel BUG at /Users/laurent/src/linux-ppc/mm/memory_hotplug.c:1084! LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries Modules linked in: rpadlpar_io rpaphp pseries_rng rng_core vmx_crypto gf128mul binfmt_misc ip_tables x_tables xfs libcrc32c crc32c_vpmsum autofs4 CPU: 8 PID: 10256 Comm: drmgr Not tainted 5.9.0-rc1+ #25 Call Trace: add_memory_resource+0x23c/0x340 (unreliable) __add_memory+0x5c/0xf0 dlpar_add_lmb+0x1b4/0x500 dlpar_memory+0x1f8/0xb80 handle_dlpar_errorlog+0xc0/0x190 dlpar_store+0x198/0x4a0 kobj_attr_store+0x30/0x50 sysfs_kf_write+0x64/0x90 kernfs_fop_write+0x1b0/0x290 vfs_write+0xe8/0x290 ksys_write+0xdc/0x130 system_call_exception+0x160/0x270 system_call_common+0xf0/0x27c This has been seen on PowerPC LPAR. The root cause of this issue is that when node's memory is registered, the range used can overlap another node's range, thus the memory block is registered to multiple nodes in sysfs. There are two issues here: (a) The sysfs memory and node's layouts are broken due to these multiple links (b) The link errors in link_mem_sections() should not lead to a system panic. To address (a) register_mem_sect_under_node should not rely on the system state to detect whether the link operation is triggered by a hot plug operation or not. This is addressed by the patches 1 and 2 of this series. Issue (b) will be addressed separately. This patch (of 2): The memmap_context enum is used to detect whether a memory operation is due to a hot-add operation or happening at boot time. Make it general to the hotplug operation and rename it as meminit_context. There is no functional change introduced by this patch Suggested-by: David Hildenbrand Signed-off-by: Laurent Dufour Signed-off-by: Andrew Morton Reviewed-by: David Hildenbrand Reviewed-by: Oscar Salvador Acked-by: Michal Hocko Cc: Greg Kroah-Hartman Cc: "Rafael J . Wysocki" Cc: Nathan Lynch Cc: Scott Cheloha Cc: Tony Luck Cc: Fenghua Yu Cc: Link: https://lkml.kernel.org/r/20200915094143.79181-1-ldufour@linux.ibm.com Link: https://lkml.kernel.org/r/20200915132624.9723-1-ldufour@linux.ibm.com Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/ia64/mm/init.c | 6 +++--- include/linux/mm.h | 2 +- include/linux/mmzone.h | 11 ++++++++--- mm/memory_hotplug.c | 2 +- mm/page_alloc.c | 10 +++++----- 5 files changed, 18 insertions(+), 13 deletions(-) diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index a6dd80a2c93927..ee50506d86f426 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -518,7 +518,7 @@ virtual_memmap_init(u64 start, u64 end, void *arg) if (map_start < map_end) memmap_init_zone((unsigned long)(map_end - map_start), args->nid, args->zone, page_to_pfn(map_start), - MEMMAP_EARLY, NULL); + MEMINIT_EARLY, NULL); return 0; } @@ -527,8 +527,8 @@ memmap_init (unsigned long size, int nid, unsigned long zone, unsigned long start_pfn) { if (!vmem_map) { - memmap_init_zone(size, nid, zone, start_pfn, MEMMAP_EARLY, - NULL); + memmap_init_zone(size, nid, zone, start_pfn, + MEMINIT_EARLY, NULL); } else { struct page *start; struct memmap_init_callback_data args; diff --git a/include/linux/mm.h b/include/linux/mm.h index 3285dae06c030e..34119f393a8023 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2208,7 +2208,7 @@ static inline void zero_resv_unavail(void) {} extern void set_dma_reserve(unsigned long new_dma_reserve); extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long, - enum memmap_context, struct vmem_altmap *); + enum meminit_context, struct vmem_altmap *); extern void setup_per_zone_wmarks(void); extern int __meminit init_per_zone_wmark_min(void); extern void mem_init(void); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 85804ba6221529..a90aba3d6afb4b 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -822,10 +822,15 @@ bool zone_watermark_ok(struct zone *z, unsigned int order, unsigned int alloc_flags); bool zone_watermark_ok_safe(struct zone *z, unsigned int order, unsigned long mark, int classzone_idx); -enum memmap_context { - MEMMAP_EARLY, - MEMMAP_HOTPLUG, +/* + * Memory initialization context, use to differentiate memory added by + * the platform statically or via memory hotplug interface. + */ +enum meminit_context { + MEMINIT_EARLY, + MEMINIT_HOTPLUG, }; + extern void init_currently_empty_zone(struct zone *zone, unsigned long start_pfn, unsigned long size); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 3eb0b311b4a120..6a4b3a01e1b63b 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -725,7 +725,7 @@ void __ref move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, * are reserved so nobody should be touching them so we should be safe */ memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn, - MEMMAP_HOTPLUG, altmap); + MEMINIT_HOTPLUG, altmap); set_zone_contiguous(zone); } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 67a9943aa595f6..373ca578075896 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5875,7 +5875,7 @@ overlap_memmap_init(unsigned long zone, unsigned long *pfn) * done. Non-atomic initialization, single-pass. */ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, - unsigned long start_pfn, enum memmap_context context, + unsigned long start_pfn, enum meminit_context context, struct vmem_altmap *altmap) { unsigned long pfn, end_pfn = start_pfn + size; @@ -5907,7 +5907,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, * There can be holes in boot-time mem_map[]s handed to this * function. They do not exist on hotplugged memory. */ - if (context == MEMMAP_EARLY) { + if (context == MEMINIT_EARLY) { if (!early_pfn_valid(pfn)) continue; if (!early_pfn_in_nid(pfn, nid)) @@ -5920,7 +5920,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, page = pfn_to_page(pfn); __init_single_page(page, pfn, zone, nid); - if (context == MEMMAP_HOTPLUG) + if (context == MEMINIT_HOTPLUG) __SetPageReserved(page); /* @@ -6002,7 +6002,7 @@ void __ref memmap_init_zone_device(struct zone *zone, * check here not to call set_pageblock_migratetype() against * pfn out of zone. * - * Please note that MEMMAP_HOTPLUG path doesn't clear memmap + * Please note that MEMINIT_HOTPLUG path doesn't clear memmap * because this is done early in section_activate() */ if (!(pfn & (pageblock_nr_pages - 1))) { @@ -6028,7 +6028,7 @@ static void __meminit zone_init_free_lists(struct zone *zone) void __meminit __weak memmap_init(unsigned long size, int nid, unsigned long zone, unsigned long start_pfn) { - memmap_init_zone(size, nid, zone, start_pfn, MEMMAP_EARLY, NULL); + memmap_init_zone(size, nid, zone, start_pfn, MEMINIT_EARLY, NULL); } static int zone_batchsize(struct zone *zone) From 9626c1a63703c08e6c17adceb0d4cd468fc7ce14 Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Fri, 25 Sep 2020 21:19:31 -0700 Subject: [PATCH 49/58] mm: don't rely on system state to detect hot-plug operations commit f85086f95fa36194eb0db5cd5c12e56801b98523 upstream. In register_mem_sect_under_node() the system_state's value is checked to detect whether the call is made during boot time or during an hot-plug operation. Unfortunately, that check against SYSTEM_BOOTING is wrong because regular memory is registered at SYSTEM_SCHEDULING state. In addition, memory hot-plug operation can be triggered at this system state by the ACPI [1]. So checking against the system state is not enough. The consequence is that on system with interleaved node's ranges like this: Early memory node ranges node 1: [mem 0x0000000000000000-0x000000011fffffff] node 2: [mem 0x0000000120000000-0x000000014fffffff] node 1: [mem 0x0000000150000000-0x00000001ffffffff] node 0: [mem 0x0000000200000000-0x000000048fffffff] node 2: [mem 0x0000000490000000-0x00000007ffffffff] This can be seen on PowerPC LPAR after multiple memory hot-plug and hot-unplug operations are done. At the next reboot the node's memory ranges can be interleaved and since the call to link_mem_sections() is made in topology_init() while the system is in the SYSTEM_SCHEDULING state, the node's id is not checked, and the sections registered to multiple nodes: $ ls -l /sys/devices/system/memory/memory21/node* total 0 lrwxrwxrwx 1 root root 0 Aug 24 05:27 node1 -> ../../node/node1 lrwxrwxrwx 1 root root 0 Aug 24 05:27 node2 -> ../../node/node2 In that case, the system is able to boot but if later one of theses memory blocks is hot-unplugged and then hot-plugged, the sysfs inconsistency is detected and this is triggering a BUG_ON(): kernel BUG at /Users/laurent/src/linux-ppc/mm/memory_hotplug.c:1084! Oops: Exception in kernel mode, sig: 5 [#1] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries Modules linked in: rpadlpar_io rpaphp pseries_rng rng_core vmx_crypto gf128mul binfmt_misc ip_tables x_tables xfs libcrc32c crc32c_vpmsum autofs4 CPU: 8 PID: 10256 Comm: drmgr Not tainted 5.9.0-rc1+ #25 Call Trace: add_memory_resource+0x23c/0x340 (unreliable) __add_memory+0x5c/0xf0 dlpar_add_lmb+0x1b4/0x500 dlpar_memory+0x1f8/0xb80 handle_dlpar_errorlog+0xc0/0x190 dlpar_store+0x198/0x4a0 kobj_attr_store+0x30/0x50 sysfs_kf_write+0x64/0x90 kernfs_fop_write+0x1b0/0x290 vfs_write+0xe8/0x290 ksys_write+0xdc/0x130 system_call_exception+0x160/0x270 system_call_common+0xf0/0x27c This patch addresses the root cause by not relying on the system_state value to detect whether the call is due to a hot-plug operation. An extra parameter is added to link_mem_sections() detailing whether the operation is due to a hot-plug operation. [1] According to Oscar Salvador, using this qemu command line, ACPI memory hotplug operations are raised at SYSTEM_SCHEDULING state: $QEMU -enable-kvm -machine pc -smp 4,sockets=4,cores=1,threads=1 -cpu host -monitor pty \ -m size=$MEM,slots=255,maxmem=4294967296k \ -numa node,nodeid=0,cpus=0-3,mem=512 -numa node,nodeid=1,mem=512 \ -object memory-backend-ram,id=memdimm0,size=134217728 -device pc-dimm,node=0,memdev=memdimm0,id=dimm0,slot=0 \ -object memory-backend-ram,id=memdimm1,size=134217728 -device pc-dimm,node=0,memdev=memdimm1,id=dimm1,slot=1 \ -object memory-backend-ram,id=memdimm2,size=134217728 -device pc-dimm,node=0,memdev=memdimm2,id=dimm2,slot=2 \ -object memory-backend-ram,id=memdimm3,size=134217728 -device pc-dimm,node=0,memdev=memdimm3,id=dimm3,slot=3 \ -object memory-backend-ram,id=memdimm4,size=134217728 -device pc-dimm,node=1,memdev=memdimm4,id=dimm4,slot=4 \ -object memory-backend-ram,id=memdimm5,size=134217728 -device pc-dimm,node=1,memdev=memdimm5,id=dimm5,slot=5 \ -object memory-backend-ram,id=memdimm6,size=134217728 -device pc-dimm,node=1,memdev=memdimm6,id=dimm6,slot=6 \ Fixes: 4fbce633910e ("mm/memory_hotplug.c: make register_mem_sect_under_node() a callback of walk_memory_range()") Signed-off-by: Laurent Dufour Signed-off-by: Andrew Morton Reviewed-by: David Hildenbrand Reviewed-by: Oscar Salvador Acked-by: Michal Hocko Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Cc: Fenghua Yu Cc: Nathan Lynch Cc: Scott Cheloha Cc: Tony Luck Cc: Link: https://lkml.kernel.org/r/20200915094143.79181-3-ldufour@linux.ibm.com Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/base/node.c | 85 ++++++++++++++++++++++++++++---------------- include/linux/node.h | 11 +++--- mm/memory_hotplug.c | 3 +- 3 files changed, 64 insertions(+), 35 deletions(-) diff --git a/drivers/base/node.c b/drivers/base/node.c index 296546ffed6c12..9c6e6a7b93545c 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -758,14 +758,36 @@ static int __ref get_nid_for_pfn(unsigned long pfn) return pfn_to_nid(pfn); } +static int do_register_memory_block_under_node(int nid, + struct memory_block *mem_blk) +{ + int ret; + + /* + * If this memory block spans multiple nodes, we only indicate + * the last processed node. + */ + mem_blk->nid = nid; + + ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj, + &mem_blk->dev.kobj, + kobject_name(&mem_blk->dev.kobj)); + if (ret) + return ret; + + return sysfs_create_link_nowarn(&mem_blk->dev.kobj, + &node_devices[nid]->dev.kobj, + kobject_name(&node_devices[nid]->dev.kobj)); +} + /* register memory section under specified node if it spans that node */ -static int register_mem_sect_under_node(struct memory_block *mem_blk, - void *arg) +static int register_mem_block_under_node_early(struct memory_block *mem_blk, + void *arg) { unsigned long memory_block_pfns = memory_block_size_bytes() / PAGE_SIZE; unsigned long start_pfn = section_nr_to_pfn(mem_blk->start_section_nr); unsigned long end_pfn = start_pfn + memory_block_pfns - 1; - int ret, nid = *(int *)arg; + int nid = *(int *)arg; unsigned long pfn; for (pfn = start_pfn; pfn <= end_pfn; pfn++) { @@ -782,38 +804,33 @@ static int register_mem_sect_under_node(struct memory_block *mem_blk, } /* - * We need to check if page belongs to nid only for the boot - * case, during hotplug we know that all pages in the memory - * block belong to the same node. - */ - if (system_state == SYSTEM_BOOTING) { - page_nid = get_nid_for_pfn(pfn); - if (page_nid < 0) - continue; - if (page_nid != nid) - continue; - } - - /* - * If this memory block spans multiple nodes, we only indicate - * the last processed node. + * We need to check if page belongs to nid only at the boot + * case because node's ranges can be interleaved. */ - mem_blk->nid = nid; - - ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj, - &mem_blk->dev.kobj, - kobject_name(&mem_blk->dev.kobj)); - if (ret) - return ret; + page_nid = get_nid_for_pfn(pfn); + if (page_nid < 0) + continue; + if (page_nid != nid) + continue; - return sysfs_create_link_nowarn(&mem_blk->dev.kobj, - &node_devices[nid]->dev.kobj, - kobject_name(&node_devices[nid]->dev.kobj)); + return do_register_memory_block_under_node(nid, mem_blk); } /* mem section does not span the specified node */ return 0; } +/* + * During hotplug we know that all pages in the memory block belong to the same + * node. + */ +static int register_mem_block_under_node_hotplug(struct memory_block *mem_blk, + void *arg) +{ + int nid = *(int *)arg; + + return do_register_memory_block_under_node(nid, mem_blk); +} + /* * Unregister a memory block device under the node it spans. Memory blocks * with multiple nodes cannot be offlined and therefore also never be removed. @@ -829,11 +846,19 @@ void unregister_memory_block_under_nodes(struct memory_block *mem_blk) kobject_name(&node_devices[mem_blk->nid]->dev.kobj)); } -int link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn) +int link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn, + enum meminit_context context) { + walk_memory_blocks_func_t func; + + if (context == MEMINIT_HOTPLUG) + func = register_mem_block_under_node_hotplug; + else + func = register_mem_block_under_node_early; + return walk_memory_blocks(PFN_PHYS(start_pfn), PFN_PHYS(end_pfn - start_pfn), (void *)&nid, - register_mem_sect_under_node); + func); } #ifdef CONFIG_HUGETLBFS diff --git a/include/linux/node.h b/include/linux/node.h index 4866f32a02d8d4..014ba3ab2efd8b 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -99,11 +99,13 @@ extern struct node *node_devices[]; typedef void (*node_registration_func_t)(struct node *); #if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_NUMA) -extern int link_mem_sections(int nid, unsigned long start_pfn, - unsigned long end_pfn); +int link_mem_sections(int nid, unsigned long start_pfn, + unsigned long end_pfn, + enum meminit_context context); #else static inline int link_mem_sections(int nid, unsigned long start_pfn, - unsigned long end_pfn) + unsigned long end_pfn, + enum meminit_context context) { return 0; } @@ -128,7 +130,8 @@ static inline int register_one_node(int nid) if (error) return error; /* link memory sections under this node */ - error = link_mem_sections(nid, start_pfn, end_pfn); + error = link_mem_sections(nid, start_pfn, end_pfn, + MEMINIT_EARLY); } return error; diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 6a4b3a01e1b63b..308beca3ffebcc 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1082,7 +1082,8 @@ int __ref add_memory_resource(int nid, struct resource *res) } /* link memory sections under this node.*/ - ret = link_mem_sections(nid, PFN_DOWN(start), PFN_UP(start + size - 1)); + ret = link_mem_sections(nid, PFN_DOWN(start), PFN_UP(start + size - 1), + MEMINIT_HOTPLUG); BUG_ON(ret); /* create new memmap entry */ From 34b939695f2852ff39ecb1c6b6abbcc7c13c3a12 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 21 Oct 2019 12:40:03 +0900 Subject: [PATCH 50/58] nvme: Cleanup and rename nvme_block_nr() commit 314d48dd224897e35ddcaf5a1d7d133b5adddeb7 upstream. Rename nvme_block_nr() to nvme_sect_to_lba() and use SECTOR_SHIFT instead of its hard coded value 9. Also add a comment to decribe this helper. Reviewed-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Signed-off-by: Damien Le Moal Signed-off-by: Keith Busch Signed-off-by: Jens Axboe Signed-off-by: Revanth Rajashekar 1 Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/host/core.c | 6 +++--- drivers/nvme/host/nvme.h | 7 +++++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 24c6d5a446b79a..acad37cd894f17 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -630,7 +630,7 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req, } __rq_for_each_bio(bio, req) { - u64 slba = nvme_block_nr(ns, bio->bi_iter.bi_sector); + u64 slba = nvme_sect_to_lba(ns, bio->bi_iter.bi_sector); u32 nlb = bio->bi_iter.bi_size >> ns->lba_shift; if (n < segments) { @@ -671,7 +671,7 @@ static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns, cmnd->write_zeroes.opcode = nvme_cmd_write_zeroes; cmnd->write_zeroes.nsid = cpu_to_le32(ns->head->ns_id); cmnd->write_zeroes.slba = - cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req))); + cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req))); cmnd->write_zeroes.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1); cmnd->write_zeroes.control = 0; @@ -695,7 +695,7 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns, cmnd->rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read); cmnd->rw.nsid = cpu_to_le32(ns->head->ns_id); - cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req))); + cmnd->rw.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req))); cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1); if (req_op(req) == REQ_OP_WRITE && ctrl->nr_streams) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index b7117fb09dd0f5..0660408f5ee638 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -429,9 +429,12 @@ static inline int nvme_reset_subsystem(struct nvme_ctrl *ctrl) return ctrl->ops->reg_write32(ctrl, NVME_REG_NSSR, 0x4E564D65); } -static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector) +/* + * Convert a 512B sector number to a device logical block number. + */ +static inline u64 nvme_sect_to_lba(struct nvme_ns *ns, sector_t sector) { - return (sector >> (ns->lba_shift - 9)); + return sector >> (ns->lba_shift - SECTOR_SHIFT); } static inline void nvme_end_request(struct request *req, __le16 status, From 03f4f85bbd7dc2bb9ebb5e96937bbf778d89b586 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 21 Oct 2019 12:40:04 +0900 Subject: [PATCH 51/58] nvme: Introduce nvme_lba_to_sect() commit e08f2ae850929d40e66268ee47e443e7ea56eeb7 upstream. Introduce the new helper function nvme_lba_to_sect() to convert a device logical block number to a 512B sector number. Use this new helper in obvious places, cleaning up the code. Reviewed-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Signed-off-by: Damien Le Moal Signed-off-by: Keith Busch Signed-off-by: Jens Axboe Signed-off-by: Revanth Rajashekar Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/host/core.c | 14 +++++++------- drivers/nvme/host/nvme.h | 8 ++++++++ 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index acad37cd894f17..ec89f780cbf7fe 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1682,7 +1682,7 @@ static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type) static void nvme_set_chunk_size(struct nvme_ns *ns) { - u32 chunk_size = (((u32)ns->noiob) << (ns->lba_shift - 9)); + u32 chunk_size = nvme_lba_to_sect(ns, ns->noiob); blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(chunk_size)); } @@ -1719,8 +1719,7 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns) static void nvme_config_write_zeroes(struct gendisk *disk, struct nvme_ns *ns) { - u32 max_sectors; - unsigned short bs = 1 << ns->lba_shift; + u64 max_blocks; if (!(ns->ctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES) || (ns->ctrl->quirks & NVME_QUIRK_DISABLE_WRITE_ZEROES)) @@ -1736,11 +1735,12 @@ static void nvme_config_write_zeroes(struct gendisk *disk, struct nvme_ns *ns) * nvme_init_identify() if available. */ if (ns->ctrl->max_hw_sectors == UINT_MAX) - max_sectors = ((u32)(USHRT_MAX + 1) * bs) >> 9; + max_blocks = (u64)USHRT_MAX + 1; else - max_sectors = ((u32)(ns->ctrl->max_hw_sectors + 1) * bs) >> 9; + max_blocks = ns->ctrl->max_hw_sectors + 1; - blk_queue_max_write_zeroes_sectors(disk->queue, max_sectors); + blk_queue_max_write_zeroes_sectors(disk->queue, + nvme_lba_to_sect(ns, max_blocks)); } static int nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid, @@ -1774,7 +1774,7 @@ static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b) static void nvme_update_disk_info(struct gendisk *disk, struct nvme_ns *ns, struct nvme_id_ns *id) { - sector_t capacity = le64_to_cpu(id->nsze) << (ns->lba_shift - 9); + sector_t capacity = nvme_lba_to_sect(ns, le64_to_cpu(id->nsze)); unsigned short bs = 1 << ns->lba_shift; u32 atomic_bs, phys_bs, io_opt; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 0660408f5ee638..0010a2b01a4f2c 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -437,6 +437,14 @@ static inline u64 nvme_sect_to_lba(struct nvme_ns *ns, sector_t sector) return sector >> (ns->lba_shift - SECTOR_SHIFT); } +/* + * Convert a device logical block number to a 512B sector number. + */ +static inline sector_t nvme_lba_to_sect(struct nvme_ns *ns, u64 lba) +{ + return lba << (ns->lba_shift - SECTOR_SHIFT); +} + static inline void nvme_end_request(struct request *req, __le16 status, union nvme_result result) { From 8db44b30d392b03df5e0dfab02dbafc591ec1320 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 9 Apr 2020 09:09:06 -0700 Subject: [PATCH 52/58] nvme: consolidate chunk_sectors settings commit 38adf94e166e3cb4eb89683458ca578051e8218d upstream. Move the quirked chunk_sectors setting to the same location as noiob so one place registers this setting. And since the noiob value is only used locally, remove the member from struct nvme_ns. Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe Signed-off-by: Revanth Rajashekar Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/host/core.c | 22 ++++++++++------------ drivers/nvme/host/nvme.h | 1 - 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index ec89f780cbf7fe..207ed6d49ad7c1 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1680,12 +1680,6 @@ static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type) } #endif /* CONFIG_BLK_DEV_INTEGRITY */ -static void nvme_set_chunk_size(struct nvme_ns *ns) -{ - u32 chunk_size = nvme_lba_to_sect(ns, ns->noiob); - blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(chunk_size)); -} - static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns) { struct nvme_ctrl *ctrl = ns->ctrl; @@ -1840,6 +1834,7 @@ static void nvme_update_disk_info(struct gendisk *disk, static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) { struct nvme_ns *ns = disk->private_data; + u32 iob; /* * If identify namespace failed, use default 512 byte block size so @@ -1848,7 +1843,13 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) ns->lba_shift = id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ds; if (ns->lba_shift == 0) ns->lba_shift = 9; - ns->noiob = le16_to_cpu(id->noiob); + + if ((ns->ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) && + is_power_of_2(ns->ctrl->max_hw_sectors)) + iob = ns->ctrl->max_hw_sectors; + else + iob = nvme_lba_to_sect(ns, le16_to_cpu(id->noiob)); + ns->ms = le16_to_cpu(id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ms); ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT); /* the PI implementation requires metadata equal t10 pi tuple size */ @@ -1857,8 +1858,8 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) else ns->pi_type = 0; - if (ns->noiob) - nvme_set_chunk_size(ns); + if (iob) + blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(iob)); nvme_update_disk_info(disk, ns, id); #ifdef CONFIG_NVME_MULTIPATH if (ns->head->disk) { @@ -2209,9 +2210,6 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors); blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX)); } - if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) && - is_power_of_2(ctrl->max_hw_sectors)) - blk_queue_chunk_sectors(q, ctrl->max_hw_sectors); blk_queue_virt_boundary(q, ctrl->page_size - 1); if (ctrl->vwc & NVME_CTRL_VWC_PRESENT) vwc = true; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 0010a2b01a4f2c..d7132d8cb7c5de 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -384,7 +384,6 @@ struct nvme_ns { #define NVME_NS_REMOVING 0 #define NVME_NS_DEAD 1 #define NVME_NS_ANA_PENDING 2 - u16 noiob; struct nvme_fault_inject fault_inject; From 8993da3d4d3a7ae721e9dafa140ba64c0e632a50 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 9 Sep 2020 22:25:06 -0400 Subject: [PATCH 53/58] epoll: do not insert into poll queues until all sanity checks are done commit f8d4f44df056c5b504b0d49683fb7279218fd207 upstream. Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/eventpoll.c | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index ae1d32344f7ac6..f70df53666ed14 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1527,6 +1527,22 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, RCU_INIT_POINTER(epi->ws, NULL); } + /* Add the current item to the list of active epoll hook for this file */ + spin_lock(&tfile->f_lock); + list_add_tail_rcu(&epi->fllink, &tfile->f_ep_links); + spin_unlock(&tfile->f_lock); + + /* + * Add the current item to the RB tree. All RB tree operations are + * protected by "mtx", and ep_insert() is called with "mtx" held. + */ + ep_rbtree_insert(ep, epi); + + /* now check if we've created too many backpaths */ + error = -EINVAL; + if (full_check && reverse_path_check()) + goto error_remove_epi; + /* Initialize the poll table using the queue callback */ epq.epi = epi; init_poll_funcptr(&epq.pt, ep_ptable_queue_proc); @@ -1549,22 +1565,6 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, if (epi->nwait < 0) goto error_unregister; - /* Add the current item to the list of active epoll hook for this file */ - spin_lock(&tfile->f_lock); - list_add_tail_rcu(&epi->fllink, &tfile->f_ep_links); - spin_unlock(&tfile->f_lock); - - /* - * Add the current item to the RB tree. All RB tree operations are - * protected by "mtx", and ep_insert() is called with "mtx" held. - */ - ep_rbtree_insert(ep, epi); - - /* now check if we've created too many backpaths */ - error = -EINVAL; - if (full_check && reverse_path_check()) - goto error_remove_epi; - /* We have to drop the new item inside our item list to keep track of it */ write_lock_irq(&ep->lock); @@ -1593,6 +1593,8 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, return 0; +error_unregister: + ep_unregister_pollwait(ep, epi); error_remove_epi: spin_lock(&tfile->f_lock); list_del_rcu(&epi->fllink); @@ -1600,9 +1602,6 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, rb_erase_cached(&epi->rbn, &ep->rbr); -error_unregister: - ep_unregister_pollwait(ep, epi); - /* * We need to do this because an event could have been arrived on some * allocated wait queue. Note that we don't care about the ep->ovflist From 099b7a1bc7910baefdb7d2341a9199c9c92aaecb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 10 Sep 2020 08:30:05 -0400 Subject: [PATCH 54/58] epoll: replace ->visited/visited_list with generation count commit 18306c404abe18a0972587a6266830583c60c928 upstream. removes the need to clear it, along with the races. Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/eventpoll.c | 26 +++++++------------------- 1 file changed, 7 insertions(+), 19 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index f70df53666ed14..4962321c9fcd90 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -218,8 +218,7 @@ struct eventpoll { struct file *file; /* used to optimize loop detection check */ - int visited; - struct list_head visited_list_link; + u64 gen; #ifdef CONFIG_NET_RX_BUSY_POLL /* used to track busy poll napi_id */ @@ -269,6 +268,8 @@ static long max_user_watches __read_mostly; */ static DEFINE_MUTEX(epmutex); +static u64 loop_check_gen = 0; + /* Used to check for epoll file descriptor inclusion loops */ static struct nested_calls poll_loop_ncalls; @@ -278,9 +279,6 @@ static struct kmem_cache *epi_cache __read_mostly; /* Slab cache used to allocate "struct eppoll_entry" */ static struct kmem_cache *pwq_cache __read_mostly; -/* Visited nodes during ep_loop_check(), so we can unset them when we finish */ -static LIST_HEAD(visited_list); - /* * List of files with newly added links, where we may need to limit the number * of emanating paths. Protected by the epmutex. @@ -1968,13 +1966,12 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests) struct epitem *epi; mutex_lock_nested(&ep->mtx, call_nests + 1); - ep->visited = 1; - list_add(&ep->visited_list_link, &visited_list); + ep->gen = loop_check_gen; for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) { epi = rb_entry(rbp, struct epitem, rbn); if (unlikely(is_file_epoll(epi->ffd.file))) { ep_tovisit = epi->ffd.file->private_data; - if (ep_tovisit->visited) + if (ep_tovisit->gen == loop_check_gen) continue; error = ep_call_nested(&poll_loop_ncalls, ep_loop_check_proc, epi->ffd.file, @@ -2015,18 +2012,8 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests) */ static int ep_loop_check(struct eventpoll *ep, struct file *file) { - int ret; - struct eventpoll *ep_cur, *ep_next; - - ret = ep_call_nested(&poll_loop_ncalls, + return ep_call_nested(&poll_loop_ncalls, ep_loop_check_proc, file, ep, current); - /* clear visited list */ - list_for_each_entry_safe(ep_cur, ep_next, &visited_list, - visited_list_link) { - ep_cur->visited = 0; - list_del(&ep_cur->visited_list_link); - } - return ret; } static void clear_tfile_check_list(void) @@ -2248,6 +2235,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, error_tgt_fput: if (full_check) { clear_tfile_check_list(); + loop_check_gen++; mutex_unlock(&epmutex); } From 8e58bad666bb11432201d0c6020cffc73780bdd0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 10 Sep 2020 08:33:27 -0400 Subject: [PATCH 55/58] epoll: EPOLL_CTL_ADD: close the race in decision to take fast path commit fe0a916c1eae8e17e86c3753d13919177d63ed7e upstream. Checking for the lack of epitems refering to the epoll we want to insert into is not enough; we might have an insertion of that epoll into another one that has already collected the set of files to recheck for excessive reverse paths, but hasn't gotten to creating/inserting the epitem for it. However, any such insertion in progress can be detected - it will update the generation count in our epoll when it's done looking through it for files to check. That gets done under ->mtx of our epoll and that allows us to detect that safely. We are *not* holding epmutex here, so the generation count is not stable. However, since both the update of ep->gen by loop check and (later) insertion into ->f_ep_link are done with ep->mtx held, we are fine - the sequence is grab epmutex bump loop_check_gen ... grab tep->mtx // 1 tep->gen = loop_check_gen ... drop tep->mtx // 2 ... grab tep->mtx // 3 ... insert into ->f_ep_link ... drop tep->mtx // 4 bump loop_check_gen drop epmutex and if the fastpath check in another thread happens for that eventpoll, it can come * before (1) - in that case fastpath is just fine * after (4) - we'll see non-empty ->f_ep_link, slow path taken * between (2) and (3) - loop_check_gen is stable, with ->mtx providing barriers and we end up taking slow path. Note that ->f_ep_link emptiness check is slightly racy - we are protected against insertions into that list, but removals can happen right under us. Not a problem - in the worst case we'll end up taking a slow path for no good reason. Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/eventpoll.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 4962321c9fcd90..333bb8f9ed0e8d 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -2175,6 +2175,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, mutex_lock_nested(&ep->mtx, 0); if (op == EPOLL_CTL_ADD) { if (!list_empty(&f.file->f_ep_links) || + ep->gen == loop_check_gen || is_file_epoll(tf.file)) { full_check = 1; mutex_unlock(&ep->mtx); From 27423bb05e251020660e092c2f6b78ad2e4cb9a3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 24 Sep 2020 19:41:58 -0400 Subject: [PATCH 56/58] ep_create_wakeup_source(): dentry name can change under you... commit 3701cb59d892b88d569427586f01491552f377b1 upstream. or get freed, for that matter, if it's a long (separately stored) name. Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/eventpoll.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 333bb8f9ed0e8d..339453ac834cc7 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1453,7 +1453,7 @@ static int reverse_path_check(void) static int ep_create_wakeup_source(struct epitem *epi) { - const char *name; + struct name_snapshot n; struct wakeup_source *ws; if (!epi->ep->ws) { @@ -1462,8 +1462,9 @@ static int ep_create_wakeup_source(struct epitem *epi) return -ENOMEM; } - name = epi->ffd.file->f_path.dentry->d_name.name; - ws = wakeup_source_register(NULL, name); + take_dentry_name_snapshot(&n, epi->ffd.file->f_path.dentry); + ws = wakeup_source_register(NULL, n.name.name); + release_dentry_name_snapshot(&n); if (!ws) return -ENOMEM; From 253052b636e98083b1ecc3e9b0cf6f151e1cb8c6 Mon Sep 17 00:00:00 2001 From: Will McVicker Date: Mon, 24 Aug 2020 19:38:32 +0000 Subject: [PATCH 57/58] netfilter: ctnetlink: add a range check for l3/l4 protonum commit 1cc5ef91d2ff94d2bf2de3b3585423e8a1051cb6 upstream. The indexes to the nf_nat_l[34]protos arrays come from userspace. So check the tuple's family, e.g. l3num, when creating the conntrack in order to prevent an OOB memory access during setup. Here is an example kernel panic on 4.14.180 when userspace passes in an index greater than NFPROTO_NUMPROTO. Internal error: Oops - BUG: 0 [#1] PREEMPT SMP Modules linked in:... Process poc (pid: 5614, stack limit = 0x00000000a3933121) CPU: 4 PID: 5614 Comm: poc Tainted: G S W O 4.14.180-g051355490483 Hardware name: Qualcomm Technologies, Inc. SM8150 V2 PM8150 Google Inc. MSM task: 000000002a3dfffe task.stack: 00000000a3933121 pc : __cfi_check_fail+0x1c/0x24 lr : __cfi_check_fail+0x1c/0x24 ... Call trace: __cfi_check_fail+0x1c/0x24 name_to_dev_t+0x0/0x468 nfnetlink_parse_nat_setup+0x234/0x258 ctnetlink_parse_nat_setup+0x4c/0x228 ctnetlink_new_conntrack+0x590/0xc40 nfnetlink_rcv_msg+0x31c/0x4d4 netlink_rcv_skb+0x100/0x184 nfnetlink_rcv+0xf4/0x180 netlink_unicast+0x360/0x770 netlink_sendmsg+0x5a0/0x6a4 ___sys_sendmsg+0x314/0x46c SyS_sendmsg+0xb4/0x108 el0_svc_naked+0x34/0x38 This crash is not happening since 5.4+, however, ctnetlink still allows for creating entries with unsupported layer 3 protocol number. Fixes: c1d10adb4a521 ("[NETFILTER]: Add ctnetlink port for nf_conntrack") Signed-off-by: Will McVicker [pablo@netfilter.org: rebased original patch on top of nf.git] Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_conntrack_netlink.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index aa8adf930b3cee..b7f0d52e5f1b6f 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1141,6 +1141,8 @@ ctnetlink_parse_tuple(const struct nlattr * const cda[], if (!tb[CTA_TUPLE_IP]) return -EINVAL; + if (l3num != NFPROTO_IPV4 && l3num != NFPROTO_IPV6) + return -EOPNOTSUPP; tuple->src.l3num = l3num; err = ctnetlink_parse_tuple_ip(tb[CTA_TUPLE_IP], tuple); From d22f99d235e13356521b374410a6ee24f50b65e6 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 7 Oct 2020 08:01:31 +0200 Subject: [PATCH 58/58] Linux 5.4.70 Tested-by: Jon Hunter Tested-by: Shuah Khan Tested-by: Linux Kernel Functional Testing Tested-by: Guenter Roeck Link: https://lore.kernel.org/r/20201005142109.796046410@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index adf3847106775e..e409fd909560f7 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 4 -SUBLEVEL = 69 +SUBLEVEL = 70 EXTRAVERSION = NAME = Kleptomaniac Octopus