diff --git a/MAINTAINERS b/MAINTAINERS index 173195719a6027..b0edb520a271d0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9000,12 +9000,9 @@ K: (devm_)?gpio_regmap_(un)?register GPIO SUBSYSTEM M: Linus Walleij M: Bartosz Golaszewski -R: Andy Shevchenko L: linux-gpio@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/brgl/linux.git -F: Documentation/ABI/obsolete/sysfs-gpio -F: Documentation/ABI/testing/gpio-cdev F: Documentation/admin-guide/gpio/ F: Documentation/devicetree/bindings/gpio/ F: Documentation/driver-api/gpio/ @@ -9014,6 +9011,16 @@ F: include/dt-bindings/gpio/ F: include/linux/gpio.h F: include/linux/gpio/ F: include/linux/of_gpio.h + +GPIO UAPI +M: Bartosz Golaszewski +R: Kent Gibson +L: linux-gpio@vger.kernel.org +S: Maintained +T: git git://git.kernel.org/pub/scm/linux/kernel/git/brgl/linux.git +F: Documentation/ABI/obsolete/sysfs-gpio +F: Documentation/ABI/testing/gpio-cdev +F: drivers/gpio/gpiolib-cdev.c F: include/uapi/linux/gpio.h F: tools/gpio/ @@ -10639,6 +10646,7 @@ F: drivers/gpio/gpio-pch.c F: drivers/gpio/gpio-sch.c F: drivers/gpio/gpio-sodaville.c F: drivers/gpio/gpio-tangier.c +F: drivers/gpio/gpio-tangier.h INTEL GVT-g DRIVERS (Intel GPU Virtualization) M: Zhenyu Wang diff --git a/block/badblocks.c b/block/badblocks.c index fc92d4e18aa3c0..db4ec8b9b2a8c2 100644 --- a/block/badblocks.c +++ b/block/badblocks.c @@ -1312,12 +1312,14 @@ static int _badblocks_check(struct badblocks *bb, sector_t s, int sectors, prev = prev_badblocks(bb, &bad, hint); /* start after all badblocks */ - if ((prev + 1) >= bb->count && !overlap_front(bb, prev, &bad)) { + if ((prev >= 0) && + ((prev + 1) >= bb->count) && !overlap_front(bb, prev, &bad)) { len = sectors; goto update_sectors; } - if (overlap_front(bb, prev, &bad)) { + /* Overlapped with front badblocks record */ + if ((prev >= 0) && overlap_front(bb, prev, &bad)) { if (BB_ACK(p[prev])) acked_badblocks++; else diff --git a/drivers/platform/x86/intel/pmc/adl.c b/drivers/platform/x86/intel/pmc/adl.c index 5006008e01bea2..606f7678bcb0a1 100644 --- a/drivers/platform/x86/intel/pmc/adl.c +++ b/drivers/platform/x86/intel/pmc/adl.c @@ -314,16 +314,13 @@ int adl_core_init(struct pmc_dev *pmcdev) struct pmc *pmc = pmcdev->pmcs[PMC_IDX_MAIN]; int ret; + pmcdev->suspend = cnl_suspend; + pmcdev->resume = cnl_resume; + pmc->map = &adl_reg_map; ret = get_primary_reg_base(pmc); if (ret) return ret; - /* Due to a hardware limitation, the GBE LTR blocks PC10 - * when a cable is attached. Tell the PMC to ignore it. - */ - dev_dbg(&pmcdev->pdev->dev, "ignoring GBE LTR\n"); - pmc_core_send_ltr_ignore(pmcdev, 3); - return 0; } diff --git a/drivers/platform/x86/intel/pmc/cnp.c b/drivers/platform/x86/intel/pmc/cnp.c index 420aaa1d7c7697..98b36651201a06 100644 --- a/drivers/platform/x86/intel/pmc/cnp.c +++ b/drivers/platform/x86/intel/pmc/cnp.c @@ -204,21 +204,35 @@ const struct pmc_reg_map cnp_reg_map = { .etr3_offset = ETR3_OFFSET, }; +void cnl_suspend(struct pmc_dev *pmcdev) +{ + /* + * Due to a hardware limitation, the GBE LTR blocks PC10 + * when a cable is attached. To unblock PC10 during suspend, + * tell the PMC to ignore it. + */ + pmc_core_send_ltr_ignore(pmcdev, 3, 1); +} + +int cnl_resume(struct pmc_dev *pmcdev) +{ + pmc_core_send_ltr_ignore(pmcdev, 3, 0); + + return pmc_core_resume_common(pmcdev); +} + int cnp_core_init(struct pmc_dev *pmcdev) { struct pmc *pmc = pmcdev->pmcs[PMC_IDX_MAIN]; int ret; + pmcdev->suspend = cnl_suspend; + pmcdev->resume = cnl_resume; + pmc->map = &cnp_reg_map; ret = get_primary_reg_base(pmc); if (ret) return ret; - /* Due to a hardware limitation, the GBE LTR blocks PC10 - * when a cable is attached. Tell the PMC to ignore it. - */ - dev_dbg(&pmcdev->pdev->dev, "ignoring GBE LTR\n"); - pmc_core_send_ltr_ignore(pmcdev, 3); - return 0; } diff --git a/drivers/platform/x86/intel/pmc/core.c b/drivers/platform/x86/intel/pmc/core.c index e95d3011b9997d..022afb97d531c9 100644 --- a/drivers/platform/x86/intel/pmc/core.c +++ b/drivers/platform/x86/intel/pmc/core.c @@ -460,7 +460,7 @@ static int pmc_core_pll_show(struct seq_file *s, void *unused) } DEFINE_SHOW_ATTRIBUTE(pmc_core_pll); -int pmc_core_send_ltr_ignore(struct pmc_dev *pmcdev, u32 value) +int pmc_core_send_ltr_ignore(struct pmc_dev *pmcdev, u32 value, int ignore) { struct pmc *pmc; const struct pmc_reg_map *map; @@ -498,7 +498,10 @@ int pmc_core_send_ltr_ignore(struct pmc_dev *pmcdev, u32 value) mutex_lock(&pmcdev->lock); reg = pmc_core_reg_read(pmc, map->ltr_ignore_offset); - reg |= BIT(ltr_index); + if (ignore) + reg |= BIT(ltr_index); + else + reg &= ~BIT(ltr_index); pmc_core_reg_write(pmc, map->ltr_ignore_offset, reg); mutex_unlock(&pmcdev->lock); @@ -521,7 +524,7 @@ static ssize_t pmc_core_ltr_ignore_write(struct file *file, if (err) return err; - err = pmc_core_send_ltr_ignore(pmcdev, value); + err = pmc_core_send_ltr_ignore(pmcdev, value, 1); return err == 0 ? count : err; } @@ -1279,6 +1282,9 @@ static __maybe_unused int pmc_core_suspend(struct device *dev) struct pmc_dev *pmcdev = dev_get_drvdata(dev); struct pmc *pmc = pmcdev->pmcs[PMC_IDX_MAIN]; + if (pmcdev->suspend) + pmcdev->suspend(pmcdev); + /* Check if the syspend will actually use S0ix */ if (pm_suspend_via_firmware()) return 0; diff --git a/drivers/platform/x86/intel/pmc/core.h b/drivers/platform/x86/intel/pmc/core.h index 0729f593c6a759..b66dacbfb94bf7 100644 --- a/drivers/platform/x86/intel/pmc/core.h +++ b/drivers/platform/x86/intel/pmc/core.h @@ -363,6 +363,7 @@ struct pmc { * @s0ix_counter: S0ix residency (step adjusted) * @num_lpm_modes: Count of enabled modes * @lpm_en_modes: Array of enabled modes from lowest to highest priority + * @suspend: Function to perform platform specific suspend * @resume: Function to perform platform specific resume * * pmc_dev contains info about power management controller device. @@ -379,6 +380,7 @@ struct pmc_dev { u64 s0ix_counter; int num_lpm_modes; int lpm_en_modes[LPM_MAX_NUM_MODES]; + void (*suspend)(struct pmc_dev *pmcdev); int (*resume)(struct pmc_dev *pmcdev); bool has_die_c6; @@ -486,7 +488,7 @@ extern const struct pmc_bit_map *mtl_ioem_lpm_maps[]; extern const struct pmc_reg_map mtl_ioem_reg_map; extern void pmc_core_get_tgl_lpm_reqs(struct platform_device *pdev); -extern int pmc_core_send_ltr_ignore(struct pmc_dev *pmcdev, u32 value); +int pmc_core_send_ltr_ignore(struct pmc_dev *pmcdev, u32 value, int ignore); int pmc_core_resume_common(struct pmc_dev *pmcdev); int get_primary_reg_base(struct pmc *pmc); @@ -500,6 +502,9 @@ int tgl_core_init(struct pmc_dev *pmcdev); int adl_core_init(struct pmc_dev *pmcdev); int mtl_core_init(struct pmc_dev *pmcdev); +void cnl_suspend(struct pmc_dev *pmcdev); +int cnl_resume(struct pmc_dev *pmcdev); + #define pmc_for_each_mode(i, mode, pmcdev) \ for (i = 0, mode = pmcdev->lpm_en_modes[i]; \ i < pmcdev->num_lpm_modes; \ diff --git a/drivers/platform/x86/intel/pmc/mtl.c b/drivers/platform/x86/intel/pmc/mtl.c index 2204bc666980ed..504e3e273c323b 100644 --- a/drivers/platform/x86/intel/pmc/mtl.c +++ b/drivers/platform/x86/intel/pmc/mtl.c @@ -979,6 +979,8 @@ static void mtl_d3_fixup(void) static int mtl_resume(struct pmc_dev *pmcdev) { mtl_d3_fixup(); + pmc_core_send_ltr_ignore(pmcdev, 3, 0); + return pmc_core_resume_common(pmcdev); } @@ -989,6 +991,7 @@ int mtl_core_init(struct pmc_dev *pmcdev) mtl_d3_fixup(); + pmcdev->suspend = cnl_suspend; pmcdev->resume = mtl_resume; pmcdev->regmap_list = mtl_pmc_info_list; @@ -1002,11 +1005,5 @@ int mtl_core_init(struct pmc_dev *pmcdev) return ret; } - /* Due to a hardware limitation, the GBE LTR blocks PC10 - * when a cable is attached. Tell the PMC to ignore it. - */ - dev_dbg(&pmcdev->pdev->dev, "ignoring GBE LTR\n"); - pmc_core_send_ltr_ignore(pmcdev, 3); - return 0; } diff --git a/drivers/platform/x86/intel/pmc/tgl.c b/drivers/platform/x86/intel/pmc/tgl.c index 2449940102db4f..e88d3d00c85393 100644 --- a/drivers/platform/x86/intel/pmc/tgl.c +++ b/drivers/platform/x86/intel/pmc/tgl.c @@ -259,16 +259,15 @@ int tgl_core_init(struct pmc_dev *pmcdev) int ret; pmc->map = &tgl_reg_map; + + pmcdev->suspend = cnl_suspend; + pmcdev->resume = cnl_resume; + ret = get_primary_reg_base(pmc); if (ret) return ret; pmc_core_get_tgl_lpm_reqs(pmcdev->pdev); - /* Due to a hardware limitation, the GBE LTR blocks PC10 - * when a cable is attached. Tell the PMC to ignore it. - */ - dev_dbg(&pmcdev->pdev->dev, "ignoring GBE LTR\n"); - pmc_core_send_ltr_ignore(pmcdev, 3); return 0; } diff --git a/drivers/platform/x86/p2sb.c b/drivers/platform/x86/p2sb.c index 1cf2471d54ddef..fcf1ce8bbdc5bd 100644 --- a/drivers/platform/x86/p2sb.c +++ b/drivers/platform/x86/p2sb.c @@ -26,6 +26,21 @@ static const struct x86_cpu_id p2sb_cpu_ids[] = { {} }; +/* + * Cache BAR0 of P2SB device functions 0 to 7. + * TODO: The constant 8 is the number of functions that PCI specification + * defines. Same definitions exist tree-wide. Unify this definition and + * the other definitions then move to include/uapi/linux/pci.h. + */ +#define NR_P2SB_RES_CACHE 8 + +struct p2sb_res_cache { + u32 bus_dev_id; + struct resource res; +}; + +static struct p2sb_res_cache p2sb_resources[NR_P2SB_RES_CACHE]; + static int p2sb_get_devfn(unsigned int *devfn) { unsigned int fn = P2SB_DEVFN_DEFAULT; @@ -39,8 +54,16 @@ static int p2sb_get_devfn(unsigned int *devfn) return 0; } +static bool p2sb_valid_resource(struct resource *res) +{ + if (res->flags) + return true; + + return false; +} + /* Copy resource from the first BAR of the device in question */ -static int p2sb_read_bar0(struct pci_dev *pdev, struct resource *mem) +static void p2sb_read_bar0(struct pci_dev *pdev, struct resource *mem) { struct resource *bar0 = &pdev->resource[0]; @@ -56,47 +79,64 @@ static int p2sb_read_bar0(struct pci_dev *pdev, struct resource *mem) mem->end = bar0->end; mem->flags = bar0->flags; mem->desc = bar0->desc; - - return 0; } -static int p2sb_scan_and_read(struct pci_bus *bus, unsigned int devfn, struct resource *mem) +static void p2sb_scan_and_cache_devfn(struct pci_bus *bus, unsigned int devfn) { + struct p2sb_res_cache *cache = &p2sb_resources[PCI_FUNC(devfn)]; struct pci_dev *pdev; - int ret; pdev = pci_scan_single_device(bus, devfn); if (!pdev) - return -ENODEV; + return; - ret = p2sb_read_bar0(pdev, mem); + p2sb_read_bar0(pdev, &cache->res); + cache->bus_dev_id = bus->dev.id; pci_stop_and_remove_bus_device(pdev); - return ret; + return; } -/** - * p2sb_bar - Get Primary to Sideband (P2SB) bridge device BAR - * @bus: PCI bus to communicate with - * @devfn: PCI slot and function to communicate with - * @mem: memory resource to be filled in - * - * The BIOS prevents the P2SB device from being enumerated by the PCI - * subsystem, so we need to unhide and hide it back to lookup the BAR. - * - * if @bus is NULL, the bus 0 in domain 0 will be used. - * If @devfn is 0, it will be replaced by devfn of the P2SB device. - * - * Caller must provide a valid pointer to @mem. - * - * Locking is handled by pci_rescan_remove_lock mutex. - * - * Return: - * 0 on success or appropriate errno value on error. - */ -int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem) +static int p2sb_scan_and_cache(struct pci_bus *bus, unsigned int devfn) +{ + unsigned int slot, fn; + + if (PCI_FUNC(devfn) == 0) { + /* + * When function number of the P2SB device is zero, scan it and + * other function numbers, and if devices are available, cache + * their BAR0s. + */ + slot = PCI_SLOT(devfn); + for (fn = 0; fn < NR_P2SB_RES_CACHE; fn++) + p2sb_scan_and_cache_devfn(bus, PCI_DEVFN(slot, fn)); + } else { + /* Scan the P2SB device and cache its BAR0 */ + p2sb_scan_and_cache_devfn(bus, devfn); + } + + if (!p2sb_valid_resource(&p2sb_resources[PCI_FUNC(devfn)].res)) + return -ENOENT; + + return 0; +} + +static struct pci_bus *p2sb_get_bus(struct pci_bus *bus) +{ + static struct pci_bus *p2sb_bus; + + bus = bus ?: p2sb_bus; + if (bus) + return bus; + + /* Assume P2SB is on the bus 0 in domain 0 */ + p2sb_bus = pci_find_bus(0, 0); + return p2sb_bus; +} + +static int p2sb_cache_resources(void) { - struct pci_dev *pdev_p2sb; + struct pci_bus *bus; unsigned int devfn_p2sb; u32 value = P2SBC_HIDE; int ret; @@ -106,8 +146,9 @@ int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem) if (ret) return ret; - /* if @bus is NULL, use bus 0 in domain 0 */ - bus = bus ?: pci_find_bus(0, 0); + bus = p2sb_get_bus(NULL); + if (!bus) + return -ENODEV; /* * Prevent concurrent PCI bus scan from seeing the P2SB device and @@ -115,17 +156,16 @@ int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem) */ pci_lock_rescan_remove(); - /* Unhide the P2SB device, if needed */ + /* + * The BIOS prevents the P2SB device from being enumerated by the PCI + * subsystem, so we need to unhide and hide it back to lookup the BAR. + * Unhide the P2SB device here, if needed. + */ pci_bus_read_config_dword(bus, devfn_p2sb, P2SBC, &value); if (value & P2SBC_HIDE) pci_bus_write_config_dword(bus, devfn_p2sb, P2SBC, 0); - pdev_p2sb = pci_scan_single_device(bus, devfn_p2sb); - if (devfn) - ret = p2sb_scan_and_read(bus, devfn, mem); - else - ret = p2sb_read_bar0(pdev_p2sb, mem); - pci_stop_and_remove_bus_device(pdev_p2sb); + ret = p2sb_scan_and_cache(bus, devfn_p2sb); /* Hide the P2SB device, if it was hidden */ if (value & P2SBC_HIDE) @@ -133,12 +173,62 @@ int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem) pci_unlock_rescan_remove(); - if (ret) - return ret; + return ret; +} + +/** + * p2sb_bar - Get Primary to Sideband (P2SB) bridge device BAR + * @bus: PCI bus to communicate with + * @devfn: PCI slot and function to communicate with + * @mem: memory resource to be filled in + * + * If @bus is NULL, the bus 0 in domain 0 will be used. + * If @devfn is 0, it will be replaced by devfn of the P2SB device. + * + * Caller must provide a valid pointer to @mem. + * + * Return: + * 0 on success or appropriate errno value on error. + */ +int p2sb_bar(struct pci_bus *bus, unsigned int devfn, struct resource *mem) +{ + struct p2sb_res_cache *cache; + int ret; - if (mem->flags == 0) + bus = p2sb_get_bus(bus); + if (!bus) return -ENODEV; + if (!devfn) { + ret = p2sb_get_devfn(&devfn); + if (ret) + return ret; + } + + cache = &p2sb_resources[PCI_FUNC(devfn)]; + if (cache->bus_dev_id != bus->dev.id) + return -ENODEV; + + if (!p2sb_valid_resource(&cache->res)) + return -ENOENT; + + memcpy(mem, &cache->res, sizeof(*mem)); return 0; } EXPORT_SYMBOL_GPL(p2sb_bar); + +static int __init p2sb_fs_init(void) +{ + p2sb_cache_resources(); + return 0; +} + +/* + * pci_rescan_remove_lock to avoid access to unhidden P2SB devices can + * not be locked in sysfs pci bus rescan path because of deadlock. To + * avoid the deadlock, access to P2SB devices with the lock at an early + * step in kernel initialization and cache required resources. This + * should happen after subsys_initcall which initializes PCI subsystem + * and before device_initcall which requires P2SB resources. + */ +fs_initcall(p2sb_fs_init); diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index 2ccc849a5bda50..f0677ea0ec24e7 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -113,7 +113,14 @@ static int eventfs_set_attr(struct mnt_idmap *idmap, struct dentry *dentry, * determined by the parent directory. */ if (dentry->d_inode->i_mode & S_IFDIR) { - update_attr(&ei->attr, iattr); + /* + * The events directory dentry is never freed, unless its + * part of an instance that is deleted. It's attr is the + * default for its child files and directories. + * Do not update it. It's not used for its own mode or ownership + */ + if (!ei->is_events) + update_attr(&ei->attr, iattr); } else { name = dentry->d_name.name; @@ -148,28 +155,93 @@ static const struct file_operations eventfs_file_operations = { .release = eventfs_release, }; +/* Return the evenfs_inode of the "events" directory */ +static struct eventfs_inode *eventfs_find_events(struct dentry *dentry) +{ + struct eventfs_inode *ei; + + mutex_lock(&eventfs_mutex); + do { + /* The parent always has an ei, except for events itself */ + ei = dentry->d_parent->d_fsdata; + + /* + * If the ei is being freed, the ownership of the children + * doesn't matter. + */ + if (ei->is_freed) { + ei = NULL; + break; + } + + dentry = ei->dentry; + } while (!ei->is_events); + mutex_unlock(&eventfs_mutex); + + return ei; +} + static void update_inode_attr(struct dentry *dentry, struct inode *inode, struct eventfs_attr *attr, umode_t mode) { - if (!attr) { - inode->i_mode = mode; + struct eventfs_inode *events_ei = eventfs_find_events(dentry); + + if (!events_ei) + return; + + inode->i_mode = mode; + inode->i_uid = events_ei->attr.uid; + inode->i_gid = events_ei->attr.gid; + + if (!attr) return; - } if (attr->mode & EVENTFS_SAVE_MODE) inode->i_mode = attr->mode & EVENTFS_MODE_MASK; - else - inode->i_mode = mode; if (attr->mode & EVENTFS_SAVE_UID) inode->i_uid = attr->uid; - else - inode->i_uid = d_inode(dentry->d_parent)->i_uid; if (attr->mode & EVENTFS_SAVE_GID) inode->i_gid = attr->gid; - else - inode->i_gid = d_inode(dentry->d_parent)->i_gid; +} + +static void update_gid(struct eventfs_inode *ei, kgid_t gid, int level) +{ + struct eventfs_inode *ei_child; + + /* at most we have events/system/event */ + if (WARN_ON_ONCE(level > 3)) + return; + + ei->attr.gid = gid; + + if (ei->entry_attrs) { + for (int i = 0; i < ei->nr_entries; i++) { + ei->entry_attrs[i].gid = gid; + } + } + + /* + * Only eventfs_inode with dentries are updated, make sure + * all eventfs_inodes are updated. If one of the children + * do not have a dentry, this function must traverse it. + */ + list_for_each_entry_srcu(ei_child, &ei->children, list, + srcu_read_lock_held(&eventfs_srcu)) { + if (!ei_child->dentry) + update_gid(ei_child, gid, level + 1); + } +} + +void eventfs_update_gid(struct dentry *dentry, kgid_t gid) +{ + struct eventfs_inode *ei = dentry->d_fsdata; + int idx; + + idx = srcu_read_lock(&eventfs_srcu); + update_gid(ei, gid, 0); + srcu_read_unlock(&eventfs_srcu, idx); } /** @@ -860,6 +932,8 @@ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry struct eventfs_inode *ei; struct tracefs_inode *ti; struct inode *inode; + kuid_t uid; + kgid_t gid; if (security_locked_down(LOCKDOWN_TRACEFS)) return NULL; @@ -884,11 +958,20 @@ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry ei->dentry = dentry; ei->entries = entries; ei->nr_entries = size; + ei->is_events = 1; ei->data = data; ei->name = kstrdup_const(name, GFP_KERNEL); if (!ei->name) goto fail; + /* Save the ownership of this directory */ + uid = d_inode(dentry->d_parent)->i_uid; + gid = d_inode(dentry->d_parent)->i_gid; + + /* This is used as the default ownership of the files and directories */ + ei->attr.uid = uid; + ei->attr.gid = gid; + INIT_LIST_HEAD(&ei->children); INIT_LIST_HEAD(&ei->list); @@ -897,6 +980,8 @@ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry ti->private = ei; inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; + inode->i_uid = uid; + inode->i_gid = gid; inode->i_op = &eventfs_root_dir_inode_operations; inode->i_fop = &eventfs_file_operations; diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index ae648deed019cc..62524b20964ec9 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -210,6 +210,7 @@ static void set_gid(struct dentry *parent, kgid_t gid) next = this_parent->d_subdirs.next; resume: while (next != &this_parent->d_subdirs) { + struct tracefs_inode *ti; struct list_head *tmp = next; struct dentry *dentry = list_entry(tmp, struct dentry, d_child); next = tmp->next; @@ -218,6 +219,11 @@ static void set_gid(struct dentry *parent, kgid_t gid) change_gid(dentry, gid); + /* If this is the events directory, update that too */ + ti = get_tracefs(dentry->d_inode); + if (ti && (ti->flags & TRACEFS_EVENT_INODE)) + eventfs_update_gid(dentry, gid); + if (!list_empty(&dentry->d_subdirs)) { spin_unlock(&this_parent->d_lock); spin_release(&dentry->d_lock.dep_map, _RET_IP_); diff --git a/fs/tracefs/internal.h b/fs/tracefs/internal.h index ccee18ca66c786..899e447778ac55 100644 --- a/fs/tracefs/internal.h +++ b/fs/tracefs/internal.h @@ -62,6 +62,7 @@ struct eventfs_inode { struct rcu_head rcu; }; unsigned int is_freed:1; + unsigned int is_events:1; unsigned int nr_entries:31; }; @@ -77,6 +78,7 @@ struct inode *tracefs_get_inode(struct super_block *sb); struct dentry *eventfs_start_creating(const char *name, struct dentry *parent); struct dentry *eventfs_failed_creating(struct dentry *dentry); struct dentry *eventfs_end_creating(struct dentry *dentry); +void eventfs_update_gid(struct dentry *dentry, kgid_t gid); void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry); #endif /* _TRACEFS_INTERNAL_H */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 51fa7ffdee83b4..88e9dd4b71fba3 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -538,7 +538,7 @@ struct request_queue { #define QUEUE_FLAG_ADD_RANDOM 10 /* Contributes to random pool */ #define QUEUE_FLAG_SYNCHRONOUS 11 /* always completes in submit context */ #define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ -#define QUEUE_FLAG_HW_WC 18 /* Write back caching supported */ +#define QUEUE_FLAG_HW_WC 13 /* Write back caching supported */ #define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */ #define QUEUE_FLAG_STABLE_WRITES 15 /* don't modify blks until WB is done */ #define QUEUE_FLAG_POLL 16 /* IO polling enabled if set */ diff --git a/include/linux/osq_lock.h b/include/linux/osq_lock.h index 5581dbd3bd3407..ea8fb31379e3cb 100644 --- a/include/linux/osq_lock.h +++ b/include/linux/osq_lock.h @@ -6,11 +6,6 @@ * An MCS like lock especially tailored for optimistic spinning for sleeping * lock implementations (mutex, rwsem, etc). */ -struct optimistic_spin_node { - struct optimistic_spin_node *next, *prev; - int locked; /* 1 if lock acquired */ - int cpu; /* encoded CPU # + 1 value */ -}; struct optimistic_spin_queue { /* diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c index d5610ad52b92b5..75a6f6133866d8 100644 --- a/kernel/locking/osq_lock.c +++ b/kernel/locking/osq_lock.c @@ -11,6 +11,13 @@ * called from interrupt context and we have preemption disabled while * spinning. */ + +struct optimistic_spin_node { + struct optimistic_spin_node *next, *prev; + int locked; /* 1 if lock acquired */ + int cpu; /* encoded CPU # + 1 value */ +}; + static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_node, osq_node); /* @@ -37,32 +44,28 @@ static inline struct optimistic_spin_node *decode_cpu(int encoded_cpu_val) /* * Get a stable @node->next pointer, either for unlock() or unqueue() purposes. * Can return NULL in case we were the last queued and we updated @lock instead. + * + * If osq_lock() is being cancelled there must be a previous node + * and 'old_cpu' is its CPU #. + * For osq_unlock() there is never a previous node and old_cpu is + * set to OSQ_UNLOCKED_VAL. */ static inline struct optimistic_spin_node * osq_wait_next(struct optimistic_spin_queue *lock, struct optimistic_spin_node *node, - struct optimistic_spin_node *prev) + int old_cpu) { - struct optimistic_spin_node *next = NULL; int curr = encode_cpu(smp_processor_id()); - int old; - - /* - * If there is a prev node in queue, then the 'old' value will be - * the prev node's CPU #, else it's set to OSQ_UNLOCKED_VAL since if - * we're currently last in queue, then the queue will then become empty. - */ - old = prev ? prev->cpu : OSQ_UNLOCKED_VAL; for (;;) { if (atomic_read(&lock->tail) == curr && - atomic_cmpxchg_acquire(&lock->tail, curr, old) == curr) { + atomic_cmpxchg_acquire(&lock->tail, curr, old_cpu) == curr) { /* * We were the last queued, we moved @lock back. @prev * will now observe @lock and will complete its * unlock()/unqueue(). */ - break; + return NULL; } /* @@ -76,15 +79,15 @@ osq_wait_next(struct optimistic_spin_queue *lock, * wait for a new @node->next from its Step-C. */ if (node->next) { + struct optimistic_spin_node *next; + next = xchg(&node->next, NULL); if (next) - break; + return next; } cpu_relax(); } - - return next; } bool osq_lock(struct optimistic_spin_queue *lock) @@ -186,7 +189,7 @@ bool osq_lock(struct optimistic_spin_queue *lock) * back to @prev. */ - next = osq_wait_next(lock, node, prev); + next = osq_wait_next(lock, node, prev->cpu); if (!next) return false; @@ -226,7 +229,7 @@ void osq_unlock(struct optimistic_spin_queue *lock) return; } - next = osq_wait_next(lock, node, NULL); + next = osq_wait_next(lock, node, OSQ_UNLOCKED_VAL); if (next) WRITE_ONCE(next->locked, 1); } diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 8de8bec5f36640..b01ae7d3602181 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1183,18 +1183,19 @@ static void __add_hash_entry(struct ftrace_hash *hash, hash->count++; } -static int add_hash_entry(struct ftrace_hash *hash, unsigned long ip) +static struct ftrace_func_entry * +add_hash_entry(struct ftrace_hash *hash, unsigned long ip) { struct ftrace_func_entry *entry; entry = kmalloc(sizeof(*entry), GFP_KERNEL); if (!entry) - return -ENOMEM; + return NULL; entry->ip = ip; __add_hash_entry(hash, entry); - return 0; + return entry; } static void @@ -1349,7 +1350,6 @@ alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash) struct ftrace_func_entry *entry; struct ftrace_hash *new_hash; int size; - int ret; int i; new_hash = alloc_ftrace_hash(size_bits); @@ -1366,8 +1366,7 @@ alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash) size = 1 << hash->size_bits; for (i = 0; i < size; i++) { hlist_for_each_entry(entry, &hash->buckets[i], hlist) { - ret = add_hash_entry(new_hash, entry->ip); - if (ret < 0) + if (add_hash_entry(new_hash, entry->ip) == NULL) goto free_hash; } } @@ -2536,7 +2535,7 @@ ftrace_find_unique_ops(struct dyn_ftrace *rec) #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS /* Protected by rcu_tasks for reading, and direct_mutex for writing */ -static struct ftrace_hash *direct_functions = EMPTY_HASH; +static struct ftrace_hash __rcu *direct_functions = EMPTY_HASH; static DEFINE_MUTEX(direct_mutex); int ftrace_direct_func_count; @@ -2555,39 +2554,6 @@ unsigned long ftrace_find_rec_direct(unsigned long ip) return entry->direct; } -static struct ftrace_func_entry* -ftrace_add_rec_direct(unsigned long ip, unsigned long addr, - struct ftrace_hash **free_hash) -{ - struct ftrace_func_entry *entry; - - if (ftrace_hash_empty(direct_functions) || - direct_functions->count > 2 * (1 << direct_functions->size_bits)) { - struct ftrace_hash *new_hash; - int size = ftrace_hash_empty(direct_functions) ? 0 : - direct_functions->count + 1; - - if (size < 32) - size = 32; - - new_hash = dup_hash(direct_functions, size); - if (!new_hash) - return NULL; - - *free_hash = direct_functions; - direct_functions = new_hash; - } - - entry = kmalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) - return NULL; - - entry->ip = ip; - entry->direct = addr; - __add_hash_entry(direct_functions, entry); - return entry; -} - static void call_direct_funcs(unsigned long ip, unsigned long pip, struct ftrace_ops *ops, struct ftrace_regs *fregs) { @@ -4223,8 +4189,8 @@ enter_record(struct ftrace_hash *hash, struct dyn_ftrace *rec, int clear_filter) /* Do nothing if it exists */ if (entry) return 0; - - ret = add_hash_entry(hash, rec->ip); + if (add_hash_entry(hash, rec->ip) == NULL) + ret = -ENOMEM; } return ret; } @@ -5266,7 +5232,8 @@ __ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove) return 0; } - return add_hash_entry(hash, ip); + entry = add_hash_entry(hash, ip); + return entry ? 0 : -ENOMEM; } static int @@ -5410,7 +5377,7 @@ static void remove_direct_functions_hash(struct ftrace_hash *hash, unsigned long */ int register_ftrace_direct(struct ftrace_ops *ops, unsigned long addr) { - struct ftrace_hash *hash, *free_hash = NULL; + struct ftrace_hash *hash, *new_hash = NULL, *free_hash = NULL; struct ftrace_func_entry *entry, *new; int err = -EBUSY, size, i; @@ -5436,17 +5403,44 @@ int register_ftrace_direct(struct ftrace_ops *ops, unsigned long addr) } } - /* ... and insert them to direct_functions hash. */ err = -ENOMEM; + + /* Make a copy hash to place the new and the old entries in */ + size = hash->count + direct_functions->count; + if (size > 32) + size = 32; + new_hash = alloc_ftrace_hash(fls(size)); + if (!new_hash) + goto out_unlock; + + /* Now copy over the existing direct entries */ + size = 1 << direct_functions->size_bits; + for (i = 0; i < size; i++) { + hlist_for_each_entry(entry, &direct_functions->buckets[i], hlist) { + new = add_hash_entry(new_hash, entry->ip); + if (!new) + goto out_unlock; + new->direct = entry->direct; + } + } + + /* ... and add the new entries */ + size = 1 << hash->size_bits; for (i = 0; i < size; i++) { hlist_for_each_entry(entry, &hash->buckets[i], hlist) { - new = ftrace_add_rec_direct(entry->ip, addr, &free_hash); + new = add_hash_entry(new_hash, entry->ip); if (!new) - goto out_remove; + goto out_unlock; + /* Update both the copy and the hash entry */ + new->direct = addr; entry->direct = addr; } } + free_hash = direct_functions; + rcu_assign_pointer(direct_functions, new_hash); + new_hash = NULL; + ops->func = call_direct_funcs; ops->flags = MULTI_FLAGS; ops->trampoline = FTRACE_REGS_ADDR; @@ -5454,17 +5448,17 @@ int register_ftrace_direct(struct ftrace_ops *ops, unsigned long addr) err = register_ftrace_function_nolock(ops); - out_remove: - if (err) - remove_direct_functions_hash(hash, addr); - out_unlock: mutex_unlock(&direct_mutex); - if (free_hash) { + if (free_hash && free_hash != EMPTY_HASH) { synchronize_rcu_tasks(); free_ftrace_hash(free_hash); } + + if (new_hash) + free_ftrace_hash(new_hash); + return err; } EXPORT_SYMBOL_GPL(register_ftrace_direct); @@ -6309,7 +6303,7 @@ ftrace_graph_set_hash(struct ftrace_hash *hash, char *buffer) if (entry) continue; - if (add_hash_entry(hash, rec->ip) < 0) + if (add_hash_entry(hash, rec->ip) == NULL) goto out; } else { if (entry) { diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 83eab547f1d1f2..9286f88fcd32ac 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -881,9 +881,14 @@ static __always_inline bool full_hit(struct trace_buffer *buffer, int cpu, int f if (!nr_pages || !full) return true; - dirty = ring_buffer_nr_dirty_pages(buffer, cpu); + /* + * Add one as dirty will never equal nr_pages, as the sub-buffer + * that the writer is on is not counted as dirty. + * This is needed if "buffer_percent" is set to 100. + */ + dirty = ring_buffer_nr_dirty_pages(buffer, cpu) + 1; - return (dirty * 100) > (full * nr_pages); + return (dirty * 100) >= (full * nr_pages); } /* @@ -944,7 +949,8 @@ void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu) /* make sure the waiters see the new index */ smp_wmb(); - rb_wake_up_waiters(&rbwork->work); + /* This can be called in any context */ + irq_work_queue(&rbwork->work); } /** diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 199df497db07eb..a0defe156b5710 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1894,6 +1894,9 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu, __update_max_tr(tr, tsk, cpu); arch_spin_unlock(&tr->max_lock); + + /* Any waiters on the old snapshot buffer need to wake up */ + ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS); } /** @@ -1945,12 +1948,23 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) static int wait_on_pipe(struct trace_iterator *iter, int full) { + int ret; + /* Iterators are static, they should be filled or empty */ if (trace_buffer_iter(iter, iter->cpu_file)) return 0; - return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, - full); + ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full); + +#ifdef CONFIG_TRACER_MAX_TRACE + /* + * Make sure this is still the snapshot buffer, as if a snapshot were + * to happen, this would now be the main buffer. + */ + if (iter->snapshot) + iter->array_buffer = &iter->tr->max_buffer; +#endif + return ret; } #ifdef CONFIG_FTRACE_STARTUP_TEST @@ -8517,7 +8531,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, wait_index = READ_ONCE(iter->wait_index); - ret = wait_on_pipe(iter, iter->tr->buffer_percent); + ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent); if (ret) goto out;