diff --git a/.gitlab-ci-private.yml b/.gitlab-ci-private.yml index 662b4365736fa3..8f1301cc579776 100644 --- a/.gitlab-ci-private.yml +++ b/.gitlab-ci-private.yml @@ -18,11 +18,12 @@ variables: title: ${CI_COMMIT_TITLE} kernel_type: internal make_target: rpm - builder_image: quay.io/cki/builder-rhel8 - build_kabi_whitelist: 'true' + builder_image: quay.io/cki/builder-rhel8.4 + build_kabi_stablelist: 'true' tree_yaml_name: rhel publish_elsewhere: 'true' disttag_override: '.el8_4' + skip_results: 'true' realtime_check: variables: diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 3955aacc8e22d1..f1c56ce85870b4 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -17,8 +17,8 @@ variables: title: ${CI_COMMIT_TITLE} kernel_type: internal make_target: rpm - builder_image: quay.io/cki/builder-rhel8 - build_kabi_whitelist: 'true' + builder_image: quay.io/cki/builder-rhel8.4 + build_kabi_stablelist: 'true' tree_yaml_name: rhel publish_elsewhere: 'true' disttag_override: '.el8_4' @@ -31,6 +31,7 @@ realtime_check: architectures: 'x86_64' package_name: kernel-rt skip_test: 'true' + skip_results: 'true' trigger: project: redhat/red-hat-ci-tools/kernel/cki-internal-pipelines/cki-internal-contributors branch: rhel8 diff --git a/Makefile.rhelver b/Makefile.rhelver index 0360d0f3d39cda..3df3812ebdfdeb 100644 --- a/Makefile.rhelver +++ b/Makefile.rhelver @@ -12,7 +12,7 @@ RHEL_MINOR = 4 # # Use this spot to avoid future merge conflicts. # Do not trim this comment. -RHEL_RELEASE = 305.3.1 +RHEL_RELEASE = 305.7.1 # # Early y+1 numbering diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 27ceb6b4928757..8a81fb9cde1594 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -7,8 +7,6 @@ #include #include -extern void kvmclock_init(void); - #ifdef CONFIG_KVM_GUEST bool kvm_check_and_clear_guest_paused(void); #else @@ -86,13 +84,14 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, } #ifdef CONFIG_KVM_GUEST +void kvmclock_init(void); +void kvmclock_disable(void); bool kvm_para_available(void); unsigned int kvm_arch_para_features(void); unsigned int kvm_arch_para_hints(void); void kvm_async_pf_task_wait_schedule(u32 token); void kvm_async_pf_task_wake(u32 token); u32 kvm_read_and_reset_apf_flags(void); -void kvm_disable_steal_time(void); bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token); DECLARE_STATIC_KEY_FALSE(kvm_async_pf_enabled); @@ -143,11 +142,6 @@ static inline u32 kvm_read_and_reset_apf_flags(void) return 0; } -static inline void kvm_disable_steal_time(void) -{ - return; -} - static inline bool kvm_handle_async_pf(struct pt_regs *regs, u32 token) { return false; diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index afff6cce6ca02d..3e50aeec34724e 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -49,6 +50,7 @@ #include #include #include +#include #include DEFINE_STATIC_KEY_FALSE(kvm_async_pf_enabled); @@ -354,7 +356,7 @@ static void kvm_guest_cpu_init(void) wrmsrl(MSR_KVM_ASYNC_PF_EN, pa); __this_cpu_write(apf_reason.enabled, 1); - pr_info("KVM setup async PF for cpu %d\n", smp_processor_id()); + pr_info("setup async PF for cpu %d\n", smp_processor_id()); } if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) { @@ -380,34 +382,17 @@ static void kvm_pv_disable_apf(void) wrmsrl(MSR_KVM_ASYNC_PF_EN, 0); __this_cpu_write(apf_reason.enabled, 0); - pr_info("Unregister pv shared memory for cpu %d\n", smp_processor_id()); + pr_info("disable async PF for cpu %d\n", smp_processor_id()); } -static void kvm_pv_guest_cpu_reboot(void *unused) +static void kvm_disable_steal_time(void) { - /* - * We disable PV EOI before we load a new kernel by kexec, - * since MSR_KVM_PV_EOI_EN stores a pointer into old kernel's memory. - * New kernel can re-enable when it boots. - */ - if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) - wrmsrl(MSR_KVM_PV_EOI_EN, 0); - kvm_pv_disable_apf(); - kvm_disable_steal_time(); -} + if (!has_steal_clock) + return; -static int kvm_pv_reboot_notify(struct notifier_block *nb, - unsigned long code, void *unused) -{ - if (code == SYS_RESTART) - on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1); - return NOTIFY_DONE; + wrmsr(MSR_KVM_STEAL_TIME, 0, 0); } -static struct notifier_block kvm_pv_reboot_nb = { - .notifier_call = kvm_pv_reboot_notify, -}; - static u64 kvm_steal_clock(int cpu) { u64 steal; @@ -425,14 +410,6 @@ static u64 kvm_steal_clock(int cpu) return steal; } -void kvm_disable_steal_time(void) -{ - if (!has_steal_clock) - return; - - wrmsr(MSR_KVM_STEAL_TIME, 0, 0); -} - static inline void __set_percpu_decrypted(void *ptr, unsigned long size) { early_set_memory_decrypted((unsigned long) ptr, size); @@ -469,6 +446,27 @@ static bool pv_tlb_flush_supported(void) static DEFINE_PER_CPU(cpumask_var_t, __pv_cpu_mask); +static void kvm_guest_cpu_offline(bool shutdown) +{ + kvm_disable_steal_time(); + if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) + wrmsrl(MSR_KVM_PV_EOI_EN, 0); + kvm_pv_disable_apf(); + if (!shutdown) + apf_task_wake_all(); + kvmclock_disable(); +} + +static int kvm_cpu_online(unsigned int cpu) +{ + unsigned long flags; + + local_irq_save(flags); + kvm_guest_cpu_init(); + local_irq_restore(flags); + return 0; +} + #ifdef CONFIG_SMP static bool pv_ipi_supported(void) @@ -596,31 +594,34 @@ static void __init kvm_smp_prepare_boot_cpu(void) kvm_spinlock_init(); } -static void kvm_guest_cpu_offline(void) +static int kvm_cpu_down_prepare(unsigned int cpu) { - kvm_disable_steal_time(); - if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) - wrmsrl(MSR_KVM_PV_EOI_EN, 0); - kvm_pv_disable_apf(); - apf_task_wake_all(); + unsigned long flags; + + local_irq_save(flags); + kvm_guest_cpu_offline(false); + local_irq_restore(flags); + return 0; } -static int kvm_cpu_online(unsigned int cpu) +#endif + +static int kvm_suspend(void) { - local_irq_disable(); - kvm_guest_cpu_init(); - local_irq_enable(); + kvm_guest_cpu_offline(false); + return 0; } -static int kvm_cpu_down_prepare(unsigned int cpu) +static void kvm_resume(void) { - local_irq_disable(); - kvm_guest_cpu_offline(); - local_irq_enable(); - return 0; + kvm_cpu_online(raw_smp_processor_id()); } -#endif + +static struct syscore_ops kvm_syscore_ops = { + .suspend = kvm_suspend, + .resume = kvm_resume, +}; static void kvm_flush_tlb_others(const struct cpumask *cpumask, const struct flush_tlb_info *info) @@ -648,6 +649,37 @@ static void kvm_flush_tlb_others(const struct cpumask *cpumask, native_flush_tlb_others(flushmask, info); } +static void kvm_pv_guest_cpu_reboot(void *unused) +{ + kvm_guest_cpu_offline(true); +} + +static int kvm_pv_reboot_notify(struct notifier_block *nb, + unsigned long code, void *unused) +{ + if (code == SYS_RESTART) + on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1); + return NOTIFY_DONE; +} + +static struct notifier_block kvm_pv_reboot_nb = { + .notifier_call = kvm_pv_reboot_notify, +}; + +/* + * After a PV feature is registered, the host will keep writing to the + * registered memory location. If the guest happens to shutdown, this memory + * won't be valid. In cases like kexec, in which you install a new kernel, this + * means a random memory location will be kept being written. + */ +#ifdef CONFIG_KEXEC_CORE +static void kvm_crash_shutdown(struct pt_regs *regs) +{ + kvm_guest_cpu_offline(true); + native_machine_crash_shutdown(regs); +} +#endif + static void __init kvm_guest_init(void) { int i; @@ -690,6 +722,12 @@ static void __init kvm_guest_init(void) kvm_guest_cpu_init(); #endif +#ifdef CONFIG_KEXEC_CORE + machine_ops.crash_shutdown = kvm_crash_shutdown; +#endif + + register_syscore_ops(&kvm_syscore_ops); + /* * Hard lockup detection is enabled by default. Disable it, as guests * can get false positives too easily, for example if the host is diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 3848a9c0cd0e25..2ba0f183f54f90 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -20,7 +20,6 @@ #include #include #include -#include #include static int kvmclock __initdata = 1; @@ -203,28 +202,9 @@ static void kvm_setup_secondary_clock(void) } #endif -/* - * After the clock is registered, the host will keep writing to the - * registered memory location. If the guest happens to shutdown, this memory - * won't be valid. In cases like kexec, in which you install a new kernel, this - * means a random memory location will be kept being written. So before any - * kind of shutdown from our side, we unregister the clock by writing anything - * that does not have the 'enable' bit set in the msr - */ -#ifdef CONFIG_KEXEC_CORE -static void kvm_crash_shutdown(struct pt_regs *regs) -{ - native_write_msr(msr_kvm_system_time, 0, 0); - kvm_disable_steal_time(); - native_machine_crash_shutdown(regs); -} -#endif - -static void kvm_shutdown(void) +void kvmclock_disable(void) { native_write_msr(msr_kvm_system_time, 0, 0); - kvm_disable_steal_time(); - native_machine_shutdown(); } static void __init kvmclock_init_mem(void) @@ -352,10 +332,6 @@ void __init kvmclock_init(void) #endif x86_platform.save_sched_clock_state = kvm_save_sched_clock_state; x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state; - machine_ops.shutdown = kvm_shutdown; -#ifdef CONFIG_KEXEC_CORE - machine_ops.crash_shutdown = kvm_crash_shutdown; -#endif kvm_get_preset_lpj(); /* diff --git a/certs/.gitignore b/certs/.gitignore index f51aea4a71ec81..6ce81161148efd 100644 --- a/certs/.gitignore +++ b/certs/.gitignore @@ -2,3 +2,4 @@ # Generated files # x509_certificate_list +x509_revocation_list diff --git a/certs/Kconfig b/certs/Kconfig index c94e93d8bccf03..ab88d2a7f3c7fb 100644 --- a/certs/Kconfig +++ b/certs/Kconfig @@ -83,4 +83,21 @@ config SYSTEM_BLACKLIST_HASH_LIST wrapper to incorporate the list into the kernel. Each should be a string of hex digits. +config SYSTEM_REVOCATION_LIST + bool "Provide system-wide ring of revocation certificates" + depends on SYSTEM_BLACKLIST_KEYRING + depends on PKCS7_MESSAGE_PARSER=y + help + If set, this allows revocation certificates to be stored in the + blacklist keyring and implements a hook whereby a PKCS#7 message can + be checked to see if it matches such a certificate. + +config SYSTEM_REVOCATION_KEYS + string "X.509 certificates to be preloaded into the system blacklist keyring" + depends on SYSTEM_REVOCATION_LIST + help + If set, this option should be the filename of a PEM-formatted file + containing X.509 certificates to be included in the default blacklist + keyring. + endmenu diff --git a/certs/Makefile b/certs/Makefile index 5d0999b9e21b14..c10a97d6046817 100644 --- a/certs/Makefile +++ b/certs/Makefile @@ -3,8 +3,9 @@ # Makefile for the linux kernel signature checking certificates. # -obj-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += system_keyring.o system_certificates.o -obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist.o +obj-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += system_keyring.o system_certificates.o common.o +obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist.o common.o +obj-$(CONFIG_SYSTEM_REVOCATION_LIST) += revocation_certificates.o ifneq ($(CONFIG_SYSTEM_BLACKLIST_HASH_LIST),"") obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist_hashes.o else @@ -29,7 +30,7 @@ $(obj)/x509_certificate_list: scripts/extract-cert $(SYSTEM_TRUSTED_KEYS_SRCPREF $(call if_changed,extract_certs,$(SYSTEM_TRUSTED_KEYS_SRCPREFIX)$(CONFIG_SYSTEM_TRUSTED_KEYS)) endif # CONFIG_SYSTEM_TRUSTED_KEYRING -clean-files := x509_certificate_list .x509.list +clean-files := x509_certificate_list .x509.list x509_revocation_list ifeq ($(CONFIG_MODULE_SIG),y) ############################################################################### @@ -104,3 +105,17 @@ targets += signing_key.x509 $(obj)/signing_key.x509: scripts/extract-cert $(X509_DEP) FORCE $(call if_changed,extract_certs,$(MODULE_SIG_KEY_SRCPREFIX)$(CONFIG_MODULE_SIG_KEY)) endif # CONFIG_MODULE_SIG + +ifeq ($(CONFIG_SYSTEM_REVOCATION_LIST),y) + +$(eval $(call config_filename,SYSTEM_REVOCATION_KEYS)) + +$(obj)/revocation_certificates.o: $(obj)/x509_revocation_list + +quiet_cmd_extract_certs = EXTRACT_CERTS $(patsubst "%",%,$(2)) + cmd_extract_certs = scripts/extract-cert $(2) $@ + +targets += x509_revocation_list +$(obj)/x509_revocation_list: scripts/extract-cert $(SYSTEM_REVOCATION_KEYS_SRCPREFIX)$(SYSTEM_REVOCATION_KEYS_FILENAME) FORCE + $(call if_changed,extract_certs,$(SYSTEM_REVOCATION_KEYS_SRCPREFIX)$(CONFIG_SYSTEM_REVOCATION_KEYS)) +endif diff --git a/certs/blacklist.c b/certs/blacklist.c index b82c77748ae22f..b1aff7c028a298 100644 --- a/certs/blacklist.c +++ b/certs/blacklist.c @@ -20,9 +20,15 @@ #include #include #include "blacklist.h" +#include "common.h" static struct key *blacklist_keyring; +#ifdef CONFIG_SYSTEM_REVOCATION_LIST +extern __initconst const u8 revocation_certificate_list[]; +extern __initconst const unsigned long revocation_certificate_list_size; +#endif + /* * The description must be a type prefix, a colon and then an even number of * hex digits. The hash is kept in the description. @@ -148,6 +154,49 @@ int is_binary_blacklisted(const u8 *hash, size_t hash_len) } EXPORT_SYMBOL_GPL(is_binary_blacklisted); +#ifdef CONFIG_SYSTEM_REVOCATION_LIST +/** + * add_key_to_revocation_list - Add a revocation certificate to the blacklist + * @data: The data blob containing the certificate + * @size: The size of data blob + */ +int add_key_to_revocation_list(const char *data, size_t size) +{ + key_ref_t key; + + key = key_create_or_update(make_key_ref(blacklist_keyring, true), + "asymmetric", + NULL, + data, + size, + ((KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW), + KEY_ALLOC_NOT_IN_QUOTA | KEY_ALLOC_BUILT_IN); + + if (IS_ERR(key)) { + pr_err("Problem with revocation key (%ld)\n", PTR_ERR(key)); + return PTR_ERR(key); + } + + return 0; +} + +/** + * is_key_on_revocation_list - Determine if the key for a PKCS#7 message is revoked + * @pkcs7: The PKCS#7 message to check + */ +int is_key_on_revocation_list(struct pkcs7_message *pkcs7) +{ + int ret; + + ret = pkcs7_validate_trust(pkcs7, blacklist_keyring); + + if (ret == 0) + return -EKEYREJECTED; + + return -ENOKEY; +} +#endif + /* * Initialise the blacklist */ @@ -181,3 +230,18 @@ static int __init blacklist_init(void) * Must be initialised before we try and load the keys into the keyring. */ device_initcall(blacklist_init); + +#ifdef CONFIG_SYSTEM_REVOCATION_LIST +/* + * Load the compiled-in list of revocation X.509 certificates. + */ +static __init int load_revocation_certificate_list(void) +{ + if (revocation_certificate_list_size) + pr_notice("Loading compiled-in revocation X.509 certificates\n"); + + return load_certificate_list(revocation_certificate_list, revocation_certificate_list_size, + blacklist_keyring); +} +late_initcall(load_revocation_certificate_list); +#endif diff --git a/certs/blacklist.h b/certs/blacklist.h index 1efd6fa0dc608c..51b320cf85749e 100644 --- a/certs/blacklist.h +++ b/certs/blacklist.h @@ -1,3 +1,5 @@ #include +#include +#include extern const char __initconst *const blacklist_hashes[]; diff --git a/certs/common.c b/certs/common.c new file mode 100644 index 00000000000000..16a220887a53e8 --- /dev/null +++ b/certs/common.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include +#include "common.h" + +int load_certificate_list(const u8 cert_list[], + const unsigned long list_size, + const struct key *keyring) +{ + key_ref_t key; + const u8 *p, *end; + size_t plen; + + p = cert_list; + end = p + list_size; + while (p < end) { + /* Each cert begins with an ASN.1 SEQUENCE tag and must be more + * than 256 bytes in size. + */ + if (end - p < 4) + goto dodgy_cert; + if (p[0] != 0x30 && + p[1] != 0x82) + goto dodgy_cert; + plen = (p[2] << 8) | p[3]; + plen += 4; + if (plen > end - p) + goto dodgy_cert; + + key = key_create_or_update(make_key_ref(keyring, 1), + "asymmetric", + NULL, + p, + plen, + ((KEY_POS_ALL & ~KEY_POS_SETATTR) | + KEY_USR_VIEW | KEY_USR_READ), + KEY_ALLOC_NOT_IN_QUOTA | + KEY_ALLOC_BUILT_IN | + KEY_ALLOC_BYPASS_RESTRICTION); + if (IS_ERR(key)) { + pr_err("Problem loading in-kernel X.509 certificate (%ld)\n", + PTR_ERR(key)); + } else { + pr_notice("Loaded X.509 cert '%s'\n", + key_ref_to_ptr(key)->description); + key_ref_put(key); + } + p += plen; + } + + return 0; + +dodgy_cert: + pr_err("Problem parsing in-kernel X.509 certificate list\n"); + return 0; +} diff --git a/certs/common.h b/certs/common.h new file mode 100644 index 00000000000000..abdb5795936b76 --- /dev/null +++ b/certs/common.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef _CERT_COMMON_H +#define _CERT_COMMON_H + +int load_certificate_list(const u8 cert_list[], const unsigned long list_size, + const struct key *keyring); + +#endif diff --git a/certs/revocation_certificates.S b/certs/revocation_certificates.S new file mode 100644 index 00000000000000..f21aae8a8f0ef7 --- /dev/null +++ b/certs/revocation_certificates.S @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include +#include + + __INITRODATA + + .align 8 + .globl revocation_certificate_list +revocation_certificate_list: +__revocation_list_start: + .incbin "certs/x509_revocation_list" +__revocation_list_end: + + .align 8 + .globl revocation_certificate_list_size +revocation_certificate_list_size: +#ifdef CONFIG_64BIT + .quad __revocation_list_end - __revocation_list_start +#else + .long __revocation_list_end - __revocation_list_start +#endif diff --git a/certs/system_keyring.c b/certs/system_keyring.c index 4aaae48fa88ed2..c95357e28745f4 100644 --- a/certs/system_keyring.c +++ b/certs/system_keyring.c @@ -19,6 +19,7 @@ #include #include #include +#include "common.h" static struct key *builtin_trusted_keys; #ifdef CONFIG_SECONDARY_TRUSTED_KEYRING @@ -140,54 +141,10 @@ device_initcall(system_trusted_keyring_init); */ static __init int load_system_certificate_list(void) { - key_ref_t key; - const u8 *p, *end; - size_t plen; - pr_notice("Loading compiled-in X.509 certificates\n"); - p = system_certificate_list; - end = p + system_certificate_list_size; - while (p < end) { - /* Each cert begins with an ASN.1 SEQUENCE tag and must be more - * than 256 bytes in size. - */ - if (end - p < 4) - goto dodgy_cert; - if (p[0] != 0x30 && - p[1] != 0x82) - goto dodgy_cert; - plen = (p[2] << 8) | p[3]; - plen += 4; - if (plen > end - p) - goto dodgy_cert; - - key = key_create_or_update(make_key_ref(builtin_trusted_keys, 1), - "asymmetric", - NULL, - p, - plen, - ((KEY_POS_ALL & ~KEY_POS_SETATTR) | - KEY_USR_VIEW | KEY_USR_READ), - KEY_ALLOC_NOT_IN_QUOTA | - KEY_ALLOC_BUILT_IN | - KEY_ALLOC_BYPASS_RESTRICTION); - if (IS_ERR(key)) { - pr_err("Problem loading in-kernel X.509 certificate (%ld)\n", - PTR_ERR(key)); - } else { - pr_notice("Loaded X.509 cert '%s'\n", - key_ref_to_ptr(key)->description); - key_ref_put(key); - } - p += plen; - } - - return 0; - -dodgy_cert: - pr_err("Problem parsing in-kernel X.509 certificate list\n"); - return 0; + return load_certificate_list(system_certificate_list, system_certificate_list_size, + builtin_trusted_keys); } late_initcall(load_system_certificate_list); @@ -245,6 +202,12 @@ int verify_pkcs7_message_sig(const void *data, size_t len, pr_devel("PKCS#7 platform keyring is not available\n"); goto error; } + + ret = is_key_on_revocation_list(pkcs7); + if (ret != -ENOKEY) { + pr_devel("PKCS#7 platform key is on revocation list\n"); + goto error; + } } ret = pkcs7_validate_trust(pkcs7, trusted_keys); if (ret < 0) { diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index c1ebc74923bb1f..23cee7d8710244 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -523,7 +523,7 @@ static void ssd_commit_superblock(struct dm_writecache *wc) region.bdev = wc->ssd_dev->bdev; region.sector = 0; - region.count = PAGE_SIZE; + region.count = PAGE_SIZE >> SECTOR_SHIFT; if (unlikely(region.sector + region.count > wc->metadata_sectors)) region.count = wc->metadata_sectors - region.sector; diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 55770c7bc23786..5763b5eb890890 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -3170,6 +3170,11 @@ vmxnet3_declare_features(struct vmxnet3_adapter *adapter, bool dma64) ~(NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX); netdev->features = netdev->hw_features | NETIF_F_HW_VLAN_CTAG_FILTER; + + if (VMXNET3_VERSION_GE_4(adapter)) { + netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM); + } } diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c index d2e226073d3451..930cdb58e51945 100644 --- a/drivers/scsi/fnic/fnic_scsi.c +++ b/drivers/scsi/fnic/fnic_scsi.c @@ -101,7 +101,7 @@ static const char *fnic_fcpio_status_to_str(unsigned int status) return fcpio_status_str[status]; } -static void fnic_cleanup_io(struct fnic *fnic, int exclude_id); +static void fnic_cleanup_io(struct fnic *fnic); static inline spinlock_t *fnic_io_lock_hash(struct fnic *fnic, struct scsi_cmnd *sc) @@ -637,7 +637,7 @@ static int fnic_fcpio_fw_reset_cmpl_handler(struct fnic *fnic, atomic64_inc(&reset_stats->fw_reset_completions); /* Clean up all outstanding io requests */ - fnic_cleanup_io(fnic, SCSI_NO_TAG); + fnic_cleanup_io(fnic); atomic64_set(&fnic->fnic_stats.fw_stats.active_fw_reqs, 0); atomic64_set(&fnic->fnic_stats.io_stats.active_ios, 0); @@ -1360,93 +1360,90 @@ int fnic_wq_copy_cmpl_handler(struct fnic *fnic, int copy_work_to_do) return wq_work_done; } -static void fnic_cleanup_io(struct fnic *fnic, int exclude_id) +static bool fnic_cleanup_io_iter(struct scsi_cmnd *sc, void *data, + bool reserved) { - int i; + struct fnic *fnic = data; struct fnic_io_req *io_req; unsigned long flags = 0; - struct scsi_cmnd *sc; spinlock_t *io_lock; unsigned long start_time = 0; struct fnic_stats *fnic_stats = &fnic->fnic_stats; - for (i = 0; i < fnic->fnic_max_tag_id; i++) { - if (i == exclude_id) - continue; - - io_lock = fnic_io_lock_tag(fnic, i); - spin_lock_irqsave(io_lock, flags); - sc = scsi_host_find_tag(fnic->lport->host, i); - if (!sc) { - spin_unlock_irqrestore(io_lock, flags); - continue; - } - - io_req = (struct fnic_io_req *)CMD_SP(sc); - if ((CMD_FLAGS(sc) & FNIC_DEVICE_RESET) && - !(CMD_FLAGS(sc) & FNIC_DEV_RST_DONE)) { - /* - * We will be here only when FW completes reset - * without sending completions for outstanding ios. - */ - CMD_FLAGS(sc) |= FNIC_DEV_RST_DONE; - if (io_req && io_req->dr_done) - complete(io_req->dr_done); - else if (io_req && io_req->abts_done) - complete(io_req->abts_done); - spin_unlock_irqrestore(io_lock, flags); - continue; - } else if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) { - spin_unlock_irqrestore(io_lock, flags); - continue; - } - if (!io_req) { - spin_unlock_irqrestore(io_lock, flags); - continue; - } - - CMD_SP(sc) = NULL; - - spin_unlock_irqrestore(io_lock, flags); + io_lock = fnic_io_lock_tag(fnic, sc->request->tag); + spin_lock_irqsave(io_lock, flags); + io_req = (struct fnic_io_req *)CMD_SP(sc); + if ((CMD_FLAGS(sc) & FNIC_DEVICE_RESET) && + !(CMD_FLAGS(sc) & FNIC_DEV_RST_DONE)) { /* - * If there is a scsi_cmnd associated with this io_req, then - * free the corresponding state + * We will be here only when FW completes reset + * without sending completions for outstanding ios. */ - start_time = io_req->start_time; - fnic_release_ioreq_buf(fnic, io_req, sc); - mempool_free(io_req, fnic->io_req_pool); + CMD_FLAGS(sc) |= FNIC_DEV_RST_DONE; + if (io_req && io_req->dr_done) + complete(io_req->dr_done); + else if (io_req && io_req->abts_done) + complete(io_req->abts_done); + spin_unlock_irqrestore(io_lock, flags); + return true; + } else if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) { + spin_unlock_irqrestore(io_lock, flags); + return true; + } + if (!io_req) { + spin_unlock_irqrestore(io_lock, flags); + goto cleanup_scsi_cmd; + } - sc->result = DID_TRANSPORT_DISRUPTED << 16; - FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, - "%s: tag:0x%x : sc:0x%p duration = %lu DID_TRANSPORT_DISRUPTED\n", - __func__, sc->request->tag, sc, - (jiffies - start_time)); + CMD_SP(sc) = NULL; - if (atomic64_read(&fnic->io_cmpl_skip)) - atomic64_dec(&fnic->io_cmpl_skip); - else - atomic64_inc(&fnic_stats->io_stats.io_completions); + spin_unlock_irqrestore(io_lock, flags); - /* Complete the command to SCSI */ - if (sc->scsi_done) { - if (!(CMD_FLAGS(sc) & FNIC_IO_ISSUED)) - shost_printk(KERN_ERR, fnic->lport->host, - "Calling done for IO not issued to fw: tag:0x%x sc:0x%p\n", - sc->request->tag, sc); + /* + * If there is a scsi_cmnd associated with this io_req, then + * free the corresponding state + */ + start_time = io_req->start_time; + fnic_release_ioreq_buf(fnic, io_req, sc); + mempool_free(io_req, fnic->io_req_pool); - FNIC_TRACE(fnic_cleanup_io, - sc->device->host->host_no, i, sc, - jiffies_to_msecs(jiffies - start_time), - 0, ((u64)sc->cmnd[0] << 32 | - (u64)sc->cmnd[2] << 24 | - (u64)sc->cmnd[3] << 16 | - (u64)sc->cmnd[4] << 8 | sc->cmnd[5]), - (((u64)CMD_FLAGS(sc) << 32) | CMD_STATE(sc))); +cleanup_scsi_cmd: + sc->result = DID_TRANSPORT_DISRUPTED << 16; + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "fnic_cleanup_io: tag:0x%x : sc:0x%p duration = %lu DID_TRANSPORT_DISRUPTED\n", + sc->request->tag, sc, (jiffies - start_time)); - sc->scsi_done(sc); - } + if (atomic64_read(&fnic->io_cmpl_skip)) + atomic64_dec(&fnic->io_cmpl_skip); + else + atomic64_inc(&fnic_stats->io_stats.io_completions); + + /* Complete the command to SCSI */ + if (sc->scsi_done) { + if (!(CMD_FLAGS(sc) & FNIC_IO_ISSUED)) + shost_printk(KERN_ERR, fnic->lport->host, + "Calling done for IO not issued to fw: tag:0x%x sc:0x%p\n", + sc->request->tag, sc); + + FNIC_TRACE(fnic_cleanup_io, + sc->device->host->host_no, sc->request->tag, sc, + jiffies_to_msecs(jiffies - start_time), + 0, ((u64)sc->cmnd[0] << 32 | + (u64)sc->cmnd[2] << 24 | + (u64)sc->cmnd[3] << 16 | + (u64)sc->cmnd[4] << 8 | sc->cmnd[5]), + (((u64)CMD_FLAGS(sc) << 32) | CMD_STATE(sc))); + + sc->scsi_done(sc); } + return true; +} + +static void fnic_cleanup_io(struct fnic *fnic) +{ + scsi_host_busy_iter(fnic->lport->host, + fnic_cleanup_io_iter, fnic); } void fnic_wq_copy_cleanup_handler(struct vnic_wq_copy *wq, @@ -1557,143 +1554,141 @@ static inline int fnic_queue_abort_io_req(struct fnic *fnic, int tag, return 0; } -static void fnic_rport_exch_reset(struct fnic *fnic, u32 port_id) +struct fnic_rport_abort_io_iter_data { + struct fnic *fnic; + u32 port_id; + int term_cnt; +}; + +static bool fnic_rport_abort_io_iter(struct scsi_cmnd *sc, void *data, + bool reserved) { - int tag; - int abt_tag; - int term_cnt = 0; + struct fnic_rport_abort_io_iter_data *iter_data = data; + struct fnic *fnic = iter_data->fnic; + int abt_tag = sc->request->tag; struct fnic_io_req *io_req; spinlock_t *io_lock; unsigned long flags; - struct scsi_cmnd *sc; struct reset_stats *reset_stats = &fnic->fnic_stats.reset_stats; struct terminate_stats *term_stats = &fnic->fnic_stats.term_stats; struct scsi_lun fc_lun; enum fnic_ioreq_state old_ioreq_state; - FNIC_SCSI_DBG(KERN_DEBUG, - fnic->lport->host, - "fnic_rport_exch_reset called portid 0x%06x\n", - port_id); - - if (fnic->in_remove) - return; - - for (tag = 0; tag < fnic->fnic_max_tag_id; tag++) { - abt_tag = tag; - io_lock = fnic_io_lock_tag(fnic, tag); - spin_lock_irqsave(io_lock, flags); - sc = scsi_host_find_tag(fnic->lport->host, tag); - if (!sc) { - spin_unlock_irqrestore(io_lock, flags); - continue; - } + io_lock = fnic_io_lock_tag(fnic, abt_tag); + spin_lock_irqsave(io_lock, flags); - io_req = (struct fnic_io_req *)CMD_SP(sc); + io_req = (struct fnic_io_req *)CMD_SP(sc); - if (!io_req || io_req->port_id != port_id) { - spin_unlock_irqrestore(io_lock, flags); - continue; - } + if (!io_req || io_req->port_id != iter_data->port_id) { + spin_unlock_irqrestore(io_lock, flags); + return true; + } - if ((CMD_FLAGS(sc) & FNIC_DEVICE_RESET) && - (!(CMD_FLAGS(sc) & FNIC_DEV_RST_ISSUED))) { - FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + if ((CMD_FLAGS(sc) & FNIC_DEVICE_RESET) && + (!(CMD_FLAGS(sc) & FNIC_DEV_RST_ISSUED))) { + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, "fnic_rport_exch_reset dev rst not pending sc 0x%p\n", sc); - spin_unlock_irqrestore(io_lock, flags); - continue; - } + spin_unlock_irqrestore(io_lock, flags); + return true; + } - /* - * Found IO that is still pending with firmware and - * belongs to rport that went away - */ - if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) { - spin_unlock_irqrestore(io_lock, flags); - continue; - } - if (io_req->abts_done) { - shost_printk(KERN_ERR, fnic->lport->host, + /* + * Found IO that is still pending with firmware and + * belongs to rport that went away + */ + if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) { + spin_unlock_irqrestore(io_lock, flags); + return true; + } + if (io_req->abts_done) { + shost_printk(KERN_ERR, fnic->lport->host, "fnic_rport_exch_reset: io_req->abts_done is set " "state is %s\n", fnic_ioreq_state_to_str(CMD_STATE(sc))); - } + } - if (!(CMD_FLAGS(sc) & FNIC_IO_ISSUED)) { - shost_printk(KERN_ERR, fnic->lport->host, - "rport_exch_reset " - "IO not yet issued %p tag 0x%x flags " - "%x state %d\n", - sc, tag, CMD_FLAGS(sc), CMD_STATE(sc)); - } - old_ioreq_state = CMD_STATE(sc); - CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING; - CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE; - if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) { - atomic64_inc(&reset_stats->device_reset_terminates); - abt_tag = (tag | FNIC_TAG_DEV_RST); - FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, - "fnic_rport_exch_reset dev rst sc 0x%p\n", - sc); - } + if (!(CMD_FLAGS(sc) & FNIC_IO_ISSUED)) { + shost_printk(KERN_ERR, fnic->lport->host, + "rport_exch_reset " + "IO not yet issued %p tag 0x%x flags " + "%x state %d\n", + sc, abt_tag, CMD_FLAGS(sc), CMD_STATE(sc)); + } + old_ioreq_state = CMD_STATE(sc); + CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING; + CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE; + if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) { + atomic64_inc(&reset_stats->device_reset_terminates); + abt_tag |= FNIC_TAG_DEV_RST; + } + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "fnic_rport_exch_reset dev rst sc 0x%p\n", sc); + BUG_ON(io_req->abts_done); - BUG_ON(io_req->abts_done); + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "fnic_rport_reset_exch: Issuing abts\n"); - FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, - "fnic_rport_reset_exch: Issuing abts\n"); + spin_unlock_irqrestore(io_lock, flags); + /* Now queue the abort command to firmware */ + int_to_scsilun(sc->device->lun, &fc_lun); + + if (fnic_queue_abort_io_req(fnic, abt_tag, + FCPIO_ITMF_ABT_TASK_TERM, + fc_lun.scsi_lun, io_req)) { + /* + * Revert the cmd state back to old state, if + * it hasn't changed in between. This cmd will get + * aborted later by scsi_eh, or cleaned up during + * lun reset + */ + spin_lock_irqsave(io_lock, flags); + if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) + CMD_STATE(sc) = old_ioreq_state; spin_unlock_irqrestore(io_lock, flags); + } else { + spin_lock_irqsave(io_lock, flags); + if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) + CMD_FLAGS(sc) |= FNIC_DEV_RST_TERM_ISSUED; + else + CMD_FLAGS(sc) |= FNIC_IO_INTERNAL_TERM_ISSUED; + spin_unlock_irqrestore(io_lock, flags); + atomic64_inc(&term_stats->terminates); + iter_data->term_cnt++; + } + return true; +} - /* Now queue the abort command to firmware */ - int_to_scsilun(sc->device->lun, &fc_lun); +static void fnic_rport_exch_reset(struct fnic *fnic, u32 port_id) +{ + struct terminate_stats *term_stats = &fnic->fnic_stats.term_stats; + struct fnic_rport_abort_io_iter_data iter_data = { + .fnic = fnic, + .port_id = port_id, + .term_cnt = 0, + }; - if (fnic_queue_abort_io_req(fnic, abt_tag, - FCPIO_ITMF_ABT_TASK_TERM, - fc_lun.scsi_lun, io_req)) { - /* - * Revert the cmd state back to old state, if - * it hasn't changed in between. This cmd will get - * aborted later by scsi_eh, or cleaned up during - * lun reset - */ - spin_lock_irqsave(io_lock, flags); - if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) - CMD_STATE(sc) = old_ioreq_state; - spin_unlock_irqrestore(io_lock, flags); - } else { - spin_lock_irqsave(io_lock, flags); - if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) - CMD_FLAGS(sc) |= FNIC_DEV_RST_TERM_ISSUED; - else - CMD_FLAGS(sc) |= FNIC_IO_INTERNAL_TERM_ISSUED; - spin_unlock_irqrestore(io_lock, flags); - atomic64_inc(&term_stats->terminates); - term_cnt++; - } - } - if (term_cnt > atomic64_read(&term_stats->max_terminates)) - atomic64_set(&term_stats->max_terminates, term_cnt); + FNIC_SCSI_DBG(KERN_DEBUG, + fnic->lport->host, + "fnic_rport_exch_reset called portid 0x%06x\n", + port_id); + + if (fnic->in_remove) + return; + + scsi_host_busy_iter(fnic->lport->host, fnic_rport_abort_io_iter, + &iter_data); + if (iter_data.term_cnt > atomic64_read(&term_stats->max_terminates)) + atomic64_set(&term_stats->max_terminates, iter_data.term_cnt); } void fnic_terminate_rport_io(struct fc_rport *rport) { - int tag; - int abt_tag; - int term_cnt = 0; - struct fnic_io_req *io_req; - spinlock_t *io_lock; - unsigned long flags; - struct scsi_cmnd *sc; - struct scsi_lun fc_lun; struct fc_rport_libfc_priv *rdata; struct fc_lport *lport; struct fnic *fnic; - struct fc_rport *cmd_rport; - struct reset_stats *reset_stats; - struct terminate_stats *term_stats; - enum fnic_ioreq_state old_ioreq_state; if (!rport) { printk(KERN_ERR "fnic_terminate_rport_io: rport is NULL\n"); @@ -1721,108 +1716,7 @@ void fnic_terminate_rport_io(struct fc_rport *rport) if (fnic->in_remove) return; - reset_stats = &fnic->fnic_stats.reset_stats; - term_stats = &fnic->fnic_stats.term_stats; - - for (tag = 0; tag < fnic->fnic_max_tag_id; tag++) { - abt_tag = tag; - io_lock = fnic_io_lock_tag(fnic, tag); - spin_lock_irqsave(io_lock, flags); - sc = scsi_host_find_tag(fnic->lport->host, tag); - if (!sc) { - spin_unlock_irqrestore(io_lock, flags); - continue; - } - - io_req = (struct fnic_io_req *)CMD_SP(sc); - if (!io_req) { - spin_unlock_irqrestore(io_lock, flags); - continue; - } - - cmd_rport = starget_to_rport(scsi_target(sc->device)); - if (rport != cmd_rport) { - spin_unlock_irqrestore(io_lock, flags); - continue; - } - - if ((CMD_FLAGS(sc) & FNIC_DEVICE_RESET) && - (!(CMD_FLAGS(sc) & FNIC_DEV_RST_ISSUED))) { - FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, - "fnic_terminate_rport_io dev rst not pending sc 0x%p\n", - sc); - spin_unlock_irqrestore(io_lock, flags); - continue; - } - /* - * Found IO that is still pending with firmware and - * belongs to rport that went away - */ - if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) { - spin_unlock_irqrestore(io_lock, flags); - continue; - } - if (io_req->abts_done) { - shost_printk(KERN_ERR, fnic->lport->host, - "fnic_terminate_rport_io: io_req->abts_done is set " - "state is %s\n", - fnic_ioreq_state_to_str(CMD_STATE(sc))); - } - if (!(CMD_FLAGS(sc) & FNIC_IO_ISSUED)) { - FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host, - "fnic_terminate_rport_io " - "IO not yet issued %p tag 0x%x flags " - "%x state %d\n", - sc, tag, CMD_FLAGS(sc), CMD_STATE(sc)); - } - old_ioreq_state = CMD_STATE(sc); - CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING; - CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE; - if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) { - atomic64_inc(&reset_stats->device_reset_terminates); - abt_tag = (tag | FNIC_TAG_DEV_RST); - FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, - "fnic_terminate_rport_io dev rst sc 0x%p\n", sc); - } - - BUG_ON(io_req->abts_done); - - FNIC_SCSI_DBG(KERN_DEBUG, - fnic->lport->host, - "fnic_terminate_rport_io: Issuing abts\n"); - - spin_unlock_irqrestore(io_lock, flags); - - /* Now queue the abort command to firmware */ - int_to_scsilun(sc->device->lun, &fc_lun); - - if (fnic_queue_abort_io_req(fnic, abt_tag, - FCPIO_ITMF_ABT_TASK_TERM, - fc_lun.scsi_lun, io_req)) { - /* - * Revert the cmd state back to old state, if - * it hasn't changed in between. This cmd will get - * aborted later by scsi_eh, or cleaned up during - * lun reset - */ - spin_lock_irqsave(io_lock, flags); - if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) - CMD_STATE(sc) = old_ioreq_state; - spin_unlock_irqrestore(io_lock, flags); - } else { - spin_lock_irqsave(io_lock, flags); - if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) - CMD_FLAGS(sc) |= FNIC_DEV_RST_TERM_ISSUED; - else - CMD_FLAGS(sc) |= FNIC_IO_INTERNAL_TERM_ISSUED; - spin_unlock_irqrestore(io_lock, flags); - atomic64_inc(&term_stats->terminates); - term_cnt++; - } - } - if (term_cnt > atomic64_read(&term_stats->max_terminates)) - atomic64_set(&term_stats->max_terminates, term_cnt); - + fnic_rport_exch_reset(fnic, rport->port_id); } /* @@ -2117,165 +2011,183 @@ static inline int fnic_queue_dr_io_req(struct fnic *fnic, return ret; } -/* - * Clean up any pending aborts on the lun - * For each outstanding IO on this lun, whose abort is not completed by fw, - * issue a local abort. Wait for abort to complete. Return 0 if all commands - * successfully aborted, 1 otherwise - */ -static int fnic_clean_pending_aborts(struct fnic *fnic, - struct scsi_cmnd *lr_sc, - bool new_sc) +struct fnic_pending_aborts_iter_data { + struct fnic *fnic; + struct scsi_cmnd *lr_sc; + struct scsi_device *lun_dev; + int ret; +}; +static bool fnic_pending_aborts_iter(struct scsi_cmnd *sc, + void *data, bool reserved) { - int tag, abt_tag; + struct fnic_pending_aborts_iter_data *iter_data = data; + struct fnic *fnic = iter_data->fnic; + struct scsi_device *lun_dev = iter_data->lun_dev; + int abt_tag = sc->request->tag; struct fnic_io_req *io_req; spinlock_t *io_lock; unsigned long flags; - int ret = 0; - struct scsi_cmnd *sc; struct scsi_lun fc_lun; - struct scsi_device *lun_dev = lr_sc->device; DECLARE_COMPLETION_ONSTACK(tm_done); enum fnic_ioreq_state old_ioreq_state; - for (tag = 0; tag < fnic->fnic_max_tag_id; tag++) { - io_lock = fnic_io_lock_tag(fnic, tag); - spin_lock_irqsave(io_lock, flags); - sc = scsi_host_find_tag(fnic->lport->host, tag); - /* - * ignore this lun reset cmd if issued using new SC - * or cmds that do not belong to this lun - */ - if (!sc || ((sc == lr_sc) && new_sc) || sc->device != lun_dev) { - spin_unlock_irqrestore(io_lock, flags); - continue; - } - - io_req = (struct fnic_io_req *)CMD_SP(sc); + if (sc == iter_data->lr_sc || sc->device != lun_dev) + return true; + if (reserved) + return true; - if (!io_req || sc->device != lun_dev) { - spin_unlock_irqrestore(io_lock, flags); - continue; - } - - /* - * Found IO that is still pending with firmware and - * belongs to the LUN that we are resetting - */ - FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, - "Found IO in %s on lun\n", - fnic_ioreq_state_to_str(CMD_STATE(sc))); - - if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) { - spin_unlock_irqrestore(io_lock, flags); - continue; - } - if ((CMD_FLAGS(sc) & FNIC_DEVICE_RESET) && - (!(CMD_FLAGS(sc) & FNIC_DEV_RST_ISSUED))) { - FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host, - "%s dev rst not pending sc 0x%p\n", __func__, - sc); - spin_unlock_irqrestore(io_lock, flags); - continue; - } + io_lock = fnic_io_lock_tag(fnic, abt_tag); + spin_lock_irqsave(io_lock, flags); + io_req = (struct fnic_io_req *)CMD_SP(sc); + if (!io_req) { + spin_unlock_irqrestore(io_lock, flags); + return true; + } - if (io_req->abts_done) - shost_printk(KERN_ERR, fnic->lport->host, - "%s: io_req->abts_done is set state is %s\n", - __func__, fnic_ioreq_state_to_str(CMD_STATE(sc))); - old_ioreq_state = CMD_STATE(sc); - /* - * Any pending IO issued prior to reset is expected to be - * in abts pending state, if not we need to set - * FNIC_IOREQ_ABTS_PENDING to indicate the IO is abort pending. - * When IO is completed, the IO will be handed over and - * handled in this function. - */ - CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING; + /* + * Found IO that is still pending with firmware and + * belongs to the LUN that we are resetting + */ + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "Found IO in %s on lun\n", + fnic_ioreq_state_to_str(CMD_STATE(sc))); - BUG_ON(io_req->abts_done); + if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) { + spin_unlock_irqrestore(io_lock, flags); + return true; + } + if ((CMD_FLAGS(sc) & FNIC_DEVICE_RESET) && + (!(CMD_FLAGS(sc) & FNIC_DEV_RST_ISSUED))) { + FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host, + "%s dev rst not pending sc 0x%p\n", __func__, + sc); + spin_unlock_irqrestore(io_lock, flags); + return true; + } - abt_tag = tag; - if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) { - abt_tag |= FNIC_TAG_DEV_RST; - FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host, - "%s: dev rst sc 0x%p\n", __func__, sc); - } + if (io_req->abts_done) + shost_printk(KERN_ERR, fnic->lport->host, + "%s: io_req->abts_done is set state is %s\n", + __func__, fnic_ioreq_state_to_str(CMD_STATE(sc))); + old_ioreq_state = CMD_STATE(sc); + /* + * Any pending IO issued prior to reset is expected to be + * in abts pending state, if not we need to set + * FNIC_IOREQ_ABTS_PENDING to indicate the IO is abort pending. + * When IO is completed, the IO will be handed over and + * handled in this function. + */ + CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING; - CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE; - io_req->abts_done = &tm_done; - spin_unlock_irqrestore(io_lock, flags); + BUG_ON(io_req->abts_done); - /* Now queue the abort command to firmware */ - int_to_scsilun(sc->device->lun, &fc_lun); + if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) { + abt_tag |= FNIC_TAG_DEV_RST; + FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host, + "%s: dev rst sc 0x%p\n", __func__, sc); + } - if (fnic_queue_abort_io_req(fnic, abt_tag, - FCPIO_ITMF_ABT_TASK_TERM, - fc_lun.scsi_lun, io_req)) { - spin_lock_irqsave(io_lock, flags); - io_req = (struct fnic_io_req *)CMD_SP(sc); - if (io_req) - io_req->abts_done = NULL; - if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) - CMD_STATE(sc) = old_ioreq_state; - spin_unlock_irqrestore(io_lock, flags); - ret = 1; - goto clean_pending_aborts_end; - } else { - spin_lock_irqsave(io_lock, flags); - if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) - CMD_FLAGS(sc) |= FNIC_DEV_RST_TERM_ISSUED; - spin_unlock_irqrestore(io_lock, flags); - } - CMD_FLAGS(sc) |= FNIC_IO_INTERNAL_TERM_ISSUED; + CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE; + io_req->abts_done = &tm_done; + spin_unlock_irqrestore(io_lock, flags); - wait_for_completion_timeout(&tm_done, - msecs_to_jiffies - (fnic->config.ed_tov)); + /* Now queue the abort command to firmware */ + int_to_scsilun(sc->device->lun, &fc_lun); - /* Recheck cmd state to check if it is now aborted */ + if (fnic_queue_abort_io_req(fnic, abt_tag, + FCPIO_ITMF_ABT_TASK_TERM, + fc_lun.scsi_lun, io_req)) { spin_lock_irqsave(io_lock, flags); io_req = (struct fnic_io_req *)CMD_SP(sc); - if (!io_req) { - spin_unlock_irqrestore(io_lock, flags); - CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_REQ_NULL; - continue; - } + if (io_req) + io_req->abts_done = NULL; + if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) + CMD_STATE(sc) = old_ioreq_state; + spin_unlock_irqrestore(io_lock, flags); + iter_data->ret = FAILED; + return false; + } else { + spin_lock_irqsave(io_lock, flags); + if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) + CMD_FLAGS(sc) |= FNIC_DEV_RST_TERM_ISSUED; + spin_unlock_irqrestore(io_lock, flags); + } + CMD_FLAGS(sc) |= FNIC_IO_INTERNAL_TERM_ISSUED; - io_req->abts_done = NULL; + wait_for_completion_timeout(&tm_done, msecs_to_jiffies + (fnic->config.ed_tov)); - /* if abort is still pending with fw, fail */ - if (CMD_ABTS_STATUS(sc) == FCPIO_INVALID_CODE) { - spin_unlock_irqrestore(io_lock, flags); - CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_DONE; - ret = 1; - goto clean_pending_aborts_end; - } - CMD_STATE(sc) = FNIC_IOREQ_ABTS_COMPLETE; + /* Recheck cmd state to check if it is now aborted */ + spin_lock_irqsave(io_lock, flags); + io_req = (struct fnic_io_req *)CMD_SP(sc); + if (!io_req) { + spin_unlock_irqrestore(io_lock, flags); + CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_REQ_NULL; + return true; + } - /* original sc used for lr is handled by dev reset code */ - if (sc != lr_sc) - CMD_SP(sc) = NULL; + io_req->abts_done = NULL; + + /* if abort is still pending with fw, fail */ + if (CMD_ABTS_STATUS(sc) == FCPIO_INVALID_CODE) { spin_unlock_irqrestore(io_lock, flags); + CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_DONE; + iter_data->ret = FAILED; + return false; + } + CMD_STATE(sc) = FNIC_IOREQ_ABTS_COMPLETE; - /* original sc used for lr is handled by dev reset code */ - if (sc != lr_sc) { - fnic_release_ioreq_buf(fnic, io_req, sc); - mempool_free(io_req, fnic->io_req_pool); - } + /* original sc used for lr is handled by dev reset code */ + if (sc != iter_data->lr_sc) + CMD_SP(sc) = NULL; + spin_unlock_irqrestore(io_lock, flags); - /* - * Any IO is returned during reset, it needs to call scsi_done - * to return the scsi_cmnd to upper layer. - */ - if (sc->scsi_done) { - /* Set result to let upper SCSI layer retry */ - sc->result = DID_RESET << 16; - sc->scsi_done(sc); - } + /* original sc used for lr is handled by dev reset code */ + if (sc != iter_data->lr_sc) { + fnic_release_ioreq_buf(fnic, io_req, sc); + mempool_free(io_req, fnic->io_req_pool); } + /* + * Any IO is returned during reset, it needs to call scsi_done + * to return the scsi_cmnd to upper layer. + */ + if (sc->scsi_done) { + /* Set result to let upper SCSI layer retry */ + sc->result = DID_RESET << 16; + sc->scsi_done(sc); + } + return true; +} + +/* + * Clean up any pending aborts on the lun + * For each outstanding IO on this lun, whose abort is not completed by fw, + * issue a local abort. Wait for abort to complete. Return 0 if all commands + * successfully aborted, 1 otherwise + */ +static int fnic_clean_pending_aborts(struct fnic *fnic, + struct scsi_cmnd *lr_sc, + bool new_sc) + +{ + int ret = SUCCESS; + struct fnic_pending_aborts_iter_data iter_data = { + .fnic = fnic, + .lun_dev = lr_sc->device, + .ret = SUCCESS, + }; + + if (new_sc) + iter_data.lr_sc = lr_sc; + + scsi_host_busy_iter(fnic->lport->host, + fnic_pending_aborts_iter, &iter_data); + if (iter_data.ret == FAILED) { + ret = iter_data.ret; + goto clean_pending_aborts_end; + } schedule_timeout(msecs_to_jiffies(2 * fnic->config.ed_tov)); /* walk again to check, if IOs are still pending in fw */ @@ -2774,58 +2686,72 @@ void fnic_exch_mgr_reset(struct fc_lport *lp, u32 sid, u32 did) } -/* - * fnic_is_abts_pending() is a helper function that - * walks through tag map to check if there is any IOs pending,if there is one, - * then it returns 1 (true), otherwise 0 (false) - * if @lr_sc is non NULL, then it checks IOs specific to particular LUN, - * otherwise, it checks for all IOs. - */ -int fnic_is_abts_pending(struct fnic *fnic, struct scsi_cmnd *lr_sc) +static bool fnic_abts_pending_iter(struct scsi_cmnd *sc, void *data, + bool reserved) { - int tag; + struct fnic_pending_aborts_iter_data *iter_data = data; + struct fnic *fnic = iter_data->fnic; + int cmd_state; struct fnic_io_req *io_req; spinlock_t *io_lock; unsigned long flags; - int ret = 0; - struct scsi_cmnd *sc; - struct scsi_device *lun_dev = NULL; - if (lr_sc) - lun_dev = lr_sc->device; + /* + * ignore this lun reset cmd or cmds that do not belong to + * this lun + */ + if (iter_data->lr_sc && sc == iter_data->lr_sc) + return true; + if (iter_data->lun_dev && sc->device != iter_data->lun_dev) + return true; - /* walk again to check, if IOs are still pending in fw */ - for (tag = 0; tag < fnic->fnic_max_tag_id; tag++) { - sc = scsi_host_find_tag(fnic->lport->host, tag); - /* - * ignore this lun reset cmd or cmds that do not belong to - * this lun - */ - if (!sc || (lr_sc && (sc->device != lun_dev || sc == lr_sc))) - continue; + io_lock = fnic_io_lock_hash(fnic, sc); + spin_lock_irqsave(io_lock, flags); - io_lock = fnic_io_lock_hash(fnic, sc); - spin_lock_irqsave(io_lock, flags); + io_req = (struct fnic_io_req *)CMD_SP(sc); + if (!io_req) { + spin_unlock_irqrestore(io_lock, flags); + return true; + } - io_req = (struct fnic_io_req *)CMD_SP(sc); + /* + * Found IO that is still pending with firmware and + * belongs to the LUN that we are resetting + */ + FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host, + "Found IO in %s on lun\n", + fnic_ioreq_state_to_str(CMD_STATE(sc))); + cmd_state = CMD_STATE(sc); + spin_unlock_irqrestore(io_lock, flags); + if (cmd_state == FNIC_IOREQ_ABTS_PENDING) + iter_data->ret = 1; - if (!io_req || sc->device != lun_dev) { - spin_unlock_irqrestore(io_lock, flags); - continue; - } + return iter_data->ret ? false : true; +} - /* - * Found IO that is still pending with firmware and - * belongs to the LUN that we are resetting - */ - FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host, - "Found IO in %s on lun\n", - fnic_ioreq_state_to_str(CMD_STATE(sc))); +/* + * fnic_is_abts_pending() is a helper function that + * walks through tag map to check if there is any IOs pending,if there is one, + * then it returns 1 (true), otherwise 0 (false) + * if @lr_sc is non NULL, then it checks IOs specific to particular LUN, + * otherwise, it checks for all IOs. + */ +int fnic_is_abts_pending(struct fnic *fnic, struct scsi_cmnd *lr_sc) +{ + struct fnic_pending_aborts_iter_data iter_data = { + .fnic = fnic, + .lun_dev = NULL, + .ret = 0, + }; - if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) - ret = 1; - spin_unlock_irqrestore(io_lock, flags); + if (lr_sc) { + iter_data.lun_dev = lr_sc->device; + iter_data.lr_sc = lr_sc; } - return ret; + /* walk again to check, if IOs are still pending in fw */ + scsi_host_busy_iter(fnic->lport->host, + fnic_abts_pending_iter, &iter_data); + + return iter_data.ret; } diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 2056e5ecf24fe1..7f45c3c2c84532 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -522,8 +522,19 @@ static int freeze_go_sync(struct gfs2_glock *gl) int error = 0; struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; + /* + * We need to check gl_state == LM_ST_SHARED here and not gl_req == + * LM_ST_EXCLUSIVE. That's because when any node does a freeze, + * all the nodes should have the freeze glock in SH mode and they all + * call do_xmote: One for EX and the others for UN. They ALL must + * freeze locally, and they ALL must queue freeze work. The freeze_work + * calls freeze_func, which tries to reacquire the freeze glock in SH, + * effectively waiting for the thaw on the node who holds it in EX. + * Once thawed, the work func acquires the freeze glock in + * SH and everybody goes back to thawed. + */ if (gl->gl_state == LM_ST_SHARED && !gfs2_withdrawn(sdp) && - test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { + !test_bit(SDF_NORECOVERY, &sdp->sd_flags)) { atomic_set(&sdp->sd_freeze_state, SFS_STARTING_FREEZE); error = freeze_super(sdp->sd_vfs); if (error) { @@ -536,8 +547,11 @@ static int freeze_go_sync(struct gfs2_glock *gl) gfs2_assert_withdraw(sdp, 0); } queue_work(gfs2_freeze_wq, &sdp->sd_freeze_work); - gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_FREEZE | - GFS2_LFC_FREEZE_GO_SYNC); + if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) + gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_FREEZE | + GFS2_LFC_FREEZE_GO_SYNC); + else /* read-only mounts */ + atomic_set(&sdp->sd_freeze_state, SFS_FROZEN); } return 0; } diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index de2515fe5ac9d3..1aa20417b6a48e 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1080,6 +1080,7 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc) int silent = fc->sb_flags & SB_SILENT; struct gfs2_sbd *sdp; struct gfs2_holder mount_gh; + struct gfs2_holder freeze_gh; int error; sdp = init_sbd(sb); @@ -1203,14 +1204,18 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc) goto fail_per_node; } - if (!sb_rdonly(sb)) { + error = gfs2_freeze_lock(sdp, &freeze_gh, 0); + if (error) + goto fail_per_node; + + if (!sb_rdonly(sb)) error = gfs2_make_fs_rw(sdp); - if (error) { - fs_err(sdp, "can't make FS RW: %d\n", error); - goto fail_per_node; - } - } + gfs2_freeze_unlock(&freeze_gh); + if (error) { + fs_err(sdp, "can't make FS RW: %d\n", error); + goto fail_per_node; + } gfs2_glock_dq_uninit(&mount_gh); gfs2_online_uevent(sdp); return 0; @@ -1511,6 +1516,12 @@ static int gfs2_reconfigure(struct fs_context *fc) fc->sb_flags |= SB_RDONLY; if ((sb->s_flags ^ fc->sb_flags) & SB_RDONLY) { + struct gfs2_holder freeze_gh; + + error = gfs2_freeze_lock(sdp, &freeze_gh, 0); + if (error) + return -EINVAL; + if (fc->sb_flags & SB_RDONLY) { error = gfs2_make_fs_ro(sdp); if (error) @@ -1520,6 +1531,7 @@ static int gfs2_reconfigure(struct fs_context *fc) if (error) errorfc(fc, "unable to remount read-write"); } + gfs2_freeze_unlock(&freeze_gh); } sdp->sd_args = *newargs; diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 30659dcb295183..77e2ca8e918b0a 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -473,9 +473,7 @@ void gfs2_recover_func(struct work_struct *work) /* Acquire a shared hold on the freeze lock */ - error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, - LM_FLAG_NOEXP | LM_FLAG_PRIORITY, - &thaw_gh); + error = gfs2_freeze_lock(sdp, &thaw_gh, LM_FLAG_PRIORITY); if (error) goto fail_gunlock_ji; @@ -525,7 +523,7 @@ void gfs2_recover_func(struct work_struct *work) clean_journal(jd, &head); up_write(&sdp->sd_log_flush_lock); - gfs2_glock_dq_uninit(&thaw_gh); + gfs2_freeze_unlock(&thaw_gh); t_rep = ktime_get(); fs_info(sdp, "jid=%u: Journal replayed in %lldms [jlck:%lldms, " "jhead:%lldms, tlck:%lldms, replay:%lldms]\n", @@ -547,7 +545,7 @@ void gfs2_recover_func(struct work_struct *work) goto done; fail_gunlock_thaw: - gfs2_glock_dq_uninit(&thaw_gh); + gfs2_freeze_unlock(&thaw_gh); fail_gunlock_ji: if (jlocked) { gfs2_glock_dq_uninit(&ji_gh); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 3c1e38302eacb7..3f8470d617f9d2 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -168,7 +168,6 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp) { struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode); struct gfs2_glock *j_gl = ip->i_gl; - struct gfs2_holder freeze_gh; struct gfs2_log_header_host head; int error; @@ -176,11 +175,6 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp) if (error) return error; - error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, 0, - &freeze_gh); - if (error) - goto fail_threads; - j_gl->gl_ops->go_inval(j_gl, DIO_METADATA); if (gfs2_withdrawn(sdp)) { error = -EIO; @@ -207,14 +201,9 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp) set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); - gfs2_glock_dq_uninit(&freeze_gh); - return 0; fail: - freeze_gh.gh_flags |= GL_NOCACHE; - gfs2_glock_dq_uninit(&freeze_gh); -fail_threads: if (sdp->sd_quotad_process) kthread_stop(sdp->sd_quotad_process); sdp->sd_quotad_process = NULL; @@ -414,8 +403,7 @@ struct lfcc { * Returns: errno */ -static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp, - struct gfs2_holder *freeze_gh) +static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp) { struct gfs2_inode *ip; struct gfs2_jdesc *jd; @@ -440,7 +428,9 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp, } error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_EXCLUSIVE, - GL_NOCACHE, freeze_gh); + LM_FLAG_NOEXP, &sdp->sd_freeze_gh); + if (error) + goto out; list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { error = gfs2_jdesc_check(jd); @@ -456,7 +446,7 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp, } if (error) - gfs2_glock_dq_uninit(freeze_gh); + gfs2_freeze_unlock(&sdp->sd_freeze_gh); out: while (!list_empty(&list)) { @@ -612,28 +602,9 @@ static void gfs2_dirty_inode(struct inode *inode, int flags) int gfs2_make_fs_ro(struct gfs2_sbd *sdp) { - struct gfs2_holder freeze_gh; int error = 0; int log_write_allowed = test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); - gfs2_holder_mark_uninitialized(&freeze_gh); - if (sdp->sd_freeze_gl && - !gfs2_glock_is_locked_by_me(sdp->sd_freeze_gl)) { - if (!log_write_allowed) { - error = gfs2_glock_nq_init(sdp->sd_freeze_gl, - LM_ST_SHARED, GL_NOCACHE | - LM_FLAG_TRY, &freeze_gh); - if (error == GLR_TRYFAILED) - error = 0; - } else { - error = gfs2_glock_nq_init(sdp->sd_freeze_gl, - LM_ST_SHARED, GL_NOCACHE, - &freeze_gh); - if (error && !gfs2_withdrawn(sdp)) - return error; - } - } - gfs2_flush_delete_work(sdp); if (!log_write_allowed && current == sdp->sd_quotad_process) fs_warn(sdp, "The quotad daemon is withdrawing.\n"); @@ -662,9 +633,6 @@ int gfs2_make_fs_ro(struct gfs2_sbd *sdp) atomic_read(&sdp->sd_reserving_log) == 0, HZ * 5); } - if (gfs2_holder_initialized(&freeze_gh)) - gfs2_glock_dq_uninit(&freeze_gh); - gfs2_quota_cleanup(sdp); if (!log_write_allowed) @@ -772,10 +740,8 @@ void gfs2_freeze_func(struct work_struct *work) struct super_block *sb = sdp->sd_vfs; atomic_inc(&sb->s_active); - error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, 0, - &freeze_gh); + error = gfs2_freeze_lock(sdp, &freeze_gh, 0); if (error) { - fs_info(sdp, "GFS2: couldn't get freeze lock : %d\n", error); gfs2_assert_withdraw(sdp, 0); } else { atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN); @@ -785,9 +751,7 @@ void gfs2_freeze_func(struct work_struct *work) error); gfs2_assert_withdraw(sdp, 0); } - if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) - freeze_gh.gh_flags |= GL_NOCACHE; - gfs2_glock_dq_uninit(&freeze_gh); + gfs2_freeze_unlock(&freeze_gh); } deactivate_super(sb); clear_bit(SDF_FS_FROZEN, &sdp->sd_flags); @@ -804,11 +768,13 @@ void gfs2_freeze_func(struct work_struct *work) static int gfs2_freeze(struct super_block *sb) { struct gfs2_sbd *sdp = sb->s_fs_info; - int error = 0; + int error; mutex_lock(&sdp->sd_freeze_mutex); - if (atomic_read(&sdp->sd_freeze_state) != SFS_UNFROZEN) + if (atomic_read(&sdp->sd_freeze_state) != SFS_UNFROZEN) { + error = -EBUSY; goto out; + } for (;;) { if (gfs2_withdrawn(sdp)) { @@ -816,15 +782,19 @@ static int gfs2_freeze(struct super_block *sb) goto out; } - error = gfs2_lock_fs_check_clean(sdp, &sdp->sd_freeze_gh); + error = gfs2_lock_fs_check_clean(sdp); if (!error) break; if (error == -EBUSY) fs_err(sdp, "waiting for recovery before freeze\n"); - else + else if (error == -EIO) { + fs_err(sdp, "Fatal IO error: cannot freeze gfs2 due " + "to recovery error.\n"); + goto out; + } else { fs_err(sdp, "error freezing FS: %d\n", error); - + } fs_err(sdp, "retrying...\n"); msleep(1000); } @@ -845,13 +815,13 @@ static int gfs2_unfreeze(struct super_block *sb) struct gfs2_sbd *sdp = sb->s_fs_info; mutex_lock(&sdp->sd_freeze_mutex); - if (atomic_read(&sdp->sd_freeze_state) != SFS_FROZEN || + if (atomic_read(&sdp->sd_freeze_state) != SFS_FROZEN || !gfs2_holder_initialized(&sdp->sd_freeze_gh)) { mutex_unlock(&sdp->sd_freeze_mutex); - return 0; + return -EINVAL; } - gfs2_glock_dq_uninit(&sdp->sd_freeze_gh); + gfs2_freeze_unlock(&sdp->sd_freeze_gh); mutex_unlock(&sdp->sd_freeze_mutex); return wait_on_bit(&sdp->sd_flags, SDF_FS_FROZEN, TASK_INTERRUPTIBLE); } diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 0c5b43d3ecbf20..8cdd2155667292 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -93,12 +93,38 @@ int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, return error; } +/** + * gfs2_freeze_lock - hold the freeze glock + * @sdp: the superblock + * @freeze_gh: pointer to the requested holder + * @caller_flags: any additional flags needed by the caller + */ +int gfs2_freeze_lock(struct gfs2_sbd *sdp, struct gfs2_holder *freeze_gh, + int caller_flags) +{ + int flags = LM_FLAG_NOEXP | GL_EXACT | caller_flags; + int error; + + error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, flags, + freeze_gh); + if (error && error != GLR_TRYFAILED) + fs_err(sdp, "can't lock the freeze lock: %d\n", error); + return error; +} + +void gfs2_freeze_unlock(struct gfs2_holder *freeze_gh) +{ + if (gfs2_holder_initialized(freeze_gh)) + gfs2_glock_dq_uninit(freeze_gh); +} + static void signal_our_withdraw(struct gfs2_sbd *sdp) { struct gfs2_glock *gl = sdp->sd_live_gh.gh_gl; struct inode *inode = sdp->sd_jdesc->jd_inode; struct gfs2_inode *ip = GFS2_I(inode); u64 no_formal_ino = ip->i_no_formal_ino; + int log_write_allowed = test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); int ret = 0; int tries; @@ -119,8 +145,21 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp) * therefore we need to clear SDF_JOURNAL_LIVE manually. */ clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); - if (sdp->sd_vfs && !sb_rdonly(sdp->sd_vfs)) - ret = gfs2_make_fs_ro(sdp); + if (!sb_rdonly(sdp->sd_vfs)) { + struct gfs2_holder freeze_gh; + + gfs2_holder_mark_uninitialized(&freeze_gh); + if (sdp->sd_freeze_gl && + !gfs2_glock_is_locked_by_me(sdp->sd_freeze_gl)) { + ret = gfs2_freeze_lock(sdp, &freeze_gh, + log_write_allowed ? 0 : LM_FLAG_TRY); + if (ret == GLR_TRYFAILED) + ret = 0; + } + if (!ret) + ret = gfs2_make_fs_ro(sdp); + gfs2_freeze_unlock(&freeze_gh); + } if (sdp->sd_lockstruct.ls_ops->lm_lock == NULL) { /* lock_nolock */ if (!ret) @@ -139,7 +178,7 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp) gfs2_glock_dq(&sdp->sd_jinode_gh); if (test_bit(SDF_FS_FROZEN, &sdp->sd_flags)) { /* Make sure gfs2_unfreeze works if partially-frozen */ - flush_workqueue(gfs2_freeze_wq); + flush_work(&sdp->sd_freeze_work); atomic_set(&sdp->sd_freeze_state, SFS_FROZEN); thaw_super(sdp->sd_vfs); } else { diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h index ea01c860102c02..ee1b3da8056032 100644 --- a/fs/gfs2/util.h +++ b/fs/gfs2/util.h @@ -152,6 +152,9 @@ int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, extern int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, bool verbose); +extern int gfs2_freeze_lock(struct gfs2_sbd *sdp, + struct gfs2_holder *freeze_gh, int caller_flags); +extern void gfs2_freeze_unlock(struct gfs2_holder *freeze_gh); #define gfs2_io_error(sdp) \ gfs2_io_error_i((sdp), __func__, __FILE__, __LINE__); diff --git a/include/keys/system_keyring.h b/include/keys/system_keyring.h index 0a4d5f79c69933..f0b953b8904cbe 100644 --- a/include/keys/system_keyring.h +++ b/include/keys/system_keyring.h @@ -35,6 +35,7 @@ extern int restrict_link_by_builtin_and_secondary_trusted( #define restrict_link_by_builtin_and_secondary_trusted restrict_link_by_builtin_trusted #endif +extern struct pkcs7_message *pkcs7; #ifdef CONFIG_SYSTEM_BLACKLIST_KEYRING extern int mark_hash_blacklisted(const char *hash); extern int is_hash_blacklisted(const u8 *hash, size_t hash_len, @@ -53,6 +54,20 @@ static inline int is_binary_blacklisted(const u8 *hash, size_t hash_len) } #endif +#ifdef CONFIG_SYSTEM_REVOCATION_LIST +extern int add_key_to_revocation_list(const char *data, size_t size); +extern int is_key_on_revocation_list(struct pkcs7_message *pkcs7); +#else +static inline int add_key_to_revocation_list(const char *data, size_t size) +{ + return 0; +} +static inline int is_key_on_revocation_list(struct pkcs7_message *pkcs7) +{ + return -ENOKEY; +} +#endif + #ifdef CONFIG_IMA_BLACKLIST_KEYRING extern struct key *ima_blacklist_keyring; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 49a6cc03c2f160..41ae9a59462434 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -293,6 +293,7 @@ struct nf_bridge_info { struct tc_skb_ext { __u32 chain; __u16 mru; + bool post_ct; }; #endif diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 12e9c34a2b6547..962adc3acdf6d4 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -596,6 +596,7 @@ struct hci_chan { struct sk_buff_head data_q; unsigned int sent; __u8 state; + bool amp; }; struct hci_conn_params { diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index d4d46123635194..b608be532964fb 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -709,6 +709,17 @@ tc_cls_common_offload_init(struct flow_cls_common_offload *cls_common, cls_common->extack = extack; } +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) +static inline struct tc_skb_ext *tc_skb_ext_alloc(struct sk_buff *skb) +{ + struct tc_skb_ext *tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT); + + if (tc_skb_ext) + memset(tc_skb_ext, 0, sizeof(*tc_skb_ext)); + return tc_skb_ext; +} +#endif + enum tc_matchall_command { TC_CLSMATCHALL_REPLACE, TC_CLSMATCHALL_DESTROY, diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index 6d5beac29bc116..aa8b396133b498 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -83,7 +83,6 @@ enum sctp_verb { SCTP_CMD_ASSOC_FAILED, /* Handle association failure. */ SCTP_CMD_DISCARD_PACKET, /* Discard the whole packet. */ SCTP_CMD_GEN_SHUTDOWN, /* Generate a SHUTDOWN chunk. */ - SCTP_CMD_UPDATE_ASSOC, /* Update association information. */ SCTP_CMD_PURGE_OUTQUEUE, /* Purge all data waiting to be sent. */ SCTP_CMD_SETUP_T2, /* Hi-level, setup T2-shutdown parms. */ SCTP_CMD_RTO_PENDING, /* Set transport's rto_pending. */ diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 8a7af33b0a3e4d..175affc631d637 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -4929,6 +4929,7 @@ static void hci_loglink_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) return; hchan->handle = le16_to_cpu(ev->handle); + hchan->amp = true; BT_DBG("hcon %p mgr %p hchan %p", hcon, hcon->amp_mgr, hchan); @@ -4961,7 +4962,7 @@ static void hci_disconn_loglink_complete_evt(struct hci_dev *hdev, hci_dev_lock(hdev); hchan = hci_chan_lookup_handle(hdev, le16_to_cpu(ev->handle)); - if (!hchan) + if (!hchan || !hchan->amp) goto unlock; amp_destroy_logical_link(hchan, ev->reason); diff --git a/net/core/dev.c b/net/core/dev.c index c4124082029649..cb9d4e2765cd63 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5988,11 +5988,18 @@ EXPORT_SYMBOL(napi_schedule_prep); * __napi_schedule_irqoff - schedule for receive * @n: entry to schedule * - * Variant of __napi_schedule() assuming hard irqs are masked + * Variant of __napi_schedule() assuming hard irqs are masked. + * + * On PREEMPT_RT enabled kernels this maps to __napi_schedule() + * because the interrupt disabled assumption might not be true + * due to force-threaded interrupts and spinlock substitution. */ void __napi_schedule_irqoff(struct napi_struct *n) { - ____napi_schedule(this_cpu_ptr(&softnet_data), n); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + ____napi_schedule(this_cpu_ptr(&softnet_data), n); + else + __napi_schedule(n); } EXPORT_SYMBOL(__napi_schedule_irqoff); diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index fcbb487b51f205..7d1575d08e3cb0 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1052,6 +1052,9 @@ bool __skb_flow_dissect(const struct net *net, key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; } + __skb_flow_dissect_ipv4(skb, flow_dissector, + target_container, data, iph); + if (ip_is_fragment(iph)) { key_control->flags |= FLOW_DIS_IS_FRAGMENT; @@ -1068,9 +1071,6 @@ bool __skb_flow_dissect(const struct net *net, } } - __skb_flow_dissect_ipv4(skb, flow_dissector, - target_container, data, iph); - break; } case htons(ETH_P_IPV6): { diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index c15293e6e1d390..8459f12b8b8544 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -835,17 +835,17 @@ static void ovs_fragment(struct net *net, struct vport *vport, } if (key->eth.type == htons(ETH_P_IP)) { - struct dst_entry ovs_dst; + struct rtable ovs_rt = { 0 }; unsigned long orig_dst; prepare_frag(vport, skb, orig_network_offset, ovs_key_mac_proto(key)); - dst_init(&ovs_dst, &ovs_dst_ops, NULL, 1, + dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, 1, DST_OBSOLETE_NONE, DST_NOCOUNT); - ovs_dst.dev = vport->dev; + ovs_rt.dst.dev = vport->dev; orig_dst = skb->_skb_refdst; - skb_dst_set_noref(skb, &ovs_dst); + skb_dst_set_noref(skb, &ovs_rt.dst); IPCB(skb)->frag_max_size = mru; ip_do_fragment(net, skb->sk, skb, ovs_vport_output); diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index a0686a88634c22..389b681ad14c2a 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -279,9 +279,11 @@ static void ovs_ct_update_key(const struct sk_buff *skb, /* This is called to initialize CT key fields possibly coming in from the local * stack. */ -void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key) +void ovs_ct_fill_key(const struct sk_buff *skb, + struct sw_flow_key *key, + bool post_ct) { - ovs_ct_update_key(skb, NULL, key, false, false); + ovs_ct_update_key(skb, NULL, key, post_ct, false); } int ovs_ct_put_key(const struct sw_flow_key *swkey, @@ -1332,7 +1334,7 @@ int ovs_ct_clear(struct sk_buff *skb, struct sw_flow_key *key) if (skb_nfct(skb)) { nf_conntrack_put(skb_nfct(skb)); nf_ct_set(skb, NULL, IP_CT_UNTRACKED); - ovs_ct_fill_key(skb, key); + ovs_ct_fill_key(skb, key, false); } return 0; diff --git a/net/openvswitch/conntrack.h b/net/openvswitch/conntrack.h index 900dadd70974a9..a87053c644b237 100644 --- a/net/openvswitch/conntrack.h +++ b/net/openvswitch/conntrack.h @@ -33,7 +33,8 @@ int ovs_ct_execute(struct net *, struct sk_buff *, struct sw_flow_key *, const struct ovs_conntrack_info *); int ovs_ct_clear(struct sk_buff *skb, struct sw_flow_key *key); -void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key); +void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key, + bool post_ct); int ovs_ct_put_key(const struct sw_flow_key *swkey, const struct sw_flow_key *output, struct sk_buff *skb); void ovs_ct_free_action(const struct nlattr *a); @@ -82,7 +83,8 @@ static inline int ovs_ct_clear(struct sk_buff *skb, } static inline void ovs_ct_fill_key(const struct sk_buff *skb, - struct sw_flow_key *key) + struct sw_flow_key *key, + bool post_ct) { key->ct_state = 0; key->ct_zone = 0; diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 3f2ee2279b395f..e6a2833ca3820c 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -866,6 +866,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) struct tc_skb_ext *tc_ext; #endif + bool post_ct = false; int res, err; /* Extract metadata from packet. */ @@ -904,6 +905,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, tc_ext = skb_ext_find(skb, TC_SKB_EXT); key->recirc_id = tc_ext ? tc_ext->chain : 0; OVS_CB(skb)->mru = tc_ext ? tc_ext->mru : 0; + post_ct = tc_ext ? tc_ext->post_ct : false; } else { key->recirc_id = 0; } @@ -913,7 +915,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, err = key_extract(skb, key); if (!err) - ovs_ct_fill_key(skb, key); /* Must be after key_extract(). */ + ovs_ct_fill_key(skb, key, post_ct); /* Must be after key_extract(). */ return err; } diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index f6e0cd1d902d46..89f6ad521ad005 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -733,7 +733,8 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, #endif } - *qdisc_skb_cb(skb) = cb; + if (err != -EINPROGRESS) + *qdisc_skb_cb(skb) = cb; skb_clear_hash(skb); skb->ignore_df = 1; return err; @@ -968,7 +969,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, err = tcf_ct_handle_fragments(net, skb, family, p->zone, &defrag); if (err == -EINPROGRESS) { retval = TC_ACT_STOLEN; - goto out; + goto out_clear; } if (err) goto drop; @@ -1031,7 +1032,6 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, out_push: skb_push_rcsum(skb, nh_ofs); -out: qdisc_skb_cb(skb)->post_ct = true; out_clear: tcf_action_update_bstats(&c->common, skb); @@ -1205,9 +1205,6 @@ static int tcf_ct_fill_params(struct net *net, sizeof(p->zone)); } - if (p->zone == NF_CT_DEFAULT_ZONE_ID) - return 0; - nf_ct_zone_init(&zone, p->zone, NF_CT_DEFAULT_ZONE_DIR, 0); tmpl = nf_ct_tmpl_alloc(net, &zone, GFP_KERNEL); if (!tmpl) { diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 18ea3a8a12ca41..6c6fbb31992e16 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -651,7 +651,7 @@ static void tc_block_indr_cleanup(struct flow_block_cb *block_cb) struct net_device *dev = block_cb->indr.dev; struct Qdisc *sch = block_cb->indr.sch; struct netlink_ext_ack extack = {}; - struct flow_block_offload bo; + struct flow_block_offload bo = {}; tcf_block_offload_init(&bo, dev, sch, FLOW_BLOCK_UNBIND, block_cb->indr.binder_type, @@ -1537,7 +1537,7 @@ static inline int __tcf_classify(struct sk_buff *skb, u32 *last_executed_chain) { #ifdef CONFIG_NET_CLS_ACT - const int max_reclassify_loop = 4; + const int max_reclassify_loop = 16; const struct tcf_proto *first_tp; int limit = 0; @@ -1630,11 +1630,12 @@ int tcf_classify_ingress(struct sk_buff *skb, /* If we missed on some chain */ if (ret == TC_ACT_UNSPEC && last_executed_chain) { - ext = skb_ext_add(skb, TC_SKB_EXT); + ext = tc_skb_ext_alloc(skb); if (WARN_ON_ONCE(!ext)) return TC_ACT_SHOT; ext->chain = last_executed_chain; ext->mru = qdisc_skb_cb(skb)->mru; + ext->post_ct = qdisc_skb_cb(skb)->post_ct; } return ret; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 7f1a951ad272a8..955cc43b515ad6 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -1421,6 +1421,21 @@ static int fl_validate_ct_state(u16 state, struct nlattr *tb, return -EINVAL; } + if (state & TCA_FLOWER_KEY_CT_FLAGS_INVALID && + state & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED | + TCA_FLOWER_KEY_CT_FLAGS_INVALID)) { + NL_SET_ERR_MSG_ATTR(extack, tb, + "when inv is set, only trk may be set"); + return -EINVAL; + } + + if (state & TCA_FLOWER_KEY_CT_FLAGS_NEW && + state & TCA_FLOWER_KEY_CT_FLAGS_REPLY) { + NL_SET_ERR_MSG_ATTR(extack, tb, + "new and rpl are mutually exclusive"); + return -EINVAL; + } + return 0; } @@ -1440,7 +1455,7 @@ static int fl_set_key_ct(struct nlattr **tb, &mask->ct_state, TCA_FLOWER_KEY_CT_STATE_MASK, sizeof(key->ct_state)); - err = fl_validate_ct_state(mask->ct_state, + err = fl_validate_ct_state(key->ct_state & mask->ct_state, tb[TCA_FLOWER_KEY_CT_STATE_MASK], extack); if (err) diff --git a/net/sched/sch_frag.c b/net/sched/sch_frag.c index e1e77d3fb6c021..8c06381391d6fe 100644 --- a/net/sched/sch_frag.c +++ b/net/sched/sch_frag.c @@ -90,16 +90,16 @@ static int sch_fragment(struct net *net, struct sk_buff *skb, } if (skb_protocol(skb, true) == htons(ETH_P_IP)) { - struct dst_entry sch_frag_dst; + struct rtable sch_frag_rt = { 0 }; unsigned long orig_dst; sch_frag_prepare_frag(skb, xmit); - dst_init(&sch_frag_dst, &sch_frag_dst_ops, NULL, 1, + dst_init(&sch_frag_rt.dst, &sch_frag_dst_ops, NULL, 1, DST_OBSOLETE_NONE, DST_NOCOUNT); - sch_frag_dst.dev = skb->dev; + sch_frag_rt.dst.dev = skb->dev; orig_dst = skb->_skb_refdst; - skb_dst_set_noref(skb, &sch_frag_dst); + skb_dst_set_noref(skb, &sch_frag_rt.dst); IPCB(skb)->frag_max_size = mru; ret = ip_do_fragment(net, skb->sk, skb, sch_frag_xmit); diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 356a7093572d73..7d0887a4401745 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -873,11 +873,7 @@ struct sctp_chunk *sctp_make_shutdown(const struct sctp_association *asoc, struct sctp_chunk *retval; __u32 ctsn; - if (chunk && chunk->asoc) - ctsn = sctp_tsnmap_get_ctsn(&chunk->asoc->peer.tsn_map); - else - ctsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map); - + ctsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map); shut.cum_tsn_ack = htonl(ctsn); retval = sctp_make_control(asoc, SCTP_CID_SHUTDOWN, 0, diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index a0591dd130f351..ffb361eed23e8a 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -845,28 +845,6 @@ static void sctp_cmd_setup_t2(struct sctp_cmd_seq *cmds, asoc->timeouts[SCTP_EVENT_TIMEOUT_T2_SHUTDOWN] = t->rto; } -static void sctp_cmd_assoc_update(struct sctp_cmd_seq *cmds, - struct sctp_association *asoc, - struct sctp_association *new) -{ - struct net *net = sock_net(asoc->base.sk); - struct sctp_chunk *abort; - - if (!sctp_assoc_update(asoc, new)) - return; - - abort = sctp_make_abort(asoc, NULL, sizeof(struct sctp_errhdr)); - if (abort) { - sctp_init_cause(abort, SCTP_ERROR_RSRC_LOW, 0); - sctp_add_cmd_sf(cmds, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); - } - sctp_add_cmd_sf(cmds, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNABORTED)); - sctp_add_cmd_sf(cmds, SCTP_CMD_ASSOC_FAILED, - SCTP_PERR(SCTP_ERROR_RSRC_LOW)); - SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); - SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); -} - /* Helper function to change the state of an association. */ static void sctp_cmd_new_state(struct sctp_cmd_seq *cmds, struct sctp_association *asoc, @@ -1320,10 +1298,6 @@ static int sctp_cmd_interpreter(enum sctp_event_type event_type, sctp_endpoint_add_asoc(ep, asoc); break; - case SCTP_CMD_UPDATE_ASSOC: - sctp_cmd_assoc_update(commands, asoc, cmd->obj.asoc); - break; - case SCTP_CMD_PURGE_OUTQUEUE: sctp_outq_teardown(&asoc->outqueue); break; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 45c81a072ab62d..e68fc57fc787b5 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -1777,6 +1777,30 @@ enum sctp_disposition sctp_sf_do_5_2_3_initack( return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); } +static int sctp_sf_do_assoc_update(struct sctp_association *asoc, + struct sctp_association *new, + struct sctp_cmd_seq *cmds) +{ + struct net *net = asoc->base.net; + struct sctp_chunk *abort; + + if (!sctp_assoc_update(asoc, new)) + return 0; + + abort = sctp_make_abort(asoc, NULL, sizeof(struct sctp_errhdr)); + if (abort) { + sctp_init_cause(abort, SCTP_ERROR_RSRC_LOW, 0); + sctp_add_cmd_sf(cmds, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); + } + sctp_add_cmd_sf(cmds, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNABORTED)); + sctp_add_cmd_sf(cmds, SCTP_CMD_ASSOC_FAILED, + SCTP_PERR(SCTP_ERROR_RSRC_LOW)); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); + + return -ENOMEM; +} + /* Unexpected COOKIE-ECHO handler for peer restart (Table 2, action 'A') * * Section 5.2.4 @@ -1856,20 +1880,22 @@ static enum sctp_disposition sctp_sf_do_dupcook_a( SCTP_TO(SCTP_EVENT_TIMEOUT_T4_RTO)); sctp_add_cmd_sf(commands, SCTP_CMD_PURGE_ASCONF_QUEUE, SCTP_NULL()); - repl = sctp_make_cookie_ack(new_asoc, chunk); + /* Update the content of current association. */ + if (sctp_sf_do_assoc_update((struct sctp_association *)asoc, new_asoc, commands)) + goto nomem; + + repl = sctp_make_cookie_ack(asoc, chunk); if (!repl) goto nomem; /* Report association restart to upper layer. */ ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_RESTART, 0, - new_asoc->c.sinit_num_ostreams, - new_asoc->c.sinit_max_instreams, + asoc->c.sinit_num_ostreams, + asoc->c.sinit_max_instreams, NULL, GFP_ATOMIC); if (!ev) goto nomem_ev; - /* Update the content of current association. */ - sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc)); sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); if ((sctp_state(asoc, SHUTDOWN_PENDING) || sctp_state(asoc, SHUTDOWN_SENT)) && @@ -1929,14 +1955,16 @@ static enum sctp_disposition sctp_sf_do_dupcook_b( if (!sctp_auth_chunk_verify(net, chunk, new_asoc)) return SCTP_DISPOSITION_DISCARD; - /* Update the content of current association. */ - sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc)); sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_ESTABLISHED)); SCTP_INC_STATS(net, SCTP_MIB_CURRESTAB); sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL()); - repl = sctp_make_cookie_ack(new_asoc, chunk); + /* Update the content of current association. */ + if (sctp_sf_do_assoc_update((struct sctp_association *)asoc, new_asoc, commands)) + goto nomem; + + repl = sctp_make_cookie_ack(asoc, chunk); if (!repl) goto nomem; diff --git a/scripts/Makefile b/scripts/Makefile index 124c3828865eb0..77c371b9e295a3 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -19,6 +19,7 @@ hostprogs-$(CONFIG_ASN1) += asn1_compiler hostprogs-$(CONFIG_MODULE_SIG_FORMAT) += sign-file hostprogs-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += extract-cert hostprogs-$(CONFIG_SYSTEM_EXTRA_CERTIFICATE) += insert-sys-cert +hostprogs-$(CONFIG_SYSTEM_REVOCATION_LIST) += extract-cert HOSTCFLAGS_sortextable.o = -I$(srctree)/tools/include HOSTCFLAGS_asn1_compiler.o = -I$(srctree)/include diff --git a/security/integrity/platform_certs/keyring_handler.c b/security/integrity/platform_certs/keyring_handler.c index c5ba695c10e3a5..5604bd57c99077 100644 --- a/security/integrity/platform_certs/keyring_handler.c +++ b/security/integrity/platform_certs/keyring_handler.c @@ -55,6 +55,15 @@ static __init void uefi_blacklist_binary(const char *source, uefi_blacklist_hash(source, data, len, "bin:", 4); } +/* + * Add an X509 cert to the revocation list. + */ +static __init void uefi_revocation_list_x509(const char *source, + const void *data, size_t len) +{ + add_key_to_revocation_list(data, len); +} + /* * Return the appropriate handler for particular signature list types found in * the UEFI db and MokListRT tables. @@ -76,5 +85,7 @@ __init efi_element_handler_t get_handler_for_dbx(const efi_guid_t *sig_type) return uefi_blacklist_x509_tbs; if (efi_guidcmp(*sig_type, efi_cert_sha256_guid) == 0) return uefi_blacklist_binary; + if (efi_guidcmp(*sig_type, efi_cert_x509_guid) == 0) + return uefi_revocation_list_x509; return 0; } diff --git a/security/integrity/platform_certs/load_uefi.c b/security/integrity/platform_certs/load_uefi.c index 974f07939f2943..080738a2108db0 100644 --- a/security/integrity/platform_certs/load_uefi.c +++ b/security/integrity/platform_certs/load_uefi.c @@ -132,8 +132,9 @@ static int __init load_moklist_certs(void) static int __init load_uefi_certs(void) { efi_guid_t secure_var = EFI_IMAGE_SECURITY_DATABASE_GUID; - void *db = NULL, *dbx = NULL; - unsigned long dbsize = 0, dbxsize = 0; + efi_guid_t mok_var = EFI_SHIM_LOCK_GUID; + void *db = NULL, *dbx = NULL, *mokx = NULL; + unsigned long dbsize = 0, dbxsize = 0, mokxsize = 0; efi_status_t status; int rc = 0; @@ -175,6 +176,21 @@ static int __init load_uefi_certs(void) kfree(dbx); } + mokx = get_cert_list(L"MokListXRT", &mok_var, &mokxsize, &status); + if (!mokx) { + if (status == EFI_NOT_FOUND) + pr_debug("mokx variable wasn't found\n"); + else + pr_info("Couldn't get mokx list\n"); + } else { + rc = parse_efi_signature_list("UEFI:MokListXRT", + mokx, mokxsize, + get_handler_for_dbx); + if (rc) + pr_err("Couldn't parse mokx signatures %d\n", rc); + kfree(mokx); + } + /* Load the MokListRT certs */ rc = load_moklist_certs(); diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh index 058c746ee3006e..b11d8e6b5bc145 100755 --- a/tools/testing/selftests/net/forwarding/tc_flower.sh +++ b/tools/testing/selftests/net/forwarding/tc_flower.sh @@ -3,7 +3,7 @@ ALL_TESTS="match_dst_mac_test match_src_mac_test match_dst_ip_test \ match_src_ip_test match_ip_flags_test match_pcp_test match_vlan_test \ - match_ip_tos_test match_indev_test" + match_ip_tos_test match_indev_test match_ip_ttl_test" NUM_NETIFS=2 source tc_common.sh source lib.sh @@ -310,6 +310,42 @@ match_ip_tos_test() log_test "ip_tos match ($tcflags)" } +match_ip_ttl_test() +{ + RET=0 + + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + $tcflags dst_ip 192.0.2.2 ip_ttl 63 action drop + tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \ + $tcflags dst_ip 192.0.2.2 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip "ttl=63" -q + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip "ttl=63,mf,frag=256" -q + + tc_check_packets "dev $h2 ingress" 102 1 + check_fail $? "Matched on the wrong filter (no check on ttl)" + + tc_check_packets "dev $h2 ingress" 101 2 + check_err $? "Did not match on correct filter (ttl=63)" + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip "ttl=255" -q + + tc_check_packets "dev $h2 ingress" 101 3 + check_fail $? "Matched on a wrong filter (ttl=63)" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match on correct filter (no check on ttl)" + + tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower + + log_test "ip_ttl match ($tcflags)" +} + match_indev_test() { RET=0