diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 5a801c7438cc36..e14231af2acc48 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -22,6 +22,7 @@ workflow: !reference [.workflow] architectures: 'x86_64 ppc64le aarch64 s390x' debug_architectures: 'x86_64 ppc64le aarch64 s390x' native_tools: 'true' + disttag_override: '.el8_10' # Realtime specific configuration .realtime_check: @@ -32,6 +33,7 @@ workflow: !reference [.workflow] builder_image: quay.io/cki/builder-rhel8 name: kernel-rt-rhel8 pipeline_name_suffix: '' + disttag_override: '.el8_10' # Full RT pipeline for -rt branch .realtime_full: @@ -40,6 +42,7 @@ workflow: !reference [.workflow] variables: builder_image: quay.io/cki/builder-rhel8 name: kernel-rt-rhel8 + disttag_override: '.el8_10' RUN_ONLY_FOR_RT: 'true' # regular pipelines diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index b8fca95fec9179..2a250e8964e670 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1585,6 +1585,12 @@ 0 -- machine default 1 -- force brightness inversion + ia32_emulation= [X86-64] + Format: + When true, allows loading 32-bit programs and executing 32-bit + syscalls, essentially overriding IA32_EMULATION_DEFAULT_DISABLED at + boot time. When false, unconditionally disables IA32 emulation. + icn= [HW,ISDN] Format: [,[,[,]]] diff --git a/Documentation/networking/device_drivers/ethernet/intel/idpf.rst b/Documentation/networking/device_drivers/ethernet/intel/idpf.rst new file mode 100644 index 00000000000000..adb16e2abd21b5 --- /dev/null +++ b/Documentation/networking/device_drivers/ethernet/intel/idpf.rst @@ -0,0 +1,160 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +========================================================================== +idpf Linux* Base Driver for the Intel(R) Infrastructure Data Path Function +========================================================================== + +Intel idpf Linux driver. +Copyright(C) 2023 Intel Corporation. + +.. contents:: + +The idpf driver serves as both the Physical Function (PF) and Virtual Function +(VF) driver for the Intel(R) Infrastructure Data Path Function. + +Driver information can be obtained using ethtool, lspci, and ip. + +For questions related to hardware requirements, refer to the documentation +supplied with your Intel adapter. All hardware requirements listed apply to use +with Linux. + + +Identifying Your Adapter +======================== +For information on how to identify your adapter, and for the latest Intel +network drivers, refer to the Intel Support website: +http://www.intel.com/support + + +Additional Features and Configurations +====================================== + +ethtool +------- +The driver utilizes the ethtool interface for driver configuration and +diagnostics, as well as displaying statistical information. The latest ethtool +version is required for this functionality. If you don't have one yet, you can +obtain it at: +https://kernel.org/pub/software/network/ethtool/ + + +Viewing Link Messages +--------------------- +Link messages will not be displayed to the console if the distribution is +restricting system messages. In order to see network driver link messages on +your console, set dmesg to eight by entering the following:: + + # dmesg -n 8 + +.. note:: + This setting is not saved across reboots. + + +Jumbo Frames +------------ +Jumbo Frames support is enabled by changing the Maximum Transmission Unit (MTU) +to a value larger than the default value of 1500. + +Use the ip command to increase the MTU size. For example, enter the following +where is the interface number:: + + # ip link set mtu 9000 dev + # ip link set up dev + +.. note:: + The maximum MTU setting for jumbo frames is 9706. This corresponds to the + maximum jumbo frame size of 9728 bytes. + +.. note:: + This driver will attempt to use multiple page sized buffers to receive + each jumbo packet. This should help to avoid buffer starvation issues when + allocating receive packets. + +.. note:: + Packet loss may have a greater impact on throughput when you use jumbo + frames. If you observe a drop in performance after enabling jumbo frames, + enabling flow control may mitigate the issue. + + +Performance Optimization +======================== +Driver defaults are meant to fit a wide variety of workloads, but if further +optimization is required, we recommend experimenting with the following +settings. + + +Interrupt Rate Limiting +----------------------- +This driver supports an adaptive interrupt throttle rate (ITR) mechanism that +is tuned for general workloads. The user can customize the interrupt rate +control for specific workloads, via ethtool, adjusting the number of +microseconds between interrupts. + +To set the interrupt rate manually, you must disable adaptive mode:: + + # ethtool -C adaptive-rx off adaptive-tx off + +For lower CPU utilization: + - Disable adaptive ITR and lower Rx and Tx interrupts. The examples below + affect every queue of the specified interface. + + - Setting rx-usecs and tx-usecs to 80 will limit interrupts to about + 12,500 interrupts per second per queue:: + + # ethtool -C adaptive-rx off adaptive-tx off rx-usecs 80 + tx-usecs 80 + +For reduced latency: + - Disable adaptive ITR and ITR by setting rx-usecs and tx-usecs to 0 + using ethtool:: + + # ethtool -C adaptive-rx off adaptive-tx off rx-usecs 0 + tx-usecs 0 + +Per-queue interrupt rate settings: + - The following examples are for queues 1 and 3, but you can adjust other + queues. + + - To disable Rx adaptive ITR and set static Rx ITR to 10 microseconds or + about 100,000 interrupts/second, for queues 1 and 3:: + + # ethtool --per-queue queue_mask 0xa --coalesce adaptive-rx off + rx-usecs 10 + + - To show the current coalesce settings for queues 1 and 3:: + + # ethtool --per-queue queue_mask 0xa --show-coalesce + + + +Virtualized Environments +------------------------ +In addition to the other suggestions in this section, the following may be +helpful to optimize performance in VMs. + + - Using the appropriate mechanism (vcpupin) in the VM, pin the CPUs to + individual LCPUs, making sure to use a set of CPUs included in the + device's local_cpulist: /sys/class/net//device/local_cpulist. + + - Configure as many Rx/Tx queues in the VM as available. (See the idpf driver + documentation for the number of queues supported.) For example:: + + # ethtool -L rx tx + + +Support +======= +For general information, go to the Intel support website at: +http://www.intel.com/support/ + +If an issue is identified with the released source code on a supported kernel +with a supported adapter, email the specific information related to the issue +to intel-wired-lan@lists.osuosl.org. + + +Trademarks +========== +Intel is a trademark or registered trademark of Intel Corporation or its +subsidiaries in the United States and/or other countries. + +* Other names and brands may be claimed as the property of others. diff --git a/Makefile.rhelver b/Makefile.rhelver index f98dee184a2862..6ddf38c309465f 100644 --- a/Makefile.rhelver +++ b/Makefile.rhelver @@ -12,7 +12,7 @@ RHEL_MINOR = 10 # # Use this spot to avoid future merge conflicts. # Do not trim this comment. -RHEL_RELEASE = 553 +RHEL_RELEASE = 553.5.1 # # ZSTREAM @@ -34,7 +34,7 @@ RHEL_RELEASE = 553 # (when you give RHDISTGIT_BRANCH on the command line, in which case the Z # number will be incremented instead of the Y). # -ZSTREAM = no +ZSTREAM = yes # # Early y+1 numbering diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index 93e20edfe82ca6..faa2dd55f76531 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -94,6 +94,11 @@ static inline bool kdump_in_progress(void) return crashing_cpu >= 0; } +#if defined(CONFIG_CRASH_DUMP) +bool is_kdump_kernel(void); +#define is_kdump_kernel is_kdump_kernel +#endif /* CONFIG_CRASH_DUMP */ + #ifdef CONFIG_KEXEC_FILE extern const struct kexec_file_ops kexec_elf64_ops; diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index 64cdf8a5cc9e0a..cef0123c4fe8e4 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -21,6 +21,7 @@ #include #include #include +#include #ifdef DEBUG #include @@ -119,6 +120,17 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, return csize; } +/* + * Return true only when kexec based kernel dump capturing method is used. + * This ensures all restritions applied for kdump case are not automatically + * applied for fadump case. + */ +bool is_kdump_kernel(void) +{ + return !is_fadump_active() && elfcorehdr_addr != ELFCORE_ADDR_MAX; +} +EXPORT_SYMBOL_GPL(is_kdump_kernel); + #ifdef CONFIG_PPC_RTAS /* * The crashkernel region will almost always overlap the RTAS region, so diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index bc8260160da4fb..87736c5bb589e4 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -529,6 +529,8 @@ static int do_fp_load(struct instruction_op *op, unsigned long ea, } u; nb = GETSIZE(op->type); + if (nb > sizeof(u)) + return -EINVAL; if (!address_ok(regs, ea, nb)) return -EFAULT; rn = op->reg; @@ -579,6 +581,8 @@ static int do_fp_store(struct instruction_op *op, unsigned long ea, } u; nb = GETSIZE(op->type); + if (nb > sizeof(u)) + return -EINVAL; if (!address_ok(regs, ea, nb)) return -EFAULT; rn = op->reg; @@ -623,6 +627,9 @@ static nokprobe_inline int do_vec_load(int rn, unsigned long ea, u8 b[sizeof(__vector128)]; } u = {}; + if (size > sizeof(u)) + return -EINVAL; + if (!address_ok(regs, ea & ~0xfUL, 16)) return -EFAULT; /* align to multiple of size */ @@ -650,6 +657,9 @@ static nokprobe_inline int do_vec_store(int rn, unsigned long ea, u8 b[sizeof(__vector128)]; } u; + if (size > sizeof(u)) + return -EINVAL; + if (!address_ok(regs, ea & ~0xfUL, 16)) return -EFAULT; /* align to multiple of size */ diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c index 2b656e67f2eaaa..927703af49be23 100644 --- a/arch/powerpc/mm/init-common.c +++ b/arch/powerpc/mm/init-common.c @@ -65,7 +65,7 @@ void pgtable_cache_add(unsigned shift, void (*ctor)(void *)) * as to leave enough 0 bits in the address to contain it. */ unsigned long minalign = max(MAX_PGTABLE_INDEX_SIZE + 1, HUGEPD_SHIFT_MASK + 1); - struct kmem_cache *new; + struct kmem_cache *new = NULL; /* It would be nice if this was a BUILD_BUG_ON(), but at the * moment, gcc doesn't seem to recognize is_power_of_2 as a @@ -78,7 +78,8 @@ void pgtable_cache_add(unsigned shift, void (*ctor)(void *)) align = max_t(unsigned long, align, minalign); name = kasprintf(GFP_KERNEL, "pgtable-2^%d", shift); - new = kmem_cache_create(name, table_size, align, 0, ctor); + if (name) + new = kmem_cache_create(name, table_size, align, 0, ctor); if (!new) panic("Could not allocate pgtable cache for order %d", shift); diff --git a/arch/powerpc/platforms/pseries/papr_platform_attributes.c b/arch/powerpc/platforms/pseries/papr_platform_attributes.c index 515150417bb363..b0738e073fa89e 100644 --- a/arch/powerpc/platforms/pseries/papr_platform_attributes.c +++ b/arch/powerpc/platforms/pseries/papr_platform_attributes.c @@ -100,10 +100,12 @@ static int papr_get_attr(u64 id, struct energy_scale_attribute *esi) esi_buf_size = ESI_HDR_SIZE + (CURR_MAX_ESI_ATTRS * max_esi_attrs); temp_buf = krealloc(buf, esi_buf_size, GFP_KERNEL); - if (temp_buf) + if (temp_buf) { buf = temp_buf; - else - return -ENOMEM; + } else { + ret = -ENOMEM; + goto out_buf; + } goto retry; } diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index fa31e26eddb0f7..bfd8a0e4825cf1 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -389,6 +389,7 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) /* * floating point control reg. is in the thread structure */ + save_fpu_regs(); if ((unsigned int) data != 0 || test_fp_ctl(data >> (BITS_PER_LONG - 32))) return -EINVAL; @@ -747,6 +748,7 @@ static int __poke_user_compat(struct task_struct *child, /* * floating point control reg. is in the thread structure */ + save_fpu_regs(); if (test_fp_ctl(tmp)) return -EINVAL; child->thread.fpu.fpc = data; @@ -962,9 +964,7 @@ static int s390_fpregs_set(struct task_struct *target, int rc = 0; freg_t fprs[__NUM_FPRS]; - if (target == current) - save_fpu_regs(); - + save_fpu_regs(); if (MACHINE_HAS_VX) convert_vx_to_fp(fprs, target->thread.fpu.vxrs); else diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 733f556dfb2df2..73d31811cdd58d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -3099,6 +3099,15 @@ config IA32_AOUT ---help--- Support old a.out binaries in the 32bit emulation. +config IA32_EMULATION_DEFAULT_DISABLED + bool "IA32 emulation disabled by default" + default n + depends on IA32_EMULATION + help + Make IA32 emulation disabled by default. This prevents loading 32-bit + processes and access to 32-bit syscalls. If unsure, leave it to its + default value. + config X86_X32 bool "x32 ABI for 64-bit mode" depends on X86_64 diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index c6584e4c3b3673..1b226683125f5c 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -825,5 +826,14 @@ void __init tdx_early_init(void) x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required; x86_platform.guest.enc_status_change_finish = tdx_enc_status_changed; + /* + * The VMM is capable of injecting interrupt 0x80 and triggering the + * compatibility syscall path. + * + * By default, the 32-bit emulation is disabled in order to ensure + * the safety of the VM. + */ + ia32_disable(); + pr_info("Guest detected\n"); } diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index ae41c7e326f5d8..e4c53879dd3515 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -308,6 +309,16 @@ __visible void do_syscall_64(unsigned long nr, struct pt_regs *regs) #endif #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) +#ifdef CONFIG_IA32_EMULATION +bool __ia32_enabled __ro_after_init = !IS_ENABLED(CONFIG_IA32_EMULATION_DEFAULT_DISABLED); + +static int ia32_emulation_override_cmdline(char *arg) +{ + return kstrtobool(arg, &__ia32_enabled); +} +early_param("ia32_emulation", ia32_emulation_override_cmdline); +#endif + /* * Does a 32-bit syscall. Called with IRQs on in CONTEXT_KERNEL. Does * all entry and exit work and returns with IRQs off. This function is diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 5ab0016d792637..20ae3965f651f4 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1859,11 +1859,11 @@ nmi_restore: iretq SYM_CODE_END(nmi) -SYM_CODE_START(ignore_sysret) +SYM_CODE_START(entry_SYSCALL32_ignore) UNWIND_HINT_EMPTY mov $-ENOSYS, %eax sysret -SYM_CODE_END(ignore_sysret) +SYM_CODE_END(entry_SYSCALL32_ignore) SYM_CODE_START(rewind_stack_do_exit) UNWIND_HINT_FUNC diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 3908f664e7d9ed..014d85fe2402c6 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -162,8 +162,15 @@ do { \ #define elf_check_arch(x) \ ((x)->e_machine == EM_X86_64) +/* + * RHEL-only: avoid including '' here as doing so breaks core dumps, + * see https://issues.redhat.com/browse/RHEL-33448. Use __ia32_enabled directly + * instead. + */ +extern bool __ia32_enabled; + #define compat_elf_check_arch(x) \ - (elf_check_arch_ia32(x) || \ + ((elf_check_arch_ia32(x) && __ia32_enabled) || \ (IS_ENABLED(CONFIG_X86_X32_ABI) && (x)->e_machine == EM_X86_64)) #if __USER32_DS != __USER_DS diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h index 2c5f7861d373c4..1d69acb119f3e6 100644 --- a/arch/x86/include/asm/ia32.h +++ b/arch/x86/include/asm/ia32.h @@ -68,6 +68,27 @@ extern void ia32_pick_mmap_layout(struct mm_struct *mm); #endif -#endif /* !CONFIG_IA32_SUPPORT */ +extern bool __ia32_enabled; + +static inline bool ia32_enabled(void) +{ + return __ia32_enabled; +} + +static inline void ia32_disable(void) +{ + __ia32_enabled = false; +} + +#else /* !CONFIG_IA32_SUPPORT */ + +static inline bool ia32_enabled(void) +{ + return IS_ENABLED(CONFIG_X86_32); +} + +static inline void ia32_disable(void) {} + +#endif #endif /* _ASM_X86_IA32_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 5c8e4f3d7fdc91..a204b71956a5bd 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -428,7 +428,7 @@ static inline unsigned long cpu_kernelmode_gs_base(int cpu) } DECLARE_PER_CPU(unsigned int, irq_count); -extern asmlinkage void ignore_sysret(void); +extern asmlinkage void entry_SYSCALL32_ignore(void); /* Save actual FS/GS selectors and bases to current->thread */ void current_save_fsgs(void); diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index 95d915db434914..8295116e5f6586 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h @@ -34,6 +34,9 @@ void entry_INT80_compat(void); #if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV) void xen_entry_INT80_compat(void); #endif +#else /* !CONFIG_IA32_EMULATION */ +#define entry_SYSCALL_compat NULL +#define entry_SYSENTER_compat NULL #endif void x86_configure_nx(void); diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 8a0c25c6bf099c..f4a6d116ba8870 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -58,12 +58,10 @@ extern bool tsc_async_resets; #ifdef CONFIG_X86_TSC extern bool tsc_store_and_check_tsc_adjust(bool bootcpu); extern void tsc_verify_tsc_adjust(bool resume); -extern void check_tsc_sync_source(int cpu); extern void check_tsc_sync_target(void); #else static inline bool tsc_store_and_check_tsc_adjust(bool bootcpu) { return false; } static inline void tsc_verify_tsc_adjust(bool resume) { } -static inline void check_tsc_sync_source(int cpu) { } static inline void check_tsc_sync_target(void) { } #endif diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 8388f4f0a7f27f..8c89548f6d4bf1 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -158,7 +158,8 @@ static int x2apic_dead_cpu(unsigned int dead_cpu) { struct cluster_mask *cmsk = per_cpu(cluster_masks, dead_cpu); - cpumask_clear_cpu(dead_cpu, &cmsk->mask); + if (cmsk) + cpumask_clear_cpu(dead_cpu, &cmsk->mask); free_cpumask_var(per_cpu(ipi_mask, dead_cpu)); return 0; } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 2cf803e7346683..cce3aa9507f097 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -56,6 +56,7 @@ #include #include +#include #include #include @@ -1921,30 +1922,41 @@ DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1; DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; EXPORT_PER_CPU_SYMBOL(__preempt_count); +static void wrmsrl_cstar(unsigned long val) +{ + /* + * Intel CPUs do not support 32-bit SYSCALL. Writing to MSR_CSTAR + * is so far ignored by the CPU, but raises a #VE trap in a TDX + * guest. Avoid the pointless write on all Intel CPUs. + */ + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + wrmsrl(MSR_CSTAR, val); +} + /* May not be marked __init: used by software suspend */ void syscall_init(void) { wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS); wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64); -#ifdef CONFIG_IA32_EMULATION - wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat); - /* - * This only works on Intel CPUs. - * On AMD CPUs these MSRs are 32-bit, CPU truncates MSR_IA32_SYSENTER_EIP. - * This does not cause SYSENTER to jump to the wrong location, because - * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit). - */ - wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); - wrmsrl_safe(MSR_IA32_SYSENTER_ESP, - (unsigned long)(cpu_entry_stack(smp_processor_id()) + 1)); - wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat); -#else - wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret); - wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG); - wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL); - wrmsrl_safe(MSR_IA32_SYSENTER_EIP, 0ULL); -#endif + if (ia32_enabled()) { + wrmsrl_cstar((unsigned long)entry_SYSCALL_compat); + /* + * This only works on Intel CPUs. + * On AMD CPUs these MSRs are 32-bit, CPU truncates MSR_IA32_SYSENTER_EIP. + * This does not cause SYSENTER to jump to the wrong location, because + * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit). + */ + wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); + wrmsrl_safe(MSR_IA32_SYSENTER_ESP, + (unsigned long)(cpu_entry_stack(smp_processor_id()) + 1)); + wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat); + } else { + wrmsrl_cstar((unsigned long)entry_SYSCALL32_ignore); + wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG); + wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL); + wrmsrl_safe(MSR_IA32_SYSENTER_EIP, 0ULL); + } /* Flags to clear on syscall */ wrmsrl(MSR_SYSCALL_MASK, diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 558076dbde5bfc..247f2225aa9f36 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -274,12 +274,13 @@ static int __restore_fpregs_from_user(void __user *buf, u64 ufeatures, * Attempt to restore the FPU registers directly from user memory. * Pagefaults are handled and any errors returned are fatal. */ -static bool restore_fpregs_from_user(void __user *buf, u64 xrestore, - bool fx_only, unsigned int size) +static bool restore_fpregs_from_user(void __user *buf, u64 xrestore, bool fx_only) { struct fpu *fpu = ¤t->thread.fpu; int ret; + /* Restore enabled features only. */ + xrestore &= fpu->fpstate->user_xfeatures; retry: fpregs_lock(); /* Ensure that XFD is up to date */ @@ -309,7 +310,7 @@ static bool restore_fpregs_from_user(void __user *buf, u64 xrestore, if (ret != X86_TRAP_PF) return false; - if (!fault_in_readable(buf, size)) + if (!fault_in_readable(buf, fpu->fpstate->user_size)) goto retry; return false; } @@ -339,7 +340,6 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx, struct user_i387_ia32_struct env; bool success, fx_only = false; union fpregs_state *fpregs; - unsigned int state_size; u64 user_xfeatures = 0; if (use_xsave()) { @@ -349,17 +349,14 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx, return false; fx_only = !fx_sw_user.magic1; - state_size = fx_sw_user.xstate_size; user_xfeatures = fx_sw_user.xfeatures; } else { user_xfeatures = XFEATURE_MASK_FPSSE; - state_size = fpu->fpstate->user_size; } if (likely(!ia32_fxstate)) { /* Restore the FPU registers directly from user memory. */ - return restore_fpregs_from_user(buf_fx, user_xfeatures, fx_only, - state_size); + return restore_fpregs_from_user(buf_fx, user_xfeatures, fx_only); } /* diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 03a482ce69fdfb..93f5e5bde5c8f8 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -10,6 +10,7 @@ #include #include #include +#include #define DPL0 0x0 #define DPL3 0x3 @@ -106,6 +107,9 @@ static const __initconst struct idt_data def_idts[] = { #endif SYSG(X86_TRAP_OF, overflow), +}; + +static const struct idt_data ia32_idt[] __initconst = { #if defined(CONFIG_IA32_EMULATION) SYSG(IA32_SYSCALL_VECTOR, entry_INT80_compat), #elif defined(CONFIG_X86_32) @@ -235,6 +239,9 @@ void __init idt_setup_early_traps(void) void __init idt_setup_traps(void) { idt_setup_from_table(idt_table, def_idts, ARRAY_SIZE(def_idts), true); + + if (ia32_enabled()) + idt_setup_from_table(idt_table, ia32_idt, ARRAY_SIZE(ia32_idt), true); } #ifdef CONFIG_X86_64 diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 5ff70ce6c1439e..4bce6b723cfcd5 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1208,7 +1208,6 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle) { int apicid = apic->cpu_present_to_apicid(cpu); int cpu0_nmi_registered = 0; - unsigned long flags; int err, ret = 0; lockdep_assert_irqs_enabled(); @@ -1255,14 +1254,6 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle) goto unreg_nmi; } - /* - * Check TSC synchronization with the AP (keep irqs disabled - * while doing so): - */ - local_irq_save(flags); - check_tsc_sync_source(cpu); - local_irq_restore(flags); - while (!cpu_online(cpu)) { cpu_relax(); touch_nmi_watchdog(); diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index 37b291725df902..f9dd0c22ef71f1 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -15,6 +15,7 @@ * ( The serial nature of the boot logic and the CPU hotplug lock * protects against more than 2 CPUs entering this code. ) */ +#include #include #include #include @@ -245,7 +246,6 @@ bool tsc_store_and_check_tsc_adjust(bool bootcpu) */ static atomic_t start_count; static atomic_t stop_count; -static atomic_t skip_test; static atomic_t test_runs; /* @@ -344,21 +344,21 @@ static inline unsigned int loop_timeout(int cpu) return (cpumask_weight(topology_core_cpumask(cpu)) > 1) ? 2 : 20; } +static void tsc_sync_mark_tsc_unstable(struct work_struct *work) +{ + mark_tsc_unstable("check_tsc_sync_source failed"); +} + +static DECLARE_WORK(tsc_sync_work, tsc_sync_mark_tsc_unstable); + /* - * Source CPU calls into this - it waits for the freshly booted - * target CPU to arrive and then starts the measurement: + * The freshly booted CPU initiates this via an async SMP function call. */ -void check_tsc_sync_source(int cpu) +static void check_tsc_sync_source(void *__cpu) { + unsigned int cpu = (unsigned long)__cpu; int cpus = 2; - /* - * No need to check if we already know that the TSC is not - * synchronized or if we have no TSC. - */ - if (unsynchronized_tsc()) - return; - /* * Set the maximum number of test runs to * 1 if the CPU does not provide the TSC_ADJUST MSR @@ -369,16 +369,9 @@ void check_tsc_sync_source(int cpu) else atomic_set(&test_runs, 3); retry: - /* - * Wait for the target to start or to skip the test: - */ - while (atomic_read(&start_count) != cpus - 1) { - if (atomic_read(&skip_test) > 0) { - atomic_set(&skip_test, 0); - return; - } + /* Wait for the target to start. */ + while (atomic_read(&start_count) != cpus - 1) cpu_relax(); - } /* * Trigger the target to continue into the measurement too: @@ -398,20 +391,20 @@ void check_tsc_sync_source(int cpu) if (!nr_warps) { atomic_set(&test_runs, 0); - pr_debug("TSC synchronization [CPU#%d -> CPU#%d]: passed\n", + pr_debug("TSC synchronization [CPU#%d -> CPU#%u]: passed\n", smp_processor_id(), cpu); } else if (atomic_dec_and_test(&test_runs) || random_warps) { /* Force it to 0 if random warps brought us here */ atomic_set(&test_runs, 0); - pr_warn("TSC synchronization [CPU#%d -> CPU#%d]:\n", + pr_warn("TSC synchronization [CPU#%d -> CPU#%u]:\n", smp_processor_id(), cpu); pr_warn("Measured %Ld cycles TSC warp between CPUs, " "turning off TSC clock.\n", max_warp); if (random_warps) pr_warn("TSC warped randomly between CPUs\n"); - mark_tsc_unstable("check_tsc_sync_source failed"); + schedule_work(&tsc_sync_work); } /* @@ -458,11 +451,12 @@ void check_tsc_sync_target(void) * SoCs the TSC is frequency synchronized, but still the TSC ADJUST * register might have been wreckaged by the BIOS.. */ - if (tsc_store_and_check_tsc_adjust(false) || tsc_clocksource_reliable) { - atomic_inc(&skip_test); + if (tsc_store_and_check_tsc_adjust(false) || tsc_clocksource_reliable) return; - } + /* Kick the control CPU into the TSC synchronization function */ + smp_call_function_single(cpumask_first(cpu_online_mask), check_tsc_sync_source, + (unsigned long *)(unsigned long)cpu, 0); retry: /* * Register this CPU's participation and wait for the diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c index ce8273ed479d14..122a1c9f91f534 100644 --- a/arch/x86/mm/mem_encrypt_amd.c +++ b/arch/x86/mm/mem_encrypt_amd.c @@ -35,6 +35,7 @@ #include #include #include +#include #include "mm_internal.h" @@ -501,6 +502,16 @@ void __init sme_early_init(void) x86_platform.guest.enc_status_change_finish = amd_enc_status_change_finish; x86_platform.guest.enc_tlb_flush_required = amd_enc_tlb_flush_required; x86_platform.guest.enc_cache_flush_required = amd_enc_cache_flush_required; + + /* + * The VMM is capable of injecting interrupt 0x80 and triggering the + * compatibility syscall path. + * + * By default, the 32-bit emulation is disabled in order to ensure + * the safety of the VM. + */ + if (sev_status & MSR_AMD64_SEV_ENABLED) + ia32_disable(); } /* diff --git a/crypto/akcipher.c b/crypto/akcipher.c index cfbdb06d8ca864..9d77fc4dd9c41a 100644 --- a/crypto/akcipher.c +++ b/crypto/akcipher.c @@ -122,10 +122,22 @@ static void akcipher_prepare_alg(struct akcipher_alg *alg) base->cra_flags |= CRYPTO_ALG_TYPE_AKCIPHER; } +static int akcipher_default_op(struct akcipher_request *req) +{ + return -ENOSYS; +} + int crypto_register_akcipher(struct akcipher_alg *alg) { struct crypto_alg *base = &alg->base; + alg->sign = akcipher_default_op; + if (!alg->verify) + alg->verify = akcipher_default_op; + if (!alg->encrypt) + alg->encrypt = akcipher_default_op; + alg->decrypt = akcipher_default_op; + akcipher_prepare_alg(alg); return crypto_register_alg(base); } diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 3dd3f8428f730e..0d07479bf62125 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -2210,6 +2210,9 @@ static int test_akcipher_one(struct crypto_akcipher *tfm, unsigned int out_len_max, out_len = 0; int err = -ENOMEM; struct scatterlist src, dst, src_tab[2]; + const char *m, *c; + unsigned int m_size, c_size; + const char *op; if (testmgr_alloc_buf(xbuf)) return err; @@ -2231,47 +2234,73 @@ static int test_akcipher_one(struct crypto_akcipher *tfm, err = -ENOMEM; out_len_max = crypto_akcipher_maxsize(tfm); + + /* + * First run test which do not require a private key, such as + * encrypt or verify. + */ outbuf_enc = kzalloc(out_len_max, GFP_KERNEL); if (!outbuf_enc) goto free_req; - if (WARN_ON(vecs->m_size > PAGE_SIZE)) - goto free_all; + if (!vecs->siggen_sigver_test) { + m = vecs->m; + m_size = vecs->m_size; + c = vecs->c; + c_size = vecs->c_size; + op = "encrypt"; + } else { + /* Swap args so we could keep plaintext (digest) + * in vecs->m, and cooked signature in vecs->c. + */ + m = vecs->c; /* signature */ + m_size = vecs->c_size; + c = vecs->m; /* digest */ + c_size = vecs->m_size; + op = "verify"; + } - memcpy(xbuf[0], vecs->m, vecs->m_size); + if (WARN_ON(m_size > PAGE_SIZE)) + goto free_all; + memcpy(xbuf[0], m, m_size); sg_init_table(src_tab, 2); sg_set_buf(&src_tab[0], xbuf[0], 8); - sg_set_buf(&src_tab[1], xbuf[0] + 8, vecs->m_size - 8); + sg_set_buf(&src_tab[1], xbuf[0] + 8, m_size - 8); sg_init_one(&dst, outbuf_enc, out_len_max); - akcipher_request_set_crypt(req, src_tab, &dst, vecs->m_size, + akcipher_request_set_crypt(req, src_tab, &dst, m_size, out_len_max); akcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, crypto_req_done, &wait); err = crypto_wait_req(vecs->siggen_sigver_test ? - /* Run asymmetric signature generation */ - crypto_akcipher_sign(req) : + /* Run asymmetric signature verification */ + crypto_akcipher_verify(req) : /* Run asymmetric encrypt */ crypto_akcipher_encrypt(req), &wait); if (err) { - pr_err("alg: akcipher: encrypt test failed. err %d\n", err); + pr_err("alg: akcipher: %s test failed. err %d\n", op, err); goto free_all; } - if (req->dst_len != vecs->c_size) { - pr_err("alg: akcipher: encrypt test failed. Invalid output len\n"); + if (req->dst_len != c_size) { + pr_err("alg: akcipher: %s test failed. Invalid output len\n", + op); err = -EINVAL; goto free_all; } /* verify that encrypted message is equal to expected */ - if (memcmp(vecs->c, outbuf_enc, vecs->c_size)) { - pr_err("alg: akcipher: encrypt test failed. Invalid output\n"); - hexdump(outbuf_enc, vecs->c_size); + if (memcmp(c, outbuf_enc, c_size)) { + pr_err("alg: akcipher: %s test failed. Invalid output\n", op); + hexdump(outbuf_enc, c_size); err = -EINVAL; goto free_all; } - /* Don't invoke decrypt for vectors with public key */ - if (vecs->public_key_vec) { + + /* + * Don't invoke (decrypt or sign) test which require a private key + * for vectors with only a public key. + */ + if (1 || vecs->public_key_vec) { err = 0; goto free_all; } @@ -2281,37 +2310,36 @@ static int test_akcipher_one(struct crypto_akcipher *tfm, goto free_all; } - if (WARN_ON(vecs->c_size > PAGE_SIZE)) + op = vecs->siggen_sigver_test ? "sign" : "decrypt"; + if (WARN_ON(c_size > PAGE_SIZE)) goto free_all; + memcpy(xbuf[0], c, c_size); - memcpy(xbuf[0], vecs->c, vecs->c_size); - - sg_init_one(&src, xbuf[0], vecs->c_size); + sg_init_one(&src, xbuf[0], c_size); sg_init_one(&dst, outbuf_dec, out_len_max); crypto_init_wait(&wait); - akcipher_request_set_crypt(req, &src, &dst, vecs->c_size, out_len_max); + akcipher_request_set_crypt(req, &src, &dst, c_size, out_len_max); err = crypto_wait_req(vecs->siggen_sigver_test ? - /* Run asymmetric signature verification */ - crypto_akcipher_verify(req) : + /* Run asymmetric signature generation */ + crypto_akcipher_sign(req) : /* Run asymmetric decrypt */ crypto_akcipher_decrypt(req), &wait); if (err) { - pr_err("alg: akcipher: decrypt test failed. err %d\n", err); + pr_err("alg: akcipher: %s test failed. err %d\n", op, err); goto free_all; } out_len = req->dst_len; - if (out_len < vecs->m_size) { - pr_err("alg: akcipher: decrypt test failed. " - "Invalid output len %u\n", out_len); + if (out_len < m_size) { + pr_err("alg: akcipher: %s test failed. Invalid output len %u\n", + op, out_len); err = -EINVAL; goto free_all; } /* verify that decrypted message is equal to the original msg */ - if (memchr_inv(outbuf_dec, 0, out_len - vecs->m_size) || - memcmp(vecs->m, outbuf_dec + out_len - vecs->m_size, - vecs->m_size)) { - pr_err("alg: akcipher: decrypt test failed. Invalid output\n"); + if (memchr_inv(outbuf_dec, 0, out_len - m_size) || + memcmp(m, outbuf_dec + out_len - m_size, m_size)) { + pr_err("alg: akcipher: %s test failed. Invalid output\n", op); hexdump(outbuf_dec, out_len); err = -EINVAL; } diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 43c759285c3e03..405d00dd874601 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2409,6 +2409,7 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = { X86_MATCH(ICELAKE_X, core_funcs), X86_MATCH(TIGERLAKE, core_funcs), X86_MATCH(SAPPHIRERAPIDS_X, core_funcs), + X86_MATCH(EMERALDRAPIDS_X, core_funcs), {} }; MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids); diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index 040fc728f21bf7..6276145e7d1b98 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -481,18 +481,14 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu, if (pkg_flag) { attr_no = PKG_SYSFS_ATTR_NO; } else { - index = ida_alloc(&pdata->ida, GFP_KERNEL); + index = ida_alloc_max(&pdata->ida, NUM_REAL_CORES - 1, GFP_KERNEL); if (index < 0) return index; + pdata->cpu_map[index] = topology_core_id(cpu); attr_no = index + BASE_SYSFS_ATTR_NO; } - if (attr_no > MAX_CORE_DATA - 1) { - err = -ERANGE; - goto ida_free; - } - tdata = init_temp_data(cpu, pkg_flag); if (!tdata) { err = -ENOMEM; diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index b16791c8b5d7fe..68b4520aa722ed 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -404,11 +404,9 @@ static int i801_check_post(struct i801_priv *priv, int status) dev_err(&priv->pci_dev->dev, "Transaction timeout\n"); /* try to stop the current command */ dev_dbg(&priv->pci_dev->dev, "Terminating the current operation\n"); - outb_p(inb_p(SMBHSTCNT(priv)) | SMBHSTCNT_KILL, - SMBHSTCNT(priv)); + outb_p(SMBHSTCNT_KILL, SMBHSTCNT(priv)); usleep_range(1000, 2000); - outb_p(inb_p(SMBHSTCNT(priv)) & (~SMBHSTCNT_KILL), - SMBHSTCNT(priv)); + outb_p(0, SMBHSTCNT(priv)); /* Check if it worked */ status = inb_p(SMBHSTSTS(priv)); @@ -555,12 +553,11 @@ static int i801_block_transaction_by_block(struct i801_priv *priv, return -EOPNOTSUPP; } - inb_p(SMBHSTCNT(priv)); /* reset the data buffer index */ - /* Use 32-byte buffer to process this transaction */ if (read_write == I2C_SMBUS_WRITE) { len = data->block[0]; outb_p(len, SMBHSTDAT0(priv)); + inb_p(SMBHSTCNT(priv)); /* reset the data buffer index */ for (i = 0; i < len; i++) outb_p(data->block[i+1], SMBBLKDAT(priv)); } @@ -576,6 +573,7 @@ static int i801_block_transaction_by_block(struct i801_priv *priv, return -EPROTO; data->block[0] = len; + inb_p(SMBHSTCNT(priv)); /* reset the data buffer index */ for (i = 0; i < len; i++) data->block[i + 1] = inb_p(SMBBLKDAT(priv)); } diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index 80ae274b62bf42..c10643b38b2601 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -2328,8 +2328,9 @@ void i2c_put_adapter(struct i2c_adapter *adap) if (!adap) return; - put_device(&adap->dev); module_put(adap->owner); + /* Should be last, otherwise we risk use-after-free with 'adap' */ + put_device(&adap->dev); } EXPORT_SYMBOL(i2c_put_adapter); diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c index 2ea4585d18c5e1..ab43cd8e28eb4b 100644 --- a/drivers/i2c/i2c-dev.c +++ b/drivers/i2c/i2c-dev.c @@ -528,6 +528,9 @@ static long compat_i2cdev_ioctl(struct file *file, unsigned int cmd, unsigned lo sizeof(rdwr_arg))) return -EFAULT; + if (!rdwr_arg.msgs || rdwr_arg.nmsgs == 0) + return -EINVAL; + if (rdwr_arg.nmsgs > I2C_RDWR_IOCTL_MAX_MSGS) return -EINVAL; diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 2e91d887932658..33f9d02f9b6009 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -1457,6 +1457,17 @@ static int config_non_roce_gid_cache(struct ib_device *device, i); goto err; } + + if (rdma_protocol_iwarp(device, port)) { + struct net_device *ndev; + + ndev = ib_device_get_netdev(device, port); + if (!ndev) + continue; + RCU_INIT_POINTER(gid_attr.ndev, ndev); + dev_put(ndev); + } + gid_attr.index = i; tprops->subnet_prefix = be64_to_cpu(gid_attr.gid.global.subnet_prefix); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 5da1725d96d4cf..e32b62c277b365 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -686,30 +686,52 @@ cma_validate_port(struct ib_device *device, u32 port, struct rdma_id_private *id_priv) { struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; + const struct ib_gid_attr *sgid_attr = ERR_PTR(-ENODEV); int bound_if_index = dev_addr->bound_dev_if; - const struct ib_gid_attr *sgid_attr; int dev_type = dev_addr->dev_type; struct net_device *ndev = NULL; if (!rdma_dev_access_netns(device, id_priv->id.route.addr.dev_addr.net)) - return ERR_PTR(-ENODEV); + goto out; if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port)) - return ERR_PTR(-ENODEV); + goto out; if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port)) - return ERR_PTR(-ENODEV); + goto out; + + /* + * For drivers that do not associate more than one net device with + * their gid tables, such as iWARP drivers, it is sufficient to + * return the first table entry. + * + * Other driver classes might be included in the future. + */ + if (rdma_protocol_iwarp(device, port)) { + sgid_attr = rdma_get_gid_attr(device, port, 0); + if (IS_ERR(sgid_attr)) + goto out; + + rcu_read_lock(); + ndev = rcu_dereference(sgid_attr->ndev); + if (!net_eq(dev_net(ndev), dev_addr->net) || + ndev->ifindex != bound_if_index) + sgid_attr = ERR_PTR(-ENODEV); + rcu_read_unlock(); + goto out; + } if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) { ndev = dev_get_by_index(dev_addr->net, bound_if_index); if (!ndev) - return ERR_PTR(-ENODEV); + goto out; } else { gid_type = IB_GID_TYPE_IB; } sgid_attr = rdma_find_gid_by_port(device, gid, gid_type, port, ndev); dev_put(ndev); +out: return sgid_attr; } diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index b99b3cc283b650..cf3641ce657a71 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1880,12 +1880,86 @@ int ib_modify_qp_with_udata(struct ib_qp *ib_qp, struct ib_qp_attr *attr, } EXPORT_SYMBOL(ib_modify_qp_with_udata); +static void ib_get_width_and_speed(u32 netdev_speed, u32 lanes, + u16 *speed, u8 *width) +{ + if (!lanes) { + if (netdev_speed <= SPEED_1000) { + *width = IB_WIDTH_1X; + *speed = IB_SPEED_SDR; + } else if (netdev_speed <= SPEED_10000) { + *width = IB_WIDTH_1X; + *speed = IB_SPEED_FDR10; + } else if (netdev_speed <= SPEED_20000) { + *width = IB_WIDTH_4X; + *speed = IB_SPEED_DDR; + } else if (netdev_speed <= SPEED_25000) { + *width = IB_WIDTH_1X; + *speed = IB_SPEED_EDR; + } else if (netdev_speed <= SPEED_40000) { + *width = IB_WIDTH_4X; + *speed = IB_SPEED_FDR10; + } else { + *width = IB_WIDTH_4X; + *speed = IB_SPEED_EDR; + } + + return; + } + + switch (lanes) { + case 1: + *width = IB_WIDTH_1X; + break; + case 2: + *width = IB_WIDTH_2X; + break; + case 4: + *width = IB_WIDTH_4X; + break; + case 8: + *width = IB_WIDTH_8X; + break; + case 12: + *width = IB_WIDTH_12X; + break; + default: + *width = IB_WIDTH_1X; + } + + switch (netdev_speed / lanes) { + case SPEED_2500: + *speed = IB_SPEED_SDR; + break; + case SPEED_5000: + *speed = IB_SPEED_DDR; + break; + case SPEED_10000: + *speed = IB_SPEED_FDR10; + break; + case SPEED_14000: + *speed = IB_SPEED_FDR; + break; + case SPEED_25000: + *speed = IB_SPEED_EDR; + break; + case SPEED_50000: + *speed = IB_SPEED_HDR; + break; + case SPEED_100000: + *speed = IB_SPEED_NDR; + break; + default: + *speed = IB_SPEED_SDR; + } +} + int ib_get_eth_speed(struct ib_device *dev, u32 port_num, u16 *speed, u8 *width) { int rc; u32 netdev_speed; struct net_device *netdev; - struct ethtool_link_ksettings lksettings; + struct ethtool_link_ksettings lksettings = {}; if (rdma_port_get_link_layer(dev, port_num) != IB_LINK_LAYER_ETHERNET) return -EINVAL; @@ -1904,29 +1978,13 @@ int ib_get_eth_speed(struct ib_device *dev, u32 port_num, u16 *speed, u8 *width) netdev_speed = lksettings.base.speed; } else { netdev_speed = SPEED_1000; - pr_warn("%s speed is unknown, defaulting to %u\n", netdev->name, - netdev_speed); + if (rc) + pr_warn("%s speed is unknown, defaulting to %u\n", + netdev->name, netdev_speed); } - if (netdev_speed <= SPEED_1000) { - *width = IB_WIDTH_1X; - *speed = IB_SPEED_SDR; - } else if (netdev_speed <= SPEED_10000) { - *width = IB_WIDTH_1X; - *speed = IB_SPEED_FDR10; - } else if (netdev_speed <= SPEED_20000) { - *width = IB_WIDTH_4X; - *speed = IB_SPEED_DDR; - } else if (netdev_speed <= SPEED_25000) { - *width = IB_WIDTH_1X; - *speed = IB_SPEED_EDR; - } else if (netdev_speed <= SPEED_40000) { - *width = IB_WIDTH_4X; - *speed = IB_SPEED_FDR10; - } else { - *width = IB_WIDTH_4X; - *speed = IB_SPEED_EDR; - } + ib_get_width_and_speed(netdev_speed, lksettings.lanes, + speed, width); return 0; } diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index c6a6fcaf7f7e93..aeef2d59cae767 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -557,6 +557,13 @@ static void irdma_destroy_irq(struct irdma_pci_f *rf, dev->irq_ops->irdma_dis_irq(dev, msix_vec->idx); irq_update_affinity_hint(msix_vec->irq, NULL); free_irq(msix_vec->irq, dev_id); + if (rf == dev_id) { + tasklet_kill(&rf->dpc_tasklet); + } else { + struct irdma_ceq *iwceq = (struct irdma_ceq *)dev_id; + + tasklet_kill(&iwceq->dpc_tasklet); + } } /** diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 02c6ea7694a088..ed1a140ccb8cfa 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -1880,8 +1880,17 @@ static int qedr_create_user_qp(struct qedr_dev *dev, /* RQ - read access only (0) */ rc = qedr_init_user_queue(udata, dev, &qp->urq, ureq.rq_addr, ureq.rq_len, true, 0, alloc_and_init); - if (rc) + if (rc) { + ib_umem_release(qp->usq.umem); + qp->usq.umem = NULL; + if (rdma_protocol_roce(&dev->ibdev, 1)) { + qedr_free_pbl(dev, &qp->usq.pbl_info, + qp->usq.pbl_tbl); + } else { + kfree(qp->usq.pbl_tbl); + } return rc; + } } memset(&in_params, 0, sizeof(in_params)); diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h index 2f3a9cda3850f6..8b4a710b82bc18 100644 --- a/drivers/infiniband/sw/siw/siw.h +++ b/drivers/infiniband/sw/siw/siw.h @@ -74,6 +74,7 @@ struct siw_device { u32 vendor_part_id; int numa_node; + char raw_gid[ETH_ALEN]; /* physical port state (only one port per device) */ enum ib_port_state state; diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c index f88d2971c2c635..416a8c8717036a 100644 --- a/drivers/infiniband/sw/siw/siw_cm.c +++ b/drivers/infiniband/sw/siw/siw_cm.c @@ -973,6 +973,7 @@ static void siw_accept_newconn(struct siw_cep *cep) siw_cep_put(cep); new_cep->listen_cep = NULL; if (rv) { + siw_cancel_mpatimer(new_cep); siw_cep_set_free(new_cep); goto error; } @@ -1097,9 +1098,12 @@ static void siw_cm_work_handler(struct work_struct *w) /* * Socket close before MPA request received. */ - siw_dbg_cep(cep, "no mpareq: drop listener\n"); - siw_cep_put(cep->listen_cep); - cep->listen_cep = NULL; + if (cep->listen_cep) { + siw_dbg_cep(cep, + "no mpareq: drop listener\n"); + siw_cep_put(cep->listen_cep); + cep->listen_cep = NULL; + } } } release_cep = 1; @@ -1222,7 +1226,11 @@ static void siw_cm_llp_data_ready(struct sock *sk) if (!cep) goto out; - siw_dbg_cep(cep, "state: %d\n", cep->state); + siw_dbg_cep(cep, "cep state: %d, socket state %d\n", + cep->state, sk->sk_state); + + if (sk->sk_state != TCP_ESTABLISHED) + goto out; switch (cep->state) { case SIW_EPSTATE_RDMA_MODE: diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index 23e6c02e77ced0..bbf5612862ffa3 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -75,8 +75,7 @@ static int siw_device_register(struct siw_device *sdev, const char *name) return rv; } - siw_dbg(base_dev, "HWaddr=%pM\n", sdev->netdev->dev_addr); - + siw_dbg(base_dev, "HWaddr=%pM\n", sdev->raw_gid); return 0; } @@ -314,24 +313,19 @@ static struct siw_device *siw_device_create(struct net_device *netdev) return NULL; base_dev = &sdev->base_dev; - sdev->netdev = netdev; - if (netdev->type != ARPHRD_LOOPBACK && netdev->type != ARPHRD_NONE) { - addrconf_addr_eui48((unsigned char *)&base_dev->node_guid, - netdev->dev_addr); + if (netdev->addr_len) { + memcpy(sdev->raw_gid, netdev->dev_addr, + min_t(unsigned int, netdev->addr_len, ETH_ALEN)); } else { /* - * This device does not have a HW address, - * but connection mangagement lib expects gid != 0 + * This device does not have a HW address, but + * connection mangagement requires a unique gid. */ - size_t len = min_t(size_t, strlen(base_dev->name), 6); - char addr[6] = { }; - - memcpy(addr, base_dev->name, len); - addrconf_addr_eui48((unsigned char *)&base_dev->node_guid, - addr); + eth_random_addr(sdev->raw_gid); } + addrconf_addr_eui48((u8 *)&base_dev->node_guid, sdev->raw_gid); base_dev->uverbs_cmd_mask |= BIT_ULL(IB_USER_VERBS_CMD_POST_SEND); diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index 3e814cfb298cf8..e545654697b176 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -157,7 +157,7 @@ int siw_query_device(struct ib_device *base_dev, struct ib_device_attr *attr, attr->vendor_part_id = sdev->vendor_part_id; addrconf_addr_eui48((u8 *)&attr->sys_image_guid, - sdev->netdev->dev_addr); + sdev->raw_gid); return 0; } @@ -218,7 +218,7 @@ int siw_query_gid(struct ib_device *base_dev, u32 port, int idx, /* subnet_prefix == interface_id == 0; */ memset(gid, 0, sizeof(*gid)); - memcpy(&gid->raw[0], sdev->netdev->dev_addr, 6); + memcpy(gid->raw, sdev->raw_gid, ETH_ALEN); return 0; } diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 239194da2782a6..260a7baa3db988 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -79,12 +79,16 @@ module_param(srpt_srq_size, int, 0444); MODULE_PARM_DESC(srpt_srq_size, "Shared receive queue (SRQ) size."); +static int srpt_set_u64_x(const char *buffer, const struct kernel_param *kp) +{ + return kstrtou64(buffer, 16, (u64 *)kp->arg); +} static int srpt_get_u64_x(char *buffer, const struct kernel_param *kp) { return sprintf(buffer, "0x%016llx\n", *(u64 *)kp->arg); } -module_param_call(srpt_service_guid, NULL, srpt_get_u64_x, &srpt_service_guid, - 0444); +module_param_call(srpt_service_guid, srpt_set_u64_x, srpt_get_u64_x, + &srpt_service_guid, 0444); MODULE_PARM_DESC(srpt_service_guid, "Using this value for ioc_guid, id_ext, and cm_listen_id instead of using the node_guid of the first HCA."); @@ -3204,7 +3208,6 @@ static int srpt_add_one(struct ib_device *device) INIT_IB_EVENT_HANDLER(&sdev->event_handler, sdev->device, srpt_event_handler); - ib_register_event_handler(&sdev->event_handler); for (i = 1; i <= sdev->device->phys_port_cnt; i++) { sport = &sdev->port[i - 1]; @@ -3227,6 +3230,7 @@ static int srpt_add_one(struct ib_device *device) } } + ib_register_event_handler(&sdev->event_handler); spin_lock(&srpt_dev_lock); list_add_tail(&sdev->list, &srpt_dev_list); spin_unlock(&srpt_dev_lock); @@ -3237,7 +3241,6 @@ static int srpt_add_one(struct ib_device *device) err_port: srpt_unregister_mad_agent(sdev, i); - ib_unregister_event_handler(&sdev->event_handler); err_cm: if (sdev->cm_id) ib_destroy_cm_id(sdev->cm_id); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index fda24c636baec3..fe81518e1ac00f 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2437,7 +2437,7 @@ static int grow_one_stripe(struct r5conf *conf, gfp_t gfp) atomic_inc(&conf->active_stripes); raid5_release_stripe(sh); - conf->max_nr_stripes++; + WRITE_ONCE(conf->max_nr_stripes, conf->max_nr_stripes + 1); return 1; } @@ -2734,7 +2734,7 @@ static int drop_one_stripe(struct r5conf *conf) shrink_buffers(sh); free_stripe(conf->slab_cache, sh); atomic_dec(&conf->active_stripes); - conf->max_nr_stripes--; + WRITE_ONCE(conf->max_nr_stripes, conf->max_nr_stripes - 1); return 1; } @@ -6949,7 +6949,7 @@ raid5_set_cache_size(struct mddev *mddev, int size) if (size <= 16 || size > 32768) return -EINVAL; - conf->min_nr_stripes = size; + WRITE_ONCE(conf->min_nr_stripes, size); mutex_lock(&conf->cache_size_mutex); while (size < conf->max_nr_stripes && drop_one_stripe(conf)) @@ -6961,7 +6961,7 @@ raid5_set_cache_size(struct mddev *mddev, int size) mutex_lock(&conf->cache_size_mutex); while (size > conf->max_nr_stripes) if (!grow_one_stripe(conf, GFP_KERNEL)) { - conf->min_nr_stripes = conf->max_nr_stripes; + WRITE_ONCE(conf->min_nr_stripes, conf->max_nr_stripes); result = -ENOMEM; break; } @@ -7527,10 +7527,13 @@ static unsigned long raid5_cache_count(struct shrinker *shrink, { struct r5conf *conf = container_of(shrink, struct r5conf, shrinker); - if (conf->max_nr_stripes < conf->min_nr_stripes) + int max_stripes = READ_ONCE(conf->max_nr_stripes); + int min_stripes = READ_ONCE(conf->min_nr_stripes); + + if (max_stripes < min_stripes) /* unlikely, but not impossible */ return 0; - return conf->max_nr_stripes - conf->min_nr_stripes; + return max_stripes - min_stripes; } static struct r5conf *setup_conf(struct mddev *mddev) diff --git a/drivers/media/dvb-core/dvbdev.c b/drivers/media/dvb-core/dvbdev.c index 11a2288a2c3f5d..ab9d4dc1f88390 100644 --- a/drivers/media/dvb-core/dvbdev.c +++ b/drivers/media/dvb-core/dvbdev.c @@ -245,6 +245,7 @@ static void dvb_media_device_free(struct dvb_device *dvbdev) if (dvbdev->adapter->conn) { media_device_unregister_entity(dvbdev->adapter->conn); + kfree(dvbdev->adapter->conn); dvbdev->adapter->conn = NULL; kfree(dvbdev->adapter->conn_pads); dvbdev->adapter->conn_pads = NULL; diff --git a/drivers/media/usb/pvrusb2/pvrusb2-context.c b/drivers/media/usb/pvrusb2/pvrusb2-context.c index 14170a5d72b350..1764674de98bc0 100644 --- a/drivers/media/usb/pvrusb2/pvrusb2-context.c +++ b/drivers/media/usb/pvrusb2/pvrusb2-context.c @@ -268,7 +268,8 @@ void pvr2_context_disconnect(struct pvr2_context *mp) { pvr2_hdw_disconnect(mp->hdw); mp->disconnect_flag = !0; - pvr2_context_notify(mp); + if (!pvr2_context_shutok()) + pvr2_context_notify(mp); } diff --git a/drivers/media/usb/uvc/uvc_ctrl.c b/drivers/media/usb/uvc/uvc_ctrl.c index 5e9d3da862dd86..e59a463c27618e 100644 --- a/drivers/media/usb/uvc/uvc_ctrl.c +++ b/drivers/media/usb/uvc/uvc_ctrl.c @@ -1402,6 +1402,9 @@ int uvc_query_v4l2_menu(struct uvc_video_chain *chain, query_menu->id = id; query_menu->index = index; + if (index >= BITS_PER_TYPE(mapping->menu_mask)) + return -EINVAL; + ret = mutex_lock_interruptible(&chain->ctrl_mutex); if (ret < 0) return -ERESTARTSYS; diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index cd67c85cc87de4..ac323b44598a8e 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -364,9 +364,6 @@ static int mtdchar_writeoob(struct file *file, struct mtd_info *mtd, uint32_t retlen; int ret = 0; - if (!(file->f_mode & FMODE_WRITE)) - return -EPERM; - if (length > 4096) return -EINVAL; @@ -651,6 +648,48 @@ static int mtdchar_ioctl(struct file *file, u_int cmd, u_long arg) pr_debug("MTD_ioctl\n"); + /* + * Check the file mode to require "dangerous" commands to have write + * permissions. + */ + switch (cmd) { + /* "safe" commands */ + case MEMGETREGIONCOUNT: + case MEMGETREGIONINFO: + case MEMGETINFO: + case MEMREADOOB: + case MEMREADOOB64: + case MEMISLOCKED: + case MEMGETOOBSEL: + case MEMGETBADBLOCK: + case OTPSELECT: + case OTPGETREGIONCOUNT: + case OTPGETREGIONINFO: + case ECCGETLAYOUT: + case ECCGETSTATS: + case MTDFILEMODE: + case BLKPG: + case BLKRRPART: + break; + + /* "dangerous" commands */ + case MEMERASE: + case MEMERASE64: + case MEMLOCK: + case MEMUNLOCK: + case MEMSETBADBLOCK: + case MEMWRITEOOB: + case MEMWRITEOOB64: + case MEMWRITE: + case OTPLOCK: + if (!(file->f_mode & FMODE_WRITE)) + return -EPERM; + break; + + default: + return -ENOTTY; + } + switch (cmd) { case MEMGETREGIONCOUNT: if (copy_to_user(argp, &(mtd->numeraseregions), sizeof(int))) @@ -698,9 +737,6 @@ static int mtdchar_ioctl(struct file *file, u_int cmd, u_long arg) { struct erase_info *erase; - if(!(file->f_mode & FMODE_WRITE)) - return -EPERM; - erase=kzalloc(sizeof(struct erase_info),GFP_KERNEL); if (!erase) ret = -ENOMEM; @@ -993,9 +1029,6 @@ static int mtdchar_ioctl(struct file *file, u_int cmd, u_long arg) ret = 0; break; } - - default: - ret = -ENOTTY; } return ret; @@ -1039,6 +1072,11 @@ static long mtdchar_compat_ioctl(struct file *file, unsigned int cmd, struct mtd_oob_buf32 buf; struct mtd_oob_buf32 __user *buf_user = argp; + if (!(file->f_mode & FMODE_WRITE)) { + ret = -EPERM; + break; + } + if (copy_from_user(&buf, argp, sizeof(buf))) ret = -EFAULT; else diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 57c38fe86f7ba3..185945f6d667ae 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -3169,23 +3169,6 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } -static u16 ena_select_queue(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback) -{ - u16 qid; - /* we suspect that this is good for in--kernel network services that - * want to loop incoming skb rx to tx in normal user generated traffic, - * most probably we will not get to this - */ - if (skb_rx_queue_recorded(skb)) - qid = skb_get_rx_queue(skb); - else - qid = fallback(dev, skb, NULL); - - return qid; -} - static void ena_config_host_info(struct ena_com_dev *ena_dev, struct pci_dev *pdev) { struct device *dev = &pdev->dev; @@ -3355,7 +3338,6 @@ static const struct net_device_ops ena_netdev_ops = { .ndo_open = ena_open, .ndo_stop = ena_close, .ndo_start_xmit = ena_start_xmit, - .ndo_select_queue = ena_select_queue, .ndo_get_stats64 = ena_get_stats64, .ndo_tx_timeout = ena_tx_timeout, .ndo_change_mtu = ena_change_mtu, diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig index a840644166f88e..ea88ce9ff84f8f 100644 --- a/drivers/net/ethernet/intel/Kconfig +++ b/drivers/net/ethernet/intel/Kconfig @@ -354,5 +354,17 @@ config IGC To compile this driver as a module, choose M here. The module will be called igc. +config IDPF + tristate "Intel(R) Infrastructure Data Path Function Support" + depends on PCI_MSI + select DIMLIB + select PAGE_POOL + select PAGE_POOL_STATS + help + This driver supports Intel(R) Infrastructure Data Path Function + devices. + + To compile this driver as a module, choose M here. The module + will be called idpf. endif # NET_VENDOR_INTEL diff --git a/drivers/net/ethernet/intel/Makefile b/drivers/net/ethernet/intel/Makefile index 3075290063f660..70cc2ae1cb97e8 100644 --- a/drivers/net/ethernet/intel/Makefile +++ b/drivers/net/ethernet/intel/Makefile @@ -16,3 +16,4 @@ obj-$(CONFIG_IXGB) += ixgb/ obj-$(CONFIG_IAVF) += iavf/ obj-$(CONFIG_FM10K) += fm10k/ obj-$(CONFIG_ICE) += ice/ +obj-$(CONFIG_IDPF) += idpf/ diff --git a/drivers/net/ethernet/intel/idpf/Makefile b/drivers/net/ethernet/intel/idpf/Makefile new file mode 100644 index 00000000000000..6844ead2f3acf9 --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/Makefile @@ -0,0 +1,18 @@ +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (C) 2023 Intel Corporation + +# Makefile for Intel(R) Infrastructure Data Path Function Linux Driver + +obj-$(CONFIG_IDPF) += idpf.o + +idpf-y := \ + idpf_controlq.o \ + idpf_controlq_setup.o \ + idpf_dev.o \ + idpf_ethtool.o \ + idpf_lib.o \ + idpf_main.o \ + idpf_singleq_txrx.o \ + idpf_txrx.o \ + idpf_virtchnl.o \ + idpf_vf_dev.o diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h new file mode 100644 index 00000000000000..25cde08a355ae5 --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/idpf.h @@ -0,0 +1,841 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2023 Intel Corporation */ + +#ifndef _IDPF_H_ +#define _IDPF_H_ + +/* Forward declaration */ +struct idpf_adapter; +struct idpf_vport; +struct idpf_vport_max_q; + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "virtchnl2.h" +#include "idpf_lan_txrx.h" +#include "idpf_txrx.h" +#include "idpf_controlq.h" + +#define GETMAXVAL(num_bits) GENMASK((num_bits) - 1, 0) + +#define IDPF_NO_FREE_SLOT 0xffff + +/* Default Mailbox settings */ +#define IDPF_NUM_FILTERS_PER_MSG 20 +#define IDPF_NUM_DFLT_MBX_Q 2 /* includes both TX and RX */ +#define IDPF_DFLT_MBX_Q_LEN 64 +#define IDPF_DFLT_MBX_ID -1 +/* maximum number of times to try before resetting mailbox */ +#define IDPF_MB_MAX_ERR 20 +#define IDPF_NUM_CHUNKS_PER_MSG(struct_sz, chunk_sz) \ + ((IDPF_CTLQ_MAX_BUF_LEN - (struct_sz)) / (chunk_sz)) + +#define IDPF_MAX_WAIT 500 + +/* available message levels */ +#define IDPF_AVAIL_NETIF_M (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK) + +#define IDPF_DIM_PROFILE_SLOTS 5 + +#define IDPF_VIRTCHNL_VERSION_MAJOR VIRTCHNL2_VERSION_MAJOR_2 +#define IDPF_VIRTCHNL_VERSION_MINOR VIRTCHNL2_VERSION_MINOR_0 + +/** + * struct idpf_mac_filter + * @list: list member field + * @macaddr: MAC address + * @remove: filter should be removed (virtchnl) + * @add: filter should be added (virtchnl) + */ +struct idpf_mac_filter { + struct list_head list; + u8 macaddr[ETH_ALEN]; + bool remove; + bool add; +}; + +/** + * enum idpf_state - State machine to handle bring up + * @__IDPF_VER_CHECK: Negotiate virtchnl version + * @__IDPF_GET_CAPS: Negotiate capabilities + * @__IDPF_INIT_SW: Init based on given capabilities + * @__IDPF_STATE_LAST: Must be last, used to determine size + */ +enum idpf_state { + __IDPF_VER_CHECK, + __IDPF_GET_CAPS, + __IDPF_INIT_SW, + __IDPF_STATE_LAST, +}; + +/** + * enum idpf_flags - Hard reset causes. + * @IDPF_HR_FUNC_RESET: Hard reset when TxRx timeout + * @IDPF_HR_DRV_LOAD: Set on driver load for a clean HW + * @IDPF_HR_RESET_IN_PROG: Reset in progress + * @IDPF_REMOVE_IN_PROG: Driver remove in progress + * @IDPF_MB_INTR_MODE: Mailbox in interrupt mode + * @IDPF_VC_CORE_INIT: virtchnl core has been init + * @IDPF_FLAGS_NBITS: Must be last + */ +enum idpf_flags { + IDPF_HR_FUNC_RESET, + IDPF_HR_DRV_LOAD, + IDPF_HR_RESET_IN_PROG, + IDPF_REMOVE_IN_PROG, + IDPF_MB_INTR_MODE, + IDPF_VC_CORE_INIT, + IDPF_FLAGS_NBITS, +}; + +/** + * enum idpf_cap_field - Offsets into capabilities struct for specific caps + * @IDPF_BASE_CAPS: generic base capabilities + * @IDPF_CSUM_CAPS: checksum offload capabilities + * @IDPF_SEG_CAPS: segmentation offload capabilities + * @IDPF_RSS_CAPS: RSS offload capabilities + * @IDPF_HSPLIT_CAPS: Header split capabilities + * @IDPF_RSC_CAPS: RSC offload capabilities + * @IDPF_OTHER_CAPS: miscellaneous offloads + * + * Used when checking for a specific capability flag since different capability + * sets are not mutually exclusive numerically, the caller must specify which + * type of capability they are checking for. + */ +enum idpf_cap_field { + IDPF_BASE_CAPS = -1, + IDPF_CSUM_CAPS = offsetof(struct virtchnl2_get_capabilities, + csum_caps), + IDPF_SEG_CAPS = offsetof(struct virtchnl2_get_capabilities, + seg_caps), + IDPF_RSS_CAPS = offsetof(struct virtchnl2_get_capabilities, + rss_caps), + IDPF_HSPLIT_CAPS = offsetof(struct virtchnl2_get_capabilities, + hsplit_caps), + IDPF_RSC_CAPS = offsetof(struct virtchnl2_get_capabilities, + rsc_caps), + IDPF_OTHER_CAPS = offsetof(struct virtchnl2_get_capabilities, + other_caps), +}; + +/** + * enum idpf_vport_state - Current vport state + * @__IDPF_VPORT_DOWN: Vport is down + * @__IDPF_VPORT_UP: Vport is up + * @__IDPF_VPORT_STATE_LAST: Must be last, number of states + */ +enum idpf_vport_state { + __IDPF_VPORT_DOWN, + __IDPF_VPORT_UP, + __IDPF_VPORT_STATE_LAST, +}; + +/** + * struct idpf_netdev_priv - Struct to store vport back pointer + * @adapter: Adapter back pointer + * @vport: Vport back pointer + * @vport_id: Vport identifier + * @vport_idx: Relative vport index + * @state: See enum idpf_vport_state + * @netstats: Packet and byte stats + * @stats_lock: Lock to protect stats update + */ +struct idpf_netdev_priv { + struct idpf_adapter *adapter; + struct idpf_vport *vport; + u32 vport_id; + u16 vport_idx; + enum idpf_vport_state state; + struct rtnl_link_stats64 netstats; + spinlock_t stats_lock; +}; + +/** + * struct idpf_reset_reg - Reset register offsets/masks + * @rstat: Reset status register + * @rstat_m: Reset status mask + */ +struct idpf_reset_reg { + void __iomem *rstat; + u32 rstat_m; +}; + +/** + * struct idpf_vport_max_q - Queue limits + * @max_rxq: Maximum number of RX queues supported + * @max_txq: Maixmum number of TX queues supported + * @max_bufq: In splitq, maximum number of buffer queues supported + * @max_complq: In splitq, maximum number of completion queues supported + */ +struct idpf_vport_max_q { + u16 max_rxq; + u16 max_txq; + u16 max_bufq; + u16 max_complq; +}; + +/** + * struct idpf_reg_ops - Device specific register operation function pointers + * @ctlq_reg_init: Mailbox control queue register initialization + * @intr_reg_init: Traffic interrupt register initialization + * @mb_intr_reg_init: Mailbox interrupt register initialization + * @reset_reg_init: Reset register initialization + * @trigger_reset: Trigger a reset to occur + */ +struct idpf_reg_ops { + void (*ctlq_reg_init)(struct idpf_ctlq_create_info *cq); + int (*intr_reg_init)(struct idpf_vport *vport); + void (*mb_intr_reg_init)(struct idpf_adapter *adapter); + void (*reset_reg_init)(struct idpf_adapter *adapter); + void (*trigger_reset)(struct idpf_adapter *adapter, + enum idpf_flags trig_cause); +}; + +/** + * struct idpf_dev_ops - Device specific operations + * @reg_ops: Register operations + */ +struct idpf_dev_ops { + struct idpf_reg_ops reg_ops; +}; + +/** + * enum idpf_vport_reset_cause - Vport soft reset causes + * @IDPF_SR_Q_CHANGE: Soft reset queue change + * @IDPF_SR_Q_DESC_CHANGE: Soft reset descriptor change + * @IDPF_SR_MTU_CHANGE: Soft reset MTU change + * @IDPF_SR_RSC_CHANGE: Soft reset RSC change + */ +enum idpf_vport_reset_cause { + IDPF_SR_Q_CHANGE, + IDPF_SR_Q_DESC_CHANGE, + IDPF_SR_MTU_CHANGE, + IDPF_SR_RSC_CHANGE, +}; + +/** + * enum idpf_vport_flags - Vport flags + * @IDPF_VPORT_DEL_QUEUES: To send delete queues message + * @IDPF_VPORT_SW_MARKER: Indicate TX pipe drain software marker packets + * processing is done + * @IDPF_VPORT_FLAGS_NBITS: Must be last + */ +enum idpf_vport_flags { + IDPF_VPORT_DEL_QUEUES, + IDPF_VPORT_SW_MARKER, + IDPF_VPORT_FLAGS_NBITS, +}; + +struct idpf_port_stats { + struct u64_stats_sync stats_sync; + u64_stats_t rx_hw_csum_err; + u64_stats_t rx_hsplit; + u64_stats_t rx_hsplit_hbo; + u64_stats_t rx_bad_descs; + u64_stats_t tx_linearize; + u64_stats_t tx_busy; + u64_stats_t tx_drops; + u64_stats_t tx_dma_map_errs; + struct virtchnl2_vport_stats vport_stats; +}; + +/** + * struct idpf_vport - Handle for netdevices and queue resources + * @num_txq: Number of allocated TX queues + * @num_complq: Number of allocated completion queues + * @txq_desc_count: TX queue descriptor count + * @complq_desc_count: Completion queue descriptor count + * @compln_clean_budget: Work budget for completion clean + * @num_txq_grp: Number of TX queue groups + * @txq_grps: Array of TX queue groups + * @txq_model: Split queue or single queue queuing model + * @txqs: Used only in hotpath to get to the right queue very fast + * @crc_enable: Enable CRC insertion offload + * @num_rxq: Number of allocated RX queues + * @num_bufq: Number of allocated buffer queues + * @rxq_desc_count: RX queue descriptor count. *MUST* have enough descriptors + * to complete all buffer descriptors for all buffer queues in + * the worst case. + * @num_bufqs_per_qgrp: Buffer queues per RX queue in a given grouping + * @bufq_desc_count: Buffer queue descriptor count + * @bufq_size: Size of buffers in ring (e.g. 2K, 4K, etc) + * @num_rxq_grp: Number of RX queues in a group + * @rxq_grps: Total number of RX groups. Number of groups * number of RX per + * group will yield total number of RX queues. + * @rxq_model: Splitq queue or single queue queuing model + * @rx_ptype_lkup: Lookup table for ptypes on RX + * @adapter: back pointer to associated adapter + * @netdev: Associated net_device. Each vport should have one and only one + * associated netdev. + * @flags: See enum idpf_vport_flags + * @vport_type: Default SRIOV, SIOV, etc. + * @vport_id: Device given vport identifier + * @idx: Software index in adapter vports struct + * @default_vport: Use this vport if one isn't specified + * @base_rxd: True if the driver should use base descriptors instead of flex + * @num_q_vectors: Number of IRQ vectors allocated + * @q_vectors: Array of queue vectors + * @q_vector_idxs: Starting index of queue vectors + * @max_mtu: device given max possible MTU + * @default_mac_addr: device will give a default MAC to use + * @rx_itr_profile: RX profiles for Dynamic Interrupt Moderation + * @tx_itr_profile: TX profiles for Dynamic Interrupt Moderation + * @port_stats: per port csum, header split, and other offload stats + * @link_up: True if link is up + * @link_speed_mbps: Link speed in mbps + * @sw_marker_wq: workqueue for marker packets + */ +struct idpf_vport { + u16 num_txq; + u16 num_complq; + u32 txq_desc_count; + u32 complq_desc_count; + u32 compln_clean_budget; + u16 num_txq_grp; + struct idpf_txq_group *txq_grps; + u32 txq_model; + struct idpf_queue **txqs; + bool crc_enable; + + u16 num_rxq; + u16 num_bufq; + u32 rxq_desc_count; + u8 num_bufqs_per_qgrp; + u32 bufq_desc_count[IDPF_MAX_BUFQS_PER_RXQ_GRP]; + u32 bufq_size[IDPF_MAX_BUFQS_PER_RXQ_GRP]; + u16 num_rxq_grp; + struct idpf_rxq_group *rxq_grps; + u32 rxq_model; + struct idpf_rx_ptype_decoded rx_ptype_lkup[IDPF_RX_MAX_PTYPE]; + + struct idpf_adapter *adapter; + struct net_device *netdev; + DECLARE_BITMAP(flags, IDPF_VPORT_FLAGS_NBITS); + u16 vport_type; + u32 vport_id; + u16 idx; + bool default_vport; + bool base_rxd; + + u16 num_q_vectors; + struct idpf_q_vector *q_vectors; + u16 *q_vector_idxs; + u16 max_mtu; + u8 default_mac_addr[ETH_ALEN]; + u16 rx_itr_profile[IDPF_DIM_PROFILE_SLOTS]; + u16 tx_itr_profile[IDPF_DIM_PROFILE_SLOTS]; + struct idpf_port_stats port_stats; + + bool link_up; + u32 link_speed_mbps; + + wait_queue_head_t sw_marker_wq; +}; + +/** + * enum idpf_user_flags + * @__IDPF_USER_FLAG_HSPLIT: header split state + * @__IDPF_PROMISC_UC: Unicast promiscuous mode + * @__IDPF_PROMISC_MC: Multicast promiscuous mode + * @__IDPF_USER_FLAGS_NBITS: Must be last + */ +enum idpf_user_flags { + __IDPF_USER_FLAG_HSPLIT = 0U, + __IDPF_PROMISC_UC = 32, + __IDPF_PROMISC_MC, + + __IDPF_USER_FLAGS_NBITS, +}; + +/** + * struct idpf_rss_data - Associated RSS data + * @rss_key_size: Size of RSS hash key + * @rss_key: RSS hash key + * @rss_lut_size: Size of RSS lookup table + * @rss_lut: RSS lookup table + * @cached_lut: Used to restore previously init RSS lut + */ +struct idpf_rss_data { + u16 rss_key_size; + u8 *rss_key; + u16 rss_lut_size; + u32 *rss_lut; + u32 *cached_lut; +}; + +/** + * struct idpf_vport_user_config_data - User defined configuration values for + * each vport. + * @rss_data: See struct idpf_rss_data + * @num_req_tx_qs: Number of user requested TX queues through ethtool + * @num_req_rx_qs: Number of user requested RX queues through ethtool + * @num_req_txq_desc: Number of user requested TX queue descriptors through + * ethtool + * @num_req_rxq_desc: Number of user requested RX queue descriptors through + * ethtool + * @user_flags: User toggled config flags + * @mac_filter_list: List of MAC filters + * + * Used to restore configuration after a reset as the vport will get wiped. + */ +struct idpf_vport_user_config_data { + struct idpf_rss_data rss_data; + u16 num_req_tx_qs; + u16 num_req_rx_qs; + u32 num_req_txq_desc; + u32 num_req_rxq_desc; + DECLARE_BITMAP(user_flags, __IDPF_USER_FLAGS_NBITS); + struct list_head mac_filter_list; +}; + +/** + * enum idpf_vport_config_flags - Vport config flags + * @IDPF_VPORT_REG_NETDEV: Register netdev + * @IDPF_VPORT_UP_REQUESTED: Set if interface up is requested on core reset + * @IDPF_VPORT_CONFIG_FLAGS_NBITS: Must be last + */ +enum idpf_vport_config_flags { + IDPF_VPORT_REG_NETDEV, + IDPF_VPORT_UP_REQUESTED, + IDPF_VPORT_CONFIG_FLAGS_NBITS, +}; + +/** + * struct idpf_avail_queue_info + * @avail_rxq: Available RX queues + * @avail_txq: Available TX queues + * @avail_bufq: Available buffer queues + * @avail_complq: Available completion queues + * + * Maintain total queues available after allocating max queues to each vport. + */ +struct idpf_avail_queue_info { + u16 avail_rxq; + u16 avail_txq; + u16 avail_bufq; + u16 avail_complq; +}; + +/** + * struct idpf_vector_info - Utility structure to pass function arguments as a + * structure + * @num_req_vecs: Vectors required based on the number of queues updated by the + * user via ethtool + * @num_curr_vecs: Current number of vectors, must be >= @num_req_vecs + * @index: Relative starting index for vectors + * @default_vport: Vectors are for default vport + */ +struct idpf_vector_info { + u16 num_req_vecs; + u16 num_curr_vecs; + u16 index; + bool default_vport; +}; + +/** + * struct idpf_vector_lifo - Stack to maintain vector indexes used for vector + * distribution algorithm + * @top: Points to stack top i.e. next available vector index + * @base: Always points to start of the free pool + * @size: Total size of the vector stack + * @vec_idx: Array to store all the vector indexes + * + * Vector stack maintains all the relative vector indexes at the *adapter* + * level. This stack is divided into 2 parts, first one is called as 'default + * pool' and other one is called 'free pool'. Vector distribution algorithm + * gives priority to default vports in a way that at least IDPF_MIN_Q_VEC + * vectors are allocated per default vport and the relative vector indexes for + * those are maintained in default pool. Free pool contains all the unallocated + * vector indexes which can be allocated on-demand basis. Mailbox vector index + * is maintained in the default pool of the stack. + */ +struct idpf_vector_lifo { + u16 top; + u16 base; + u16 size; + u16 *vec_idx; +}; + +/** + * struct idpf_vport_config - Vport configuration data + * @user_config: see struct idpf_vport_user_config_data + * @max_q: Maximum possible queues + * @req_qs_chunks: Queue chunk data for requested queues + * @mac_filter_list_lock: Lock to protect mac filters + * @flags: See enum idpf_vport_config_flags + */ +struct idpf_vport_config { + struct idpf_vport_user_config_data user_config; + struct idpf_vport_max_q max_q; + struct virtchnl2_add_queues *req_qs_chunks; + spinlock_t mac_filter_list_lock; + DECLARE_BITMAP(flags, IDPF_VPORT_CONFIG_FLAGS_NBITS); +}; + +struct idpf_vc_xn_manager; + +/** + * struct idpf_adapter - Device data struct generated on probe + * @pdev: PCI device struct given on probe + * @virt_ver_maj: Virtchnl version major + * @virt_ver_min: Virtchnl version minor + * @msg_enable: Debug message level enabled + * @mb_wait_count: Number of times mailbox was attempted initialization + * @state: Init state machine + * @flags: See enum idpf_flags + * @reset_reg: See struct idpf_reset_reg + * @hw: Device access data + * @num_req_msix: Requested number of MSIX vectors + * @num_avail_msix: Available number of MSIX vectors + * @num_msix_entries: Number of entries in MSIX table + * @msix_entries: MSIX table + * @req_vec_chunks: Requested vector chunk data + * @mb_vector: Mailbox vector data + * @vector_stack: Stack to store the msix vector indexes + * @irq_mb_handler: Handler for hard interrupt for mailbox + * @tx_timeout_count: Number of TX timeouts that have occurred + * @avail_queues: Device given queue limits + * @vports: Array to store vports created by the driver + * @netdevs: Associated Vport netdevs + * @vport_params_reqd: Vport params requested + * @vport_params_recvd: Vport params received + * @vport_ids: Array of device given vport identifiers + * @vport_config: Vport config parameters + * @max_vports: Maximum vports that can be allocated + * @num_alloc_vports: Current number of vports allocated + * @next_vport: Next free slot in pf->vport[] - 0-based! + * @init_task: Initialization task + * @init_wq: Workqueue for initialization task + * @serv_task: Periodically recurring maintenance task + * @serv_wq: Workqueue for service task + * @mbx_task: Task to handle mailbox interrupts + * @mbx_wq: Workqueue for mailbox responses + * @vc_event_task: Task to handle out of band virtchnl event notifications + * @vc_event_wq: Workqueue for virtchnl events + * @stats_task: Periodic statistics retrieval task + * @stats_wq: Workqueue for statistics task + * @caps: Negotiated capabilities with device + * @vcxn_mngr: Virtchnl transaction manager + * @dev_ops: See idpf_dev_ops + * @num_vfs: Number of allocated VFs through sysfs. PF does not directly talk + * to VFs but is used to initialize them + * @crc_enable: Enable CRC insertion offload + * @req_tx_splitq: TX split or single queue model to request + * @req_rx_splitq: RX split or single queue model to request + * @vport_ctrl_lock: Lock to protect the vport control flow + * @vector_lock: Lock to protect vector distribution + * @queue_lock: Lock to protect queue distribution + * @vc_buf_lock: Lock to protect virtchnl buffer + */ +struct idpf_adapter { + struct pci_dev *pdev; + u32 virt_ver_maj; + u32 virt_ver_min; + + u32 msg_enable; + u32 mb_wait_count; + enum idpf_state state; + DECLARE_BITMAP(flags, IDPF_FLAGS_NBITS); + struct idpf_reset_reg reset_reg; + struct idpf_hw hw; + u16 num_req_msix; + u16 num_avail_msix; + u16 num_msix_entries; + struct msix_entry *msix_entries; + struct virtchnl2_alloc_vectors *req_vec_chunks; + struct idpf_q_vector mb_vector; + struct idpf_vector_lifo vector_stack; + irqreturn_t (*irq_mb_handler)(int irq, void *data); + + u32 tx_timeout_count; + struct idpf_avail_queue_info avail_queues; + struct idpf_vport **vports; + struct net_device **netdevs; + struct virtchnl2_create_vport **vport_params_reqd; + struct virtchnl2_create_vport **vport_params_recvd; + u32 *vport_ids; + + struct idpf_vport_config **vport_config; + u16 max_vports; + u16 num_alloc_vports; + u16 next_vport; + + struct delayed_work init_task; + struct workqueue_struct *init_wq; + struct delayed_work serv_task; + struct workqueue_struct *serv_wq; + struct delayed_work mbx_task; + struct workqueue_struct *mbx_wq; + struct delayed_work vc_event_task; + struct workqueue_struct *vc_event_wq; + struct delayed_work stats_task; + struct workqueue_struct *stats_wq; + struct virtchnl2_get_capabilities caps; + struct idpf_vc_xn_manager *vcxn_mngr; + + struct idpf_dev_ops dev_ops; + int num_vfs; + bool crc_enable; + bool req_tx_splitq; + bool req_rx_splitq; + + struct mutex vport_ctrl_lock; + struct mutex vector_lock; + struct mutex queue_lock; + struct mutex vc_buf_lock; +}; + +/** + * idpf_is_queue_model_split - check if queue model is split + * @q_model: queue model single or split + * + * Returns true if queue model is split else false + */ +static inline int idpf_is_queue_model_split(u16 q_model) +{ + return q_model == VIRTCHNL2_QUEUE_MODEL_SPLIT; +} + +#define idpf_is_cap_ena(adapter, field, flag) \ + idpf_is_capability_ena(adapter, false, field, flag) +#define idpf_is_cap_ena_all(adapter, field, flag) \ + idpf_is_capability_ena(adapter, true, field, flag) + +bool idpf_is_capability_ena(struct idpf_adapter *adapter, bool all, + enum idpf_cap_field field, u64 flag); + +#define IDPF_CAP_RSS (\ + VIRTCHNL2_CAP_RSS_IPV4_TCP |\ + VIRTCHNL2_CAP_RSS_IPV4_TCP |\ + VIRTCHNL2_CAP_RSS_IPV4_UDP |\ + VIRTCHNL2_CAP_RSS_IPV4_SCTP |\ + VIRTCHNL2_CAP_RSS_IPV4_OTHER |\ + VIRTCHNL2_CAP_RSS_IPV6_TCP |\ + VIRTCHNL2_CAP_RSS_IPV6_TCP |\ + VIRTCHNL2_CAP_RSS_IPV6_UDP |\ + VIRTCHNL2_CAP_RSS_IPV6_SCTP |\ + VIRTCHNL2_CAP_RSS_IPV6_OTHER) + +#define IDPF_CAP_RSC (\ + VIRTCHNL2_CAP_RSC_IPV4_TCP |\ + VIRTCHNL2_CAP_RSC_IPV6_TCP) + +#define IDPF_CAP_HSPLIT (\ + VIRTCHNL2_CAP_RX_HSPLIT_AT_L4V4 |\ + VIRTCHNL2_CAP_RX_HSPLIT_AT_L4V6) + +#define IDPF_CAP_RX_CSUM_L4V4 (\ + VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_TCP |\ + VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_UDP) + +#define IDPF_CAP_RX_CSUM_L4V6 (\ + VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_TCP |\ + VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_UDP) + +#define IDPF_CAP_RX_CSUM (\ + VIRTCHNL2_CAP_RX_CSUM_L3_IPV4 |\ + VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_TCP |\ + VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_UDP |\ + VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_TCP |\ + VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_UDP) + +#define IDPF_CAP_SCTP_CSUM (\ + VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_SCTP |\ + VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_SCTP |\ + VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_SCTP |\ + VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_SCTP) + +#define IDPF_CAP_TUNNEL_TX_CSUM (\ + VIRTCHNL2_CAP_TX_CSUM_L3_SINGLE_TUNNEL |\ + VIRTCHNL2_CAP_TX_CSUM_L4_SINGLE_TUNNEL) + +/** + * idpf_get_reserved_vecs - Get reserved vectors + * @adapter: private data struct + */ +static inline u16 idpf_get_reserved_vecs(struct idpf_adapter *adapter) +{ + return le16_to_cpu(adapter->caps.num_allocated_vectors); +} + +/** + * idpf_get_default_vports - Get default number of vports + * @adapter: private data struct + */ +static inline u16 idpf_get_default_vports(struct idpf_adapter *adapter) +{ + return le16_to_cpu(adapter->caps.default_num_vports); +} + +/** + * idpf_get_max_vports - Get max number of vports + * @adapter: private data struct + */ +static inline u16 idpf_get_max_vports(struct idpf_adapter *adapter) +{ + return le16_to_cpu(adapter->caps.max_vports); +} + +/** + * idpf_get_max_tx_bufs - Get max scatter-gather buffers supported by the device + * @adapter: private data struct + */ +static inline unsigned int idpf_get_max_tx_bufs(struct idpf_adapter *adapter) +{ + return adapter->caps.max_sg_bufs_per_tx_pkt; +} + +/** + * idpf_get_min_tx_pkt_len - Get min packet length supported by the device + * @adapter: private data struct + */ +static inline u8 idpf_get_min_tx_pkt_len(struct idpf_adapter *adapter) +{ + u8 pkt_len = adapter->caps.min_sso_packet_len; + + return pkt_len ? pkt_len : IDPF_TX_MIN_PKT_LEN; +} + +/** + * idpf_get_reg_addr - Get BAR0 register address + * @adapter: private data struct + * @reg_offset: register offset value + * + * Based on the register offset, return the actual BAR0 register address + */ +static inline void __iomem *idpf_get_reg_addr(struct idpf_adapter *adapter, + resource_size_t reg_offset) +{ + return (void __iomem *)(adapter->hw.hw_addr + reg_offset); +} + +/** + * idpf_is_reset_detected - check if we were reset at some point + * @adapter: driver specific private structure + * + * Returns true if we are either in reset currently or were previously reset. + */ +static inline bool idpf_is_reset_detected(struct idpf_adapter *adapter) +{ + if (!adapter->hw.arq) + return true; + + return !(readl(idpf_get_reg_addr(adapter, adapter->hw.arq->reg.len)) & + adapter->hw.arq->reg.len_mask); +} + +/** + * idpf_is_reset_in_prog - check if reset is in progress + * @adapter: driver specific private structure + * + * Returns true if hard reset is in progress, false otherwise + */ +static inline bool idpf_is_reset_in_prog(struct idpf_adapter *adapter) +{ + return (test_bit(IDPF_HR_RESET_IN_PROG, adapter->flags) || + test_bit(IDPF_HR_FUNC_RESET, adapter->flags) || + test_bit(IDPF_HR_DRV_LOAD, adapter->flags)); +} + +/** + * idpf_netdev_to_vport - get a vport handle from a netdev + * @netdev: network interface device structure + */ +static inline struct idpf_vport *idpf_netdev_to_vport(struct net_device *netdev) +{ + struct idpf_netdev_priv *np = netdev_priv(netdev); + + return np->vport; +} + +/** + * idpf_netdev_to_adapter - Get adapter handle from a netdev + * @netdev: Network interface device structure + */ +static inline struct idpf_adapter *idpf_netdev_to_adapter(struct net_device *netdev) +{ + struct idpf_netdev_priv *np = netdev_priv(netdev); + + return np->adapter; +} + +/** + * idpf_is_feature_ena - Determine if a particular feature is enabled + * @vport: Vport to check + * @feature: Netdev flag to check + * + * Returns true or false if a particular feature is enabled. + */ +static inline bool idpf_is_feature_ena(const struct idpf_vport *vport, + netdev_features_t feature) +{ + return vport->netdev->features & feature; +} + +/** + * idpf_get_max_tx_hdr_size -- get the size of tx header + * @adapter: Driver specific private structure + */ +static inline u16 idpf_get_max_tx_hdr_size(struct idpf_adapter *adapter) +{ + return le16_to_cpu(adapter->caps.max_tx_hdr_size); +} + +/** + * idpf_vport_ctrl_lock - Acquire the vport control lock + * @netdev: Network interface device structure + * + * This lock should be used by non-datapath code to protect against vport + * destruction. + */ +static inline void idpf_vport_ctrl_lock(struct net_device *netdev) +{ + struct idpf_netdev_priv *np = netdev_priv(netdev); + + mutex_lock(&np->adapter->vport_ctrl_lock); +} + +/** + * idpf_vport_ctrl_unlock - Release the vport control lock + * @netdev: Network interface device structure + */ +static inline void idpf_vport_ctrl_unlock(struct net_device *netdev) +{ + struct idpf_netdev_priv *np = netdev_priv(netdev); + + mutex_unlock(&np->adapter->vport_ctrl_lock); +} + +void idpf_statistics_task(struct work_struct *work); +void idpf_init_task(struct work_struct *work); +void idpf_service_task(struct work_struct *work); +void idpf_mbx_task(struct work_struct *work); +void idpf_vc_event_task(struct work_struct *work); +void idpf_dev_ops_init(struct idpf_adapter *adapter); +void idpf_vf_dev_ops_init(struct idpf_adapter *adapter); +int idpf_intr_req(struct idpf_adapter *adapter); +void idpf_intr_rel(struct idpf_adapter *adapter); +u16 idpf_get_max_tx_hdr_size(struct idpf_adapter *adapter); +int idpf_initiate_soft_reset(struct idpf_vport *vport, + enum idpf_vport_reset_cause reset_cause); +void idpf_deinit_task(struct idpf_adapter *adapter); +int idpf_req_rel_vector_indexes(struct idpf_adapter *adapter, + u16 *q_vector_idxs, + struct idpf_vector_info *vec_info); +void idpf_set_ethtool_ops(struct net_device *netdev); +void idpf_vport_intr_write_itr(struct idpf_q_vector *q_vector, + u16 itr, bool tx); +int idpf_sriov_configure(struct pci_dev *pdev, int num_vfs); + +u8 idpf_vport_get_hsplit(const struct idpf_vport *vport); +bool idpf_vport_set_hsplit(const struct idpf_vport *vport, u8 val); + +#endif /* !_IDPF_H_ */ diff --git a/drivers/net/ethernet/intel/idpf/idpf_controlq.c b/drivers/net/ethernet/intel/idpf/idpf_controlq.c new file mode 100644 index 00000000000000..4849590a5591f1 --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/idpf_controlq.c @@ -0,0 +1,618 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2023 Intel Corporation */ + +#include "idpf_controlq.h" + +/** + * idpf_ctlq_setup_regs - initialize control queue registers + * @cq: pointer to the specific control queue + * @q_create_info: structs containing info for each queue to be initialized + */ +static void idpf_ctlq_setup_regs(struct idpf_ctlq_info *cq, + struct idpf_ctlq_create_info *q_create_info) +{ + /* set control queue registers in our local struct */ + cq->reg.head = q_create_info->reg.head; + cq->reg.tail = q_create_info->reg.tail; + cq->reg.len = q_create_info->reg.len; + cq->reg.bah = q_create_info->reg.bah; + cq->reg.bal = q_create_info->reg.bal; + cq->reg.len_mask = q_create_info->reg.len_mask; + cq->reg.len_ena_mask = q_create_info->reg.len_ena_mask; + cq->reg.head_mask = q_create_info->reg.head_mask; +} + +/** + * idpf_ctlq_init_regs - Initialize control queue registers + * @hw: pointer to hw struct + * @cq: pointer to the specific Control queue + * @is_rxq: true if receive control queue, false otherwise + * + * Initialize registers. The caller is expected to have already initialized the + * descriptor ring memory and buffer memory + */ +static void idpf_ctlq_init_regs(struct idpf_hw *hw, struct idpf_ctlq_info *cq, + bool is_rxq) +{ + /* Update tail to post pre-allocated buffers for rx queues */ + if (is_rxq) + wr32(hw, cq->reg.tail, (u32)(cq->ring_size - 1)); + + /* For non-Mailbox control queues only TAIL need to be set */ + if (cq->q_id != -1) + return; + + /* Clear Head for both send or receive */ + wr32(hw, cq->reg.head, 0); + + /* set starting point */ + wr32(hw, cq->reg.bal, lower_32_bits(cq->desc_ring.pa)); + wr32(hw, cq->reg.bah, upper_32_bits(cq->desc_ring.pa)); + wr32(hw, cq->reg.len, (cq->ring_size | cq->reg.len_ena_mask)); +} + +/** + * idpf_ctlq_init_rxq_bufs - populate receive queue descriptors with buf + * @cq: pointer to the specific Control queue + * + * Record the address of the receive queue DMA buffers in the descriptors. + * The buffers must have been previously allocated. + */ +static void idpf_ctlq_init_rxq_bufs(struct idpf_ctlq_info *cq) +{ + int i; + + for (i = 0; i < cq->ring_size; i++) { + struct idpf_ctlq_desc *desc = IDPF_CTLQ_DESC(cq, i); + struct idpf_dma_mem *bi = cq->bi.rx_buff[i]; + + /* No buffer to post to descriptor, continue */ + if (!bi) + continue; + + desc->flags = + cpu_to_le16(IDPF_CTLQ_FLAG_BUF | IDPF_CTLQ_FLAG_RD); + desc->opcode = 0; + desc->datalen = cpu_to_le16(bi->size); + desc->ret_val = 0; + desc->v_opcode_dtype = 0; + desc->v_retval = 0; + desc->params.indirect.addr_high = + cpu_to_le32(upper_32_bits(bi->pa)); + desc->params.indirect.addr_low = + cpu_to_le32(lower_32_bits(bi->pa)); + desc->params.indirect.param0 = 0; + desc->params.indirect.sw_cookie = 0; + desc->params.indirect.v_flags = 0; + } +} + +/** + * idpf_ctlq_shutdown - shutdown the CQ + * @hw: pointer to hw struct + * @cq: pointer to the specific Control queue + * + * The main shutdown routine for any controq queue + */ +static void idpf_ctlq_shutdown(struct idpf_hw *hw, struct idpf_ctlq_info *cq) +{ + mutex_lock(&cq->cq_lock); + + /* free ring buffers and the ring itself */ + idpf_ctlq_dealloc_ring_res(hw, cq); + + /* Set ring_size to 0 to indicate uninitialized queue */ + cq->ring_size = 0; + + mutex_unlock(&cq->cq_lock); + mutex_destroy(&cq->cq_lock); +} + +/** + * idpf_ctlq_add - add one control queue + * @hw: pointer to hardware struct + * @qinfo: info for queue to be created + * @cq_out: (output) double pointer to control queue to be created + * + * Allocate and initialize a control queue and add it to the control queue list. + * The cq parameter will be allocated/initialized and passed back to the caller + * if no errors occur. + * + * Note: idpf_ctlq_init must be called prior to any calls to idpf_ctlq_add + */ +int idpf_ctlq_add(struct idpf_hw *hw, + struct idpf_ctlq_create_info *qinfo, + struct idpf_ctlq_info **cq_out) +{ + struct idpf_ctlq_info *cq; + bool is_rxq = false; + int err; + + cq = kzalloc(sizeof(*cq), GFP_KERNEL); + if (!cq) + return -ENOMEM; + + cq->cq_type = qinfo->type; + cq->q_id = qinfo->id; + cq->buf_size = qinfo->buf_size; + cq->ring_size = qinfo->len; + + cq->next_to_use = 0; + cq->next_to_clean = 0; + cq->next_to_post = cq->ring_size - 1; + + switch (qinfo->type) { + case IDPF_CTLQ_TYPE_MAILBOX_RX: + is_rxq = true; + fallthrough; + case IDPF_CTLQ_TYPE_MAILBOX_TX: + err = idpf_ctlq_alloc_ring_res(hw, cq); + break; + default: + err = -EBADR; + break; + } + + if (err) + goto init_free_q; + + if (is_rxq) { + idpf_ctlq_init_rxq_bufs(cq); + } else { + /* Allocate the array of msg pointers for TX queues */ + cq->bi.tx_msg = kcalloc(qinfo->len, + sizeof(struct idpf_ctlq_msg *), + GFP_KERNEL); + if (!cq->bi.tx_msg) { + err = -ENOMEM; + goto init_dealloc_q_mem; + } + } + + idpf_ctlq_setup_regs(cq, qinfo); + + idpf_ctlq_init_regs(hw, cq, is_rxq); + + mutex_init(&cq->cq_lock); + + list_add(&cq->cq_list, &hw->cq_list_head); + + *cq_out = cq; + + return 0; + +init_dealloc_q_mem: + /* free ring buffers and the ring itself */ + idpf_ctlq_dealloc_ring_res(hw, cq); +init_free_q: + kfree(cq); + + return err; +} + +/** + * idpf_ctlq_remove - deallocate and remove specified control queue + * @hw: pointer to hardware struct + * @cq: pointer to control queue to be removed + */ +void idpf_ctlq_remove(struct idpf_hw *hw, + struct idpf_ctlq_info *cq) +{ + list_del(&cq->cq_list); + idpf_ctlq_shutdown(hw, cq); + kfree(cq); +} + +/** + * idpf_ctlq_init - main initialization routine for all control queues + * @hw: pointer to hardware struct + * @num_q: number of queues to initialize + * @q_info: array of structs containing info for each queue to be initialized + * + * This initializes any number and any type of control queues. This is an all + * or nothing routine; if one fails, all previously allocated queues will be + * destroyed. This must be called prior to using the individual add/remove + * APIs. + */ +int idpf_ctlq_init(struct idpf_hw *hw, u8 num_q, + struct idpf_ctlq_create_info *q_info) +{ + struct idpf_ctlq_info *cq, *tmp; + int err; + int i; + + INIT_LIST_HEAD(&hw->cq_list_head); + + for (i = 0; i < num_q; i++) { + struct idpf_ctlq_create_info *qinfo = q_info + i; + + err = idpf_ctlq_add(hw, qinfo, &cq); + if (err) + goto init_destroy_qs; + } + + return 0; + +init_destroy_qs: + list_for_each_entry_safe(cq, tmp, &hw->cq_list_head, cq_list) + idpf_ctlq_remove(hw, cq); + + return err; +} + +/** + * idpf_ctlq_deinit - destroy all control queues + * @hw: pointer to hw struct + */ +void idpf_ctlq_deinit(struct idpf_hw *hw) +{ + struct idpf_ctlq_info *cq, *tmp; + + list_for_each_entry_safe(cq, tmp, &hw->cq_list_head, cq_list) + idpf_ctlq_remove(hw, cq); +} + +/** + * idpf_ctlq_send - send command to Control Queue (CTQ) + * @hw: pointer to hw struct + * @cq: handle to control queue struct to send on + * @num_q_msg: number of messages to send on control queue + * @q_msg: pointer to array of queue messages to be sent + * + * The caller is expected to allocate DMAable buffers and pass them to the + * send routine via the q_msg struct / control queue specific data struct. + * The control queue will hold a reference to each send message until + * the completion for that message has been cleaned. + */ +int idpf_ctlq_send(struct idpf_hw *hw, struct idpf_ctlq_info *cq, + u16 num_q_msg, struct idpf_ctlq_msg q_msg[]) +{ + struct idpf_ctlq_desc *desc; + int num_desc_avail; + int err = 0; + int i; + + mutex_lock(&cq->cq_lock); + + /* Ensure there are enough descriptors to send all messages */ + num_desc_avail = IDPF_CTLQ_DESC_UNUSED(cq); + if (num_desc_avail == 0 || num_desc_avail < num_q_msg) { + err = -ENOSPC; + goto err_unlock; + } + + for (i = 0; i < num_q_msg; i++) { + struct idpf_ctlq_msg *msg = &q_msg[i]; + + desc = IDPF_CTLQ_DESC(cq, cq->next_to_use); + + desc->opcode = cpu_to_le16(msg->opcode); + desc->pfid_vfid = cpu_to_le16(msg->func_id); + + desc->v_opcode_dtype = cpu_to_le32(msg->cookie.mbx.chnl_opcode); + desc->v_retval = cpu_to_le32(msg->cookie.mbx.chnl_retval); + + desc->flags = cpu_to_le16((msg->host_id & IDPF_HOST_ID_MASK) << + IDPF_CTLQ_FLAG_HOST_ID_S); + if (msg->data_len) { + struct idpf_dma_mem *buff = msg->ctx.indirect.payload; + + desc->datalen |= cpu_to_le16(msg->data_len); + desc->flags |= cpu_to_le16(IDPF_CTLQ_FLAG_BUF); + desc->flags |= cpu_to_le16(IDPF_CTLQ_FLAG_RD); + + /* Update the address values in the desc with the pa + * value for respective buffer + */ + desc->params.indirect.addr_high = + cpu_to_le32(upper_32_bits(buff->pa)); + desc->params.indirect.addr_low = + cpu_to_le32(lower_32_bits(buff->pa)); + + memcpy(&desc->params, msg->ctx.indirect.context, + IDPF_INDIRECT_CTX_SIZE); + } else { + memcpy(&desc->params, msg->ctx.direct, + IDPF_DIRECT_CTX_SIZE); + } + + /* Store buffer info */ + cq->bi.tx_msg[cq->next_to_use] = msg; + + (cq->next_to_use)++; + if (cq->next_to_use == cq->ring_size) + cq->next_to_use = 0; + } + + /* Force memory write to complete before letting hardware + * know that there are new descriptors to fetch. + */ + dma_wmb(); + + wr32(hw, cq->reg.tail, cq->next_to_use); + +err_unlock: + mutex_unlock(&cq->cq_lock); + + return err; +} + +/** + * idpf_ctlq_clean_sq - reclaim send descriptors on HW write back for the + * requested queue + * @cq: pointer to the specific Control queue + * @clean_count: (input|output) number of descriptors to clean as input, and + * number of descriptors actually cleaned as output + * @msg_status: (output) pointer to msg pointer array to be populated; needs + * to be allocated by caller + * + * Returns an array of message pointers associated with the cleaned + * descriptors. The pointers are to the original ctlq_msgs sent on the cleaned + * descriptors. The status will be returned for each; any messages that failed + * to send will have a non-zero status. The caller is expected to free original + * ctlq_msgs and free or reuse the DMA buffers. + */ +int idpf_ctlq_clean_sq(struct idpf_ctlq_info *cq, u16 *clean_count, + struct idpf_ctlq_msg *msg_status[]) +{ + struct idpf_ctlq_desc *desc; + u16 i, num_to_clean; + u16 ntc, desc_err; + + if (*clean_count == 0) + return 0; + if (*clean_count > cq->ring_size) + return -EBADR; + + mutex_lock(&cq->cq_lock); + + ntc = cq->next_to_clean; + + num_to_clean = *clean_count; + + for (i = 0; i < num_to_clean; i++) { + /* Fetch next descriptor and check if marked as done */ + desc = IDPF_CTLQ_DESC(cq, ntc); + if (!(le16_to_cpu(desc->flags) & IDPF_CTLQ_FLAG_DD)) + break; + + /* strip off FW internal code */ + desc_err = le16_to_cpu(desc->ret_val) & 0xff; + + msg_status[i] = cq->bi.tx_msg[ntc]; + msg_status[i]->status = desc_err; + + cq->bi.tx_msg[ntc] = NULL; + + /* Zero out any stale data */ + memset(desc, 0, sizeof(*desc)); + + ntc++; + if (ntc == cq->ring_size) + ntc = 0; + } + + cq->next_to_clean = ntc; + + mutex_unlock(&cq->cq_lock); + + /* Return number of descriptors actually cleaned */ + *clean_count = i; + + return 0; +} + +/** + * idpf_ctlq_post_rx_buffs - post buffers to descriptor ring + * @hw: pointer to hw struct + * @cq: pointer to control queue handle + * @buff_count: (input|output) input is number of buffers caller is trying to + * return; output is number of buffers that were not posted + * @buffs: array of pointers to dma mem structs to be given to hardware + * + * Caller uses this function to return DMA buffers to the descriptor ring after + * consuming them; buff_count will be the number of buffers. + * + * Note: this function needs to be called after a receive call even + * if there are no DMA buffers to be returned, i.e. buff_count = 0, + * buffs = NULL to support direct commands + */ +int idpf_ctlq_post_rx_buffs(struct idpf_hw *hw, struct idpf_ctlq_info *cq, + u16 *buff_count, struct idpf_dma_mem **buffs) +{ + struct idpf_ctlq_desc *desc; + u16 ntp = cq->next_to_post; + bool buffs_avail = false; + u16 tbp = ntp + 1; + int i = 0; + + if (*buff_count > cq->ring_size) + return -EBADR; + + if (*buff_count > 0) + buffs_avail = true; + + mutex_lock(&cq->cq_lock); + + if (tbp >= cq->ring_size) + tbp = 0; + + if (tbp == cq->next_to_clean) + /* Nothing to do */ + goto post_buffs_out; + + /* Post buffers for as many as provided or up until the last one used */ + while (ntp != cq->next_to_clean) { + desc = IDPF_CTLQ_DESC(cq, ntp); + + if (cq->bi.rx_buff[ntp]) + goto fill_desc; + if (!buffs_avail) { + /* If the caller hasn't given us any buffers or + * there are none left, search the ring itself + * for an available buffer to move to this + * entry starting at the next entry in the ring + */ + tbp = ntp + 1; + + /* Wrap ring if necessary */ + if (tbp >= cq->ring_size) + tbp = 0; + + while (tbp != cq->next_to_clean) { + if (cq->bi.rx_buff[tbp]) { + cq->bi.rx_buff[ntp] = + cq->bi.rx_buff[tbp]; + cq->bi.rx_buff[tbp] = NULL; + + /* Found a buffer, no need to + * search anymore + */ + break; + } + + /* Wrap ring if necessary */ + tbp++; + if (tbp >= cq->ring_size) + tbp = 0; + } + + if (tbp == cq->next_to_clean) + goto post_buffs_out; + } else { + /* Give back pointer to DMA buffer */ + cq->bi.rx_buff[ntp] = buffs[i]; + i++; + + if (i >= *buff_count) + buffs_avail = false; + } + +fill_desc: + desc->flags = + cpu_to_le16(IDPF_CTLQ_FLAG_BUF | IDPF_CTLQ_FLAG_RD); + + /* Post buffers to descriptor */ + desc->datalen = cpu_to_le16(cq->bi.rx_buff[ntp]->size); + desc->params.indirect.addr_high = + cpu_to_le32(upper_32_bits(cq->bi.rx_buff[ntp]->pa)); + desc->params.indirect.addr_low = + cpu_to_le32(lower_32_bits(cq->bi.rx_buff[ntp]->pa)); + + ntp++; + if (ntp == cq->ring_size) + ntp = 0; + } + +post_buffs_out: + /* Only update tail if buffers were actually posted */ + if (cq->next_to_post != ntp) { + if (ntp) + /* Update next_to_post to ntp - 1 since current ntp + * will not have a buffer + */ + cq->next_to_post = ntp - 1; + else + /* Wrap to end of end ring since current ntp is 0 */ + cq->next_to_post = cq->ring_size - 1; + + dma_wmb(); + + wr32(hw, cq->reg.tail, cq->next_to_post); + } + + mutex_unlock(&cq->cq_lock); + + /* return the number of buffers that were not posted */ + *buff_count = *buff_count - i; + + return 0; +} + +/** + * idpf_ctlq_recv - receive control queue message call back + * @cq: pointer to control queue handle to receive on + * @num_q_msg: (input|output) input number of messages that should be received; + * output number of messages actually received + * @q_msg: (output) array of received control queue messages on this q; + * needs to be pre-allocated by caller for as many messages as requested + * + * Called by interrupt handler or polling mechanism. Caller is expected + * to free buffers + */ +int idpf_ctlq_recv(struct idpf_ctlq_info *cq, u16 *num_q_msg, + struct idpf_ctlq_msg *q_msg) +{ + u16 num_to_clean, ntc, flags; + struct idpf_ctlq_desc *desc; + int err = 0; + u16 i; + + /* take the lock before we start messing with the ring */ + mutex_lock(&cq->cq_lock); + + ntc = cq->next_to_clean; + + num_to_clean = *num_q_msg; + + for (i = 0; i < num_to_clean; i++) { + /* Fetch next descriptor and check if marked as done */ + desc = IDPF_CTLQ_DESC(cq, ntc); + flags = le16_to_cpu(desc->flags); + + if (!(flags & IDPF_CTLQ_FLAG_DD)) + break; + + q_msg[i].vmvf_type = (flags & + (IDPF_CTLQ_FLAG_FTYPE_VM | + IDPF_CTLQ_FLAG_FTYPE_PF)) >> + IDPF_CTLQ_FLAG_FTYPE_S; + + if (flags & IDPF_CTLQ_FLAG_ERR) + err = -EBADMSG; + + q_msg[i].cookie.mbx.chnl_opcode = + le32_to_cpu(desc->v_opcode_dtype); + q_msg[i].cookie.mbx.chnl_retval = + le32_to_cpu(desc->v_retval); + + q_msg[i].opcode = le16_to_cpu(desc->opcode); + q_msg[i].data_len = le16_to_cpu(desc->datalen); + q_msg[i].status = le16_to_cpu(desc->ret_val); + + if (desc->datalen) { + memcpy(q_msg[i].ctx.indirect.context, + &desc->params.indirect, IDPF_INDIRECT_CTX_SIZE); + + /* Assign pointer to dma buffer to ctlq_msg array + * to be given to upper layer + */ + q_msg[i].ctx.indirect.payload = cq->bi.rx_buff[ntc]; + + /* Zero out pointer to DMA buffer info; + * will be repopulated by post buffers API + */ + cq->bi.rx_buff[ntc] = NULL; + } else { + memcpy(q_msg[i].ctx.direct, desc->params.raw, + IDPF_DIRECT_CTX_SIZE); + } + + /* Zero out stale data in descriptor */ + memset(desc, 0, sizeof(struct idpf_ctlq_desc)); + + ntc++; + if (ntc == cq->ring_size) + ntc = 0; + } + + cq->next_to_clean = ntc; + + mutex_unlock(&cq->cq_lock); + + *num_q_msg = i; + if (*num_q_msg == 0) + err = -ENOMSG; + + return err; +} diff --git a/drivers/net/ethernet/intel/idpf/idpf_controlq.h b/drivers/net/ethernet/intel/idpf/idpf_controlq.h new file mode 100644 index 00000000000000..c1aba09e985624 --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/idpf_controlq.h @@ -0,0 +1,130 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2023 Intel Corporation */ + +#ifndef _IDPF_CONTROLQ_H_ +#define _IDPF_CONTROLQ_H_ + +#include + +#include "idpf_controlq_api.h" + +/* Maximum buffer length for all control queue types */ +#define IDPF_CTLQ_MAX_BUF_LEN 4096 + +#define IDPF_CTLQ_DESC(R, i) \ + (&(((struct idpf_ctlq_desc *)((R)->desc_ring.va))[i])) + +#define IDPF_CTLQ_DESC_UNUSED(R) \ + ((u16)((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->ring_size) + \ + (R)->next_to_clean - (R)->next_to_use - 1)) + +/* Control Queue default settings */ +#define IDPF_CTRL_SQ_CMD_TIMEOUT 250 /* msecs */ + +struct idpf_ctlq_desc { + /* Control queue descriptor flags */ + __le16 flags; + /* Control queue message opcode */ + __le16 opcode; + __le16 datalen; /* 0 for direct commands */ + union { + __le16 ret_val; + __le16 pfid_vfid; +#define IDPF_CTLQ_DESC_VF_ID_S 0 +#define IDPF_CTLQ_DESC_VF_ID_M (0x7FF << IDPF_CTLQ_DESC_VF_ID_S) +#define IDPF_CTLQ_DESC_PF_ID_S 11 +#define IDPF_CTLQ_DESC_PF_ID_M (0x1F << IDPF_CTLQ_DESC_PF_ID_S) + }; + + /* Virtchnl message opcode and virtchnl descriptor type + * v_opcode=[27:0], v_dtype=[31:28] + */ + __le32 v_opcode_dtype; + /* Virtchnl return value */ + __le32 v_retval; + union { + struct { + __le32 param0; + __le32 param1; + __le32 param2; + __le32 param3; + } direct; + struct { + __le32 param0; + __le16 sw_cookie; + /* Virtchnl flags */ + __le16 v_flags; + __le32 addr_high; + __le32 addr_low; + } indirect; + u8 raw[16]; + } params; +}; + +/* Flags sub-structure + * |0 |1 |2 |3 |4 |5 |6 |7 |8 |9 |10 |11 |12 |13 |14 |15 | + * |DD |CMP|ERR| * RSV * |FTYPE | *RSV* |RD |VFC|BUF| HOST_ID | + */ +/* command flags and offsets */ +#define IDPF_CTLQ_FLAG_DD_S 0 +#define IDPF_CTLQ_FLAG_CMP_S 1 +#define IDPF_CTLQ_FLAG_ERR_S 2 +#define IDPF_CTLQ_FLAG_FTYPE_S 6 +#define IDPF_CTLQ_FLAG_RD_S 10 +#define IDPF_CTLQ_FLAG_VFC_S 11 +#define IDPF_CTLQ_FLAG_BUF_S 12 +#define IDPF_CTLQ_FLAG_HOST_ID_S 13 + +#define IDPF_CTLQ_FLAG_DD BIT(IDPF_CTLQ_FLAG_DD_S) /* 0x1 */ +#define IDPF_CTLQ_FLAG_CMP BIT(IDPF_CTLQ_FLAG_CMP_S) /* 0x2 */ +#define IDPF_CTLQ_FLAG_ERR BIT(IDPF_CTLQ_FLAG_ERR_S) /* 0x4 */ +#define IDPF_CTLQ_FLAG_FTYPE_VM BIT(IDPF_CTLQ_FLAG_FTYPE_S) /* 0x40 */ +#define IDPF_CTLQ_FLAG_FTYPE_PF BIT(IDPF_CTLQ_FLAG_FTYPE_S + 1) /* 0x80 */ +#define IDPF_CTLQ_FLAG_RD BIT(IDPF_CTLQ_FLAG_RD_S) /* 0x400 */ +#define IDPF_CTLQ_FLAG_VFC BIT(IDPF_CTLQ_FLAG_VFC_S) /* 0x800 */ +#define IDPF_CTLQ_FLAG_BUF BIT(IDPF_CTLQ_FLAG_BUF_S) /* 0x1000 */ + +/* Host ID is a special field that has 3b and not a 1b flag */ +#define IDPF_CTLQ_FLAG_HOST_ID_M MAKE_MASK(0x7000UL, IDPF_CTLQ_FLAG_HOST_ID_S) + +struct idpf_mbxq_desc { + u8 pad[8]; /* CTLQ flags/opcode/len/retval fields */ + u32 chnl_opcode; /* avoid confusion with desc->opcode */ + u32 chnl_retval; /* ditto for desc->retval */ + u32 pf_vf_id; /* used by CP when sending to PF */ +}; + +/* Define the driver hardware struct to replace other control structs as needed + * Align to ctlq_hw_info + */ +struct idpf_hw { + void __iomem *hw_addr; + resource_size_t hw_addr_len; + + struct idpf_adapter *back; + + /* control queue - send and receive */ + struct idpf_ctlq_info *asq; + struct idpf_ctlq_info *arq; + + /* pci info */ + u16 device_id; + u16 vendor_id; + u16 subsystem_device_id; + u16 subsystem_vendor_id; + u8 revision_id; + bool adapter_stopped; + + struct list_head cq_list_head; +}; + +int idpf_ctlq_alloc_ring_res(struct idpf_hw *hw, + struct idpf_ctlq_info *cq); + +void idpf_ctlq_dealloc_ring_res(struct idpf_hw *hw, struct idpf_ctlq_info *cq); + +/* prototype for functions used for dynamic memory allocation */ +void *idpf_alloc_dma_mem(struct idpf_hw *hw, struct idpf_dma_mem *mem, + u64 size); +void idpf_free_dma_mem(struct idpf_hw *hw, struct idpf_dma_mem *mem); +#endif /* _IDPF_CONTROLQ_H_ */ diff --git a/drivers/net/ethernet/intel/idpf/idpf_controlq_api.h b/drivers/net/ethernet/intel/idpf/idpf_controlq_api.h new file mode 100644 index 00000000000000..e8e046ef2f0d76 --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/idpf_controlq_api.h @@ -0,0 +1,174 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2023 Intel Corporation */ + +#ifndef _IDPF_CONTROLQ_API_H_ +#define _IDPF_CONTROLQ_API_H_ + +#include "idpf_mem.h" + +struct idpf_hw; + +/* Used for queue init, response and events */ +enum idpf_ctlq_type { + IDPF_CTLQ_TYPE_MAILBOX_TX = 0, + IDPF_CTLQ_TYPE_MAILBOX_RX = 1, + IDPF_CTLQ_TYPE_CONFIG_TX = 2, + IDPF_CTLQ_TYPE_CONFIG_RX = 3, + IDPF_CTLQ_TYPE_EVENT_RX = 4, + IDPF_CTLQ_TYPE_RDMA_TX = 5, + IDPF_CTLQ_TYPE_RDMA_RX = 6, + IDPF_CTLQ_TYPE_RDMA_COMPL = 7 +}; + +/* Generic Control Queue Structures */ +struct idpf_ctlq_reg { + /* used for queue tracking */ + u32 head; + u32 tail; + /* Below applies only to default mb (if present) */ + u32 len; + u32 bah; + u32 bal; + u32 len_mask; + u32 len_ena_mask; + u32 head_mask; +}; + +/* Generic queue msg structure */ +struct idpf_ctlq_msg { + u8 vmvf_type; /* represents the source of the message on recv */ +#define IDPF_VMVF_TYPE_VF 0 +#define IDPF_VMVF_TYPE_VM 1 +#define IDPF_VMVF_TYPE_PF 2 + u8 host_id; + /* 3b field used only when sending a message to CP - to be used in + * combination with target func_id to route the message + */ +#define IDPF_HOST_ID_MASK 0x7 + + u16 opcode; + u16 data_len; /* data_len = 0 when no payload is attached */ + union { + u16 func_id; /* when sending a message */ + u16 status; /* when receiving a message */ + }; + union { + struct { + u32 chnl_opcode; + u32 chnl_retval; + } mbx; + } cookie; + union { +#define IDPF_DIRECT_CTX_SIZE 16 +#define IDPF_INDIRECT_CTX_SIZE 8 + /* 16 bytes of context can be provided or 8 bytes of context + * plus the address of a DMA buffer + */ + u8 direct[IDPF_DIRECT_CTX_SIZE]; + struct { + u8 context[IDPF_INDIRECT_CTX_SIZE]; + struct idpf_dma_mem *payload; + } indirect; + struct { + u32 rsvd; + u16 data; + u16 flags; + } sw_cookie; + } ctx; +}; + +/* Generic queue info structures */ +/* MB, CONFIG and EVENT q do not have extended info */ +struct idpf_ctlq_create_info { + enum idpf_ctlq_type type; + int id; /* absolute queue offset passed as input + * -1 for default mailbox if present + */ + u16 len; /* Queue length passed as input */ + u16 buf_size; /* buffer size passed as input */ + u64 base_address; /* output, HPA of the Queue start */ + struct idpf_ctlq_reg reg; /* registers accessed by ctlqs */ + + int ext_info_size; + void *ext_info; /* Specific to q type */ +}; + +/* Control Queue information */ +struct idpf_ctlq_info { + struct list_head cq_list; + + enum idpf_ctlq_type cq_type; + int q_id; + struct mutex cq_lock; /* control queue lock */ + /* used for interrupt processing */ + u16 next_to_use; + u16 next_to_clean; + u16 next_to_post; /* starting descriptor to post buffers + * to after recev + */ + + struct idpf_dma_mem desc_ring; /* descriptor ring memory + * idpf_dma_mem is defined in OSdep.h + */ + union { + struct idpf_dma_mem **rx_buff; + struct idpf_ctlq_msg **tx_msg; + } bi; + + u16 buf_size; /* queue buffer size */ + u16 ring_size; /* Number of descriptors */ + struct idpf_ctlq_reg reg; /* registers accessed by ctlqs */ +}; + +/** + * enum idpf_mbx_opc - PF/VF mailbox commands + * @idpf_mbq_opc_send_msg_to_cp: used by PF or VF to send a message to its CP + */ +enum idpf_mbx_opc { + idpf_mbq_opc_send_msg_to_cp = 0x0801, +}; + +/* API supported for control queue management */ +/* Will init all required q including default mb. "q_info" is an array of + * create_info structs equal to the number of control queues to be created. + */ +int idpf_ctlq_init(struct idpf_hw *hw, u8 num_q, + struct idpf_ctlq_create_info *q_info); + +/* Allocate and initialize a single control queue, which will be added to the + * control queue list; returns a handle to the created control queue + */ +int idpf_ctlq_add(struct idpf_hw *hw, + struct idpf_ctlq_create_info *qinfo, + struct idpf_ctlq_info **cq); + +/* Deinitialize and deallocate a single control queue */ +void idpf_ctlq_remove(struct idpf_hw *hw, + struct idpf_ctlq_info *cq); + +/* Sends messages to HW and will also free the buffer*/ +int idpf_ctlq_send(struct idpf_hw *hw, + struct idpf_ctlq_info *cq, + u16 num_q_msg, + struct idpf_ctlq_msg q_msg[]); + +/* Receives messages and called by interrupt handler/polling + * initiated by app/process. Also caller is supposed to free the buffers + */ +int idpf_ctlq_recv(struct idpf_ctlq_info *cq, u16 *num_q_msg, + struct idpf_ctlq_msg *q_msg); + +/* Reclaims send descriptors on HW write back */ +int idpf_ctlq_clean_sq(struct idpf_ctlq_info *cq, u16 *clean_count, + struct idpf_ctlq_msg *msg_status[]); + +/* Indicate RX buffers are done being processed */ +int idpf_ctlq_post_rx_buffs(struct idpf_hw *hw, + struct idpf_ctlq_info *cq, + u16 *buff_count, + struct idpf_dma_mem **buffs); + +/* Will destroy all q including the default mb */ +void idpf_ctlq_deinit(struct idpf_hw *hw); + +#endif /* _IDPF_CONTROLQ_API_H_ */ diff --git a/drivers/net/ethernet/intel/idpf/idpf_controlq_setup.c b/drivers/net/ethernet/intel/idpf/idpf_controlq_setup.c new file mode 100644 index 00000000000000..a942a6385d06b0 --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/idpf_controlq_setup.c @@ -0,0 +1,171 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2023 Intel Corporation */ + +#include "idpf_controlq.h" + +/** + * idpf_ctlq_alloc_desc_ring - Allocate Control Queue (CQ) rings + * @hw: pointer to hw struct + * @cq: pointer to the specific Control queue + */ +static int idpf_ctlq_alloc_desc_ring(struct idpf_hw *hw, + struct idpf_ctlq_info *cq) +{ + size_t size = cq->ring_size * sizeof(struct idpf_ctlq_desc); + + cq->desc_ring.va = idpf_alloc_dma_mem(hw, &cq->desc_ring, size); + if (!cq->desc_ring.va) + return -ENOMEM; + + return 0; +} + +/** + * idpf_ctlq_alloc_bufs - Allocate Control Queue (CQ) buffers + * @hw: pointer to hw struct + * @cq: pointer to the specific Control queue + * + * Allocate the buffer head for all control queues, and if it's a receive + * queue, allocate DMA buffers + */ +static int idpf_ctlq_alloc_bufs(struct idpf_hw *hw, + struct idpf_ctlq_info *cq) +{ + int i; + + /* Do not allocate DMA buffers for transmit queues */ + if (cq->cq_type == IDPF_CTLQ_TYPE_MAILBOX_TX) + return 0; + + /* We'll be allocating the buffer info memory first, then we can + * allocate the mapped buffers for the event processing + */ + cq->bi.rx_buff = kcalloc(cq->ring_size, sizeof(struct idpf_dma_mem *), + GFP_KERNEL); + if (!cq->bi.rx_buff) + return -ENOMEM; + + /* allocate the mapped buffers (except for the last one) */ + for (i = 0; i < cq->ring_size - 1; i++) { + struct idpf_dma_mem *bi; + int num = 1; /* number of idpf_dma_mem to be allocated */ + + cq->bi.rx_buff[i] = kcalloc(num, sizeof(struct idpf_dma_mem), + GFP_KERNEL); + if (!cq->bi.rx_buff[i]) + goto unwind_alloc_cq_bufs; + + bi = cq->bi.rx_buff[i]; + + bi->va = idpf_alloc_dma_mem(hw, bi, cq->buf_size); + if (!bi->va) { + /* unwind will not free the failed entry */ + kfree(cq->bi.rx_buff[i]); + goto unwind_alloc_cq_bufs; + } + } + + return 0; + +unwind_alloc_cq_bufs: + /* don't try to free the one that failed... */ + i--; + for (; i >= 0; i--) { + idpf_free_dma_mem(hw, cq->bi.rx_buff[i]); + kfree(cq->bi.rx_buff[i]); + } + kfree(cq->bi.rx_buff); + + return -ENOMEM; +} + +/** + * idpf_ctlq_free_desc_ring - Free Control Queue (CQ) rings + * @hw: pointer to hw struct + * @cq: pointer to the specific Control queue + * + * This assumes the posted send buffers have already been cleaned + * and de-allocated + */ +static void idpf_ctlq_free_desc_ring(struct idpf_hw *hw, + struct idpf_ctlq_info *cq) +{ + idpf_free_dma_mem(hw, &cq->desc_ring); +} + +/** + * idpf_ctlq_free_bufs - Free CQ buffer info elements + * @hw: pointer to hw struct + * @cq: pointer to the specific Control queue + * + * Free the DMA buffers for RX queues, and DMA buffer header for both RX and TX + * queues. The upper layers are expected to manage freeing of TX DMA buffers + */ +static void idpf_ctlq_free_bufs(struct idpf_hw *hw, struct idpf_ctlq_info *cq) +{ + void *bi; + + if (cq->cq_type == IDPF_CTLQ_TYPE_MAILBOX_RX) { + int i; + + /* free DMA buffers for rx queues*/ + for (i = 0; i < cq->ring_size; i++) { + if (cq->bi.rx_buff[i]) { + idpf_free_dma_mem(hw, cq->bi.rx_buff[i]); + kfree(cq->bi.rx_buff[i]); + } + } + + bi = (void *)cq->bi.rx_buff; + } else { + bi = (void *)cq->bi.tx_msg; + } + + /* free the buffer header */ + kfree(bi); +} + +/** + * idpf_ctlq_dealloc_ring_res - Free memory allocated for control queue + * @hw: pointer to hw struct + * @cq: pointer to the specific Control queue + * + * Free the memory used by the ring, buffers and other related structures + */ +void idpf_ctlq_dealloc_ring_res(struct idpf_hw *hw, struct idpf_ctlq_info *cq) +{ + /* free ring buffers and the ring itself */ + idpf_ctlq_free_bufs(hw, cq); + idpf_ctlq_free_desc_ring(hw, cq); +} + +/** + * idpf_ctlq_alloc_ring_res - allocate memory for descriptor ring and bufs + * @hw: pointer to hw struct + * @cq: pointer to control queue struct + * + * Do *NOT* hold cq_lock when calling this as the memory allocation routines + * called are not going to be atomic context safe + */ +int idpf_ctlq_alloc_ring_res(struct idpf_hw *hw, struct idpf_ctlq_info *cq) +{ + int err; + + /* allocate the ring memory */ + err = idpf_ctlq_alloc_desc_ring(hw, cq); + if (err) + return err; + + /* allocate buffers in the rings */ + err = idpf_ctlq_alloc_bufs(hw, cq); + if (err) + goto idpf_init_cq_free_ring; + + /* success! */ + return 0; + +idpf_init_cq_free_ring: + idpf_free_dma_mem(hw, &cq->desc_ring); + + return err; +} diff --git a/drivers/net/ethernet/intel/idpf/idpf_dev.c b/drivers/net/ethernet/intel/idpf/idpf_dev.c new file mode 100644 index 00000000000000..3df9935685e962 --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/idpf_dev.c @@ -0,0 +1,166 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2023 Intel Corporation */ + +#include "idpf.h" +#include "idpf_lan_pf_regs.h" +#include "idpf_virtchnl.h" + +#define IDPF_PF_ITR_IDX_SPACING 0x4 + +/** + * idpf_ctlq_reg_init - initialize default mailbox registers + * @cq: pointer to the array of create control queues + */ +static void idpf_ctlq_reg_init(struct idpf_ctlq_create_info *cq) +{ + int i; + + for (i = 0; i < IDPF_NUM_DFLT_MBX_Q; i++) { + struct idpf_ctlq_create_info *ccq = cq + i; + + switch (ccq->type) { + case IDPF_CTLQ_TYPE_MAILBOX_TX: + /* set head and tail registers in our local struct */ + ccq->reg.head = PF_FW_ATQH; + ccq->reg.tail = PF_FW_ATQT; + ccq->reg.len = PF_FW_ATQLEN; + ccq->reg.bah = PF_FW_ATQBAH; + ccq->reg.bal = PF_FW_ATQBAL; + ccq->reg.len_mask = PF_FW_ATQLEN_ATQLEN_M; + ccq->reg.len_ena_mask = PF_FW_ATQLEN_ATQENABLE_M; + ccq->reg.head_mask = PF_FW_ATQH_ATQH_M; + break; + case IDPF_CTLQ_TYPE_MAILBOX_RX: + /* set head and tail registers in our local struct */ + ccq->reg.head = PF_FW_ARQH; + ccq->reg.tail = PF_FW_ARQT; + ccq->reg.len = PF_FW_ARQLEN; + ccq->reg.bah = PF_FW_ARQBAH; + ccq->reg.bal = PF_FW_ARQBAL; + ccq->reg.len_mask = PF_FW_ARQLEN_ARQLEN_M; + ccq->reg.len_ena_mask = PF_FW_ARQLEN_ARQENABLE_M; + ccq->reg.head_mask = PF_FW_ARQH_ARQH_M; + break; + default: + break; + } + } +} + +/** + * idpf_mb_intr_reg_init - Initialize mailbox interrupt register + * @adapter: adapter structure + */ +static void idpf_mb_intr_reg_init(struct idpf_adapter *adapter) +{ + struct idpf_intr_reg *intr = &adapter->mb_vector.intr_reg; + u32 dyn_ctl = le32_to_cpu(adapter->caps.mailbox_dyn_ctl); + + intr->dyn_ctl = idpf_get_reg_addr(adapter, dyn_ctl); + intr->dyn_ctl_intena_m = PF_GLINT_DYN_CTL_INTENA_M; + intr->dyn_ctl_itridx_m = PF_GLINT_DYN_CTL_ITR_INDX_M; + intr->icr_ena = idpf_get_reg_addr(adapter, PF_INT_DIR_OICR_ENA); + intr->icr_ena_ctlq_m = PF_INT_DIR_OICR_ENA_M; +} + +/** + * idpf_intr_reg_init - Initialize interrupt registers + * @vport: virtual port structure + */ +static int idpf_intr_reg_init(struct idpf_vport *vport) +{ + struct idpf_adapter *adapter = vport->adapter; + int num_vecs = vport->num_q_vectors; + struct idpf_vec_regs *reg_vals; + int num_regs, i, err = 0; + u32 rx_itr, tx_itr; + u16 total_vecs; + + total_vecs = idpf_get_reserved_vecs(vport->adapter); + reg_vals = kcalloc(total_vecs, sizeof(struct idpf_vec_regs), + GFP_KERNEL); + if (!reg_vals) + return -ENOMEM; + + num_regs = idpf_get_reg_intr_vecs(vport, reg_vals); + if (num_regs < num_vecs) { + err = -EINVAL; + goto free_reg_vals; + } + + for (i = 0; i < num_vecs; i++) { + struct idpf_q_vector *q_vector = &vport->q_vectors[i]; + u16 vec_id = vport->q_vector_idxs[i] - IDPF_MBX_Q_VEC; + struct idpf_intr_reg *intr = &q_vector->intr_reg; + u32 spacing; + + intr->dyn_ctl = idpf_get_reg_addr(adapter, + reg_vals[vec_id].dyn_ctl_reg); + intr->dyn_ctl_intena_m = PF_GLINT_DYN_CTL_INTENA_M; + intr->dyn_ctl_itridx_s = PF_GLINT_DYN_CTL_ITR_INDX_S; + intr->dyn_ctl_intrvl_s = PF_GLINT_DYN_CTL_INTERVAL_S; + + spacing = IDPF_ITR_IDX_SPACING(reg_vals[vec_id].itrn_index_spacing, + IDPF_PF_ITR_IDX_SPACING); + rx_itr = PF_GLINT_ITR_ADDR(VIRTCHNL2_ITR_IDX_0, + reg_vals[vec_id].itrn_reg, + spacing); + tx_itr = PF_GLINT_ITR_ADDR(VIRTCHNL2_ITR_IDX_1, + reg_vals[vec_id].itrn_reg, + spacing); + intr->rx_itr = idpf_get_reg_addr(adapter, rx_itr); + intr->tx_itr = idpf_get_reg_addr(adapter, tx_itr); + } + +free_reg_vals: + kfree(reg_vals); + + return err; +} + +/** + * idpf_reset_reg_init - Initialize reset registers + * @adapter: Driver specific private structure + */ +static void idpf_reset_reg_init(struct idpf_adapter *adapter) +{ + adapter->reset_reg.rstat = idpf_get_reg_addr(adapter, PFGEN_RSTAT); + adapter->reset_reg.rstat_m = PFGEN_RSTAT_PFR_STATE_M; +} + +/** + * idpf_trigger_reset - trigger reset + * @adapter: Driver specific private structure + * @trig_cause: Reason to trigger a reset + */ +static void idpf_trigger_reset(struct idpf_adapter *adapter, + enum idpf_flags __always_unused trig_cause) +{ + u32 reset_reg; + + reset_reg = readl(idpf_get_reg_addr(adapter, PFGEN_CTRL)); + writel(reset_reg | PFGEN_CTRL_PFSWR, + idpf_get_reg_addr(adapter, PFGEN_CTRL)); +} + +/** + * idpf_reg_ops_init - Initialize register API function pointers + * @adapter: Driver specific private structure + */ +static void idpf_reg_ops_init(struct idpf_adapter *adapter) +{ + adapter->dev_ops.reg_ops.ctlq_reg_init = idpf_ctlq_reg_init; + adapter->dev_ops.reg_ops.intr_reg_init = idpf_intr_reg_init; + adapter->dev_ops.reg_ops.mb_intr_reg_init = idpf_mb_intr_reg_init; + adapter->dev_ops.reg_ops.reset_reg_init = idpf_reset_reg_init; + adapter->dev_ops.reg_ops.trigger_reset = idpf_trigger_reset; +} + +/** + * idpf_dev_ops_init - Initialize device API function pointers + * @adapter: Driver specific private structure + */ +void idpf_dev_ops_init(struct idpf_adapter *adapter) +{ + idpf_reg_ops_init(adapter); +} diff --git a/drivers/net/ethernet/intel/idpf/idpf_devids.h b/drivers/net/ethernet/intel/idpf/idpf_devids.h new file mode 100644 index 00000000000000..5154a52ae61caf --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/idpf_devids.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2023 Intel Corporation */ + +#ifndef _IDPF_DEVIDS_H_ +#define _IDPF_DEVIDS_H_ + +#define IDPF_DEV_ID_PF 0x1452 +#define IDPF_DEV_ID_VF 0x145C + +#endif /* _IDPF_DEVIDS_H_ */ diff --git a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c new file mode 100644 index 00000000000000..5e6777ea55ca69 --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c @@ -0,0 +1,1355 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2023 Intel Corporation */ + +#include "idpf.h" + +/** + * idpf_get_rxnfc - command to get RX flow classification rules + * @netdev: network interface device structure + * @cmd: ethtool rxnfc command + * @rule_locs: pointer to store rule locations + * + * Returns Success if the command is supported. + */ +static int idpf_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, + u32 __always_unused *rule_locs) +{ + struct idpf_vport *vport; + + idpf_vport_ctrl_lock(netdev); + vport = idpf_netdev_to_vport(netdev); + + switch (cmd->cmd) { + case ETHTOOL_GRXRINGS: + cmd->data = vport->num_rxq; + idpf_vport_ctrl_unlock(netdev); + + return 0; + default: + break; + } + + idpf_vport_ctrl_unlock(netdev); + + return -EOPNOTSUPP; +} + +/** + * idpf_get_rxfh_key_size - get the RSS hash key size + * @netdev: network interface device structure + * + * Returns the key size on success, error value on failure. + */ +static u32 idpf_get_rxfh_key_size(struct net_device *netdev) +{ + struct idpf_netdev_priv *np = netdev_priv(netdev); + struct idpf_vport_user_config_data *user_config; + + if (!idpf_is_cap_ena_all(np->adapter, IDPF_RSS_CAPS, IDPF_CAP_RSS)) + return -EOPNOTSUPP; + + user_config = &np->adapter->vport_config[np->vport_idx]->user_config; + + return user_config->rss_data.rss_key_size; +} + +/** + * idpf_get_rxfh_indir_size - get the rx flow hash indirection table size + * @netdev: network interface device structure + * + * Returns the table size on success, error value on failure. + */ +static u32 idpf_get_rxfh_indir_size(struct net_device *netdev) +{ + struct idpf_netdev_priv *np = netdev_priv(netdev); + struct idpf_vport_user_config_data *user_config; + + if (!idpf_is_cap_ena_all(np->adapter, IDPF_RSS_CAPS, IDPF_CAP_RSS)) + return -EOPNOTSUPP; + + user_config = &np->adapter->vport_config[np->vport_idx]->user_config; + + return user_config->rss_data.rss_lut_size; +} + +/** + * idpf_get_rxfh - get the rx flow hash indirection table + * @netdev: network interface device structure + * @indir: indirection table + * @key: hash key + * @hfunc: hash function in use + * + * Reads the indirection table directly from the hardware. Always returns 0. + */ +static int idpf_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, + u8 *hfunc) +{ + struct idpf_netdev_priv *np = netdev_priv(netdev); + struct idpf_rss_data *rss_data; + struct idpf_adapter *adapter; + int err = 0; + u16 i; + + idpf_vport_ctrl_lock(netdev); + + adapter = np->adapter; + + if (!idpf_is_cap_ena_all(adapter, IDPF_RSS_CAPS, IDPF_CAP_RSS)) { + err = -EOPNOTSUPP; + goto unlock_mutex; + } + + rss_data = &adapter->vport_config[np->vport_idx]->user_config.rss_data; + if (np->state != __IDPF_VPORT_UP) + goto unlock_mutex; + + if (hfunc) + *hfunc = ETH_RSS_HASH_TOP; + + if (key) + memcpy(key, rss_data->rss_key, rss_data->rss_key_size); + + if (indir) { + for (i = 0; i < rss_data->rss_lut_size; i++) + indir[i] = rss_data->rss_lut[i]; + } + +unlock_mutex: + idpf_vport_ctrl_unlock(netdev); + + return err; +} + +/** + * idpf_set_rxfh - set the rx flow hash indirection table + * @netdev: network interface device structure + * @indir: indirection table + * @key: hash key + * @hfunc: hash function to use + * + * Returns -EINVAL if the table specifies an invalid queue id, otherwise + * returns 0 after programming the table. + */ +static int idpf_set_rxfh(struct net_device *netdev, const u32 *indir, + const u8 *key, const u8 hfunc) +{ + struct idpf_netdev_priv *np = netdev_priv(netdev); + struct idpf_rss_data *rss_data; + struct idpf_adapter *adapter; + struct idpf_vport *vport; + int err = 0; + u16 lut; + + idpf_vport_ctrl_lock(netdev); + vport = idpf_netdev_to_vport(netdev); + + adapter = vport->adapter; + + if (!idpf_is_cap_ena_all(adapter, IDPF_RSS_CAPS, IDPF_CAP_RSS)) { + err = -EOPNOTSUPP; + goto unlock_mutex; + } + + rss_data = &adapter->vport_config[vport->idx]->user_config.rss_data; + if (np->state != __IDPF_VPORT_UP) + goto unlock_mutex; + + if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) { + err = -EOPNOTSUPP; + goto unlock_mutex; + } + + if (key) + memcpy(rss_data->rss_key, key, rss_data->rss_key_size); + + if (indir) { + for (lut = 0; lut < rss_data->rss_lut_size; lut++) + rss_data->rss_lut[lut] = indir[lut]; + } + + err = idpf_config_rss(vport); + +unlock_mutex: + idpf_vport_ctrl_unlock(netdev); + + return err; +} + +/** + * idpf_get_channels: get the number of channels supported by the device + * @netdev: network interface device structure + * @ch: channel information structure + * + * Report maximum of TX and RX. Report one extra channel to match our MailBox + * Queue. + */ +static void idpf_get_channels(struct net_device *netdev, + struct ethtool_channels *ch) +{ + struct idpf_netdev_priv *np = netdev_priv(netdev); + struct idpf_vport_config *vport_config; + u16 num_txq, num_rxq; + u16 combined; + + vport_config = np->adapter->vport_config[np->vport_idx]; + + num_txq = vport_config->user_config.num_req_tx_qs; + num_rxq = vport_config->user_config.num_req_rx_qs; + + combined = min(num_txq, num_rxq); + + /* Report maximum channels */ + ch->max_combined = min_t(u16, vport_config->max_q.max_txq, + vport_config->max_q.max_rxq); + ch->max_rx = vport_config->max_q.max_rxq; + ch->max_tx = vport_config->max_q.max_txq; + + ch->max_other = IDPF_MAX_MBXQ; + ch->other_count = IDPF_MAX_MBXQ; + + ch->combined_count = combined; + ch->rx_count = num_rxq - combined; + ch->tx_count = num_txq - combined; +} + +/** + * idpf_set_channels: set the new channel count + * @netdev: network interface device structure + * @ch: channel information structure + * + * Negotiate a new number of channels with CP. Returns 0 on success, negative + * on failure. + */ +static int idpf_set_channels(struct net_device *netdev, + struct ethtool_channels *ch) +{ + struct idpf_vport_config *vport_config; + u16 combined, num_txq, num_rxq; + unsigned int num_req_tx_q; + unsigned int num_req_rx_q; + struct idpf_vport *vport; + struct device *dev; + int err = 0; + u16 idx; + + idpf_vport_ctrl_lock(netdev); + vport = idpf_netdev_to_vport(netdev); + + idx = vport->idx; + vport_config = vport->adapter->vport_config[idx]; + + num_txq = vport_config->user_config.num_req_tx_qs; + num_rxq = vport_config->user_config.num_req_rx_qs; + + combined = min(num_txq, num_rxq); + + /* these checks are for cases where user didn't specify a particular + * value on cmd line but we get non-zero value anyway via + * get_channels(); look at ethtool.c in ethtool repository (the user + * space part), particularly, do_schannels() routine + */ + if (ch->combined_count == combined) + ch->combined_count = 0; + if (ch->combined_count && ch->rx_count == num_rxq - combined) + ch->rx_count = 0; + if (ch->combined_count && ch->tx_count == num_txq - combined) + ch->tx_count = 0; + + num_req_tx_q = ch->combined_count + ch->tx_count; + num_req_rx_q = ch->combined_count + ch->rx_count; + + dev = &vport->adapter->pdev->dev; + /* It's possible to specify number of queues that exceeds max. + * Stack checks max combined_count and max [tx|rx]_count but not the + * max combined_count + [tx|rx]_count. These checks should catch that. + */ + if (num_req_tx_q > vport_config->max_q.max_txq) { + dev_info(dev, "Maximum TX queues is %d\n", + vport_config->max_q.max_txq); + err = -EINVAL; + goto unlock_mutex; + } + if (num_req_rx_q > vport_config->max_q.max_rxq) { + dev_info(dev, "Maximum RX queues is %d\n", + vport_config->max_q.max_rxq); + err = -EINVAL; + goto unlock_mutex; + } + + if (num_req_tx_q == num_txq && num_req_rx_q == num_rxq) + goto unlock_mutex; + + vport_config->user_config.num_req_tx_qs = num_req_tx_q; + vport_config->user_config.num_req_rx_qs = num_req_rx_q; + + err = idpf_initiate_soft_reset(vport, IDPF_SR_Q_CHANGE); + if (err) { + /* roll back queue change */ + vport_config->user_config.num_req_tx_qs = num_txq; + vport_config->user_config.num_req_rx_qs = num_rxq; + } + +unlock_mutex: + idpf_vport_ctrl_unlock(netdev); + + return err; +} + +/** + * idpf_get_ringparam - Get ring parameters + * @netdev: network interface device structure + * @ring: ethtool ringparam structure + * @kring: unused + * @ext_ack: unused + * + * Returns current ring parameters. TX and RX rings are reported separately, + * but the number of rings is not reported. + */ +static void idpf_get_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kring, + struct netlink_ext_ack *ext_ack) +{ + struct idpf_vport *vport; + + idpf_vport_ctrl_lock(netdev); + vport = idpf_netdev_to_vport(netdev); + + ring->rx_max_pending = IDPF_MAX_RXQ_DESC; + ring->tx_max_pending = IDPF_MAX_TXQ_DESC; + ring->rx_pending = vport->rxq_desc_count; + ring->tx_pending = vport->txq_desc_count; + + kring->tcp_data_split = idpf_vport_get_hsplit(vport); + + idpf_vport_ctrl_unlock(netdev); +} + +/** + * idpf_set_ringparam - Set ring parameters + * @netdev: network interface device structure + * @ring: ethtool ringparam structure + * @kring: unused + * @ext_ack: unused + * + * Sets ring parameters. TX and RX rings are controlled separately, but the + * number of rings is not specified, so all rings get the same settings. + */ +static int idpf_set_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kring, + struct netlink_ext_ack *ext_ack) +{ + struct idpf_vport_user_config_data *config_data; + u32 new_rx_count, new_tx_count; + struct idpf_vport *vport; + int i, err = 0; + u16 idx; + + idpf_vport_ctrl_lock(netdev); + vport = idpf_netdev_to_vport(netdev); + + idx = vport->idx; + + if (ring->tx_pending < IDPF_MIN_TXQ_DESC) { + netdev_err(netdev, "Descriptors requested (Tx: %u) is less than min supported (%u)\n", + ring->tx_pending, + IDPF_MIN_TXQ_DESC); + err = -EINVAL; + goto unlock_mutex; + } + + if (ring->rx_pending < IDPF_MIN_RXQ_DESC) { + netdev_err(netdev, "Descriptors requested (Rx: %u) is less than min supported (%u)\n", + ring->rx_pending, + IDPF_MIN_RXQ_DESC); + err = -EINVAL; + goto unlock_mutex; + } + + new_rx_count = ALIGN(ring->rx_pending, IDPF_REQ_RXQ_DESC_MULTIPLE); + if (new_rx_count != ring->rx_pending) + netdev_info(netdev, "Requested Rx descriptor count rounded up to %u\n", + new_rx_count); + + new_tx_count = ALIGN(ring->tx_pending, IDPF_REQ_DESC_MULTIPLE); + if (new_tx_count != ring->tx_pending) + netdev_info(netdev, "Requested Tx descriptor count rounded up to %u\n", + new_tx_count); + + if (new_tx_count == vport->txq_desc_count && + new_rx_count == vport->rxq_desc_count) + goto unlock_mutex; + + if (!idpf_vport_set_hsplit(vport, kring->tcp_data_split)) { + NL_SET_ERR_MSG_MOD(ext_ack, + "setting TCP data split is not supported"); + err = -EOPNOTSUPP; + + goto unlock_mutex; + } + + config_data = &vport->adapter->vport_config[idx]->user_config; + config_data->num_req_txq_desc = new_tx_count; + config_data->num_req_rxq_desc = new_rx_count; + + /* Since we adjusted the RX completion queue count, the RX buffer queue + * descriptor count needs to be adjusted as well + */ + for (i = 0; i < vport->num_bufqs_per_qgrp; i++) + vport->bufq_desc_count[i] = + IDPF_RX_BUFQ_DESC_COUNT(new_rx_count, + vport->num_bufqs_per_qgrp); + + err = idpf_initiate_soft_reset(vport, IDPF_SR_Q_DESC_CHANGE); + +unlock_mutex: + idpf_vport_ctrl_unlock(netdev); + + return err; +} + +/** + * struct idpf_stats - definition for an ethtool statistic + * @stat_string: statistic name to display in ethtool -S output + * @sizeof_stat: the sizeof() the stat, must be no greater than sizeof(u64) + * @stat_offset: offsetof() the stat from a base pointer + * + * This structure defines a statistic to be added to the ethtool stats buffer. + * It defines a statistic as offset from a common base pointer. Stats should + * be defined in constant arrays using the IDPF_STAT macro, with every element + * of the array using the same _type for calculating the sizeof_stat and + * stat_offset. + * + * The @sizeof_stat is expected to be sizeof(u8), sizeof(u16), sizeof(u32) or + * sizeof(u64). Other sizes are not expected and will produce a WARN_ONCE from + * the idpf_add_ethtool_stat() helper function. + * + * The @stat_string is interpreted as a format string, allowing formatted + * values to be inserted while looping over multiple structures for a given + * statistics array. Thus, every statistic string in an array should have the + * same type and number of format specifiers, to be formatted by variadic + * arguments to the idpf_add_stat_string() helper function. + */ +struct idpf_stats { + char stat_string[ETH_GSTRING_LEN]; + int sizeof_stat; + int stat_offset; +}; + +/* Helper macro to define an idpf_stat structure with proper size and type. + * Use this when defining constant statistics arrays. Note that @_type expects + * only a type name and is used multiple times. + */ +#define IDPF_STAT(_type, _name, _stat) { \ + .stat_string = _name, \ + .sizeof_stat = sizeof_field(_type, _stat), \ + .stat_offset = offsetof(_type, _stat) \ +} + +/* Helper macro for defining some statistics related to queues */ +#define IDPF_QUEUE_STAT(_name, _stat) \ + IDPF_STAT(struct idpf_queue, _name, _stat) + +/* Stats associated with a Tx queue */ +static const struct idpf_stats idpf_gstrings_tx_queue_stats[] = { + IDPF_QUEUE_STAT("pkts", q_stats.tx.packets), + IDPF_QUEUE_STAT("bytes", q_stats.tx.bytes), + IDPF_QUEUE_STAT("lso_pkts", q_stats.tx.lso_pkts), +}; + +/* Stats associated with an Rx queue */ +static const struct idpf_stats idpf_gstrings_rx_queue_stats[] = { + IDPF_QUEUE_STAT("pkts", q_stats.rx.packets), + IDPF_QUEUE_STAT("bytes", q_stats.rx.bytes), + IDPF_QUEUE_STAT("rx_gro_hw_pkts", q_stats.rx.rsc_pkts), +}; + +#define IDPF_TX_QUEUE_STATS_LEN ARRAY_SIZE(idpf_gstrings_tx_queue_stats) +#define IDPF_RX_QUEUE_STATS_LEN ARRAY_SIZE(idpf_gstrings_rx_queue_stats) + +#define IDPF_PORT_STAT(_name, _stat) \ + IDPF_STAT(struct idpf_vport, _name, _stat) + +static const struct idpf_stats idpf_gstrings_port_stats[] = { + IDPF_PORT_STAT("rx-csum_errors", port_stats.rx_hw_csum_err), + IDPF_PORT_STAT("rx-hsplit", port_stats.rx_hsplit), + IDPF_PORT_STAT("rx-hsplit_hbo", port_stats.rx_hsplit_hbo), + IDPF_PORT_STAT("rx-bad_descs", port_stats.rx_bad_descs), + IDPF_PORT_STAT("tx-skb_drops", port_stats.tx_drops), + IDPF_PORT_STAT("tx-dma_map_errs", port_stats.tx_dma_map_errs), + IDPF_PORT_STAT("tx-linearized_pkts", port_stats.tx_linearize), + IDPF_PORT_STAT("tx-busy_events", port_stats.tx_busy), + IDPF_PORT_STAT("rx-unicast_pkts", port_stats.vport_stats.rx_unicast), + IDPF_PORT_STAT("rx-multicast_pkts", port_stats.vport_stats.rx_multicast), + IDPF_PORT_STAT("rx-broadcast_pkts", port_stats.vport_stats.rx_broadcast), + IDPF_PORT_STAT("rx-unknown_protocol", port_stats.vport_stats.rx_unknown_protocol), + IDPF_PORT_STAT("tx-unicast_pkts", port_stats.vport_stats.tx_unicast), + IDPF_PORT_STAT("tx-multicast_pkts", port_stats.vport_stats.tx_multicast), + IDPF_PORT_STAT("tx-broadcast_pkts", port_stats.vport_stats.tx_broadcast), +}; + +#define IDPF_PORT_STATS_LEN ARRAY_SIZE(idpf_gstrings_port_stats) + +/** + * __idpf_add_qstat_strings - copy stat strings into ethtool buffer + * @p: ethtool supplied buffer + * @stats: stat definitions array + * @size: size of the stats array + * @type: stat type + * @idx: stat index + * + * Format and copy the strings described by stats into the buffer pointed at + * by p. + */ +static void __idpf_add_qstat_strings(u8 **p, const struct idpf_stats *stats, + const unsigned int size, const char *type, + unsigned int idx) +{ + unsigned int i; + + for (i = 0; i < size; i++) + ethtool_sprintf(p, "%s_q-%u_%s", + type, idx, stats[i].stat_string); +} + +/** + * idpf_add_qstat_strings - Copy queue stat strings into ethtool buffer + * @p: ethtool supplied buffer + * @stats: stat definitions array + * @type: stat type + * @idx: stat idx + * + * Format and copy the strings described by the const static stats value into + * the buffer pointed at by p. + * + * The parameter @stats is evaluated twice, so parameters with side effects + * should be avoided. Additionally, stats must be an array such that + * ARRAY_SIZE can be called on it. + */ +#define idpf_add_qstat_strings(p, stats, type, idx) \ + __idpf_add_qstat_strings(p, stats, ARRAY_SIZE(stats), type, idx) + +/** + * idpf_add_stat_strings - Copy port stat strings into ethtool buffer + * @p: ethtool buffer + * @stats: struct to copy from + * @size: size of stats array to copy from + */ +static void idpf_add_stat_strings(u8 **p, const struct idpf_stats *stats, + const unsigned int size) +{ + unsigned int i; + + for (i = 0; i < size; i++) + ethtool_sprintf(p, "%s", stats[i].stat_string); +} + +/** + * idpf_get_stat_strings - Get stat strings + * @netdev: network interface device structure + * @data: buffer for string data + * + * Builds the statistics string table + */ +static void idpf_get_stat_strings(struct net_device *netdev, u8 *data) +{ + struct idpf_netdev_priv *np = netdev_priv(netdev); + struct idpf_vport_config *vport_config; + unsigned int i; + + idpf_add_stat_strings(&data, idpf_gstrings_port_stats, + IDPF_PORT_STATS_LEN); + + vport_config = np->adapter->vport_config[np->vport_idx]; + /* It's critical that we always report a constant number of strings and + * that the strings are reported in the same order regardless of how + * many queues are actually in use. + */ + for (i = 0; i < vport_config->max_q.max_txq; i++) + idpf_add_qstat_strings(&data, idpf_gstrings_tx_queue_stats, + "tx", i); + + for (i = 0; i < vport_config->max_q.max_rxq; i++) + idpf_add_qstat_strings(&data, idpf_gstrings_rx_queue_stats, + "rx", i); +} + +/** + * idpf_get_strings - Get string set + * @netdev: network interface device structure + * @sset: id of string set + * @data: buffer for string data + * + * Builds string tables for various string sets + */ +static void idpf_get_strings(struct net_device *netdev, u32 sset, u8 *data) +{ + switch (sset) { + case ETH_SS_STATS: + idpf_get_stat_strings(netdev, data); + break; + default: + break; + } +} + +/** + * idpf_get_sset_count - Get length of string set + * @netdev: network interface device structure + * @sset: id of string set + * + * Reports size of various string tables. + */ +static int idpf_get_sset_count(struct net_device *netdev, int sset) +{ + struct idpf_netdev_priv *np = netdev_priv(netdev); + struct idpf_vport_config *vport_config; + u16 max_txq, max_rxq; + unsigned int size; + + if (sset != ETH_SS_STATS) + return -EINVAL; + + vport_config = np->adapter->vport_config[np->vport_idx]; + /* This size reported back here *must* be constant throughout the + * lifecycle of the netdevice, i.e. we must report the maximum length + * even for queues that don't technically exist. This is due to the + * fact that this userspace API uses three separate ioctl calls to get + * stats data but has no way to communicate back to userspace when that + * size has changed, which can typically happen as a result of changing + * number of queues. If the number/order of stats change in the middle + * of this call chain it will lead to userspace crashing/accessing bad + * data through buffer under/overflow. + */ + max_txq = vport_config->max_q.max_txq; + max_rxq = vport_config->max_q.max_rxq; + + size = IDPF_PORT_STATS_LEN + (IDPF_TX_QUEUE_STATS_LEN * max_txq) + + (IDPF_RX_QUEUE_STATS_LEN * max_rxq); + + return size; +} + +/** + * idpf_add_one_ethtool_stat - copy the stat into the supplied buffer + * @data: location to store the stat value + * @pstat: old stat pointer to copy from + * @stat: the stat definition + * + * Copies the stat data defined by the pointer and stat structure pair into + * the memory supplied as data. If the pointer is null, data will be zero'd. + */ +static void idpf_add_one_ethtool_stat(u64 *data, void *pstat, + const struct idpf_stats *stat) +{ + char *p; + + if (!pstat) { + /* Ensure that the ethtool data buffer is zero'd for any stats + * which don't have a valid pointer. + */ + *data = 0; + return; + } + + p = (char *)pstat + stat->stat_offset; + switch (stat->sizeof_stat) { + case sizeof(u64): + *data = *((u64 *)p); + break; + case sizeof(u32): + *data = *((u32 *)p); + break; + case sizeof(u16): + *data = *((u16 *)p); + break; + case sizeof(u8): + *data = *((u8 *)p); + break; + default: + WARN_ONCE(1, "unexpected stat size for %s", + stat->stat_string); + *data = 0; + } +} + +/** + * idpf_add_queue_stats - copy queue statistics into supplied buffer + * @data: ethtool stats buffer + * @q: the queue to copy + * + * Queue statistics must be copied while protected by u64_stats_fetch_begin, + * so we can't directly use idpf_add_ethtool_stats. Assumes that queue stats + * are defined in idpf_gstrings_queue_stats. If the queue pointer is null, + * zero out the queue stat values and update the data pointer. Otherwise + * safely copy the stats from the queue into the supplied buffer and update + * the data pointer when finished. + * + * This function expects to be called while under rcu_read_lock(). + */ +static void idpf_add_queue_stats(u64 **data, struct idpf_queue *q) +{ + const struct idpf_stats *stats; + unsigned int start; + unsigned int size; + unsigned int i; + + if (q->q_type == VIRTCHNL2_QUEUE_TYPE_RX) { + size = IDPF_RX_QUEUE_STATS_LEN; + stats = idpf_gstrings_rx_queue_stats; + } else { + size = IDPF_TX_QUEUE_STATS_LEN; + stats = idpf_gstrings_tx_queue_stats; + } + + /* To avoid invalid statistics values, ensure that we keep retrying + * the copy until we get a consistent value according to + * u64_stats_fetch_retry. + */ + do { + start = u64_stats_fetch_begin(&q->stats_sync); + for (i = 0; i < size; i++) + idpf_add_one_ethtool_stat(&(*data)[i], q, &stats[i]); + } while (u64_stats_fetch_retry(&q->stats_sync, start)); + + /* Once we successfully copy the stats in, update the data pointer */ + *data += size; +} + +/** + * idpf_add_empty_queue_stats - Add stats for a non-existent queue + * @data: pointer to data buffer + * @qtype: type of data queue + * + * We must report a constant length of stats back to userspace regardless of + * how many queues are actually in use because stats collection happens over + * three separate ioctls and there's no way to notify userspace the size + * changed between those calls. This adds empty to data to the stats since we + * don't have a real queue to refer to for this stats slot. + */ +static void idpf_add_empty_queue_stats(u64 **data, u16 qtype) +{ + unsigned int i; + int stats_len; + + if (qtype == VIRTCHNL2_QUEUE_TYPE_RX) + stats_len = IDPF_RX_QUEUE_STATS_LEN; + else + stats_len = IDPF_TX_QUEUE_STATS_LEN; + + for (i = 0; i < stats_len; i++) + (*data)[i] = 0; + *data += stats_len; +} + +/** + * idpf_add_port_stats - Copy port stats into ethtool buffer + * @vport: virtual port struct + * @data: ethtool buffer to copy into + */ +static void idpf_add_port_stats(struct idpf_vport *vport, u64 **data) +{ + unsigned int size = IDPF_PORT_STATS_LEN; + unsigned int start; + unsigned int i; + + do { + start = u64_stats_fetch_begin(&vport->port_stats.stats_sync); + for (i = 0; i < size; i++) + idpf_add_one_ethtool_stat(&(*data)[i], vport, + &idpf_gstrings_port_stats[i]); + } while (u64_stats_fetch_retry(&vport->port_stats.stats_sync, start)); + + *data += size; +} + +/** + * idpf_collect_queue_stats - accumulate various per queue stats + * into port level stats + * @vport: pointer to vport struct + **/ +static void idpf_collect_queue_stats(struct idpf_vport *vport) +{ + struct idpf_port_stats *pstats = &vport->port_stats; + int i, j; + + /* zero out port stats since they're actually tracked in per + * queue stats; this is only for reporting + */ + u64_stats_update_begin(&pstats->stats_sync); + u64_stats_set(&pstats->rx_hw_csum_err, 0); + u64_stats_set(&pstats->rx_hsplit, 0); + u64_stats_set(&pstats->rx_hsplit_hbo, 0); + u64_stats_set(&pstats->rx_bad_descs, 0); + u64_stats_set(&pstats->tx_linearize, 0); + u64_stats_set(&pstats->tx_busy, 0); + u64_stats_set(&pstats->tx_drops, 0); + u64_stats_set(&pstats->tx_dma_map_errs, 0); + u64_stats_update_end(&pstats->stats_sync); + + for (i = 0; i < vport->num_rxq_grp; i++) { + struct idpf_rxq_group *rxq_grp = &vport->rxq_grps[i]; + u16 num_rxq; + + if (idpf_is_queue_model_split(vport->rxq_model)) + num_rxq = rxq_grp->splitq.num_rxq_sets; + else + num_rxq = rxq_grp->singleq.num_rxq; + + for (j = 0; j < num_rxq; j++) { + u64 hw_csum_err, hsplit, hsplit_hbo, bad_descs; + struct idpf_rx_queue_stats *stats; + struct idpf_queue *rxq; + unsigned int start; + + if (idpf_is_queue_model_split(vport->rxq_model)) + rxq = &rxq_grp->splitq.rxq_sets[j]->rxq; + else + rxq = rxq_grp->singleq.rxqs[j]; + + if (!rxq) + continue; + + do { + start = u64_stats_fetch_begin(&rxq->stats_sync); + + stats = &rxq->q_stats.rx; + hw_csum_err = u64_stats_read(&stats->hw_csum_err); + hsplit = u64_stats_read(&stats->hsplit_pkts); + hsplit_hbo = u64_stats_read(&stats->hsplit_buf_ovf); + bad_descs = u64_stats_read(&stats->bad_descs); + } while (u64_stats_fetch_retry(&rxq->stats_sync, start)); + + u64_stats_update_begin(&pstats->stats_sync); + u64_stats_add(&pstats->rx_hw_csum_err, hw_csum_err); + u64_stats_add(&pstats->rx_hsplit, hsplit); + u64_stats_add(&pstats->rx_hsplit_hbo, hsplit_hbo); + u64_stats_add(&pstats->rx_bad_descs, bad_descs); + u64_stats_update_end(&pstats->stats_sync); + } + } + + for (i = 0; i < vport->num_txq_grp; i++) { + struct idpf_txq_group *txq_grp = &vport->txq_grps[i]; + + for (j = 0; j < txq_grp->num_txq; j++) { + u64 linearize, qbusy, skb_drops, dma_map_errs; + struct idpf_queue *txq = txq_grp->txqs[j]; + struct idpf_tx_queue_stats *stats; + unsigned int start; + + if (!txq) + continue; + + do { + start = u64_stats_fetch_begin(&txq->stats_sync); + + stats = &txq->q_stats.tx; + linearize = u64_stats_read(&stats->linearize); + qbusy = u64_stats_read(&stats->q_busy); + skb_drops = u64_stats_read(&stats->skb_drops); + dma_map_errs = u64_stats_read(&stats->dma_map_errs); + } while (u64_stats_fetch_retry(&txq->stats_sync, start)); + + u64_stats_update_begin(&pstats->stats_sync); + u64_stats_add(&pstats->tx_linearize, linearize); + u64_stats_add(&pstats->tx_busy, qbusy); + u64_stats_add(&pstats->tx_drops, skb_drops); + u64_stats_add(&pstats->tx_dma_map_errs, dma_map_errs); + u64_stats_update_end(&pstats->stats_sync); + } + } +} + +/** + * idpf_get_ethtool_stats - report device statistics + * @netdev: network interface device structure + * @stats: ethtool statistics structure + * @data: pointer to data buffer + * + * All statistics are added to the data buffer as an array of u64. + */ +static void idpf_get_ethtool_stats(struct net_device *netdev, + struct ethtool_stats __always_unused *stats, + u64 *data) +{ + struct idpf_netdev_priv *np = netdev_priv(netdev); + struct idpf_vport_config *vport_config; + struct idpf_vport *vport; + unsigned int total = 0; + unsigned int i, j; + bool is_splitq; + u16 qtype; + + idpf_vport_ctrl_lock(netdev); + vport = idpf_netdev_to_vport(netdev); + + if (np->state != __IDPF_VPORT_UP) { + idpf_vport_ctrl_unlock(netdev); + + return; + } + + rcu_read_lock(); + + idpf_collect_queue_stats(vport); + idpf_add_port_stats(vport, &data); + + for (i = 0; i < vport->num_txq_grp; i++) { + struct idpf_txq_group *txq_grp = &vport->txq_grps[i]; + + qtype = VIRTCHNL2_QUEUE_TYPE_TX; + + for (j = 0; j < txq_grp->num_txq; j++, total++) { + struct idpf_queue *txq = txq_grp->txqs[j]; + + if (!txq) + idpf_add_empty_queue_stats(&data, qtype); + else + idpf_add_queue_stats(&data, txq); + } + } + + vport_config = vport->adapter->vport_config[vport->idx]; + /* It is critical we provide a constant number of stats back to + * userspace regardless of how many queues are actually in use because + * there is no way to inform userspace the size has changed between + * ioctl calls. This will fill in any missing stats with zero. + */ + for (; total < vport_config->max_q.max_txq; total++) + idpf_add_empty_queue_stats(&data, VIRTCHNL2_QUEUE_TYPE_TX); + total = 0; + + is_splitq = idpf_is_queue_model_split(vport->rxq_model); + + for (i = 0; i < vport->num_rxq_grp; i++) { + struct idpf_rxq_group *rxq_grp = &vport->rxq_grps[i]; + u16 num_rxq; + + qtype = VIRTCHNL2_QUEUE_TYPE_RX; + + if (is_splitq) + num_rxq = rxq_grp->splitq.num_rxq_sets; + else + num_rxq = rxq_grp->singleq.num_rxq; + + for (j = 0; j < num_rxq; j++, total++) { + struct idpf_queue *rxq; + + if (is_splitq) + rxq = &rxq_grp->splitq.rxq_sets[j]->rxq; + else + rxq = rxq_grp->singleq.rxqs[j]; + if (!rxq) + idpf_add_empty_queue_stats(&data, qtype); + else + idpf_add_queue_stats(&data, rxq); + } + } + + for (; total < vport_config->max_q.max_rxq; total++) + idpf_add_empty_queue_stats(&data, VIRTCHNL2_QUEUE_TYPE_RX); + + rcu_read_unlock(); + + idpf_vport_ctrl_unlock(netdev); +} + +/** + * idpf_find_rxq - find rxq from q index + * @vport: virtual port associated to queue + * @q_num: q index used to find queue + * + * returns pointer to rx queue + */ +static struct idpf_queue *idpf_find_rxq(struct idpf_vport *vport, int q_num) +{ + int q_grp, q_idx; + + if (!idpf_is_queue_model_split(vport->rxq_model)) + return vport->rxq_grps->singleq.rxqs[q_num]; + + q_grp = q_num / IDPF_DFLT_SPLITQ_RXQ_PER_GROUP; + q_idx = q_num % IDPF_DFLT_SPLITQ_RXQ_PER_GROUP; + + return &vport->rxq_grps[q_grp].splitq.rxq_sets[q_idx]->rxq; +} + +/** + * idpf_find_txq - find txq from q index + * @vport: virtual port associated to queue + * @q_num: q index used to find queue + * + * returns pointer to tx queue + */ +static struct idpf_queue *idpf_find_txq(struct idpf_vport *vport, int q_num) +{ + int q_grp; + + if (!idpf_is_queue_model_split(vport->txq_model)) + return vport->txqs[q_num]; + + q_grp = q_num / IDPF_DFLT_SPLITQ_TXQ_PER_GROUP; + + return vport->txq_grps[q_grp].complq; +} + +/** + * __idpf_get_q_coalesce - get ITR values for specific queue + * @ec: ethtool structure to fill with driver's coalesce settings + * @q: quuee of Rx or Tx + */ +static void __idpf_get_q_coalesce(struct ethtool_coalesce *ec, + struct idpf_queue *q) +{ + if (q->q_type == VIRTCHNL2_QUEUE_TYPE_RX) { + ec->use_adaptive_rx_coalesce = + IDPF_ITR_IS_DYNAMIC(q->q_vector->rx_intr_mode); + ec->rx_coalesce_usecs = q->q_vector->rx_itr_value; + } else { + ec->use_adaptive_tx_coalesce = + IDPF_ITR_IS_DYNAMIC(q->q_vector->tx_intr_mode); + ec->tx_coalesce_usecs = q->q_vector->tx_itr_value; + } +} + +/** + * idpf_get_q_coalesce - get ITR values for specific queue + * @netdev: pointer to the netdev associated with this query + * @ec: coalesce settings to program the device with + * @q_num: update ITR/INTRL (coalesce) settings for this queue number/index + * + * Return 0 on success, and negative on failure + */ +static int idpf_get_q_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec, + u32 q_num) +{ + struct idpf_netdev_priv *np = netdev_priv(netdev); + struct idpf_vport *vport; + int err = 0; + + idpf_vport_ctrl_lock(netdev); + vport = idpf_netdev_to_vport(netdev); + + if (np->state != __IDPF_VPORT_UP) + goto unlock_mutex; + + if (q_num >= vport->num_rxq && q_num >= vport->num_txq) { + err = -EINVAL; + goto unlock_mutex; + } + + if (q_num < vport->num_rxq) + __idpf_get_q_coalesce(ec, idpf_find_rxq(vport, q_num)); + + if (q_num < vport->num_txq) + __idpf_get_q_coalesce(ec, idpf_find_txq(vport, q_num)); + +unlock_mutex: + idpf_vport_ctrl_unlock(netdev); + + return err; +} + +/** + * idpf_get_coalesce - get ITR values as requested by user + * @netdev: pointer to the netdev associated with this query + * @ec: coalesce settings to be filled + * @kec: unused + * @extack: unused + * + * Return 0 on success, and negative on failure + */ +static int idpf_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec, + struct kernel_ethtool_coalesce *kec, + struct netlink_ext_ack *extack) +{ + /* Return coalesce based on queue number zero */ + return idpf_get_q_coalesce(netdev, ec, 0); +} + +/** + * idpf_get_per_q_coalesce - get ITR values as requested by user + * @netdev: pointer to the netdev associated with this query + * @q_num: queue for which the itr values has to retrieved + * @ec: coalesce settings to be filled + * + * Return 0 on success, and negative on failure + */ + +static int idpf_get_per_q_coalesce(struct net_device *netdev, u32 q_num, + struct ethtool_coalesce *ec) +{ + return idpf_get_q_coalesce(netdev, ec, q_num); +} + +/** + * __idpf_set_q_coalesce - set ITR values for specific queue + * @ec: ethtool structure from user to update ITR settings + * @q: queue for which itr values has to be set + * @is_rxq: is queue type rx + * + * Returns 0 on success, negative otherwise. + */ +static int __idpf_set_q_coalesce(struct ethtool_coalesce *ec, + struct idpf_queue *q, bool is_rxq) +{ + u32 use_adaptive_coalesce, coalesce_usecs; + struct idpf_q_vector *qv = q->q_vector; + bool is_dim_ena = false; + u16 itr_val; + + if (is_rxq) { + is_dim_ena = IDPF_ITR_IS_DYNAMIC(qv->rx_intr_mode); + use_adaptive_coalesce = ec->use_adaptive_rx_coalesce; + coalesce_usecs = ec->rx_coalesce_usecs; + itr_val = qv->rx_itr_value; + } else { + is_dim_ena = IDPF_ITR_IS_DYNAMIC(qv->tx_intr_mode); + use_adaptive_coalesce = ec->use_adaptive_tx_coalesce; + coalesce_usecs = ec->tx_coalesce_usecs; + itr_val = qv->tx_itr_value; + } + if (coalesce_usecs != itr_val && use_adaptive_coalesce) { + netdev_err(q->vport->netdev, "Cannot set coalesce usecs if adaptive enabled\n"); + + return -EINVAL; + } + + if (is_dim_ena && use_adaptive_coalesce) + return 0; + + if (coalesce_usecs > IDPF_ITR_MAX) { + netdev_err(q->vport->netdev, + "Invalid value, %d-usecs range is 0-%d\n", + coalesce_usecs, IDPF_ITR_MAX); + + return -EINVAL; + } + + if (coalesce_usecs % 2) { + coalesce_usecs--; + netdev_info(q->vport->netdev, + "HW only supports even ITR values, ITR rounded to %d\n", + coalesce_usecs); + } + + if (is_rxq) { + qv->rx_itr_value = coalesce_usecs; + if (use_adaptive_coalesce) { + qv->rx_intr_mode = IDPF_ITR_DYNAMIC; + } else { + qv->rx_intr_mode = !IDPF_ITR_DYNAMIC; + idpf_vport_intr_write_itr(qv, qv->rx_itr_value, + false); + } + } else { + qv->tx_itr_value = coalesce_usecs; + if (use_adaptive_coalesce) { + qv->tx_intr_mode = IDPF_ITR_DYNAMIC; + } else { + qv->tx_intr_mode = !IDPF_ITR_DYNAMIC; + idpf_vport_intr_write_itr(qv, qv->tx_itr_value, true); + } + } + + /* Update of static/dynamic itr will be taken care when interrupt is + * fired + */ + return 0; +} + +/** + * idpf_set_q_coalesce - set ITR values for specific queue + * @vport: vport associated to the queue that need updating + * @ec: coalesce settings to program the device with + * @q_num: update ITR/INTRL (coalesce) settings for this queue number/index + * @is_rxq: is queue type rx + * + * Return 0 on success, and negative on failure + */ +static int idpf_set_q_coalesce(struct idpf_vport *vport, + struct ethtool_coalesce *ec, + int q_num, bool is_rxq) +{ + struct idpf_queue *q; + + q = is_rxq ? idpf_find_rxq(vport, q_num) : idpf_find_txq(vport, q_num); + + if (q && __idpf_set_q_coalesce(ec, q, is_rxq)) + return -EINVAL; + + return 0; +} + +/** + * idpf_set_coalesce - set ITR values as requested by user + * @netdev: pointer to the netdev associated with this query + * @ec: coalesce settings to program the device with + * @kec: unused + * @extack: unused + * + * Return 0 on success, and negative on failure + */ +static int idpf_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec, + struct kernel_ethtool_coalesce *kec, + struct netlink_ext_ack *extack) +{ + struct idpf_netdev_priv *np = netdev_priv(netdev); + struct idpf_vport *vport; + int i, err = 0; + + idpf_vport_ctrl_lock(netdev); + vport = idpf_netdev_to_vport(netdev); + + if (np->state != __IDPF_VPORT_UP) + goto unlock_mutex; + + for (i = 0; i < vport->num_txq; i++) { + err = idpf_set_q_coalesce(vport, ec, i, false); + if (err) + goto unlock_mutex; + } + + for (i = 0; i < vport->num_rxq; i++) { + err = idpf_set_q_coalesce(vport, ec, i, true); + if (err) + goto unlock_mutex; + } + +unlock_mutex: + idpf_vport_ctrl_unlock(netdev); + + return err; +} + +/** + * idpf_set_per_q_coalesce - set ITR values as requested by user + * @netdev: pointer to the netdev associated with this query + * @q_num: queue for which the itr values has to be set + * @ec: coalesce settings to program the device with + * + * Return 0 on success, and negative on failure + */ +static int idpf_set_per_q_coalesce(struct net_device *netdev, u32 q_num, + struct ethtool_coalesce *ec) +{ + struct idpf_vport *vport; + int err; + + idpf_vport_ctrl_lock(netdev); + vport = idpf_netdev_to_vport(netdev); + + err = idpf_set_q_coalesce(vport, ec, q_num, false); + if (err) { + idpf_vport_ctrl_unlock(netdev); + + return err; + } + + err = idpf_set_q_coalesce(vport, ec, q_num, true); + + idpf_vport_ctrl_unlock(netdev); + + return err; +} + +/** + * idpf_get_msglevel - Get debug message level + * @netdev: network interface device structure + * + * Returns current debug message level. + */ +static u32 idpf_get_msglevel(struct net_device *netdev) +{ + struct idpf_adapter *adapter = idpf_netdev_to_adapter(netdev); + + return adapter->msg_enable; +} + +/** + * idpf_set_msglevel - Set debug message level + * @netdev: network interface device structure + * @data: message level + * + * Set current debug message level. Higher values cause the driver to + * be noisier. + */ +static void idpf_set_msglevel(struct net_device *netdev, u32 data) +{ + struct idpf_adapter *adapter = idpf_netdev_to_adapter(netdev); + + adapter->msg_enable = data; +} + +/** + * idpf_get_link_ksettings - Get Link Speed and Duplex settings + * @netdev: network interface device structure + * @cmd: ethtool command + * + * Reports speed/duplex settings. + **/ +static int idpf_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *cmd) +{ + struct idpf_vport *vport; + + idpf_vport_ctrl_lock(netdev); + vport = idpf_netdev_to_vport(netdev); + + ethtool_link_ksettings_zero_link_mode(cmd, supported); + cmd->base.autoneg = AUTONEG_DISABLE; + cmd->base.port = PORT_NONE; + if (vport->link_up) { + cmd->base.duplex = DUPLEX_FULL; + cmd->base.speed = vport->link_speed_mbps; + } else { + cmd->base.duplex = DUPLEX_UNKNOWN; + cmd->base.speed = SPEED_UNKNOWN; + } + + idpf_vport_ctrl_unlock(netdev); + + return 0; +} + +static const struct ethtool_ops idpf_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_USE_ADAPTIVE, + .get_msglevel = idpf_get_msglevel, + .set_msglevel = idpf_set_msglevel, + .get_link = ethtool_op_get_link, + .get_coalesce = idpf_get_coalesce, + .set_coalesce = idpf_set_coalesce, + .get_per_queue_coalesce = idpf_get_per_q_coalesce, + .set_per_queue_coalesce = idpf_set_per_q_coalesce, + .get_ethtool_stats = idpf_get_ethtool_stats, + .get_strings = idpf_get_strings, + .get_sset_count = idpf_get_sset_count, + .get_channels = idpf_get_channels, + .get_rxnfc = idpf_get_rxnfc, + .get_rxfh_key_size = idpf_get_rxfh_key_size, + .get_rxfh_indir_size = idpf_get_rxfh_indir_size, + .get_rxfh = idpf_get_rxfh, + .set_rxfh = idpf_set_rxfh, + .set_channels = idpf_set_channels, + .get_ringparam = idpf_get_ringparam, + .set_ringparam = idpf_set_ringparam, + .get_link_ksettings = idpf_get_link_ksettings, +}; + +/** + * idpf_set_ethtool_ops - Initialize ethtool ops struct + * @netdev: network interface device structure + * + * Sets ethtool ops struct in our netdev so that ethtool can call + * our functions. + */ +void idpf_set_ethtool_ops(struct net_device *netdev) +{ + netdev->ethtool_ops = &idpf_ethtool_ops; +} diff --git a/drivers/net/ethernet/intel/idpf/idpf_lan_pf_regs.h b/drivers/net/ethernet/intel/idpf/idpf_lan_pf_regs.h new file mode 100644 index 00000000000000..24edb8a6ec2e64 --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/idpf_lan_pf_regs.h @@ -0,0 +1,124 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2023 Intel Corporation */ + +#ifndef _IDPF_LAN_PF_REGS_H_ +#define _IDPF_LAN_PF_REGS_H_ + +/* Receive queues */ +#define PF_QRX_BASE 0x00000000 +#define PF_QRX_TAIL(_QRX) (PF_QRX_BASE + (((_QRX) * 0x1000))) +#define PF_QRX_BUFFQ_BASE 0x03000000 +#define PF_QRX_BUFFQ_TAIL(_QRX) (PF_QRX_BUFFQ_BASE + (((_QRX) * 0x1000))) + +/* Transmit queues */ +#define PF_QTX_BASE 0x05000000 +#define PF_QTX_COMM_DBELL(_DBQM) (PF_QTX_BASE + ((_DBQM) * 0x1000)) + +/* Control(PF Mailbox) Queue */ +#define PF_FW_BASE 0x08400000 + +#define PF_FW_ARQBAL (PF_FW_BASE) +#define PF_FW_ARQBAH (PF_FW_BASE + 0x4) +#define PF_FW_ARQLEN (PF_FW_BASE + 0x8) +#define PF_FW_ARQLEN_ARQLEN_S 0 +#define PF_FW_ARQLEN_ARQLEN_M GENMASK(12, 0) +#define PF_FW_ARQLEN_ARQVFE_S 28 +#define PF_FW_ARQLEN_ARQVFE_M BIT(PF_FW_ARQLEN_ARQVFE_S) +#define PF_FW_ARQLEN_ARQOVFL_S 29 +#define PF_FW_ARQLEN_ARQOVFL_M BIT(PF_FW_ARQLEN_ARQOVFL_S) +#define PF_FW_ARQLEN_ARQCRIT_S 30 +#define PF_FW_ARQLEN_ARQCRIT_M BIT(PF_FW_ARQLEN_ARQCRIT_S) +#define PF_FW_ARQLEN_ARQENABLE_S 31 +#define PF_FW_ARQLEN_ARQENABLE_M BIT(PF_FW_ARQLEN_ARQENABLE_S) +#define PF_FW_ARQH (PF_FW_BASE + 0xC) +#define PF_FW_ARQH_ARQH_S 0 +#define PF_FW_ARQH_ARQH_M GENMASK(12, 0) +#define PF_FW_ARQT (PF_FW_BASE + 0x10) + +#define PF_FW_ATQBAL (PF_FW_BASE + 0x14) +#define PF_FW_ATQBAH (PF_FW_BASE + 0x18) +#define PF_FW_ATQLEN (PF_FW_BASE + 0x1C) +#define PF_FW_ATQLEN_ATQLEN_S 0 +#define PF_FW_ATQLEN_ATQLEN_M GENMASK(9, 0) +#define PF_FW_ATQLEN_ATQVFE_S 28 +#define PF_FW_ATQLEN_ATQVFE_M BIT(PF_FW_ATQLEN_ATQVFE_S) +#define PF_FW_ATQLEN_ATQOVFL_S 29 +#define PF_FW_ATQLEN_ATQOVFL_M BIT(PF_FW_ATQLEN_ATQOVFL_S) +#define PF_FW_ATQLEN_ATQCRIT_S 30 +#define PF_FW_ATQLEN_ATQCRIT_M BIT(PF_FW_ATQLEN_ATQCRIT_S) +#define PF_FW_ATQLEN_ATQENABLE_S 31 +#define PF_FW_ATQLEN_ATQENABLE_M BIT(PF_FW_ATQLEN_ATQENABLE_S) +#define PF_FW_ATQH (PF_FW_BASE + 0x20) +#define PF_FW_ATQH_ATQH_S 0 +#define PF_FW_ATQH_ATQH_M GENMASK(9, 0) +#define PF_FW_ATQT (PF_FW_BASE + 0x24) + +/* Interrupts */ +#define PF_GLINT_BASE 0x08900000 +#define PF_GLINT_DYN_CTL(_INT) (PF_GLINT_BASE + ((_INT) * 0x1000)) +#define PF_GLINT_DYN_CTL_INTENA_S 0 +#define PF_GLINT_DYN_CTL_INTENA_M BIT(PF_GLINT_DYN_CTL_INTENA_S) +#define PF_GLINT_DYN_CTL_CLEARPBA_S 1 +#define PF_GLINT_DYN_CTL_CLEARPBA_M BIT(PF_GLINT_DYN_CTL_CLEARPBA_S) +#define PF_GLINT_DYN_CTL_SWINT_TRIG_S 2 +#define PF_GLINT_DYN_CTL_SWINT_TRIG_M BIT(PF_GLINT_DYN_CTL_SWINT_TRIG_S) +#define PF_GLINT_DYN_CTL_ITR_INDX_S 3 +#define PF_GLINT_DYN_CTL_ITR_INDX_M GENMASK(4, 3) +#define PF_GLINT_DYN_CTL_INTERVAL_S 5 +#define PF_GLINT_DYN_CTL_INTERVAL_M BIT(PF_GLINT_DYN_CTL_INTERVAL_S) +#define PF_GLINT_DYN_CTL_SW_ITR_INDX_ENA_S 24 +#define PF_GLINT_DYN_CTL_SW_ITR_INDX_ENA_M BIT(PF_GLINT_DYN_CTL_SW_ITR_INDX_ENA_S) +#define PF_GLINT_DYN_CTL_SW_ITR_INDX_S 25 +#define PF_GLINT_DYN_CTL_SW_ITR_INDX_M BIT(PF_GLINT_DYN_CTL_SW_ITR_INDX_S) +#define PF_GLINT_DYN_CTL_WB_ON_ITR_S 30 +#define PF_GLINT_DYN_CTL_WB_ON_ITR_M BIT(PF_GLINT_DYN_CTL_WB_ON_ITR_S) +#define PF_GLINT_DYN_CTL_INTENA_MSK_S 31 +#define PF_GLINT_DYN_CTL_INTENA_MSK_M BIT(PF_GLINT_DYN_CTL_INTENA_MSK_S) +/* _ITR is ITR index, _INT is interrupt index, _itrn_indx_spacing is + * spacing b/w itrn registers of the same vector. + */ +#define PF_GLINT_ITR_ADDR(_ITR, _reg_start, _itrn_indx_spacing) \ + ((_reg_start) + ((_ITR) * (_itrn_indx_spacing))) +/* For PF, itrn_indx_spacing is 4 and itrn_reg_spacing is 0x1000 */ +#define PF_GLINT_ITR(_ITR, _INT) \ + (PF_GLINT_BASE + (((_ITR) + 1) * 4) + ((_INT) * 0x1000)) +#define PF_GLINT_ITR_MAX_INDEX 2 +#define PF_GLINT_ITR_INTERVAL_S 0 +#define PF_GLINT_ITR_INTERVAL_M GENMASK(11, 0) + +/* Generic registers */ +#define PF_INT_DIR_OICR_ENA 0x08406000 +#define PF_INT_DIR_OICR_ENA_S 0 +#define PF_INT_DIR_OICR_ENA_M GENMASK(31, 0) +#define PF_INT_DIR_OICR 0x08406004 +#define PF_INT_DIR_OICR_TSYN_EVNT 0 +#define PF_INT_DIR_OICR_PHY_TS_0 BIT(1) +#define PF_INT_DIR_OICR_PHY_TS_1 BIT(2) +#define PF_INT_DIR_OICR_CAUSE 0x08406008 +#define PF_INT_DIR_OICR_CAUSE_CAUSE_S 0 +#define PF_INT_DIR_OICR_CAUSE_CAUSE_M GENMASK(31, 0) +#define PF_INT_PBA_CLEAR 0x0840600C + +#define PF_FUNC_RID 0x08406010 +#define PF_FUNC_RID_FUNCTION_NUMBER_S 0 +#define PF_FUNC_RID_FUNCTION_NUMBER_M GENMASK(2, 0) +#define PF_FUNC_RID_DEVICE_NUMBER_S 3 +#define PF_FUNC_RID_DEVICE_NUMBER_M GENMASK(7, 3) +#define PF_FUNC_RID_BUS_NUMBER_S 8 +#define PF_FUNC_RID_BUS_NUMBER_M GENMASK(15, 8) + +/* Reset registers */ +#define PFGEN_RTRIG 0x08407000 +#define PFGEN_RTRIG_CORER_S 0 +#define PFGEN_RTRIG_CORER_M BIT(0) +#define PFGEN_RTRIG_LINKR_S 1 +#define PFGEN_RTRIG_LINKR_M BIT(1) +#define PFGEN_RTRIG_IMCR_S 2 +#define PFGEN_RTRIG_IMCR_M BIT(2) +#define PFGEN_RSTAT 0x08407008 /* PFR Status */ +#define PFGEN_RSTAT_PFR_STATE_S 0 +#define PFGEN_RSTAT_PFR_STATE_M GENMASK(1, 0) +#define PFGEN_CTRL 0x0840700C +#define PFGEN_CTRL_PFSWR BIT(0) + +#endif diff --git a/drivers/net/ethernet/intel/idpf/idpf_lan_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_lan_txrx.h new file mode 100644 index 00000000000000..a5752dcab8887c --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/idpf_lan_txrx.h @@ -0,0 +1,293 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2023 Intel Corporation */ + +#ifndef _IDPF_LAN_TXRX_H_ +#define _IDPF_LAN_TXRX_H_ + +enum idpf_rss_hash { + IDPF_HASH_INVALID = 0, + /* Values 1 - 28 are reserved for future use */ + IDPF_HASH_NONF_UNICAST_IPV4_UDP = 29, + IDPF_HASH_NONF_MULTICAST_IPV4_UDP, + IDPF_HASH_NONF_IPV4_UDP, + IDPF_HASH_NONF_IPV4_TCP_SYN_NO_ACK, + IDPF_HASH_NONF_IPV4_TCP, + IDPF_HASH_NONF_IPV4_SCTP, + IDPF_HASH_NONF_IPV4_OTHER, + IDPF_HASH_FRAG_IPV4, + /* Values 37-38 are reserved */ + IDPF_HASH_NONF_UNICAST_IPV6_UDP = 39, + IDPF_HASH_NONF_MULTICAST_IPV6_UDP, + IDPF_HASH_NONF_IPV6_UDP, + IDPF_HASH_NONF_IPV6_TCP_SYN_NO_ACK, + IDPF_HASH_NONF_IPV6_TCP, + IDPF_HASH_NONF_IPV6_SCTP, + IDPF_HASH_NONF_IPV6_OTHER, + IDPF_HASH_FRAG_IPV6, + IDPF_HASH_NONF_RSVD47, + IDPF_HASH_NONF_FCOE_OX, + IDPF_HASH_NONF_FCOE_RX, + IDPF_HASH_NONF_FCOE_OTHER, + /* Values 51-62 are reserved */ + IDPF_HASH_L2_PAYLOAD = 63, + + IDPF_HASH_MAX +}; + +/* Supported RSS offloads */ +#define IDPF_DEFAULT_RSS_HASH \ + (BIT_ULL(IDPF_HASH_NONF_IPV4_UDP) | \ + BIT_ULL(IDPF_HASH_NONF_IPV4_SCTP) | \ + BIT_ULL(IDPF_HASH_NONF_IPV4_TCP) | \ + BIT_ULL(IDPF_HASH_NONF_IPV4_OTHER) | \ + BIT_ULL(IDPF_HASH_FRAG_IPV4) | \ + BIT_ULL(IDPF_HASH_NONF_IPV6_UDP) | \ + BIT_ULL(IDPF_HASH_NONF_IPV6_TCP) | \ + BIT_ULL(IDPF_HASH_NONF_IPV6_SCTP) | \ + BIT_ULL(IDPF_HASH_NONF_IPV6_OTHER) | \ + BIT_ULL(IDPF_HASH_FRAG_IPV6) | \ + BIT_ULL(IDPF_HASH_L2_PAYLOAD)) + +#define IDPF_DEFAULT_RSS_HASH_EXPANDED (IDPF_DEFAULT_RSS_HASH | \ + BIT_ULL(IDPF_HASH_NONF_IPV4_TCP_SYN_NO_ACK) | \ + BIT_ULL(IDPF_HASH_NONF_UNICAST_IPV4_UDP) | \ + BIT_ULL(IDPF_HASH_NONF_MULTICAST_IPV4_UDP) | \ + BIT_ULL(IDPF_HASH_NONF_IPV6_TCP_SYN_NO_ACK) | \ + BIT_ULL(IDPF_HASH_NONF_UNICAST_IPV6_UDP) | \ + BIT_ULL(IDPF_HASH_NONF_MULTICAST_IPV6_UDP)) + +/* For idpf_splitq_base_tx_compl_desc */ +#define IDPF_TXD_COMPLQ_GEN_S 15 +#define IDPF_TXD_COMPLQ_GEN_M BIT_ULL(IDPF_TXD_COMPLQ_GEN_S) +#define IDPF_TXD_COMPLQ_COMPL_TYPE_S 11 +#define IDPF_TXD_COMPLQ_COMPL_TYPE_M GENMASK_ULL(13, 11) +#define IDPF_TXD_COMPLQ_QID_S 0 +#define IDPF_TXD_COMPLQ_QID_M GENMASK_ULL(9, 0) + +/* For base mode TX descriptors */ + +#define IDPF_TXD_CTX_QW0_TUNN_L4T_CS_S 23 +#define IDPF_TXD_CTX_QW0_TUNN_L4T_CS_M BIT_ULL(IDPF_TXD_CTX_QW0_TUNN_L4T_CS_S) +#define IDPF_TXD_CTX_QW0_TUNN_DECTTL_S 19 +#define IDPF_TXD_CTX_QW0_TUNN_DECTTL_M \ + (0xFULL << IDPF_TXD_CTX_QW0_TUNN_DECTTL_S) +#define IDPF_TXD_CTX_QW0_TUNN_NATLEN_S 12 +#define IDPF_TXD_CTX_QW0_TUNN_NATLEN_M \ + (0X7FULL << IDPF_TXD_CTX_QW0_TUNN_NATLEN_S) +#define IDPF_TXD_CTX_QW0_TUNN_EIP_NOINC_S 11 +#define IDPF_TXD_CTX_QW0_TUNN_EIP_NOINC_M \ + BIT_ULL(IDPF_TXD_CTX_QW0_TUNN_EIP_NOINC_S) +#define IDPF_TXD_CTX_EIP_NOINC_IPID_CONST \ + IDPF_TXD_CTX_QW0_TUNN_EIP_NOINC_M +#define IDPF_TXD_CTX_QW0_TUNN_NATT_S 9 +#define IDPF_TXD_CTX_QW0_TUNN_NATT_M (0x3ULL << IDPF_TXD_CTX_QW0_TUNN_NATT_S) +#define IDPF_TXD_CTX_UDP_TUNNELING BIT_ULL(IDPF_TXD_CTX_QW0_TUNN_NATT_S) +#define IDPF_TXD_CTX_GRE_TUNNELING (0x2ULL << IDPF_TXD_CTX_QW0_TUNN_NATT_S) +#define IDPF_TXD_CTX_QW0_TUNN_EXT_IPLEN_S 2 +#define IDPF_TXD_CTX_QW0_TUNN_EXT_IPLEN_M \ + (0x3FULL << IDPF_TXD_CTX_QW0_TUNN_EXT_IPLEN_S) +#define IDPF_TXD_CTX_QW0_TUNN_EXT_IP_S 0 +#define IDPF_TXD_CTX_QW0_TUNN_EXT_IP_M \ + (0x3ULL << IDPF_TXD_CTX_QW0_TUNN_EXT_IP_S) + +#define IDPF_TXD_CTX_QW1_MSS_S 50 +#define IDPF_TXD_CTX_QW1_MSS_M GENMASK_ULL(63, 50) +#define IDPF_TXD_CTX_QW1_TSO_LEN_S 30 +#define IDPF_TXD_CTX_QW1_TSO_LEN_M GENMASK_ULL(47, 30) +#define IDPF_TXD_CTX_QW1_CMD_S 4 +#define IDPF_TXD_CTX_QW1_CMD_M GENMASK_ULL(15, 4) +#define IDPF_TXD_CTX_QW1_DTYPE_S 0 +#define IDPF_TXD_CTX_QW1_DTYPE_M GENMASK_ULL(3, 0) +#define IDPF_TXD_QW1_L2TAG1_S 48 +#define IDPF_TXD_QW1_L2TAG1_M GENMASK_ULL(63, 48) +#define IDPF_TXD_QW1_TX_BUF_SZ_S 34 +#define IDPF_TXD_QW1_TX_BUF_SZ_M GENMASK_ULL(47, 34) +#define IDPF_TXD_QW1_OFFSET_S 16 +#define IDPF_TXD_QW1_OFFSET_M GENMASK_ULL(33, 16) +#define IDPF_TXD_QW1_CMD_S 4 +#define IDPF_TXD_QW1_CMD_M GENMASK_ULL(15, 4) +#define IDPF_TXD_QW1_DTYPE_S 0 +#define IDPF_TXD_QW1_DTYPE_M GENMASK_ULL(3, 0) + +/* TX Completion Descriptor Completion Types */ +#define IDPF_TXD_COMPLT_ITR_FLUSH 0 +/* Descriptor completion type 1 is reserved */ +#define IDPF_TXD_COMPLT_RS 2 +/* Descriptor completion type 3 is reserved */ +#define IDPF_TXD_COMPLT_RE 4 +#define IDPF_TXD_COMPLT_SW_MARKER 5 + +enum idpf_tx_desc_dtype_value { + IDPF_TX_DESC_DTYPE_DATA = 0, + IDPF_TX_DESC_DTYPE_CTX = 1, + /* DTYPE 2 is reserved + * DTYPE 3 is free for future use + * DTYPE 4 is reserved + */ + IDPF_TX_DESC_DTYPE_FLEX_TSO_CTX = 5, + /* DTYPE 6 is reserved */ + IDPF_TX_DESC_DTYPE_FLEX_L2TAG1_L2TAG2 = 7, + /* DTYPE 8, 9 are free for future use + * DTYPE 10 is reserved + * DTYPE 11 is free for future use + */ + IDPF_TX_DESC_DTYPE_FLEX_FLOW_SCHE = 12, + /* DTYPE 13, 14 are free for future use */ + + /* DESC_DONE - HW has completed write-back of descriptor */ + IDPF_TX_DESC_DTYPE_DESC_DONE = 15, +}; + +enum idpf_tx_ctx_desc_cmd_bits { + IDPF_TX_CTX_DESC_TSO = 0x01, + IDPF_TX_CTX_DESC_TSYN = 0x02, + IDPF_TX_CTX_DESC_IL2TAG2 = 0x04, + IDPF_TX_CTX_DESC_RSVD = 0x08, + IDPF_TX_CTX_DESC_SWTCH_NOTAG = 0x00, + IDPF_TX_CTX_DESC_SWTCH_UPLINK = 0x10, + IDPF_TX_CTX_DESC_SWTCH_LOCAL = 0x20, + IDPF_TX_CTX_DESC_SWTCH_VSI = 0x30, + IDPF_TX_CTX_DESC_FILT_AU_EN = 0x40, + IDPF_TX_CTX_DESC_FILT_AU_EVICT = 0x80, + IDPF_TX_CTX_DESC_RSVD1 = 0xF00 +}; + +enum idpf_tx_desc_len_fields { + /* Note: These are predefined bit offsets */ + IDPF_TX_DESC_LEN_MACLEN_S = 0, /* 7 BITS */ + IDPF_TX_DESC_LEN_IPLEN_S = 7, /* 7 BITS */ + IDPF_TX_DESC_LEN_L4_LEN_S = 14 /* 4 BITS */ +}; + +enum idpf_tx_base_desc_cmd_bits { + IDPF_TX_DESC_CMD_EOP = BIT(0), + IDPF_TX_DESC_CMD_RS = BIT(1), + /* only on VFs else RSVD */ + IDPF_TX_DESC_CMD_ICRC = BIT(2), + IDPF_TX_DESC_CMD_IL2TAG1 = BIT(3), + IDPF_TX_DESC_CMD_RSVD1 = BIT(4), + IDPF_TX_DESC_CMD_IIPT_IPV6 = BIT(5), + IDPF_TX_DESC_CMD_IIPT_IPV4 = BIT(6), + IDPF_TX_DESC_CMD_IIPT_IPV4_CSUM = GENMASK(6, 5), + IDPF_TX_DESC_CMD_RSVD2 = BIT(7), + IDPF_TX_DESC_CMD_L4T_EOFT_TCP = BIT(8), + IDPF_TX_DESC_CMD_L4T_EOFT_SCTP = BIT(9), + IDPF_TX_DESC_CMD_L4T_EOFT_UDP = GENMASK(9, 8), + IDPF_TX_DESC_CMD_RSVD3 = BIT(10), + IDPF_TX_DESC_CMD_RSVD4 = BIT(11), +}; + +/* Transmit descriptors */ +/* splitq tx buf, singleq tx buf and singleq compl desc */ +struct idpf_base_tx_desc { + __le64 buf_addr; /* Address of descriptor's data buf */ + __le64 qw1; /* type_cmd_offset_bsz_l2tag1 */ +}; /* read used with buffer queues */ + +struct idpf_splitq_tx_compl_desc { + /* qid=[10:0] comptype=[13:11] rsvd=[14] gen=[15] */ + __le16 qid_comptype_gen; + union { + __le16 q_head; /* Queue head */ + __le16 compl_tag; /* Completion tag */ + } q_head_compl_tag; + u8 ts[3]; + u8 rsvd; /* Reserved */ +}; /* writeback used with completion queues */ + +/* Context descriptors */ +struct idpf_base_tx_ctx_desc { + struct { + __le32 tunneling_params; + __le16 l2tag2; + __le16 rsvd1; + } qw0; + __le64 qw1; /* type_cmd_tlen_mss/rt_hint */ +}; + +/* Common cmd field defines for all desc except Flex Flow Scheduler (0x0C) */ +enum idpf_tx_flex_desc_cmd_bits { + IDPF_TX_FLEX_DESC_CMD_EOP = BIT(0), + IDPF_TX_FLEX_DESC_CMD_RS = BIT(1), + IDPF_TX_FLEX_DESC_CMD_RE = BIT(2), + IDPF_TX_FLEX_DESC_CMD_IL2TAG1 = BIT(3), + IDPF_TX_FLEX_DESC_CMD_DUMMY = BIT(4), + IDPF_TX_FLEX_DESC_CMD_CS_EN = BIT(5), + IDPF_TX_FLEX_DESC_CMD_FILT_AU_EN = BIT(6), + IDPF_TX_FLEX_DESC_CMD_FILT_AU_EVICT = BIT(7), +}; + +struct idpf_flex_tx_desc { + __le64 buf_addr; /* Packet buffer address */ + struct { +#define IDPF_FLEX_TXD_QW1_DTYPE_S 0 +#define IDPF_FLEX_TXD_QW1_DTYPE_M GENMASK(4, 0) +#define IDPF_FLEX_TXD_QW1_CMD_S 5 +#define IDPF_FLEX_TXD_QW1_CMD_M GENMASK(15, 5) + __le16 cmd_dtype; + /* DTYPE=IDPF_TX_DESC_DTYPE_FLEX_L2TAG1_L2TAG2 (0x07) */ + struct { + __le16 l2tag1; + __le16 l2tag2; + } l2tags; + __le16 buf_size; + } qw1; +}; + +struct idpf_flex_tx_sched_desc { + __le64 buf_addr; /* Packet buffer address */ + + /* DTYPE = IDPF_TX_DESC_DTYPE_FLEX_FLOW_SCHE_16B (0x0C) */ + struct { + u8 cmd_dtype; +#define IDPF_TXD_FLEX_FLOW_DTYPE_M GENMASK(4, 0) +#define IDPF_TXD_FLEX_FLOW_CMD_EOP BIT(5) +#define IDPF_TXD_FLEX_FLOW_CMD_CS_EN BIT(6) +#define IDPF_TXD_FLEX_FLOW_CMD_RE BIT(7) + + /* [23:23] Horizon Overflow bit, [22:0] timestamp */ + u8 ts[3]; +#define IDPF_TXD_FLOW_SCH_HORIZON_OVERFLOW_M BIT(7) + + __le16 compl_tag; + __le16 rxr_bufsize; +#define IDPF_TXD_FLEX_FLOW_RXR BIT(14) +#define IDPF_TXD_FLEX_FLOW_BUFSIZE_M GENMASK(13, 0) + } qw1; +}; + +/* Common cmd fields for all flex context descriptors + * Note: these defines already account for the 5 bit dtype in the cmd_dtype + * field + */ +enum idpf_tx_flex_ctx_desc_cmd_bits { + IDPF_TX_FLEX_CTX_DESC_CMD_TSO = BIT(5), + IDPF_TX_FLEX_CTX_DESC_CMD_TSYN_EN = BIT(6), + IDPF_TX_FLEX_CTX_DESC_CMD_L2TAG2 = BIT(7), + IDPF_TX_FLEX_CTX_DESC_CMD_SWTCH_UPLNK = BIT(9), + IDPF_TX_FLEX_CTX_DESC_CMD_SWTCH_LOCAL = BIT(10), + IDPF_TX_FLEX_CTX_DESC_CMD_SWTCH_TARGETVSI = GENMASK(10, 9), +}; + +/* Standard flex descriptor TSO context quad word */ +struct idpf_flex_tx_tso_ctx_qw { + __le32 flex_tlen; +#define IDPF_TXD_FLEX_CTX_TLEN_M GENMASK(17, 0) +#define IDPF_TXD_FLEX_TSO_CTX_FLEX_S 24 + __le16 mss_rt; +#define IDPF_TXD_FLEX_CTX_MSS_RT_M GENMASK(13, 0) + u8 hdr_len; + u8 flex; +}; + +struct idpf_flex_tx_ctx_desc { + /* DTYPE = IDPF_TX_DESC_DTYPE_FLEX_TSO_CTX (0x05) */ + struct { + struct idpf_flex_tx_tso_ctx_qw qw0; + struct { + __le16 cmd_dtype; + u8 flex[6]; + } qw1; + } tso; +}; +#endif /* _IDPF_LAN_TXRX_H_ */ diff --git a/drivers/net/ethernet/intel/idpf/idpf_lan_vf_regs.h b/drivers/net/ethernet/intel/idpf/idpf_lan_vf_regs.h new file mode 100644 index 00000000000000..3d73b6c7686311 --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/idpf_lan_vf_regs.h @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2023 Intel Corporation */ + +#ifndef _IDPF_LAN_VF_REGS_H_ +#define _IDPF_LAN_VF_REGS_H_ + +/* Reset */ +#define VFGEN_RSTAT 0x00008800 +#define VFGEN_RSTAT_VFR_STATE_S 0 +#define VFGEN_RSTAT_VFR_STATE_M GENMASK(1, 0) + +/* Control(VF Mailbox) Queue */ +#define VF_BASE 0x00006000 + +#define VF_ATQBAL (VF_BASE + 0x1C00) +#define VF_ATQBAH (VF_BASE + 0x1800) +#define VF_ATQLEN (VF_BASE + 0x0800) +#define VF_ATQLEN_ATQLEN_S 0 +#define VF_ATQLEN_ATQLEN_M GENMASK(9, 0) +#define VF_ATQLEN_ATQVFE_S 28 +#define VF_ATQLEN_ATQVFE_M BIT(VF_ATQLEN_ATQVFE_S) +#define VF_ATQLEN_ATQOVFL_S 29 +#define VF_ATQLEN_ATQOVFL_M BIT(VF_ATQLEN_ATQOVFL_S) +#define VF_ATQLEN_ATQCRIT_S 30 +#define VF_ATQLEN_ATQCRIT_M BIT(VF_ATQLEN_ATQCRIT_S) +#define VF_ATQLEN_ATQENABLE_S 31 +#define VF_ATQLEN_ATQENABLE_M BIT(VF_ATQLEN_ATQENABLE_S) +#define VF_ATQH (VF_BASE + 0x0400) +#define VF_ATQH_ATQH_S 0 +#define VF_ATQH_ATQH_M GENMASK(9, 0) +#define VF_ATQT (VF_BASE + 0x2400) + +#define VF_ARQBAL (VF_BASE + 0x0C00) +#define VF_ARQBAH (VF_BASE) +#define VF_ARQLEN (VF_BASE + 0x2000) +#define VF_ARQLEN_ARQLEN_S 0 +#define VF_ARQLEN_ARQLEN_M GENMASK(9, 0) +#define VF_ARQLEN_ARQVFE_S 28 +#define VF_ARQLEN_ARQVFE_M BIT(VF_ARQLEN_ARQVFE_S) +#define VF_ARQLEN_ARQOVFL_S 29 +#define VF_ARQLEN_ARQOVFL_M BIT(VF_ARQLEN_ARQOVFL_S) +#define VF_ARQLEN_ARQCRIT_S 30 +#define VF_ARQLEN_ARQCRIT_M BIT(VF_ARQLEN_ARQCRIT_S) +#define VF_ARQLEN_ARQENABLE_S 31 +#define VF_ARQLEN_ARQENABLE_M BIT(VF_ARQLEN_ARQENABLE_S) +#define VF_ARQH (VF_BASE + 0x1400) +#define VF_ARQH_ARQH_S 0 +#define VF_ARQH_ARQH_M GENMASK(12, 0) +#define VF_ARQT (VF_BASE + 0x1000) + +/* Transmit queues */ +#define VF_QTX_TAIL_BASE 0x00000000 +#define VF_QTX_TAIL(_QTX) (VF_QTX_TAIL_BASE + (_QTX) * 0x4) +#define VF_QTX_TAIL_EXT_BASE 0x00040000 +#define VF_QTX_TAIL_EXT(_QTX) (VF_QTX_TAIL_EXT_BASE + ((_QTX) * 4)) + +/* Receive queues */ +#define VF_QRX_TAIL_BASE 0x00002000 +#define VF_QRX_TAIL(_QRX) (VF_QRX_TAIL_BASE + ((_QRX) * 4)) +#define VF_QRX_TAIL_EXT_BASE 0x00050000 +#define VF_QRX_TAIL_EXT(_QRX) (VF_QRX_TAIL_EXT_BASE + ((_QRX) * 4)) +#define VF_QRXB_TAIL_BASE 0x00060000 +#define VF_QRXB_TAIL(_QRX) (VF_QRXB_TAIL_BASE + ((_QRX) * 4)) + +/* Interrupts */ +#define VF_INT_DYN_CTL0 0x00005C00 +#define VF_INT_DYN_CTL0_INTENA_S 0 +#define VF_INT_DYN_CTL0_INTENA_M BIT(VF_INT_DYN_CTL0_INTENA_S) +#define VF_INT_DYN_CTL0_ITR_INDX_S 3 +#define VF_INT_DYN_CTL0_ITR_INDX_M GENMASK(4, 3) +#define VF_INT_DYN_CTLN(_INT) (0x00003800 + ((_INT) * 4)) +#define VF_INT_DYN_CTLN_EXT(_INT) (0x00070000 + ((_INT) * 4)) +#define VF_INT_DYN_CTLN_INTENA_S 0 +#define VF_INT_DYN_CTLN_INTENA_M BIT(VF_INT_DYN_CTLN_INTENA_S) +#define VF_INT_DYN_CTLN_CLEARPBA_S 1 +#define VF_INT_DYN_CTLN_CLEARPBA_M BIT(VF_INT_DYN_CTLN_CLEARPBA_S) +#define VF_INT_DYN_CTLN_SWINT_TRIG_S 2 +#define VF_INT_DYN_CTLN_SWINT_TRIG_M BIT(VF_INT_DYN_CTLN_SWINT_TRIG_S) +#define VF_INT_DYN_CTLN_ITR_INDX_S 3 +#define VF_INT_DYN_CTLN_ITR_INDX_M GENMASK(4, 3) +#define VF_INT_DYN_CTLN_INTERVAL_S 5 +#define VF_INT_DYN_CTLN_INTERVAL_M BIT(VF_INT_DYN_CTLN_INTERVAL_S) +#define VF_INT_DYN_CTLN_SW_ITR_INDX_ENA_S 24 +#define VF_INT_DYN_CTLN_SW_ITR_INDX_ENA_M BIT(VF_INT_DYN_CTLN_SW_ITR_INDX_ENA_S) +#define VF_INT_DYN_CTLN_SW_ITR_INDX_S 25 +#define VF_INT_DYN_CTLN_SW_ITR_INDX_M BIT(VF_INT_DYN_CTLN_SW_ITR_INDX_S) +#define VF_INT_DYN_CTLN_WB_ON_ITR_S 30 +#define VF_INT_DYN_CTLN_WB_ON_ITR_M BIT(VF_INT_DYN_CTLN_WB_ON_ITR_S) +#define VF_INT_DYN_CTLN_INTENA_MSK_S 31 +#define VF_INT_DYN_CTLN_INTENA_MSK_M BIT(VF_INT_DYN_CTLN_INTENA_MSK_S) +/* _ITR is ITR index, _INT is interrupt index, _itrn_indx_spacing is spacing + * b/w itrn registers of the same vector + */ +#define VF_INT_ITR0(_ITR) (0x00004C00 + ((_ITR) * 4)) +#define VF_INT_ITRN_ADDR(_ITR, _reg_start, _itrn_indx_spacing) \ + ((_reg_start) + ((_ITR) * (_itrn_indx_spacing))) +/* For VF with 16 vector support, itrn_reg_spacing is 0x4, itrn_indx_spacing + * is 0x40 and base register offset is 0x00002800 + */ +#define VF_INT_ITRN(_INT, _ITR) \ + (0x00002800 + ((_INT) * 4) + ((_ITR) * 0x40)) +/* For VF with 64 vector support, itrn_reg_spacing is 0x4, itrn_indx_spacing + * is 0x100 and base register offset is 0x00002C00 + */ +#define VF_INT_ITRN_64(_INT, _ITR) \ + (0x00002C00 + ((_INT) * 4) + ((_ITR) * 0x100)) +/* For VF with 2k vector support, itrn_reg_spacing is 0x4, itrn_indx_spacing + * is 0x2000 and base register offset is 0x00072000 + */ +#define VF_INT_ITRN_2K(_INT, _ITR) \ + (0x00072000 + ((_INT) * 4) + ((_ITR) * 0x2000)) +#define VF_INT_ITRN_MAX_INDEX 2 +#define VF_INT_ITRN_INTERVAL_S 0 +#define VF_INT_ITRN_INTERVAL_M GENMASK(11, 0) +#define VF_INT_PBA_CLEAR 0x00008900 + +#define VF_INT_ICR0_ENA1 0x00005000 +#define VF_INT_ICR0_ENA1_ADMINQ_S 30 +#define VF_INT_ICR0_ENA1_ADMINQ_M BIT(VF_INT_ICR0_ENA1_ADMINQ_S) +#define VF_INT_ICR0_ENA1_RSVD_S 31 +#define VF_INT_ICR01 0x00004800 +#define VF_QF_HENA(_i) (0x0000C400 + ((_i) * 4)) +#define VF_QF_HENA_MAX_INDX 1 +#define VF_QF_HKEY(_i) (0x0000CC00 + ((_i) * 4)) +#define VF_QF_HKEY_MAX_INDX 12 +#define VF_QF_HLUT(_i) (0x0000D000 + ((_i) * 4)) +#define VF_QF_HLUT_MAX_INDX 15 +#endif diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c new file mode 100644 index 00000000000000..5d3532c27d57f9 --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -0,0 +1,2421 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2023 Intel Corporation */ + +#include "idpf.h" +#include "idpf_virtchnl.h" + +static const struct net_device_ops idpf_netdev_ops_splitq; +static const struct net_device_ops idpf_netdev_ops_singleq; + +/** + * idpf_init_vector_stack - Fill the MSIX vector stack with vector index + * @adapter: private data struct + * + * Return 0 on success, error on failure + */ +static int idpf_init_vector_stack(struct idpf_adapter *adapter) +{ + struct idpf_vector_lifo *stack; + u16 min_vec; + u32 i; + + mutex_lock(&adapter->vector_lock); + min_vec = adapter->num_msix_entries - adapter->num_avail_msix; + stack = &adapter->vector_stack; + stack->size = adapter->num_msix_entries; + /* set the base and top to point at start of the 'free pool' to + * distribute the unused vectors on-demand basis + */ + stack->base = min_vec; + stack->top = min_vec; + + stack->vec_idx = kcalloc(stack->size, sizeof(u16), GFP_KERNEL); + if (!stack->vec_idx) { + mutex_unlock(&adapter->vector_lock); + + return -ENOMEM; + } + + for (i = 0; i < stack->size; i++) + stack->vec_idx[i] = i; + + mutex_unlock(&adapter->vector_lock); + + return 0; +} + +/** + * idpf_deinit_vector_stack - zero out the MSIX vector stack + * @adapter: private data struct + */ +static void idpf_deinit_vector_stack(struct idpf_adapter *adapter) +{ + struct idpf_vector_lifo *stack; + + mutex_lock(&adapter->vector_lock); + stack = &adapter->vector_stack; + kfree(stack->vec_idx); + stack->vec_idx = NULL; + mutex_unlock(&adapter->vector_lock); +} + +/** + * idpf_mb_intr_rel_irq - Free the IRQ association with the OS + * @adapter: adapter structure + * + * This will also disable interrupt mode and queue up mailbox task. Mailbox + * task will reschedule itself if not in interrupt mode. + */ +static void idpf_mb_intr_rel_irq(struct idpf_adapter *adapter) +{ + clear_bit(IDPF_MB_INTR_MODE, adapter->flags); + free_irq(adapter->msix_entries[0].vector, adapter); + queue_delayed_work(adapter->mbx_wq, &adapter->mbx_task, 0); +} + +/** + * idpf_intr_rel - Release interrupt capabilities and free memory + * @adapter: adapter to disable interrupts on + */ +void idpf_intr_rel(struct idpf_adapter *adapter) +{ + if (!adapter->msix_entries) + return; + + idpf_mb_intr_rel_irq(adapter); + pci_free_irq_vectors(adapter->pdev); + idpf_send_dealloc_vectors_msg(adapter); + idpf_deinit_vector_stack(adapter); + kfree(adapter->msix_entries); + adapter->msix_entries = NULL; +} + +/** + * idpf_mb_intr_clean - Interrupt handler for the mailbox + * @irq: interrupt number + * @data: pointer to the adapter structure + */ +static irqreturn_t idpf_mb_intr_clean(int __always_unused irq, void *data) +{ + struct idpf_adapter *adapter = (struct idpf_adapter *)data; + + queue_delayed_work(adapter->mbx_wq, &adapter->mbx_task, 0); + + return IRQ_HANDLED; +} + +/** + * idpf_mb_irq_enable - Enable MSIX interrupt for the mailbox + * @adapter: adapter to get the hardware address for register write + */ +static void idpf_mb_irq_enable(struct idpf_adapter *adapter) +{ + struct idpf_intr_reg *intr = &adapter->mb_vector.intr_reg; + u32 val; + + val = intr->dyn_ctl_intena_m | intr->dyn_ctl_itridx_m; + writel(val, intr->dyn_ctl); + writel(intr->icr_ena_ctlq_m, intr->icr_ena); +} + +/** + * idpf_mb_intr_req_irq - Request irq for the mailbox interrupt + * @adapter: adapter structure to pass to the mailbox irq handler + */ +static int idpf_mb_intr_req_irq(struct idpf_adapter *adapter) +{ + struct idpf_q_vector *mb_vector = &adapter->mb_vector; + int irq_num, mb_vidx = 0, err; + + irq_num = adapter->msix_entries[mb_vidx].vector; + mb_vector->name = kasprintf(GFP_KERNEL, "%s-%s-%d", + dev_driver_string(&adapter->pdev->dev), + "Mailbox", mb_vidx); + err = request_irq(irq_num, adapter->irq_mb_handler, 0, + mb_vector->name, adapter); + if (err) { + dev_err(&adapter->pdev->dev, + "IRQ request for mailbox failed, error: %d\n", err); + + return err; + } + + set_bit(IDPF_MB_INTR_MODE, adapter->flags); + + return 0; +} + +/** + * idpf_set_mb_vec_id - Set vector index for mailbox + * @adapter: adapter structure to access the vector chunks + * + * The first vector id in the requested vector chunks from the CP is for + * the mailbox + */ +static void idpf_set_mb_vec_id(struct idpf_adapter *adapter) +{ + if (adapter->req_vec_chunks) + adapter->mb_vector.v_idx = + le16_to_cpu(adapter->caps.mailbox_vector_id); + else + adapter->mb_vector.v_idx = 0; +} + +/** + * idpf_mb_intr_init - Initialize the mailbox interrupt + * @adapter: adapter structure to store the mailbox vector + */ +static int idpf_mb_intr_init(struct idpf_adapter *adapter) +{ + adapter->dev_ops.reg_ops.mb_intr_reg_init(adapter); + adapter->irq_mb_handler = idpf_mb_intr_clean; + + return idpf_mb_intr_req_irq(adapter); +} + +/** + * idpf_vector_lifo_push - push MSIX vector index onto stack + * @adapter: private data struct + * @vec_idx: vector index to store + */ +static int idpf_vector_lifo_push(struct idpf_adapter *adapter, u16 vec_idx) +{ + struct idpf_vector_lifo *stack = &adapter->vector_stack; + + lockdep_assert_held(&adapter->vector_lock); + + if (stack->top == stack->base) { + dev_err(&adapter->pdev->dev, "Exceeded the vector stack limit: %d\n", + stack->top); + return -EINVAL; + } + + stack->vec_idx[--stack->top] = vec_idx; + + return 0; +} + +/** + * idpf_vector_lifo_pop - pop MSIX vector index from stack + * @adapter: private data struct + */ +static int idpf_vector_lifo_pop(struct idpf_adapter *adapter) +{ + struct idpf_vector_lifo *stack = &adapter->vector_stack; + + lockdep_assert_held(&adapter->vector_lock); + + if (stack->top == stack->size) { + dev_err(&adapter->pdev->dev, "No interrupt vectors are available to distribute!\n"); + + return -EINVAL; + } + + return stack->vec_idx[stack->top++]; +} + +/** + * idpf_vector_stash - Store the vector indexes onto the stack + * @adapter: private data struct + * @q_vector_idxs: vector index array + * @vec_info: info related to the number of vectors + * + * This function is a no-op if there are no vectors indexes to be stashed + */ +static void idpf_vector_stash(struct idpf_adapter *adapter, u16 *q_vector_idxs, + struct idpf_vector_info *vec_info) +{ + int i, base = 0; + u16 vec_idx; + + lockdep_assert_held(&adapter->vector_lock); + + if (!vec_info->num_curr_vecs) + return; + + /* For default vports, no need to stash vector allocated from the + * default pool onto the stack + */ + if (vec_info->default_vport) + base = IDPF_MIN_Q_VEC; + + for (i = vec_info->num_curr_vecs - 1; i >= base ; i--) { + vec_idx = q_vector_idxs[i]; + idpf_vector_lifo_push(adapter, vec_idx); + adapter->num_avail_msix++; + } +} + +/** + * idpf_req_rel_vector_indexes - Request or release MSIX vector indexes + * @adapter: driver specific private structure + * @q_vector_idxs: vector index array + * @vec_info: info related to the number of vectors + * + * This is the core function to distribute the MSIX vectors acquired from the + * OS. It expects the caller to pass the number of vectors required and + * also previously allocated. First, it stashes previously allocated vector + * indexes on to the stack and then figures out if it can allocate requested + * vectors. It can wait on acquiring the mutex lock. If the caller passes 0 as + * requested vectors, then this function just stashes the already allocated + * vectors and returns 0. + * + * Returns actual number of vectors allocated on success, error value on failure + * If 0 is returned, implies the stack has no vectors to allocate which is also + * a failure case for the caller + */ +int idpf_req_rel_vector_indexes(struct idpf_adapter *adapter, + u16 *q_vector_idxs, + struct idpf_vector_info *vec_info) +{ + u16 num_req_vecs, num_alloc_vecs = 0, max_vecs; + struct idpf_vector_lifo *stack; + int i, j, vecid; + + mutex_lock(&adapter->vector_lock); + stack = &adapter->vector_stack; + num_req_vecs = vec_info->num_req_vecs; + + /* Stash interrupt vector indexes onto the stack if required */ + idpf_vector_stash(adapter, q_vector_idxs, vec_info); + + if (!num_req_vecs) + goto rel_lock; + + if (vec_info->default_vport) { + /* As IDPF_MIN_Q_VEC per default vport is put aside in the + * default pool of the stack, use them for default vports + */ + j = vec_info->index * IDPF_MIN_Q_VEC + IDPF_MBX_Q_VEC; + for (i = 0; i < IDPF_MIN_Q_VEC; i++) { + q_vector_idxs[num_alloc_vecs++] = stack->vec_idx[j++]; + num_req_vecs--; + } + } + + /* Find if stack has enough vector to allocate */ + max_vecs = min(adapter->num_avail_msix, num_req_vecs); + + for (j = 0; j < max_vecs; j++) { + vecid = idpf_vector_lifo_pop(adapter); + q_vector_idxs[num_alloc_vecs++] = vecid; + } + adapter->num_avail_msix -= max_vecs; + +rel_lock: + mutex_unlock(&adapter->vector_lock); + + return num_alloc_vecs; +} + +/** + * idpf_intr_req - Request interrupt capabilities + * @adapter: adapter to enable interrupts on + * + * Returns 0 on success, negative on failure + */ +int idpf_intr_req(struct idpf_adapter *adapter) +{ + u16 default_vports = idpf_get_default_vports(adapter); + int num_q_vecs, total_vecs, num_vec_ids; + int min_vectors, v_actual, err; + unsigned int vector; + u16 *vecids; + + total_vecs = idpf_get_reserved_vecs(adapter); + num_q_vecs = total_vecs - IDPF_MBX_Q_VEC; + + err = idpf_send_alloc_vectors_msg(adapter, num_q_vecs); + if (err) { + dev_err(&adapter->pdev->dev, + "Failed to allocate %d vectors: %d\n", num_q_vecs, err); + + return -EAGAIN; + } + + min_vectors = IDPF_MBX_Q_VEC + IDPF_MIN_Q_VEC * default_vports; + v_actual = pci_alloc_irq_vectors(adapter->pdev, min_vectors, + total_vecs, PCI_IRQ_MSIX); + if (v_actual < min_vectors) { + dev_err(&adapter->pdev->dev, "Failed to allocate MSIX vectors: %d\n", + v_actual); + err = -EAGAIN; + goto send_dealloc_vecs; + } + + adapter->msix_entries = kcalloc(v_actual, sizeof(struct msix_entry), + GFP_KERNEL); + + if (!adapter->msix_entries) { + err = -ENOMEM; + goto free_irq; + } + + idpf_set_mb_vec_id(adapter); + + vecids = kcalloc(total_vecs, sizeof(u16), GFP_KERNEL); + if (!vecids) { + err = -ENOMEM; + goto free_msix; + } + + if (adapter->req_vec_chunks) { + struct virtchnl2_vector_chunks *vchunks; + struct virtchnl2_alloc_vectors *ac; + + ac = adapter->req_vec_chunks; + vchunks = &ac->vchunks; + + num_vec_ids = idpf_get_vec_ids(adapter, vecids, total_vecs, + vchunks); + if (num_vec_ids < v_actual) { + err = -EINVAL; + goto free_vecids; + } + } else { + int i; + + for (i = 0; i < v_actual; i++) + vecids[i] = i; + } + + for (vector = 0; vector < v_actual; vector++) { + adapter->msix_entries[vector].entry = vecids[vector]; + adapter->msix_entries[vector].vector = + pci_irq_vector(adapter->pdev, vector); + } + + adapter->num_req_msix = total_vecs; + adapter->num_msix_entries = v_actual; + /* 'num_avail_msix' is used to distribute excess vectors to the vports + * after considering the minimum vectors required per each default + * vport + */ + adapter->num_avail_msix = v_actual - min_vectors; + + /* Fill MSIX vector lifo stack with vector indexes */ + err = idpf_init_vector_stack(adapter); + if (err) + goto free_vecids; + + err = idpf_mb_intr_init(adapter); + if (err) + goto deinit_vec_stack; + idpf_mb_irq_enable(adapter); + kfree(vecids); + + return 0; + +deinit_vec_stack: + idpf_deinit_vector_stack(adapter); +free_vecids: + kfree(vecids); +free_msix: + kfree(adapter->msix_entries); + adapter->msix_entries = NULL; +free_irq: + pci_free_irq_vectors(adapter->pdev); +send_dealloc_vecs: + idpf_send_dealloc_vectors_msg(adapter); + + return err; +} + +/** + * idpf_find_mac_filter - Search filter list for specific mac filter + * @vconfig: Vport config structure + * @macaddr: The MAC address + * + * Returns ptr to the filter object or NULL. Must be called while holding the + * mac_filter_list_lock. + **/ +static struct idpf_mac_filter *idpf_find_mac_filter(struct idpf_vport_config *vconfig, + const u8 *macaddr) +{ + struct idpf_mac_filter *f; + + if (!macaddr) + return NULL; + + list_for_each_entry(f, &vconfig->user_config.mac_filter_list, list) { + if (ether_addr_equal(macaddr, f->macaddr)) + return f; + } + + return NULL; +} + +/** + * __idpf_del_mac_filter - Delete a MAC filter from the filter list + * @vport_config: Vport config structure + * @macaddr: The MAC address + * + * Returns 0 on success, error value on failure + **/ +static int __idpf_del_mac_filter(struct idpf_vport_config *vport_config, + const u8 *macaddr) +{ + struct idpf_mac_filter *f; + + spin_lock_bh(&vport_config->mac_filter_list_lock); + f = idpf_find_mac_filter(vport_config, macaddr); + if (f) { + list_del(&f->list); + kfree(f); + } + spin_unlock_bh(&vport_config->mac_filter_list_lock); + + return 0; +} + +/** + * idpf_del_mac_filter - Delete a MAC filter from the filter list + * @vport: Main vport structure + * @np: Netdev private structure + * @macaddr: The MAC address + * @async: Don't wait for return message + * + * Removes filter from list and if interface is up, tells hardware about the + * removed filter. + **/ +static int idpf_del_mac_filter(struct idpf_vport *vport, + struct idpf_netdev_priv *np, + const u8 *macaddr, bool async) +{ + struct idpf_vport_config *vport_config; + struct idpf_mac_filter *f; + + vport_config = np->adapter->vport_config[np->vport_idx]; + + spin_lock_bh(&vport_config->mac_filter_list_lock); + f = idpf_find_mac_filter(vport_config, macaddr); + if (f) { + f->remove = true; + } else { + spin_unlock_bh(&vport_config->mac_filter_list_lock); + + return -EINVAL; + } + spin_unlock_bh(&vport_config->mac_filter_list_lock); + + if (np->state == __IDPF_VPORT_UP) { + int err; + + err = idpf_add_del_mac_filters(vport, np, false, async); + if (err) + return err; + } + + return __idpf_del_mac_filter(vport_config, macaddr); +} + +/** + * __idpf_add_mac_filter - Add mac filter helper function + * @vport_config: Vport config structure + * @macaddr: Address to add + * + * Takes mac_filter_list_lock spinlock to add new filter to list. + */ +static int __idpf_add_mac_filter(struct idpf_vport_config *vport_config, + const u8 *macaddr) +{ + struct idpf_mac_filter *f; + + spin_lock_bh(&vport_config->mac_filter_list_lock); + + f = idpf_find_mac_filter(vport_config, macaddr); + if (f) { + f->remove = false; + spin_unlock_bh(&vport_config->mac_filter_list_lock); + + return 0; + } + + f = kzalloc(sizeof(*f), GFP_ATOMIC); + if (!f) { + spin_unlock_bh(&vport_config->mac_filter_list_lock); + + return -ENOMEM; + } + + ether_addr_copy(f->macaddr, macaddr); + list_add_tail(&f->list, &vport_config->user_config.mac_filter_list); + f->add = true; + + spin_unlock_bh(&vport_config->mac_filter_list_lock); + + return 0; +} + +/** + * idpf_add_mac_filter - Add a mac filter to the filter list + * @vport: Main vport structure + * @np: Netdev private structure + * @macaddr: The MAC address + * @async: Don't wait for return message + * + * Returns 0 on success or error on failure. If interface is up, we'll also + * send the virtchnl message to tell hardware about the filter. + **/ +static int idpf_add_mac_filter(struct idpf_vport *vport, + struct idpf_netdev_priv *np, + const u8 *macaddr, bool async) +{ + struct idpf_vport_config *vport_config; + int err; + + vport_config = np->adapter->vport_config[np->vport_idx]; + err = __idpf_add_mac_filter(vport_config, macaddr); + if (err) + return err; + + if (np->state == __IDPF_VPORT_UP) + err = idpf_add_del_mac_filters(vport, np, true, async); + + return err; +} + +/** + * idpf_del_all_mac_filters - Delete all MAC filters in list + * @vport: main vport struct + * + * Takes mac_filter_list_lock spinlock. Deletes all filters + */ +static void idpf_del_all_mac_filters(struct idpf_vport *vport) +{ + struct idpf_vport_config *vport_config; + struct idpf_mac_filter *f, *ftmp; + + vport_config = vport->adapter->vport_config[vport->idx]; + spin_lock_bh(&vport_config->mac_filter_list_lock); + + list_for_each_entry_safe(f, ftmp, &vport_config->user_config.mac_filter_list, + list) { + list_del(&f->list); + kfree(f); + } + + spin_unlock_bh(&vport_config->mac_filter_list_lock); +} + +/** + * idpf_restore_mac_filters - Re-add all MAC filters in list + * @vport: main vport struct + * + * Takes mac_filter_list_lock spinlock. Sets add field to true for filters to + * resync filters back to HW. + */ +static void idpf_restore_mac_filters(struct idpf_vport *vport) +{ + struct idpf_vport_config *vport_config; + struct idpf_mac_filter *f; + + vport_config = vport->adapter->vport_config[vport->idx]; + spin_lock_bh(&vport_config->mac_filter_list_lock); + + list_for_each_entry(f, &vport_config->user_config.mac_filter_list, list) + f->add = true; + + spin_unlock_bh(&vport_config->mac_filter_list_lock); + + idpf_add_del_mac_filters(vport, netdev_priv(vport->netdev), + true, false); +} + +/** + * idpf_remove_mac_filters - Remove all MAC filters in list + * @vport: main vport struct + * + * Takes mac_filter_list_lock spinlock. Sets remove field to true for filters + * to remove filters in HW. + */ +static void idpf_remove_mac_filters(struct idpf_vport *vport) +{ + struct idpf_vport_config *vport_config; + struct idpf_mac_filter *f; + + vport_config = vport->adapter->vport_config[vport->idx]; + spin_lock_bh(&vport_config->mac_filter_list_lock); + + list_for_each_entry(f, &vport_config->user_config.mac_filter_list, list) + f->remove = true; + + spin_unlock_bh(&vport_config->mac_filter_list_lock); + + idpf_add_del_mac_filters(vport, netdev_priv(vport->netdev), + false, false); +} + +/** + * idpf_deinit_mac_addr - deinitialize mac address for vport + * @vport: main vport structure + */ +static void idpf_deinit_mac_addr(struct idpf_vport *vport) +{ + struct idpf_vport_config *vport_config; + struct idpf_mac_filter *f; + + vport_config = vport->adapter->vport_config[vport->idx]; + + spin_lock_bh(&vport_config->mac_filter_list_lock); + + f = idpf_find_mac_filter(vport_config, vport->default_mac_addr); + if (f) { + list_del(&f->list); + kfree(f); + } + + spin_unlock_bh(&vport_config->mac_filter_list_lock); +} + +/** + * idpf_init_mac_addr - initialize mac address for vport + * @vport: main vport structure + * @netdev: pointer to netdev struct associated with this vport + */ +static int idpf_init_mac_addr(struct idpf_vport *vport, + struct net_device *netdev) +{ + struct idpf_netdev_priv *np = netdev_priv(netdev); + struct idpf_adapter *adapter = vport->adapter; + int err; + + if (is_valid_ether_addr(vport->default_mac_addr)) { + eth_hw_addr_set(netdev, vport->default_mac_addr); + ether_addr_copy(netdev->perm_addr, vport->default_mac_addr); + + return idpf_add_mac_filter(vport, np, vport->default_mac_addr, + false); + } + + if (!idpf_is_cap_ena(adapter, IDPF_OTHER_CAPS, + VIRTCHNL2_CAP_MACFILTER)) { + dev_err(&adapter->pdev->dev, + "MAC address is not provided and capability is not set\n"); + + return -EINVAL; + } + + eth_hw_addr_random(netdev); + err = idpf_add_mac_filter(vport, np, netdev->dev_addr, false); + if (err) + return err; + + dev_info(&adapter->pdev->dev, "Invalid MAC address %pM, using random %pM\n", + vport->default_mac_addr, netdev->dev_addr); + ether_addr_copy(vport->default_mac_addr, netdev->dev_addr); + + return 0; +} + +/** + * idpf_cfg_netdev - Allocate, configure and register a netdev + * @vport: main vport structure + * + * Returns 0 on success, negative value on failure. + */ +static int idpf_cfg_netdev(struct idpf_vport *vport) +{ + struct idpf_adapter *adapter = vport->adapter; + struct idpf_vport_config *vport_config; + netdev_features_t dflt_features; + netdev_features_t offloads = 0; + struct idpf_netdev_priv *np; + struct net_device *netdev; + u16 idx = vport->idx; + int err; + + vport_config = adapter->vport_config[idx]; + + /* It's possible we already have a netdev allocated and registered for + * this vport + */ + if (test_bit(IDPF_VPORT_REG_NETDEV, vport_config->flags)) { + netdev = adapter->netdevs[idx]; + np = netdev_priv(netdev); + np->vport = vport; + np->vport_idx = vport->idx; + np->vport_id = vport->vport_id; + vport->netdev = netdev; + + return idpf_init_mac_addr(vport, netdev); + } + + netdev = alloc_etherdev_mqs(sizeof(struct idpf_netdev_priv), + vport_config->max_q.max_txq, + vport_config->max_q.max_rxq); + if (!netdev) + return -ENOMEM; + + vport->netdev = netdev; + np = netdev_priv(netdev); + np->vport = vport; + np->adapter = adapter; + np->vport_idx = vport->idx; + np->vport_id = vport->vport_id; + + spin_lock_init(&np->stats_lock); + + err = idpf_init_mac_addr(vport, netdev); + if (err) { + free_netdev(vport->netdev); + vport->netdev = NULL; + + return err; + } + + /* assign netdev_ops */ + if (idpf_is_queue_model_split(vport->txq_model)) + netdev->netdev_ops = &idpf_netdev_ops_splitq; + else + netdev->netdev_ops = &idpf_netdev_ops_singleq; + + /* setup watchdog timeout value to be 5 second */ + netdev->watchdog_timeo = 5 * HZ; + + netdev->dev_port = idx; + + /* configure default MTU size */ + netdev->min_mtu = ETH_MIN_MTU; + netdev->max_mtu = vport->max_mtu; + + dflt_features = NETIF_F_SG | + NETIF_F_HIGHDMA; + + if (idpf_is_cap_ena_all(adapter, IDPF_RSS_CAPS, IDPF_CAP_RSS)) + dflt_features |= NETIF_F_RXHASH; + if (idpf_is_cap_ena_all(adapter, IDPF_CSUM_CAPS, IDPF_CAP_RX_CSUM_L4V4)) + dflt_features |= NETIF_F_IP_CSUM; + if (idpf_is_cap_ena_all(adapter, IDPF_CSUM_CAPS, IDPF_CAP_RX_CSUM_L4V6)) + dflt_features |= NETIF_F_IPV6_CSUM; + if (idpf_is_cap_ena(adapter, IDPF_CSUM_CAPS, IDPF_CAP_RX_CSUM)) + dflt_features |= NETIF_F_RXCSUM; + if (idpf_is_cap_ena_all(adapter, IDPF_CSUM_CAPS, IDPF_CAP_SCTP_CSUM)) + dflt_features |= NETIF_F_SCTP_CRC; + + if (idpf_is_cap_ena(adapter, IDPF_SEG_CAPS, VIRTCHNL2_CAP_SEG_IPV4_TCP)) + dflt_features |= NETIF_F_TSO; + if (idpf_is_cap_ena(adapter, IDPF_SEG_CAPS, VIRTCHNL2_CAP_SEG_IPV6_TCP)) + dflt_features |= NETIF_F_TSO6; + if (idpf_is_cap_ena_all(adapter, IDPF_SEG_CAPS, + VIRTCHNL2_CAP_SEG_IPV4_UDP | + VIRTCHNL2_CAP_SEG_IPV6_UDP)) + dflt_features |= NETIF_F_GSO_UDP_L4; + if (idpf_is_cap_ena_all(adapter, IDPF_RSC_CAPS, IDPF_CAP_RSC)) + offloads |= NETIF_F_GRO_HW; + /* advertise to stack only if offloads for encapsulated packets is + * supported + */ + if (idpf_is_cap_ena(vport->adapter, IDPF_SEG_CAPS, + VIRTCHNL2_CAP_SEG_TX_SINGLE_TUNNEL)) { + offloads |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_GRE | + NETIF_F_GSO_GRE_CSUM | + NETIF_F_GSO_PARTIAL | + NETIF_F_GSO_UDP_TUNNEL_CSUM | + NETIF_F_GSO_IPXIP4 | + NETIF_F_GSO_IPXIP6 | + 0; + + if (!idpf_is_cap_ena_all(vport->adapter, IDPF_CSUM_CAPS, + IDPF_CAP_TUNNEL_TX_CSUM)) + netdev->gso_partial_features |= + NETIF_F_GSO_UDP_TUNNEL_CSUM; + + netdev->gso_partial_features |= NETIF_F_GSO_GRE_CSUM; + offloads |= NETIF_F_TSO_MANGLEID; + } + if (idpf_is_cap_ena(adapter, IDPF_OTHER_CAPS, VIRTCHNL2_CAP_LOOPBACK)) + offloads |= NETIF_F_LOOPBACK; + + netdev->features |= dflt_features; + netdev->hw_features |= dflt_features | offloads; + netdev->hw_enc_features |= dflt_features | offloads; + idpf_set_ethtool_ops(netdev); + SET_NETDEV_DEV(netdev, &adapter->pdev->dev); + + /* carrier off on init to avoid Tx hangs */ + netif_carrier_off(netdev); + + /* make sure transmit queues start off as stopped */ + netif_tx_stop_all_queues(netdev); + + /* The vport can be arbitrarily released so we need to also track + * netdevs in the adapter struct + */ + adapter->netdevs[idx] = netdev; + + return 0; +} + +/** + * idpf_get_free_slot - get the next non-NULL location index in array + * @adapter: adapter in which to look for a free vport slot + */ +static int idpf_get_free_slot(struct idpf_adapter *adapter) +{ + unsigned int i; + + for (i = 0; i < adapter->max_vports; i++) { + if (!adapter->vports[i]) + return i; + } + + return IDPF_NO_FREE_SLOT; +} + +/** + * idpf_remove_features - Turn off feature configs + * @vport: virtual port structure + */ +static void idpf_remove_features(struct idpf_vport *vport) +{ + struct idpf_adapter *adapter = vport->adapter; + + if (idpf_is_cap_ena(adapter, IDPF_OTHER_CAPS, VIRTCHNL2_CAP_MACFILTER)) + idpf_remove_mac_filters(vport); +} + +/** + * idpf_vport_stop - Disable a vport + * @vport: vport to disable + */ +static void idpf_vport_stop(struct idpf_vport *vport) +{ + struct idpf_netdev_priv *np = netdev_priv(vport->netdev); + + if (np->state <= __IDPF_VPORT_DOWN) + return; + + netif_carrier_off(vport->netdev); + netif_tx_disable(vport->netdev); + + idpf_send_disable_vport_msg(vport); + idpf_send_disable_queues_msg(vport); + idpf_send_map_unmap_queue_vector_msg(vport, false); + /* Normally we ask for queues in create_vport, but if the number of + * initially requested queues have changed, for example via ethtool + * set channels, we do delete queues and then add the queues back + * instead of deleting and reallocating the vport. + */ + if (test_and_clear_bit(IDPF_VPORT_DEL_QUEUES, vport->flags)) + idpf_send_delete_queues_msg(vport); + + idpf_remove_features(vport); + + vport->link_up = false; + idpf_vport_intr_deinit(vport); + idpf_vport_intr_rel(vport); + idpf_vport_queues_rel(vport); + np->state = __IDPF_VPORT_DOWN; +} + +/** + * idpf_stop - Disables a network interface + * @netdev: network interface device structure + * + * The stop entry point is called when an interface is de-activated by the OS, + * and the netdevice enters the DOWN state. The hardware is still under the + * driver's control, but the netdev interface is disabled. + * + * Returns success only - not allowed to fail + */ +static int idpf_stop(struct net_device *netdev) +{ + struct idpf_netdev_priv *np = netdev_priv(netdev); + struct idpf_vport *vport; + + if (test_bit(IDPF_REMOVE_IN_PROG, np->adapter->flags)) + return 0; + + idpf_vport_ctrl_lock(netdev); + vport = idpf_netdev_to_vport(netdev); + + idpf_vport_stop(vport); + + idpf_vport_ctrl_unlock(netdev); + + return 0; +} + +/** + * idpf_decfg_netdev - Unregister the netdev + * @vport: vport for which netdev to be unregistered + */ +static void idpf_decfg_netdev(struct idpf_vport *vport) +{ + struct idpf_adapter *adapter = vport->adapter; + + unregister_netdev(vport->netdev); + free_netdev(vport->netdev); + vport->netdev = NULL; + + adapter->netdevs[vport->idx] = NULL; +} + +/** + * idpf_vport_rel - Delete a vport and free its resources + * @vport: the vport being removed + */ +static void idpf_vport_rel(struct idpf_vport *vport) +{ + struct idpf_adapter *adapter = vport->adapter; + struct idpf_vport_config *vport_config; + struct idpf_vector_info vec_info; + struct idpf_rss_data *rss_data; + struct idpf_vport_max_q max_q; + u16 idx = vport->idx; + + vport_config = adapter->vport_config[vport->idx]; + idpf_deinit_rss(vport); + rss_data = &vport_config->user_config.rss_data; + kfree(rss_data->rss_key); + rss_data->rss_key = NULL; + + idpf_send_destroy_vport_msg(vport); + + /* Release all max queues allocated to the adapter's pool */ + max_q.max_rxq = vport_config->max_q.max_rxq; + max_q.max_txq = vport_config->max_q.max_txq; + max_q.max_bufq = vport_config->max_q.max_bufq; + max_q.max_complq = vport_config->max_q.max_complq; + idpf_vport_dealloc_max_qs(adapter, &max_q); + + /* Release all the allocated vectors on the stack */ + vec_info.num_req_vecs = 0; + vec_info.num_curr_vecs = vport->num_q_vectors; + vec_info.default_vport = vport->default_vport; + + idpf_req_rel_vector_indexes(adapter, vport->q_vector_idxs, &vec_info); + + kfree(vport->q_vector_idxs); + vport->q_vector_idxs = NULL; + + kfree(adapter->vport_params_recvd[idx]); + adapter->vport_params_recvd[idx] = NULL; + kfree(adapter->vport_params_reqd[idx]); + adapter->vport_params_reqd[idx] = NULL; + if (adapter->vport_config[idx]) { + kfree(adapter->vport_config[idx]->req_qs_chunks); + adapter->vport_config[idx]->req_qs_chunks = NULL; + } + kfree(vport); + adapter->num_alloc_vports--; +} + +/** + * idpf_vport_dealloc - cleanup and release a given vport + * @vport: pointer to idpf vport structure + * + * returns nothing + */ +static void idpf_vport_dealloc(struct idpf_vport *vport) +{ + struct idpf_adapter *adapter = vport->adapter; + unsigned int i = vport->idx; + + idpf_deinit_mac_addr(vport); + idpf_vport_stop(vport); + + if (!test_bit(IDPF_HR_RESET_IN_PROG, adapter->flags)) + idpf_decfg_netdev(vport); + if (test_bit(IDPF_REMOVE_IN_PROG, adapter->flags)) + idpf_del_all_mac_filters(vport); + + if (adapter->netdevs[i]) { + struct idpf_netdev_priv *np = netdev_priv(adapter->netdevs[i]); + + np->vport = NULL; + } + + idpf_vport_rel(vport); + + adapter->vports[i] = NULL; + adapter->next_vport = idpf_get_free_slot(adapter); +} + +/** + * idpf_is_hsplit_supported - check whether the header split is supported + * @vport: virtual port to check the capability for + * + * Return: true if it's supported by the HW/FW, false if not. + */ +static bool idpf_is_hsplit_supported(const struct idpf_vport *vport) +{ + return idpf_is_queue_model_split(vport->rxq_model) && + idpf_is_cap_ena_all(vport->adapter, IDPF_HSPLIT_CAPS, + IDPF_CAP_HSPLIT); +} + +/** + * idpf_vport_get_hsplit - get the current header split feature state + * @vport: virtual port to query the state for + * + * Return: ``ETHTOOL_TCP_DATA_SPLIT_UNKNOWN`` if not supported, + * ``ETHTOOL_TCP_DATA_SPLIT_DISABLED`` if disabled, + * ``ETHTOOL_TCP_DATA_SPLIT_ENABLED`` if active. + */ +u8 idpf_vport_get_hsplit(const struct idpf_vport *vport) +{ + const struct idpf_vport_user_config_data *config; + + if (!idpf_is_hsplit_supported(vport)) + return ETHTOOL_TCP_DATA_SPLIT_UNKNOWN; + + config = &vport->adapter->vport_config[vport->idx]->user_config; + + return test_bit(__IDPF_USER_FLAG_HSPLIT, config->user_flags) ? + ETHTOOL_TCP_DATA_SPLIT_ENABLED : + ETHTOOL_TCP_DATA_SPLIT_DISABLED; +} + +/** + * idpf_vport_set_hsplit - enable or disable header split on a given vport + * @vport: virtual port to configure + * @val: Ethtool flag controlling the header split state + * + * Return: true on success, false if not supported by the HW. + */ +bool idpf_vport_set_hsplit(const struct idpf_vport *vport, u8 val) +{ + struct idpf_vport_user_config_data *config; + + if (!idpf_is_hsplit_supported(vport)) + return val == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN; + + config = &vport->adapter->vport_config[vport->idx]->user_config; + + switch (val) { + case ETHTOOL_TCP_DATA_SPLIT_UNKNOWN: + /* Default is to enable */ + case ETHTOOL_TCP_DATA_SPLIT_ENABLED: + __set_bit(__IDPF_USER_FLAG_HSPLIT, config->user_flags); + return true; + case ETHTOOL_TCP_DATA_SPLIT_DISABLED: + __clear_bit(__IDPF_USER_FLAG_HSPLIT, config->user_flags); + return true; + default: + return false; + } +} + +/** + * idpf_vport_alloc - Allocates the next available struct vport in the adapter + * @adapter: board private structure + * @max_q: vport max queue info + * + * returns a pointer to a vport on success, NULL on failure. + */ +static struct idpf_vport *idpf_vport_alloc(struct idpf_adapter *adapter, + struct idpf_vport_max_q *max_q) +{ + struct idpf_rss_data *rss_data; + u16 idx = adapter->next_vport; + struct idpf_vport *vport; + u16 num_max_q; + + if (idx == IDPF_NO_FREE_SLOT) + return NULL; + + vport = kzalloc(sizeof(*vport), GFP_KERNEL); + if (!vport) + return vport; + + if (!adapter->vport_config[idx]) { + struct idpf_vport_config *vport_config; + + vport_config = kzalloc(sizeof(*vport_config), GFP_KERNEL); + if (!vport_config) { + kfree(vport); + + return NULL; + } + + adapter->vport_config[idx] = vport_config; + } + + vport->idx = idx; + vport->adapter = adapter; + vport->compln_clean_budget = IDPF_TX_COMPLQ_CLEAN_BUDGET; + vport->default_vport = adapter->num_alloc_vports < + idpf_get_default_vports(adapter); + + num_max_q = max(max_q->max_txq, max_q->max_rxq); + vport->q_vector_idxs = kcalloc(num_max_q, sizeof(u16), GFP_KERNEL); + if (!vport->q_vector_idxs) { + kfree(vport); + + return NULL; + } + idpf_vport_init(vport, max_q); + + /* This alloc is done separate from the LUT because it's not strictly + * dependent on how many queues we have. If we change number of queues + * and soft reset we'll need a new LUT but the key can remain the same + * for as long as the vport exists. + */ + rss_data = &adapter->vport_config[idx]->user_config.rss_data; + rss_data->rss_key = kzalloc(rss_data->rss_key_size, GFP_KERNEL); + if (!rss_data->rss_key) { + kfree(vport); + + return NULL; + } + /* Initialize default rss key */ + netdev_rss_key_fill((void *)rss_data->rss_key, rss_data->rss_key_size); + + /* fill vport slot in the adapter struct */ + adapter->vports[idx] = vport; + adapter->vport_ids[idx] = idpf_get_vport_id(vport); + + adapter->num_alloc_vports++; + /* prepare adapter->next_vport for next use */ + adapter->next_vport = idpf_get_free_slot(adapter); + + return vport; +} + +/** + * idpf_get_stats64 - get statistics for network device structure + * @netdev: network interface device structure + * @stats: main device statistics structure + */ +static void idpf_get_stats64(struct net_device *netdev, + struct rtnl_link_stats64 *stats) +{ + struct idpf_netdev_priv *np = netdev_priv(netdev); + + spin_lock_bh(&np->stats_lock); + *stats = np->netstats; + spin_unlock_bh(&np->stats_lock); +} + +/** + * idpf_statistics_task - Delayed task to get statistics over mailbox + * @work: work_struct handle to our data + */ +void idpf_statistics_task(struct work_struct *work) +{ + struct idpf_adapter *adapter; + int i; + + adapter = container_of(work, struct idpf_adapter, stats_task.work); + + for (i = 0; i < adapter->max_vports; i++) { + struct idpf_vport *vport = adapter->vports[i]; + + if (vport && !test_bit(IDPF_HR_RESET_IN_PROG, adapter->flags)) + idpf_send_get_stats_msg(vport); + } + + queue_delayed_work(adapter->stats_wq, &adapter->stats_task, + msecs_to_jiffies(10000)); +} + +/** + * idpf_mbx_task - Delayed task to handle mailbox responses + * @work: work_struct handle + */ +void idpf_mbx_task(struct work_struct *work) +{ + struct idpf_adapter *adapter; + + adapter = container_of(work, struct idpf_adapter, mbx_task.work); + + if (test_bit(IDPF_MB_INTR_MODE, adapter->flags)) + idpf_mb_irq_enable(adapter); + else + queue_delayed_work(adapter->mbx_wq, &adapter->mbx_task, + msecs_to_jiffies(300)); + + idpf_recv_mb_msg(adapter); +} + +/** + * idpf_service_task - Delayed task for handling mailbox responses + * @work: work_struct handle to our data + * + */ +void idpf_service_task(struct work_struct *work) +{ + struct idpf_adapter *adapter; + + adapter = container_of(work, struct idpf_adapter, serv_task.work); + + if (idpf_is_reset_detected(adapter) && + !idpf_is_reset_in_prog(adapter) && + !test_bit(IDPF_REMOVE_IN_PROG, adapter->flags)) { + dev_info(&adapter->pdev->dev, "HW reset detected\n"); + set_bit(IDPF_HR_FUNC_RESET, adapter->flags); + queue_delayed_work(adapter->vc_event_wq, + &adapter->vc_event_task, + msecs_to_jiffies(10)); + } + + queue_delayed_work(adapter->serv_wq, &adapter->serv_task, + msecs_to_jiffies(300)); +} + +/** + * idpf_restore_features - Restore feature configs + * @vport: virtual port structure + */ +static void idpf_restore_features(struct idpf_vport *vport) +{ + struct idpf_adapter *adapter = vport->adapter; + + if (idpf_is_cap_ena(adapter, IDPF_OTHER_CAPS, VIRTCHNL2_CAP_MACFILTER)) + idpf_restore_mac_filters(vport); +} + +/** + * idpf_set_real_num_queues - set number of queues for netdev + * @vport: virtual port structure + * + * Returns 0 on success, negative on failure. + */ +static int idpf_set_real_num_queues(struct idpf_vport *vport) +{ + int err; + + err = netif_set_real_num_rx_queues(vport->netdev, vport->num_rxq); + if (err) + return err; + + return netif_set_real_num_tx_queues(vport->netdev, vport->num_txq); +} + +/** + * idpf_up_complete - Complete interface up sequence + * @vport: virtual port structure + * + * Returns 0 on success, negative on failure. + */ +static int idpf_up_complete(struct idpf_vport *vport) +{ + struct idpf_netdev_priv *np = netdev_priv(vport->netdev); + + if (vport->link_up && !netif_carrier_ok(vport->netdev)) { + netif_carrier_on(vport->netdev); + netif_tx_start_all_queues(vport->netdev); + } + + np->state = __IDPF_VPORT_UP; + + return 0; +} + +/** + * idpf_rx_init_buf_tail - Write initial buffer ring tail value + * @vport: virtual port struct + */ +static void idpf_rx_init_buf_tail(struct idpf_vport *vport) +{ + int i, j; + + for (i = 0; i < vport->num_rxq_grp; i++) { + struct idpf_rxq_group *grp = &vport->rxq_grps[i]; + + if (idpf_is_queue_model_split(vport->rxq_model)) { + for (j = 0; j < vport->num_bufqs_per_qgrp; j++) { + struct idpf_queue *q = + &grp->splitq.bufq_sets[j].bufq; + + writel(q->next_to_alloc, q->tail); + } + } else { + for (j = 0; j < grp->singleq.num_rxq; j++) { + struct idpf_queue *q = + grp->singleq.rxqs[j]; + + writel(q->next_to_alloc, q->tail); + } + } + } +} + +/** + * idpf_vport_open - Bring up a vport + * @vport: vport to bring up + * @alloc_res: allocate queue resources + */ +static int idpf_vport_open(struct idpf_vport *vport, bool alloc_res) +{ + struct idpf_netdev_priv *np = netdev_priv(vport->netdev); + struct idpf_adapter *adapter = vport->adapter; + struct idpf_vport_config *vport_config; + int err; + + if (np->state != __IDPF_VPORT_DOWN) + return -EBUSY; + + /* we do not allow interface up just yet */ + netif_carrier_off(vport->netdev); + + if (alloc_res) { + err = idpf_vport_queues_alloc(vport); + if (err) + return err; + } + + err = idpf_vport_intr_alloc(vport); + if (err) { + dev_err(&adapter->pdev->dev, "Failed to allocate interrupts for vport %u: %d\n", + vport->vport_id, err); + goto queues_rel; + } + + err = idpf_vport_queue_ids_init(vport); + if (err) { + dev_err(&adapter->pdev->dev, "Failed to initialize queue ids for vport %u: %d\n", + vport->vport_id, err); + goto intr_rel; + } + + err = idpf_vport_intr_init(vport); + if (err) { + dev_err(&adapter->pdev->dev, "Failed to initialize interrupts for vport %u: %d\n", + vport->vport_id, err); + goto intr_rel; + } + + err = idpf_rx_bufs_init_all(vport); + if (err) { + dev_err(&adapter->pdev->dev, "Failed to initialize RX buffers for vport %u: %d\n", + vport->vport_id, err); + goto intr_rel; + } + + err = idpf_queue_reg_init(vport); + if (err) { + dev_err(&adapter->pdev->dev, "Failed to initialize queue registers for vport %u: %d\n", + vport->vport_id, err); + goto intr_rel; + } + + idpf_rx_init_buf_tail(vport); + + err = idpf_send_config_queues_msg(vport); + if (err) { + dev_err(&adapter->pdev->dev, "Failed to configure queues for vport %u, %d\n", + vport->vport_id, err); + goto intr_deinit; + } + + err = idpf_send_map_unmap_queue_vector_msg(vport, true); + if (err) { + dev_err(&adapter->pdev->dev, "Failed to map queue vectors for vport %u: %d\n", + vport->vport_id, err); + goto intr_deinit; + } + + err = idpf_send_enable_queues_msg(vport); + if (err) { + dev_err(&adapter->pdev->dev, "Failed to enable queues for vport %u: %d\n", + vport->vport_id, err); + goto unmap_queue_vectors; + } + + err = idpf_send_enable_vport_msg(vport); + if (err) { + dev_err(&adapter->pdev->dev, "Failed to enable vport %u: %d\n", + vport->vport_id, err); + err = -EAGAIN; + goto disable_queues; + } + + idpf_restore_features(vport); + + vport_config = adapter->vport_config[vport->idx]; + if (vport_config->user_config.rss_data.rss_lut) + err = idpf_config_rss(vport); + else + err = idpf_init_rss(vport); + if (err) { + dev_err(&adapter->pdev->dev, "Failed to initialize RSS for vport %u: %d\n", + vport->vport_id, err); + goto disable_vport; + } + + err = idpf_up_complete(vport); + if (err) { + dev_err(&adapter->pdev->dev, "Failed to complete interface up for vport %u: %d\n", + vport->vport_id, err); + goto deinit_rss; + } + + return 0; + +deinit_rss: + idpf_deinit_rss(vport); +disable_vport: + idpf_send_disable_vport_msg(vport); +disable_queues: + idpf_send_disable_queues_msg(vport); +unmap_queue_vectors: + idpf_send_map_unmap_queue_vector_msg(vport, false); +intr_deinit: + idpf_vport_intr_deinit(vport); +intr_rel: + idpf_vport_intr_rel(vport); +queues_rel: + idpf_vport_queues_rel(vport); + + return err; +} + +/** + * idpf_init_task - Delayed initialization task + * @work: work_struct handle to our data + * + * Init task finishes up pending work started in probe. Due to the asynchronous + * nature in which the device communicates with hardware, we may have to wait + * several milliseconds to get a response. Instead of busy polling in probe, + * pulling it out into a delayed work task prevents us from bogging down the + * whole system waiting for a response from hardware. + */ +void idpf_init_task(struct work_struct *work) +{ + struct idpf_vport_config *vport_config; + struct idpf_vport_max_q max_q; + struct idpf_adapter *adapter; + struct idpf_netdev_priv *np; + struct idpf_vport *vport; + u16 num_default_vports; + struct pci_dev *pdev; + bool default_vport; + int index, err; + + adapter = container_of(work, struct idpf_adapter, init_task.work); + + num_default_vports = idpf_get_default_vports(adapter); + if (adapter->num_alloc_vports < num_default_vports) + default_vport = true; + else + default_vport = false; + + err = idpf_vport_alloc_max_qs(adapter, &max_q); + if (err) + goto unwind_vports; + + err = idpf_send_create_vport_msg(adapter, &max_q); + if (err) { + idpf_vport_dealloc_max_qs(adapter, &max_q); + goto unwind_vports; + } + + pdev = adapter->pdev; + vport = idpf_vport_alloc(adapter, &max_q); + if (!vport) { + err = -EFAULT; + dev_err(&pdev->dev, "failed to allocate vport: %d\n", + err); + idpf_vport_dealloc_max_qs(adapter, &max_q); + goto unwind_vports; + } + + index = vport->idx; + vport_config = adapter->vport_config[index]; + + init_waitqueue_head(&vport->sw_marker_wq); + + spin_lock_init(&vport_config->mac_filter_list_lock); + + INIT_LIST_HEAD(&vport_config->user_config.mac_filter_list); + + err = idpf_check_supported_desc_ids(vport); + if (err) { + dev_err(&pdev->dev, "failed to get required descriptor ids\n"); + goto cfg_netdev_err; + } + + if (idpf_cfg_netdev(vport)) + goto cfg_netdev_err; + + err = idpf_send_get_rx_ptype_msg(vport); + if (err) + goto handle_err; + + /* Once state is put into DOWN, driver is ready for dev_open */ + np = netdev_priv(vport->netdev); + np->state = __IDPF_VPORT_DOWN; + if (test_and_clear_bit(IDPF_VPORT_UP_REQUESTED, vport_config->flags)) + idpf_vport_open(vport, true); + + /* Spawn and return 'idpf_init_task' work queue until all the + * default vports are created + */ + if (adapter->num_alloc_vports < num_default_vports) { + queue_delayed_work(adapter->init_wq, &adapter->init_task, + msecs_to_jiffies(5 * (adapter->pdev->devfn & 0x07))); + + return; + } + + for (index = 0; index < adapter->max_vports; index++) { + if (adapter->netdevs[index] && + !test_bit(IDPF_VPORT_REG_NETDEV, + adapter->vport_config[index]->flags)) { + register_netdev(adapter->netdevs[index]); + set_bit(IDPF_VPORT_REG_NETDEV, + adapter->vport_config[index]->flags); + } + } + + /* As all the required vports are created, clear the reset flag + * unconditionally here in case we were in reset and the link was down. + */ + clear_bit(IDPF_HR_RESET_IN_PROG, adapter->flags); + /* Start the statistics task now */ + queue_delayed_work(adapter->stats_wq, &adapter->stats_task, + msecs_to_jiffies(10 * (pdev->devfn & 0x07))); + + return; + +handle_err: + idpf_decfg_netdev(vport); +cfg_netdev_err: + idpf_vport_rel(vport); + adapter->vports[index] = NULL; +unwind_vports: + if (default_vport) { + for (index = 0; index < adapter->max_vports; index++) { + if (adapter->vports[index]) + idpf_vport_dealloc(adapter->vports[index]); + } + } + clear_bit(IDPF_HR_RESET_IN_PROG, adapter->flags); +} + +/** + * idpf_sriov_ena - Enable or change number of VFs + * @adapter: private data struct + * @num_vfs: number of VFs to allocate + */ +static int idpf_sriov_ena(struct idpf_adapter *adapter, int num_vfs) +{ + struct device *dev = &adapter->pdev->dev; + int err; + + err = idpf_send_set_sriov_vfs_msg(adapter, num_vfs); + if (err) { + dev_err(dev, "Failed to allocate VFs: %d\n", err); + + return err; + } + + err = pci_enable_sriov(adapter->pdev, num_vfs); + if (err) { + idpf_send_set_sriov_vfs_msg(adapter, 0); + dev_err(dev, "Failed to enable SR-IOV: %d\n", err); + + return err; + } + + adapter->num_vfs = num_vfs; + + return num_vfs; +} + +/** + * idpf_sriov_configure - Configure the requested VFs + * @pdev: pointer to a pci_dev structure + * @num_vfs: number of vfs to allocate + * + * Enable or change the number of VFs. Called when the user updates the number + * of VFs in sysfs. + **/ +int idpf_sriov_configure(struct pci_dev *pdev, int num_vfs) +{ + struct idpf_adapter *adapter = pci_get_drvdata(pdev); + + if (!idpf_is_cap_ena(adapter, IDPF_OTHER_CAPS, VIRTCHNL2_CAP_SRIOV)) { + dev_info(&pdev->dev, "SR-IOV is not supported on this device\n"); + + return -EOPNOTSUPP; + } + + if (num_vfs) + return idpf_sriov_ena(adapter, num_vfs); + + if (pci_vfs_assigned(pdev)) { + dev_warn(&pdev->dev, "Unable to free VFs because some are assigned to VMs\n"); + + return -EBUSY; + } + + pci_disable_sriov(adapter->pdev); + idpf_send_set_sriov_vfs_msg(adapter, 0); + adapter->num_vfs = 0; + + return 0; +} + +/** + * idpf_deinit_task - Device deinit routine + * @adapter: Driver specific private structure + * + * Extended remove logic which will be used for + * hard reset as well + */ +void idpf_deinit_task(struct idpf_adapter *adapter) +{ + unsigned int i; + + /* Wait until the init_task is done else this thread might release + * the resources first and the other thread might end up in a bad state + */ + cancel_delayed_work_sync(&adapter->init_task); + + if (!adapter->vports) + return; + + cancel_delayed_work_sync(&adapter->stats_task); + + for (i = 0; i < adapter->max_vports; i++) { + if (adapter->vports[i]) + idpf_vport_dealloc(adapter->vports[i]); + } +} + +/** + * idpf_check_reset_complete - check that reset is complete + * @hw: pointer to hw struct + * @reset_reg: struct with reset registers + * + * Returns 0 if device is ready to use, or -EBUSY if it's in reset. + **/ +static int idpf_check_reset_complete(struct idpf_hw *hw, + struct idpf_reset_reg *reset_reg) +{ + struct idpf_adapter *adapter = hw->back; + int i; + + for (i = 0; i < 2000; i++) { + u32 reg_val = readl(reset_reg->rstat); + + /* 0xFFFFFFFF might be read if other side hasn't cleared the + * register for us yet and 0xFFFFFFFF is not a valid value for + * the register, so treat that as invalid. + */ + if (reg_val != 0xFFFFFFFF && (reg_val & reset_reg->rstat_m)) + return 0; + + usleep_range(5000, 10000); + } + + dev_warn(&adapter->pdev->dev, "Device reset timeout!\n"); + /* Clear the reset flag unconditionally here since the reset + * technically isn't in progress anymore from the driver's perspective + */ + clear_bit(IDPF_HR_RESET_IN_PROG, adapter->flags); + + return -EBUSY; +} + +/** + * idpf_set_vport_state - Set the vport state to be after the reset + * @adapter: Driver specific private structure + */ +static void idpf_set_vport_state(struct idpf_adapter *adapter) +{ + u16 i; + + for (i = 0; i < adapter->max_vports; i++) { + struct idpf_netdev_priv *np; + + if (!adapter->netdevs[i]) + continue; + + np = netdev_priv(adapter->netdevs[i]); + if (np->state == __IDPF_VPORT_UP) + set_bit(IDPF_VPORT_UP_REQUESTED, + adapter->vport_config[i]->flags); + } +} + +/** + * idpf_init_hard_reset - Initiate a hardware reset + * @adapter: Driver specific private structure + * + * Deallocate the vports and all the resources associated with them and + * reallocate. Also reinitialize the mailbox. Return 0 on success, + * negative on failure. + */ +static int idpf_init_hard_reset(struct idpf_adapter *adapter) +{ + struct idpf_reg_ops *reg_ops = &adapter->dev_ops.reg_ops; + struct device *dev = &adapter->pdev->dev; + struct net_device *netdev; + int err; + u16 i; + + mutex_lock(&adapter->vport_ctrl_lock); + + dev_info(dev, "Device HW Reset initiated\n"); + + /* Avoid TX hangs on reset */ + for (i = 0; i < adapter->max_vports; i++) { + netdev = adapter->netdevs[i]; + if (!netdev) + continue; + + netif_carrier_off(netdev); + netif_tx_disable(netdev); + } + + /* Prepare for reset */ + if (test_and_clear_bit(IDPF_HR_DRV_LOAD, adapter->flags)) { + reg_ops->trigger_reset(adapter, IDPF_HR_DRV_LOAD); + } else if (test_and_clear_bit(IDPF_HR_FUNC_RESET, adapter->flags)) { + bool is_reset = idpf_is_reset_detected(adapter); + + idpf_set_vport_state(adapter); + idpf_vc_core_deinit(adapter); + if (!is_reset) + reg_ops->trigger_reset(adapter, IDPF_HR_FUNC_RESET); + idpf_deinit_dflt_mbx(adapter); + } else { + dev_err(dev, "Unhandled hard reset cause\n"); + err = -EBADRQC; + goto unlock_mutex; + } + + /* Wait for reset to complete */ + err = idpf_check_reset_complete(&adapter->hw, &adapter->reset_reg); + if (err) { + dev_err(dev, "The driver was unable to contact the device's firmware. Check that the FW is running. Driver state= 0x%x\n", + adapter->state); + goto unlock_mutex; + } + + /* Reset is complete and so start building the driver resources again */ + err = idpf_init_dflt_mbx(adapter); + if (err) { + dev_err(dev, "Failed to initialize default mailbox: %d\n", err); + goto unlock_mutex; + } + + queue_delayed_work(adapter->mbx_wq, &adapter->mbx_task, 0); + + /* Initialize the state machine, also allocate memory and request + * resources + */ + err = idpf_vc_core_init(adapter); + if (err) { + idpf_deinit_dflt_mbx(adapter); + goto unlock_mutex; + } + + /* Wait till all the vports are initialized to release the reset lock, + * else user space callbacks may access uninitialized vports + */ + while (test_bit(IDPF_HR_RESET_IN_PROG, adapter->flags)) + msleep(100); + +unlock_mutex: + mutex_unlock(&adapter->vport_ctrl_lock); + + return err; +} + +/** + * idpf_vc_event_task - Handle virtchannel event logic + * @work: work queue struct + */ +void idpf_vc_event_task(struct work_struct *work) +{ + struct idpf_adapter *adapter; + + adapter = container_of(work, struct idpf_adapter, vc_event_task.work); + + if (test_bit(IDPF_REMOVE_IN_PROG, adapter->flags)) + return; + + if (test_bit(IDPF_HR_FUNC_RESET, adapter->flags) || + test_bit(IDPF_HR_DRV_LOAD, adapter->flags)) { + set_bit(IDPF_HR_RESET_IN_PROG, adapter->flags); + idpf_init_hard_reset(adapter); + } +} + +/** + * idpf_initiate_soft_reset - Initiate a software reset + * @vport: virtual port data struct + * @reset_cause: reason for the soft reset + * + * Soft reset only reallocs vport queue resources. Returns 0 on success, + * negative on failure. + */ +int idpf_initiate_soft_reset(struct idpf_vport *vport, + enum idpf_vport_reset_cause reset_cause) +{ + struct idpf_netdev_priv *np = netdev_priv(vport->netdev); + enum idpf_vport_state current_state = np->state; + struct idpf_adapter *adapter = vport->adapter; + struct idpf_vport *new_vport; + int err, i; + + /* If the system is low on memory, we can end up in bad state if we + * free all the memory for queue resources and try to allocate them + * again. Instead, we can pre-allocate the new resources before doing + * anything and bailing if the alloc fails. + * + * Make a clone of the existing vport to mimic its current + * configuration, then modify the new structure with any requested + * changes. Once the allocation of the new resources is done, stop the + * existing vport and copy the configuration to the main vport. If an + * error occurred, the existing vport will be untouched. + * + */ + new_vport = kzalloc(sizeof(*vport), GFP_KERNEL); + if (!new_vport) + return -ENOMEM; + + /* This purposely avoids copying the end of the struct because it + * contains wait_queues and mutexes and other stuff we don't want to + * mess with. Nothing below should use those variables from new_vport + * and should instead always refer to them in vport if they need to. + */ + memcpy(new_vport, vport, offsetof(struct idpf_vport, link_speed_mbps)); + + /* Adjust resource parameters prior to reallocating resources */ + switch (reset_cause) { + case IDPF_SR_Q_CHANGE: + err = idpf_vport_adjust_qs(new_vport); + if (err) + goto free_vport; + break; + case IDPF_SR_Q_DESC_CHANGE: + /* Update queue parameters before allocating resources */ + idpf_vport_calc_num_q_desc(new_vport); + break; + case IDPF_SR_MTU_CHANGE: + case IDPF_SR_RSC_CHANGE: + break; + default: + dev_err(&adapter->pdev->dev, "Unhandled soft reset cause\n"); + err = -EINVAL; + goto free_vport; + } + + err = idpf_vport_queues_alloc(new_vport); + if (err) + goto free_vport; + if (current_state <= __IDPF_VPORT_DOWN) { + idpf_send_delete_queues_msg(vport); + } else { + set_bit(IDPF_VPORT_DEL_QUEUES, vport->flags); + idpf_vport_stop(vport); + } + + idpf_deinit_rss(vport); + /* We're passing in vport here because we need its wait_queue + * to send a message and it should be getting all the vport + * config data out of the adapter but we need to be careful not + * to add code to add_queues to change the vport config within + * vport itself as it will be wiped with a memcpy later. + */ + err = idpf_send_add_queues_msg(vport, new_vport->num_txq, + new_vport->num_complq, + new_vport->num_rxq, + new_vport->num_bufq); + if (err) + goto err_reset; + + /* Same comment as above regarding avoiding copying the wait_queues and + * mutexes applies here. We do not want to mess with those if possible. + */ + memcpy(vport, new_vport, offsetof(struct idpf_vport, link_speed_mbps)); + + /* Since idpf_vport_queues_alloc was called with new_port, the queue + * back pointers are currently pointing to the local new_vport. Reset + * the backpointers to the original vport here + */ + for (i = 0; i < vport->num_txq_grp; i++) { + struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; + int j; + + tx_qgrp->vport = vport; + for (j = 0; j < tx_qgrp->num_txq; j++) + tx_qgrp->txqs[j]->vport = vport; + + if (idpf_is_queue_model_split(vport->txq_model)) + tx_qgrp->complq->vport = vport; + } + + for (i = 0; i < vport->num_rxq_grp; i++) { + struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; + struct idpf_queue *q; + u16 num_rxq; + int j; + + rx_qgrp->vport = vport; + for (j = 0; j < vport->num_bufqs_per_qgrp; j++) + rx_qgrp->splitq.bufq_sets[j].bufq.vport = vport; + + if (idpf_is_queue_model_split(vport->rxq_model)) + num_rxq = rx_qgrp->splitq.num_rxq_sets; + else + num_rxq = rx_qgrp->singleq.num_rxq; + + for (j = 0; j < num_rxq; j++) { + if (idpf_is_queue_model_split(vport->rxq_model)) + q = &rx_qgrp->splitq.rxq_sets[j]->rxq; + else + q = rx_qgrp->singleq.rxqs[j]; + q->vport = vport; + } + } + + if (reset_cause == IDPF_SR_Q_CHANGE) + idpf_vport_alloc_vec_indexes(vport); + + err = idpf_set_real_num_queues(vport); + if (err) + goto err_reset; + + if (current_state == __IDPF_VPORT_UP) + err = idpf_vport_open(vport, false); + + kfree(new_vport); + + return err; + +err_reset: + idpf_vport_queues_rel(new_vport); +free_vport: + kfree(new_vport); + + return err; +} + +/** + * idpf_addr_sync - Callback for dev_(mc|uc)_sync to add address + * @netdev: the netdevice + * @addr: address to add + * + * Called by __dev_(mc|uc)_sync when an address needs to be added. We call + * __dev_(uc|mc)_sync from .set_rx_mode. Kernel takes addr_list_lock spinlock + * meaning we cannot sleep in this context. Due to this, we have to add the + * filter and send the virtchnl message asynchronously without waiting for the + * response from the other side. We won't know whether or not the operation + * actually succeeded until we get the message back. Returns 0 on success, + * negative on failure. + */ +static int idpf_addr_sync(struct net_device *netdev, const u8 *addr) +{ + struct idpf_netdev_priv *np = netdev_priv(netdev); + + return idpf_add_mac_filter(np->vport, np, addr, true); +} + +/** + * idpf_addr_unsync - Callback for dev_(mc|uc)_sync to remove address + * @netdev: the netdevice + * @addr: address to add + * + * Called by __dev_(mc|uc)_sync when an address needs to be added. We call + * __dev_(uc|mc)_sync from .set_rx_mode. Kernel takes addr_list_lock spinlock + * meaning we cannot sleep in this context. Due to this we have to delete the + * filter and send the virtchnl message asynchronously without waiting for the + * return from the other side. We won't know whether or not the operation + * actually succeeded until we get the message back. Returns 0 on success, + * negative on failure. + */ +static int idpf_addr_unsync(struct net_device *netdev, const u8 *addr) +{ + struct idpf_netdev_priv *np = netdev_priv(netdev); + + /* Under some circumstances, we might receive a request to delete + * our own device address from our uc list. Because we store the + * device address in the VSI's MAC filter list, we need to ignore + * such requests and not delete our device address from this list. + */ + if (ether_addr_equal(addr, netdev->dev_addr)) + return 0; + + idpf_del_mac_filter(np->vport, np, addr, true); + + return 0; +} + +/** + * idpf_set_rx_mode - NDO callback to set the netdev filters + * @netdev: network interface device structure + * + * Stack takes addr_list_lock spinlock before calling our .set_rx_mode. We + * cannot sleep in this context. + */ +static void idpf_set_rx_mode(struct net_device *netdev) +{ + struct idpf_netdev_priv *np = netdev_priv(netdev); + struct idpf_vport_user_config_data *config_data; + struct idpf_adapter *adapter; + bool changed = false; + struct device *dev; + int err; + + adapter = np->adapter; + dev = &adapter->pdev->dev; + + if (idpf_is_cap_ena(adapter, IDPF_OTHER_CAPS, VIRTCHNL2_CAP_MACFILTER)) { + __dev_uc_sync(netdev, idpf_addr_sync, idpf_addr_unsync); + __dev_mc_sync(netdev, idpf_addr_sync, idpf_addr_unsync); + } + + if (!idpf_is_cap_ena(adapter, IDPF_OTHER_CAPS, VIRTCHNL2_CAP_PROMISC)) + return; + + config_data = &adapter->vport_config[np->vport_idx]->user_config; + /* IFF_PROMISC enables both unicast and multicast promiscuous, + * while IFF_ALLMULTI only enables multicast such that: + * + * promisc + allmulti = unicast | multicast + * promisc + !allmulti = unicast | multicast + * !promisc + allmulti = multicast + */ + if ((netdev->flags & IFF_PROMISC) && + !test_and_set_bit(__IDPF_PROMISC_UC, config_data->user_flags)) { + changed = true; + dev_info(&adapter->pdev->dev, "Entering promiscuous mode\n"); + if (!test_and_set_bit(__IDPF_PROMISC_MC, adapter->flags)) + dev_info(dev, "Entering multicast promiscuous mode\n"); + } + + if (!(netdev->flags & IFF_PROMISC) && + test_and_clear_bit(__IDPF_PROMISC_UC, config_data->user_flags)) { + changed = true; + dev_info(dev, "Leaving promiscuous mode\n"); + } + + if (netdev->flags & IFF_ALLMULTI && + !test_and_set_bit(__IDPF_PROMISC_MC, config_data->user_flags)) { + changed = true; + dev_info(dev, "Entering multicast promiscuous mode\n"); + } + + if (!(netdev->flags & (IFF_ALLMULTI | IFF_PROMISC)) && + test_and_clear_bit(__IDPF_PROMISC_MC, config_data->user_flags)) { + changed = true; + dev_info(dev, "Leaving multicast promiscuous mode\n"); + } + + if (!changed) + return; + + err = idpf_set_promiscuous(adapter, config_data, np->vport_id); + if (err) + dev_err(dev, "Failed to set promiscuous mode: %d\n", err); +} + +/** + * idpf_vport_manage_rss_lut - disable/enable RSS + * @vport: the vport being changed + * + * In the event of disable request for RSS, this function will zero out RSS + * LUT, while in the event of enable request for RSS, it will reconfigure RSS + * LUT with the default LUT configuration. + */ +static int idpf_vport_manage_rss_lut(struct idpf_vport *vport) +{ + bool ena = idpf_is_feature_ena(vport, NETIF_F_RXHASH); + struct idpf_rss_data *rss_data; + u16 idx = vport->idx; + int lut_size; + + rss_data = &vport->adapter->vport_config[idx]->user_config.rss_data; + lut_size = rss_data->rss_lut_size * sizeof(u32); + + if (ena) { + /* This will contain the default or user configured LUT */ + memcpy(rss_data->rss_lut, rss_data->cached_lut, lut_size); + } else { + /* Save a copy of the current LUT to be restored later if + * requested. + */ + memcpy(rss_data->cached_lut, rss_data->rss_lut, lut_size); + + /* Zero out the current LUT to disable */ + memset(rss_data->rss_lut, 0, lut_size); + } + + return idpf_config_rss(vport); +} + +/** + * idpf_set_features - set the netdev feature flags + * @netdev: ptr to the netdev being adjusted + * @features: the feature set that the stack is suggesting + */ +static int idpf_set_features(struct net_device *netdev, + netdev_features_t features) +{ + netdev_features_t changed = netdev->features ^ features; + struct idpf_adapter *adapter; + struct idpf_vport *vport; + int err = 0; + + idpf_vport_ctrl_lock(netdev); + vport = idpf_netdev_to_vport(netdev); + + adapter = vport->adapter; + + if (idpf_is_reset_in_prog(adapter)) { + dev_err(&adapter->pdev->dev, "Device is resetting, changing netdev features temporarily unavailable.\n"); + err = -EBUSY; + goto unlock_mutex; + } + + if (changed & NETIF_F_RXHASH) { + netdev->features ^= NETIF_F_RXHASH; + err = idpf_vport_manage_rss_lut(vport); + if (err) + goto unlock_mutex; + } + + if (changed & NETIF_F_GRO_HW) { + netdev->features ^= NETIF_F_GRO_HW; + err = idpf_initiate_soft_reset(vport, IDPF_SR_RSC_CHANGE); + if (err) + goto unlock_mutex; + } + + if (changed & NETIF_F_LOOPBACK) { + netdev->features ^= NETIF_F_LOOPBACK; + err = idpf_send_ena_dis_loopback_msg(vport); + } + +unlock_mutex: + idpf_vport_ctrl_unlock(netdev); + + return err; +} + +/** + * idpf_open - Called when a network interface becomes active + * @netdev: network interface device structure + * + * The open entry point is called when a network interface is made + * active by the system (IFF_UP). At this point all resources needed + * for transmit and receive operations are allocated, the interrupt + * handler is registered with the OS, the netdev watchdog is enabled, + * and the stack is notified that the interface is ready. + * + * Returns 0 on success, negative value on failure + */ +static int idpf_open(struct net_device *netdev) +{ + struct idpf_vport *vport; + int err; + + idpf_vport_ctrl_lock(netdev); + vport = idpf_netdev_to_vport(netdev); + + err = idpf_vport_open(vport, true); + + idpf_vport_ctrl_unlock(netdev); + + return err; +} + +/** + * idpf_change_mtu - NDO callback to change the MTU + * @netdev: network interface device structure + * @new_mtu: new value for maximum frame size + * + * Returns 0 on success, negative on failure + */ +static int idpf_change_mtu(struct net_device *netdev, int new_mtu) +{ + struct idpf_vport *vport; + int err; + + idpf_vport_ctrl_lock(netdev); + vport = idpf_netdev_to_vport(netdev); + + netdev->mtu = new_mtu; + + err = idpf_initiate_soft_reset(vport, IDPF_SR_MTU_CHANGE); + + idpf_vport_ctrl_unlock(netdev); + + return err; +} + +/** + * idpf_features_check - Validate packet conforms to limits + * @skb: skb buffer + * @netdev: This port's netdev + * @features: Offload features that the stack believes apply + */ +static netdev_features_t idpf_features_check(struct sk_buff *skb, + struct net_device *netdev, + netdev_features_t features) +{ + struct idpf_vport *vport = idpf_netdev_to_vport(netdev); + struct idpf_adapter *adapter = vport->adapter; + size_t len; + + /* No point in doing any of this if neither checksum nor GSO are + * being requested for this frame. We can rule out both by just + * checking for CHECKSUM_PARTIAL + */ + if (skb->ip_summed != CHECKSUM_PARTIAL) + return features; + + /* We cannot support GSO if the MSS is going to be less than + * 88 bytes. If it is then we need to drop support for GSO. + */ + if (skb_is_gso(skb) && + (skb_shinfo(skb)->gso_size < IDPF_TX_TSO_MIN_MSS)) + features &= ~NETIF_F_GSO_MASK; + + /* Ensure MACLEN is <= 126 bytes (63 words) and not an odd size */ + len = skb_network_offset(skb); + if (unlikely(len & ~(126))) + goto unsupported; + + len = skb_network_header_len(skb); + if (unlikely(len > idpf_get_max_tx_hdr_size(adapter))) + goto unsupported; + + if (!skb->encapsulation) + return features; + + /* L4TUNLEN can support 127 words */ + len = skb_inner_network_header(skb) - skb_transport_header(skb); + if (unlikely(len & ~(127 * 2))) + goto unsupported; + + /* IPLEN can support at most 127 dwords */ + len = skb_inner_network_header_len(skb); + if (unlikely(len > idpf_get_max_tx_hdr_size(adapter))) + goto unsupported; + + /* No need to validate L4LEN as TCP is the only protocol with a + * a flexible value and we support all possible values supported + * by TCP, which is at most 15 dwords + */ + + return features; + +unsupported: + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); +} + +/** + * idpf_set_mac - NDO callback to set port mac address + * @netdev: network interface device structure + * @p: pointer to an address structure + * + * Returns 0 on success, negative on failure + **/ +static int idpf_set_mac(struct net_device *netdev, void *p) +{ + struct idpf_netdev_priv *np = netdev_priv(netdev); + struct idpf_vport_config *vport_config; + struct sockaddr *addr = p; + struct idpf_vport *vport; + int err = 0; + + idpf_vport_ctrl_lock(netdev); + vport = idpf_netdev_to_vport(netdev); + + if (!idpf_is_cap_ena(vport->adapter, IDPF_OTHER_CAPS, + VIRTCHNL2_CAP_MACFILTER)) { + dev_info(&vport->adapter->pdev->dev, "Setting MAC address is not supported\n"); + err = -EOPNOTSUPP; + goto unlock_mutex; + } + + if (!is_valid_ether_addr(addr->sa_data)) { + dev_info(&vport->adapter->pdev->dev, "Invalid MAC address: %pM\n", + addr->sa_data); + err = -EADDRNOTAVAIL; + goto unlock_mutex; + } + + if (ether_addr_equal(netdev->dev_addr, addr->sa_data)) + goto unlock_mutex; + + vport_config = vport->adapter->vport_config[vport->idx]; + err = idpf_add_mac_filter(vport, np, addr->sa_data, false); + if (err) { + __idpf_del_mac_filter(vport_config, addr->sa_data); + goto unlock_mutex; + } + + if (is_valid_ether_addr(vport->default_mac_addr)) + idpf_del_mac_filter(vport, np, vport->default_mac_addr, false); + + ether_addr_copy(vport->default_mac_addr, addr->sa_data); + eth_hw_addr_set(netdev, addr->sa_data); + +unlock_mutex: + idpf_vport_ctrl_unlock(netdev); + + return err; +} + +/** + * idpf_alloc_dma_mem - Allocate dma memory + * @hw: pointer to hw struct + * @mem: pointer to dma_mem struct + * @size: size of the memory to allocate + */ +void *idpf_alloc_dma_mem(struct idpf_hw *hw, struct idpf_dma_mem *mem, u64 size) +{ + struct idpf_adapter *adapter = hw->back; + size_t sz = ALIGN(size, 4096); + + mem->va = dma_alloc_coherent(&adapter->pdev->dev, sz, + &mem->pa, GFP_KERNEL); + mem->size = sz; + + return mem->va; +} + +/** + * idpf_free_dma_mem - Free the allocated dma memory + * @hw: pointer to hw struct + * @mem: pointer to dma_mem struct + */ +void idpf_free_dma_mem(struct idpf_hw *hw, struct idpf_dma_mem *mem) +{ + struct idpf_adapter *adapter = hw->back; + + dma_free_coherent(&adapter->pdev->dev, mem->size, + mem->va, mem->pa); + mem->size = 0; + mem->va = NULL; + mem->pa = 0; +} + +static const struct net_device_ops idpf_netdev_ops_splitq = { + .ndo_open = idpf_open, + .ndo_stop = idpf_stop, + .ndo_start_xmit = idpf_tx_splitq_start, + .ndo_features_check = idpf_features_check, + .ndo_set_rx_mode = idpf_set_rx_mode, + .ndo_validate_addr = eth_validate_addr, + .ndo_set_mac_address = idpf_set_mac, + .ndo_change_mtu = idpf_change_mtu, + .ndo_get_stats64 = idpf_get_stats64, + .ndo_set_features = idpf_set_features, + .ndo_tx_timeout = idpf_tx_timeout, +}; + +static const struct net_device_ops idpf_netdev_ops_singleq = { + .ndo_open = idpf_open, + .ndo_stop = idpf_stop, + .ndo_start_xmit = idpf_tx_singleq_start, + .ndo_features_check = idpf_features_check, + .ndo_set_rx_mode = idpf_set_rx_mode, + .ndo_validate_addr = eth_validate_addr, + .ndo_set_mac_address = idpf_set_mac, + .ndo_change_mtu = idpf_change_mtu, + .ndo_get_stats64 = idpf_get_stats64, + .ndo_set_features = idpf_set_features, + .ndo_tx_timeout = idpf_tx_timeout, +}; diff --git a/drivers/net/ethernet/intel/idpf/idpf_main.c b/drivers/net/ethernet/intel/idpf/idpf_main.c new file mode 100644 index 00000000000000..05115b6398489e --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/idpf_main.c @@ -0,0 +1,291 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2023 Intel Corporation */ + +#include "idpf.h" +#include "idpf_devids.h" +#include "idpf_virtchnl.h" + +#define DRV_SUMMARY "Intel(R) Infrastructure Data Path Function Linux Driver" + +MODULE_DESCRIPTION(DRV_SUMMARY); +MODULE_LICENSE("GPL"); + +/* RHEL-8-only */ +#ifndef PCI_VENDOR_ID_GOOGLE +#define PCI_VENDOR_ID_GOOGLE 0x1ae0 +#endif + +/** + * idpf_remove - Device removal routine + * @pdev: PCI device information struct + */ +static void idpf_remove(struct pci_dev *pdev) +{ + struct idpf_adapter *adapter = pci_get_drvdata(pdev); + int i; + + set_bit(IDPF_REMOVE_IN_PROG, adapter->flags); + + /* Wait until vc_event_task is done to consider if any hard reset is + * in progress else we may go ahead and release the resources but the + * thread doing the hard reset might continue the init path and + * end up in bad state. + */ + cancel_delayed_work_sync(&adapter->vc_event_task); + if (adapter->num_vfs) + idpf_sriov_configure(pdev, 0); + + idpf_vc_core_deinit(adapter); + + /* Be a good citizen and leave the device clean on exit */ + adapter->dev_ops.reg_ops.trigger_reset(adapter, IDPF_HR_FUNC_RESET); + idpf_deinit_dflt_mbx(adapter); + + if (!adapter->netdevs) + goto destroy_wqs; + + /* There are some cases where it's possible to still have netdevs + * registered with the stack at this point, e.g. if the driver detected + * a HW reset and rmmod is called before it fully recovers. Unregister + * any stale netdevs here. + */ + for (i = 0; i < adapter->max_vports; i++) { + if (!adapter->netdevs[i]) + continue; + if (adapter->netdevs[i]->reg_state != NETREG_UNINITIALIZED) + unregister_netdev(adapter->netdevs[i]); + free_netdev(adapter->netdevs[i]); + adapter->netdevs[i] = NULL; + } + +destroy_wqs: + destroy_workqueue(adapter->init_wq); + destroy_workqueue(adapter->serv_wq); + destroy_workqueue(adapter->mbx_wq); + destroy_workqueue(adapter->stats_wq); + destroy_workqueue(adapter->vc_event_wq); + + for (i = 0; i < adapter->max_vports; i++) { + kfree(adapter->vport_config[i]); + adapter->vport_config[i] = NULL; + } + kfree(adapter->vport_config); + adapter->vport_config = NULL; + kfree(adapter->netdevs); + adapter->netdevs = NULL; + kfree(adapter->vcxn_mngr); + adapter->vcxn_mngr = NULL; + + mutex_destroy(&adapter->vport_ctrl_lock); + mutex_destroy(&adapter->vector_lock); + mutex_destroy(&adapter->queue_lock); + mutex_destroy(&adapter->vc_buf_lock); + + pci_set_drvdata(pdev, NULL); + kfree(adapter); +} + +/** + * idpf_shutdown - PCI callback for shutting down device + * @pdev: PCI device information struct + */ +static void idpf_shutdown(struct pci_dev *pdev) +{ + idpf_remove(pdev); + + if (system_state == SYSTEM_POWER_OFF) + pci_set_power_state(pdev, PCI_D3hot); +} + +/** + * idpf_cfg_hw - Initialize HW struct + * @adapter: adapter to setup hw struct for + * + * Returns 0 on success, negative on failure + */ +static int idpf_cfg_hw(struct idpf_adapter *adapter) +{ + struct pci_dev *pdev = adapter->pdev; + struct idpf_hw *hw = &adapter->hw; + + hw->hw_addr = pcim_iomap_table(pdev)[0]; + if (!hw->hw_addr) { + pci_err(pdev, "failed to allocate PCI iomap table\n"); + + return -ENOMEM; + } + + hw->back = adapter; + + return 0; +} + +/** + * idpf_probe - Device initialization routine + * @pdev: PCI device information struct + * @ent: entry in idpf_pci_tbl + * + * Returns 0 on success, negative on failure + */ +static int idpf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + struct device *dev = &pdev->dev; + struct idpf_adapter *adapter; + int err; + + /* RHEL-8: idpf support only for GCP */ + if (pdev->subsystem_vendor != PCI_VENDOR_ID_GOOGLE) + mark_tech_preview("idpf driver outside of Google Cloud", + THIS_MODULE); + + adapter = kzalloc(sizeof(*adapter), GFP_KERNEL); + if (!adapter) + return -ENOMEM; + + adapter->req_tx_splitq = true; + adapter->req_rx_splitq = true; + + switch (ent->device) { + case IDPF_DEV_ID_PF: + idpf_dev_ops_init(adapter); + break; + case IDPF_DEV_ID_VF: + idpf_vf_dev_ops_init(adapter); + adapter->crc_enable = true; + break; + default: + err = -ENODEV; + dev_err(&pdev->dev, "Unexpected dev ID 0x%x in idpf probe\n", + ent->device); + goto err_free; + } + + adapter->pdev = pdev; + err = pcim_enable_device(pdev); + if (err) + goto err_free; + + err = pcim_iomap_regions(pdev, BIT(0), pci_name(pdev)); + if (err) { + pci_err(pdev, "pcim_iomap_regions failed %pe\n", ERR_PTR(err)); + + goto err_free; + } + + /* set up for high or low dma */ + err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)); + if (err) { + pci_err(pdev, "DMA configuration failed: %pe\n", ERR_PTR(err)); + + goto err_free; + } + + pci_set_master(pdev); + pci_set_drvdata(pdev, adapter); + + adapter->init_wq = alloc_workqueue("%s-%s-init", 0, 0, + dev_driver_string(dev), + dev_name(dev)); + if (!adapter->init_wq) { + dev_err(dev, "Failed to allocate init workqueue\n"); + err = -ENOMEM; + goto err_free; + } + + adapter->serv_wq = alloc_workqueue("%s-%s-service", 0, 0, + dev_driver_string(dev), + dev_name(dev)); + if (!adapter->serv_wq) { + dev_err(dev, "Failed to allocate service workqueue\n"); + err = -ENOMEM; + goto err_serv_wq_alloc; + } + + adapter->mbx_wq = alloc_workqueue("%s-%s-mbx", 0, 0, + dev_driver_string(dev), + dev_name(dev)); + if (!adapter->mbx_wq) { + dev_err(dev, "Failed to allocate mailbox workqueue\n"); + err = -ENOMEM; + goto err_mbx_wq_alloc; + } + + adapter->stats_wq = alloc_workqueue("%s-%s-stats", 0, 0, + dev_driver_string(dev), + dev_name(dev)); + if (!adapter->stats_wq) { + dev_err(dev, "Failed to allocate workqueue\n"); + err = -ENOMEM; + goto err_stats_wq_alloc; + } + + adapter->vc_event_wq = alloc_workqueue("%s-%s-vc_event", 0, 0, + dev_driver_string(dev), + dev_name(dev)); + if (!adapter->vc_event_wq) { + dev_err(dev, "Failed to allocate virtchnl event workqueue\n"); + err = -ENOMEM; + goto err_vc_event_wq_alloc; + } + + /* setup msglvl */ + adapter->msg_enable = netif_msg_init(-1, IDPF_AVAIL_NETIF_M); + + err = idpf_cfg_hw(adapter); + if (err) { + dev_err(dev, "Failed to configure HW structure for adapter: %d\n", + err); + goto err_cfg_hw; + } + + mutex_init(&adapter->vport_ctrl_lock); + mutex_init(&adapter->vector_lock); + mutex_init(&adapter->queue_lock); + mutex_init(&adapter->vc_buf_lock); + + INIT_DELAYED_WORK(&adapter->init_task, idpf_init_task); + INIT_DELAYED_WORK(&adapter->serv_task, idpf_service_task); + INIT_DELAYED_WORK(&adapter->mbx_task, idpf_mbx_task); + INIT_DELAYED_WORK(&adapter->stats_task, idpf_statistics_task); + INIT_DELAYED_WORK(&adapter->vc_event_task, idpf_vc_event_task); + + adapter->dev_ops.reg_ops.reset_reg_init(adapter); + set_bit(IDPF_HR_DRV_LOAD, adapter->flags); + queue_delayed_work(adapter->vc_event_wq, &adapter->vc_event_task, + msecs_to_jiffies(10 * (pdev->devfn & 0x07))); + + return 0; + +err_cfg_hw: + destroy_workqueue(adapter->vc_event_wq); +err_vc_event_wq_alloc: + destroy_workqueue(adapter->stats_wq); +err_stats_wq_alloc: + destroy_workqueue(adapter->mbx_wq); +err_mbx_wq_alloc: + destroy_workqueue(adapter->serv_wq); +err_serv_wq_alloc: + destroy_workqueue(adapter->init_wq); +err_free: + kfree(adapter); + return err; +} + +/* idpf_pci_tbl - PCI Dev idpf ID Table + */ +static const struct pci_device_id idpf_pci_tbl[] = { + { PCI_VDEVICE(INTEL, IDPF_DEV_ID_PF)}, + { PCI_VDEVICE(INTEL, IDPF_DEV_ID_VF)}, + { /* Sentinel */ } +}; +MODULE_DEVICE_TABLE(pci, idpf_pci_tbl); + +static struct pci_driver idpf_driver = { + .name = KBUILD_MODNAME, + .id_table = idpf_pci_tbl, + .probe = idpf_probe, + .sriov_configure = idpf_sriov_configure, + .remove = idpf_remove, + .shutdown = idpf_shutdown, +}; +module_pci_driver(idpf_driver); diff --git a/drivers/net/ethernet/intel/idpf/idpf_mem.h b/drivers/net/ethernet/intel/idpf/idpf_mem.h new file mode 100644 index 00000000000000..b21a04fccf0f0d --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/idpf_mem.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2023 Intel Corporation */ + +#ifndef _IDPF_MEM_H_ +#define _IDPF_MEM_H_ + +#include + +struct idpf_dma_mem { + void *va; + dma_addr_t pa; + size_t size; +}; + +#define wr32(a, reg, value) writel((value), ((a)->hw_addr + (reg))) +#define rd32(a, reg) readl((a)->hw_addr + (reg)) +#define wr64(a, reg, value) writeq((value), ((a)->hw_addr + (reg))) +#define rd64(a, reg) readq((a)->hw_addr + (reg)) + +#endif /* _IDPF_MEM_H_ */ diff --git a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c new file mode 100644 index 00000000000000..27b93592c4babb --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c @@ -0,0 +1,1181 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2023 Intel Corporation */ + +#include "idpf.h" + +/** + * idpf_tx_singleq_csum - Enable tx checksum offloads + * @skb: pointer to skb + * @off: pointer to struct that holds offload parameters + * + * Returns 0 or error (negative) if checksum offload cannot be executed, 1 + * otherwise. + */ +static int idpf_tx_singleq_csum(struct sk_buff *skb, + struct idpf_tx_offload_params *off) +{ + u32 l4_len, l3_len, l2_len; + union { + struct iphdr *v4; + struct ipv6hdr *v6; + unsigned char *hdr; + } ip; + union { + struct tcphdr *tcp; + unsigned char *hdr; + } l4; + u32 offset, cmd = 0; + u8 l4_proto = 0; + __be16 frag_off; + bool is_tso; + + if (skb->ip_summed != CHECKSUM_PARTIAL) + return 0; + + ip.hdr = skb_network_header(skb); + l4.hdr = skb_transport_header(skb); + + /* compute outer L2 header size */ + l2_len = ip.hdr - skb->data; + offset = FIELD_PREP(0x3F << IDPF_TX_DESC_LEN_MACLEN_S, l2_len / 2); + is_tso = !!(off->tx_flags & IDPF_TX_FLAGS_TSO); + if (skb->encapsulation) { + u32 tunnel = 0; + + /* define outer network header type */ + if (off->tx_flags & IDPF_TX_FLAGS_IPV4) { + /* The stack computes the IP header already, the only + * time we need the hardware to recompute it is in the + * case of TSO. + */ + tunnel |= is_tso ? + IDPF_TX_CTX_EXT_IP_IPV4 : + IDPF_TX_CTX_EXT_IP_IPV4_NO_CSUM; + + l4_proto = ip.v4->protocol; + } else if (off->tx_flags & IDPF_TX_FLAGS_IPV6) { + tunnel |= IDPF_TX_CTX_EXT_IP_IPV6; + + l4_proto = ip.v6->nexthdr; + if (ipv6_ext_hdr(l4_proto)) + ipv6_skip_exthdr(skb, skb_network_offset(skb) + + sizeof(*ip.v6), + &l4_proto, &frag_off); + } + + /* define outer transport */ + switch (l4_proto) { + case IPPROTO_UDP: + tunnel |= IDPF_TXD_CTX_UDP_TUNNELING; + break; + case IPPROTO_GRE: + tunnel |= IDPF_TXD_CTX_GRE_TUNNELING; + break; + case IPPROTO_IPIP: + case IPPROTO_IPV6: + l4.hdr = skb_inner_network_header(skb); + break; + default: + if (is_tso) + return -1; + + skb_checksum_help(skb); + + return 0; + } + off->tx_flags |= IDPF_TX_FLAGS_TUNNEL; + + /* compute outer L3 header size */ + tunnel |= FIELD_PREP(IDPF_TXD_CTX_QW0_TUNN_EXT_IPLEN_M, + (l4.hdr - ip.hdr) / 4); + + /* switch IP header pointer from outer to inner header */ + ip.hdr = skb_inner_network_header(skb); + + /* compute tunnel header size */ + tunnel |= FIELD_PREP(IDPF_TXD_CTX_QW0_TUNN_NATLEN_M, + (ip.hdr - l4.hdr) / 2); + + /* indicate if we need to offload outer UDP header */ + if (is_tso && + !(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) && + (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)) + tunnel |= IDPF_TXD_CTX_QW0_TUNN_L4T_CS_M; + + /* record tunnel offload values */ + off->cd_tunneling |= tunnel; + + /* switch L4 header pointer from outer to inner */ + l4.hdr = skb_inner_transport_header(skb); + l4_proto = 0; + + /* reset type as we transition from outer to inner headers */ + off->tx_flags &= ~(IDPF_TX_FLAGS_IPV4 | IDPF_TX_FLAGS_IPV6); + if (ip.v4->version == 4) + off->tx_flags |= IDPF_TX_FLAGS_IPV4; + if (ip.v6->version == 6) + off->tx_flags |= IDPF_TX_FLAGS_IPV6; + } + + /* Enable IP checksum offloads */ + if (off->tx_flags & IDPF_TX_FLAGS_IPV4) { + l4_proto = ip.v4->protocol; + /* See comment above regarding need for HW to recompute IP + * header checksum in the case of TSO. + */ + if (is_tso) + cmd |= IDPF_TX_DESC_CMD_IIPT_IPV4_CSUM; + else + cmd |= IDPF_TX_DESC_CMD_IIPT_IPV4; + + } else if (off->tx_flags & IDPF_TX_FLAGS_IPV6) { + cmd |= IDPF_TX_DESC_CMD_IIPT_IPV6; + l4_proto = ip.v6->nexthdr; + if (ipv6_ext_hdr(l4_proto)) + ipv6_skip_exthdr(skb, skb_network_offset(skb) + + sizeof(*ip.v6), &l4_proto, + &frag_off); + } else { + return -1; + } + + /* compute inner L3 header size */ + l3_len = l4.hdr - ip.hdr; + offset |= (l3_len / 4) << IDPF_TX_DESC_LEN_IPLEN_S; + + /* Enable L4 checksum offloads */ + switch (l4_proto) { + case IPPROTO_TCP: + /* enable checksum offloads */ + cmd |= IDPF_TX_DESC_CMD_L4T_EOFT_TCP; + l4_len = l4.tcp->doff; + break; + case IPPROTO_UDP: + /* enable UDP checksum offload */ + cmd |= IDPF_TX_DESC_CMD_L4T_EOFT_UDP; + l4_len = sizeof(struct udphdr) >> 2; + break; + case IPPROTO_SCTP: + /* enable SCTP checksum offload */ + cmd |= IDPF_TX_DESC_CMD_L4T_EOFT_SCTP; + l4_len = sizeof(struct sctphdr) >> 2; + break; + default: + if (is_tso) + return -1; + + skb_checksum_help(skb); + + return 0; + } + + offset |= l4_len << IDPF_TX_DESC_LEN_L4_LEN_S; + off->td_cmd |= cmd; + off->hdr_offsets |= offset; + + return 1; +} + +/** + * idpf_tx_singleq_map - Build the Tx base descriptor + * @tx_q: queue to send buffer on + * @first: first buffer info buffer to use + * @offloads: pointer to struct that holds offload parameters + * + * This function loops over the skb data pointed to by *first + * and gets a physical address for each memory location and programs + * it and the length into the transmit base mode descriptor. + */ +static void idpf_tx_singleq_map(struct idpf_queue *tx_q, + struct idpf_tx_buf *first, + struct idpf_tx_offload_params *offloads) +{ + u32 offsets = offloads->hdr_offsets; + struct idpf_tx_buf *tx_buf = first; + struct idpf_base_tx_desc *tx_desc; + struct sk_buff *skb = first->skb; + u64 td_cmd = offloads->td_cmd; + unsigned int data_len, size; + u16 i = tx_q->next_to_use; + struct netdev_queue *nq; + skb_frag_t *frag; + dma_addr_t dma; + u64 td_tag = 0; + + data_len = skb->data_len; + size = skb_headlen(skb); + + tx_desc = IDPF_BASE_TX_DESC(tx_q, i); + + dma = dma_map_single(tx_q->dev, skb->data, size, DMA_TO_DEVICE); + + /* write each descriptor with CRC bit */ + if (tx_q->vport->crc_enable) + td_cmd |= IDPF_TX_DESC_CMD_ICRC; + + for (frag = &skb_shinfo(skb)->frags[0];; frag++) { + unsigned int max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED; + + if (dma_mapping_error(tx_q->dev, dma)) + return idpf_tx_dma_map_error(tx_q, skb, first, i); + + /* record length, and DMA address */ + dma_unmap_len_set(tx_buf, len, size); + dma_unmap_addr_set(tx_buf, dma, dma); + + /* align size to end of page */ + max_data += -dma & (IDPF_TX_MAX_READ_REQ_SIZE - 1); + tx_desc->buf_addr = cpu_to_le64(dma); + + /* account for data chunks larger than the hardware + * can handle + */ + while (unlikely(size > IDPF_TX_MAX_DESC_DATA)) { + tx_desc->qw1 = idpf_tx_singleq_build_ctob(td_cmd, + offsets, + max_data, + td_tag); + tx_desc++; + i++; + + if (i == tx_q->desc_count) { + tx_desc = IDPF_BASE_TX_DESC(tx_q, 0); + i = 0; + } + + dma += max_data; + size -= max_data; + + max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED; + tx_desc->buf_addr = cpu_to_le64(dma); + } + + if (!data_len) + break; + + tx_desc->qw1 = idpf_tx_singleq_build_ctob(td_cmd, offsets, + size, td_tag); + tx_desc++; + i++; + + if (i == tx_q->desc_count) { + tx_desc = IDPF_BASE_TX_DESC(tx_q, 0); + i = 0; + } + + size = skb_frag_size(frag); + data_len -= size; + + dma = skb_frag_dma_map(tx_q->dev, frag, 0, size, + DMA_TO_DEVICE); + + tx_buf = &tx_q->tx_buf[i]; + } + + skb_tx_timestamp(first->skb); + + /* write last descriptor with RS and EOP bits */ + td_cmd |= (u64)(IDPF_TX_DESC_CMD_EOP | IDPF_TX_DESC_CMD_RS); + + tx_desc->qw1 = idpf_tx_singleq_build_ctob(td_cmd, offsets, + size, td_tag); + + IDPF_SINGLEQ_BUMP_RING_IDX(tx_q, i); + + /* set next_to_watch value indicating a packet is present */ + first->next_to_watch = tx_desc; + + nq = netdev_get_tx_queue(tx_q->vport->netdev, tx_q->idx); + netdev_tx_sent_queue(nq, first->bytecount); + + idpf_tx_buf_hw_update(tx_q, i, netdev_xmit_more()); +} + +/** + * idpf_tx_singleq_get_ctx_desc - grab next desc and update buffer ring + * @txq: queue to put context descriptor on + * + * Since the TX buffer rings mimics the descriptor ring, update the tx buffer + * ring entry to reflect that this index is a context descriptor + */ +static struct idpf_base_tx_ctx_desc * +idpf_tx_singleq_get_ctx_desc(struct idpf_queue *txq) +{ + struct idpf_base_tx_ctx_desc *ctx_desc; + int ntu = txq->next_to_use; + + memset(&txq->tx_buf[ntu], 0, sizeof(struct idpf_tx_buf)); + txq->tx_buf[ntu].ctx_entry = true; + + ctx_desc = IDPF_BASE_TX_CTX_DESC(txq, ntu); + + IDPF_SINGLEQ_BUMP_RING_IDX(txq, ntu); + txq->next_to_use = ntu; + + return ctx_desc; +} + +/** + * idpf_tx_singleq_build_ctx_desc - populate context descriptor + * @txq: queue to send buffer on + * @offload: offload parameter structure + **/ +static void idpf_tx_singleq_build_ctx_desc(struct idpf_queue *txq, + struct idpf_tx_offload_params *offload) +{ + struct idpf_base_tx_ctx_desc *desc = idpf_tx_singleq_get_ctx_desc(txq); + u64 qw1 = (u64)IDPF_TX_DESC_DTYPE_CTX; + + if (offload->tso_segs) { + qw1 |= IDPF_TX_CTX_DESC_TSO << IDPF_TXD_CTX_QW1_CMD_S; + qw1 |= FIELD_PREP(IDPF_TXD_CTX_QW1_TSO_LEN_M, + offload->tso_len); + qw1 |= FIELD_PREP(IDPF_TXD_CTX_QW1_MSS_M, offload->mss); + + u64_stats_update_begin(&txq->stats_sync); + u64_stats_inc(&txq->q_stats.tx.lso_pkts); + u64_stats_update_end(&txq->stats_sync); + } + + desc->qw0.tunneling_params = cpu_to_le32(offload->cd_tunneling); + + desc->qw0.l2tag2 = 0; + desc->qw0.rsvd1 = 0; + desc->qw1 = cpu_to_le64(qw1); +} + +/** + * idpf_tx_singleq_frame - Sends buffer on Tx ring using base descriptors + * @skb: send buffer + * @tx_q: queue to send buffer on + * + * Returns NETDEV_TX_OK if sent, else an error code + */ +static netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb, + struct idpf_queue *tx_q) +{ + struct idpf_tx_offload_params offload = { }; + struct idpf_tx_buf *first; + unsigned int count; + __be16 protocol; + int csum, tso; + + count = idpf_tx_desc_count_required(tx_q, skb); + if (unlikely(!count)) + return idpf_tx_drop_skb(tx_q, skb); + + if (idpf_tx_maybe_stop_common(tx_q, + count + IDPF_TX_DESCS_PER_CACHE_LINE + + IDPF_TX_DESCS_FOR_CTX)) { + idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false); + + return NETDEV_TX_BUSY; + } + + protocol = vlan_get_protocol(skb); + if (protocol == htons(ETH_P_IP)) + offload.tx_flags |= IDPF_TX_FLAGS_IPV4; + else if (protocol == htons(ETH_P_IPV6)) + offload.tx_flags |= IDPF_TX_FLAGS_IPV6; + + tso = idpf_tso(skb, &offload); + if (tso < 0) + goto out_drop; + + csum = idpf_tx_singleq_csum(skb, &offload); + if (csum < 0) + goto out_drop; + + if (tso || offload.cd_tunneling) + idpf_tx_singleq_build_ctx_desc(tx_q, &offload); + + /* record the location of the first descriptor for this packet */ + first = &tx_q->tx_buf[tx_q->next_to_use]; + first->skb = skb; + + if (tso) { + first->gso_segs = offload.tso_segs; + first->bytecount = skb->len + ((first->gso_segs - 1) * offload.tso_hdr_len); + } else { + first->bytecount = max_t(unsigned int, skb->len, ETH_ZLEN); + first->gso_segs = 1; + } + idpf_tx_singleq_map(tx_q, first, &offload); + + return NETDEV_TX_OK; + +out_drop: + return idpf_tx_drop_skb(tx_q, skb); +} + +/** + * idpf_tx_singleq_start - Selects the right Tx queue to send buffer + * @skb: send buffer + * @netdev: network interface device structure + * + * Returns NETDEV_TX_OK if sent, else an error code + */ +netdev_tx_t idpf_tx_singleq_start(struct sk_buff *skb, + struct net_device *netdev) +{ + struct idpf_vport *vport = idpf_netdev_to_vport(netdev); + struct idpf_queue *tx_q; + + tx_q = vport->txqs[skb_get_queue_mapping(skb)]; + + /* hardware can't handle really short frames, hardware padding works + * beyond this point + */ + if (skb_put_padto(skb, IDPF_TX_MIN_PKT_LEN)) { + idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false); + + return NETDEV_TX_OK; + } + + return idpf_tx_singleq_frame(skb, tx_q); +} + +/** + * idpf_tx_singleq_clean - Reclaim resources from queue + * @tx_q: Tx queue to clean + * @napi_budget: Used to determine if we are in netpoll + * @cleaned: returns number of packets cleaned + * + */ +static bool idpf_tx_singleq_clean(struct idpf_queue *tx_q, int napi_budget, + int *cleaned) +{ + unsigned int budget = tx_q->vport->compln_clean_budget; + unsigned int total_bytes = 0, total_pkts = 0; + struct idpf_base_tx_desc *tx_desc; + s16 ntc = tx_q->next_to_clean; + struct idpf_netdev_priv *np; + struct idpf_tx_buf *tx_buf; + struct idpf_vport *vport; + struct netdev_queue *nq; + bool dont_wake; + + tx_desc = IDPF_BASE_TX_DESC(tx_q, ntc); + tx_buf = &tx_q->tx_buf[ntc]; + ntc -= tx_q->desc_count; + + do { + struct idpf_base_tx_desc *eop_desc; + + /* If this entry in the ring was used as a context descriptor, + * it's corresponding entry in the buffer ring will indicate as + * such. We can skip this descriptor since there is no buffer + * to clean. + */ + if (tx_buf->ctx_entry) { + /* Clear this flag here to avoid stale flag values when + * this buffer is used for actual data in the future. + * There are cases where the tx_buf struct / the flags + * field will not be cleared before being reused. + */ + tx_buf->ctx_entry = false; + goto fetch_next_txq_desc; + } + + /* if next_to_watch is not set then no work pending */ + eop_desc = (struct idpf_base_tx_desc *)tx_buf->next_to_watch; + if (!eop_desc) + break; + + /* prevent any other reads prior to eop_desc */ + smp_rmb(); + + /* if the descriptor isn't done, no work yet to do */ + if (!(eop_desc->qw1 & + cpu_to_le64(IDPF_TX_DESC_DTYPE_DESC_DONE))) + break; + + /* clear next_to_watch to prevent false hangs */ + tx_buf->next_to_watch = NULL; + + /* update the statistics for this packet */ + total_bytes += tx_buf->bytecount; + total_pkts += tx_buf->gso_segs; + + napi_consume_skb(tx_buf->skb, napi_budget); + + /* unmap skb header data */ + dma_unmap_single(tx_q->dev, + dma_unmap_addr(tx_buf, dma), + dma_unmap_len(tx_buf, len), + DMA_TO_DEVICE); + + /* clear tx_buf data */ + tx_buf->skb = NULL; + dma_unmap_len_set(tx_buf, len, 0); + + /* unmap remaining buffers */ + while (tx_desc != eop_desc) { + tx_buf++; + tx_desc++; + ntc++; + if (unlikely(!ntc)) { + ntc -= tx_q->desc_count; + tx_buf = tx_q->tx_buf; + tx_desc = IDPF_BASE_TX_DESC(tx_q, 0); + } + + /* unmap any remaining paged data */ + if (dma_unmap_len(tx_buf, len)) { + dma_unmap_page(tx_q->dev, + dma_unmap_addr(tx_buf, dma), + dma_unmap_len(tx_buf, len), + DMA_TO_DEVICE); + dma_unmap_len_set(tx_buf, len, 0); + } + } + + /* update budget only if we did something */ + budget--; + +fetch_next_txq_desc: + tx_buf++; + tx_desc++; + ntc++; + if (unlikely(!ntc)) { + ntc -= tx_q->desc_count; + tx_buf = tx_q->tx_buf; + tx_desc = IDPF_BASE_TX_DESC(tx_q, 0); + } + } while (likely(budget)); + + ntc += tx_q->desc_count; + tx_q->next_to_clean = ntc; + + *cleaned += total_pkts; + + u64_stats_update_begin(&tx_q->stats_sync); + u64_stats_add(&tx_q->q_stats.tx.packets, total_pkts); + u64_stats_add(&tx_q->q_stats.tx.bytes, total_bytes); + u64_stats_update_end(&tx_q->stats_sync); + + vport = tx_q->vport; + np = netdev_priv(vport->netdev); + nq = netdev_get_tx_queue(vport->netdev, tx_q->idx); + + dont_wake = np->state != __IDPF_VPORT_UP || + !netif_carrier_ok(vport->netdev); + __netif_txq_completed_wake(nq, total_pkts, total_bytes, + IDPF_DESC_UNUSED(tx_q), IDPF_TX_WAKE_THRESH, + dont_wake); + + return !!budget; +} + +/** + * idpf_tx_singleq_clean_all - Clean all Tx queues + * @q_vec: queue vector + * @budget: Used to determine if we are in netpoll + * @cleaned: returns number of packets cleaned + * + * Returns false if clean is not complete else returns true + */ +static bool idpf_tx_singleq_clean_all(struct idpf_q_vector *q_vec, int budget, + int *cleaned) +{ + u16 num_txq = q_vec->num_txq; + bool clean_complete = true; + int i, budget_per_q; + + budget_per_q = num_txq ? max(budget / num_txq, 1) : 0; + for (i = 0; i < num_txq; i++) { + struct idpf_queue *q; + + q = q_vec->tx[i]; + clean_complete &= idpf_tx_singleq_clean(q, budget_per_q, + cleaned); + } + + return clean_complete; +} + +/** + * idpf_rx_singleq_test_staterr - tests bits in Rx descriptor + * status and error fields + * @rx_desc: pointer to receive descriptor (in le64 format) + * @stat_err_bits: value to mask + * + * This function does some fast chicanery in order to return the + * value of the mask which is really only used for boolean tests. + * The status_error_ptype_len doesn't need to be shifted because it begins + * at offset zero. + */ +static bool idpf_rx_singleq_test_staterr(const union virtchnl2_rx_desc *rx_desc, + const u64 stat_err_bits) +{ + return !!(rx_desc->base_wb.qword1.status_error_ptype_len & + cpu_to_le64(stat_err_bits)); +} + +/** + * idpf_rx_singleq_is_non_eop - process handling of non-EOP buffers + * @rxq: Rx ring being processed + * @rx_desc: Rx descriptor for current buffer + * @skb: Current socket buffer containing buffer in progress + * @ntc: next to clean + */ +static bool idpf_rx_singleq_is_non_eop(struct idpf_queue *rxq, + union virtchnl2_rx_desc *rx_desc, + struct sk_buff *skb, u16 ntc) +{ + /* if we are the last buffer then there is nothing else to do */ + if (likely(idpf_rx_singleq_test_staterr(rx_desc, IDPF_RXD_EOF_SINGLEQ))) + return false; + + return true; +} + +/** + * idpf_rx_singleq_csum - Indicate in skb if checksum is good + * @rxq: Rx ring being processed + * @skb: skb currently being received and modified + * @csum_bits: checksum bits from descriptor + * @ptype: the packet type decoded by hardware + * + * skb->protocol must be set before this function is called + */ +static void idpf_rx_singleq_csum(struct idpf_queue *rxq, struct sk_buff *skb, + struct idpf_rx_csum_decoded *csum_bits, + u16 ptype) +{ + struct idpf_rx_ptype_decoded decoded; + bool ipv4, ipv6; + + /* check if Rx checksum is enabled */ + if (unlikely(!(rxq->vport->netdev->features & NETIF_F_RXCSUM))) + return; + + /* check if HW has decoded the packet and checksum */ + if (unlikely(!(csum_bits->l3l4p))) + return; + + decoded = rxq->vport->rx_ptype_lkup[ptype]; + if (unlikely(!(decoded.known && decoded.outer_ip))) + return; + + ipv4 = IDPF_RX_PTYPE_TO_IPV(&decoded, IDPF_RX_PTYPE_OUTER_IPV4); + ipv6 = IDPF_RX_PTYPE_TO_IPV(&decoded, IDPF_RX_PTYPE_OUTER_IPV6); + + /* Check if there were any checksum errors */ + if (unlikely(ipv4 && (csum_bits->ipe || csum_bits->eipe))) + goto checksum_fail; + + /* Device could not do any checksum offload for certain extension + * headers as indicated by setting IPV6EXADD bit + */ + if (unlikely(ipv6 && csum_bits->ipv6exadd)) + return; + + /* check for L4 errors and handle packets that were not able to be + * checksummed due to arrival speed + */ + if (unlikely(csum_bits->l4e)) + goto checksum_fail; + + if (unlikely(csum_bits->nat && csum_bits->eudpe)) + goto checksum_fail; + + /* Handle packets that were not able to be checksummed due to arrival + * speed, in this case the stack can compute the csum. + */ + if (unlikely(csum_bits->pprs)) + return; + + /* If there is an outer header present that might contain a checksum + * we need to bump the checksum level by 1 to reflect the fact that + * we are indicating we validated the inner checksum. + */ + if (decoded.tunnel_type >= IDPF_RX_PTYPE_TUNNEL_IP_GRENAT) + skb->csum_level = 1; + + /* Only report checksum unnecessary for ICMP, TCP, UDP, or SCTP */ + switch (decoded.inner_prot) { + case IDPF_RX_PTYPE_INNER_PROT_ICMP: + case IDPF_RX_PTYPE_INNER_PROT_TCP: + case IDPF_RX_PTYPE_INNER_PROT_UDP: + case IDPF_RX_PTYPE_INNER_PROT_SCTP: + skb->ip_summed = CHECKSUM_UNNECESSARY; + return; + default: + return; + } + +checksum_fail: + u64_stats_update_begin(&rxq->stats_sync); + u64_stats_inc(&rxq->q_stats.rx.hw_csum_err); + u64_stats_update_end(&rxq->stats_sync); +} + +/** + * idpf_rx_singleq_base_csum - Indicate in skb if hw indicated a good cksum + * @rx_q: Rx completion queue + * @skb: skb currently being received and modified + * @rx_desc: the receive descriptor + * @ptype: Rx packet type + * + * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte + * descriptor writeback format. + **/ +static void idpf_rx_singleq_base_csum(struct idpf_queue *rx_q, + struct sk_buff *skb, + union virtchnl2_rx_desc *rx_desc, + u16 ptype) +{ + struct idpf_rx_csum_decoded csum_bits; + u32 rx_error, rx_status; + u64 qword; + + qword = le64_to_cpu(rx_desc->base_wb.qword1.status_error_ptype_len); + + rx_status = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_STATUS_M, qword); + rx_error = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_ERROR_M, qword); + + csum_bits.ipe = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_IPE_M, rx_error); + csum_bits.eipe = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_EIPE_M, + rx_error); + csum_bits.l4e = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_L4E_M, rx_error); + csum_bits.pprs = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_ERROR_PPRS_M, + rx_error); + csum_bits.l3l4p = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_STATUS_L3L4P_M, + rx_status); + csum_bits.ipv6exadd = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_STATUS_IPV6EXADD_M, + rx_status); + csum_bits.nat = 0; + csum_bits.eudpe = 0; + + idpf_rx_singleq_csum(rx_q, skb, &csum_bits, ptype); +} + +/** + * idpf_rx_singleq_flex_csum - Indicate in skb if hw indicated a good cksum + * @rx_q: Rx completion queue + * @skb: skb currently being received and modified + * @rx_desc: the receive descriptor + * @ptype: Rx packet type + * + * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible + * descriptor writeback format. + **/ +static void idpf_rx_singleq_flex_csum(struct idpf_queue *rx_q, + struct sk_buff *skb, + union virtchnl2_rx_desc *rx_desc, + u16 ptype) +{ + struct idpf_rx_csum_decoded csum_bits; + u16 rx_status0, rx_status1; + + rx_status0 = le16_to_cpu(rx_desc->flex_nic_wb.status_error0); + rx_status1 = le16_to_cpu(rx_desc->flex_nic_wb.status_error1); + + csum_bits.ipe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_IPE_M, + rx_status0); + csum_bits.eipe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EIPE_M, + rx_status0); + csum_bits.l4e = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_L4E_M, + rx_status0); + csum_bits.eudpe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_M, + rx_status0); + csum_bits.l3l4p = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_L3L4P_M, + rx_status0); + csum_bits.ipv6exadd = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_IPV6EXADD_M, + rx_status0); + csum_bits.nat = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS1_NAT_M, + rx_status1); + csum_bits.pprs = 0; + + idpf_rx_singleq_csum(rx_q, skb, &csum_bits, ptype); +} + +/** + * idpf_rx_singleq_base_hash - set the hash value in the skb + * @rx_q: Rx completion queue + * @skb: skb currently being received and modified + * @rx_desc: specific descriptor + * @decoded: Decoded Rx packet type related fields + * + * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte + * descriptor writeback format. + **/ +static void idpf_rx_singleq_base_hash(struct idpf_queue *rx_q, + struct sk_buff *skb, + union virtchnl2_rx_desc *rx_desc, + struct idpf_rx_ptype_decoded *decoded) +{ + u64 mask, qw1; + + if (unlikely(!(rx_q->vport->netdev->features & NETIF_F_RXHASH))) + return; + + mask = VIRTCHNL2_RX_BASE_DESC_FLTSTAT_RSS_HASH_M; + qw1 = le64_to_cpu(rx_desc->base_wb.qword1.status_error_ptype_len); + + if (FIELD_GET(mask, qw1) == mask) { + u32 hash = le32_to_cpu(rx_desc->base_wb.qword0.hi_dword.rss); + + skb_set_hash(skb, hash, idpf_ptype_to_htype(decoded)); + } +} + +/** + * idpf_rx_singleq_flex_hash - set the hash value in the skb + * @rx_q: Rx completion queue + * @skb: skb currently being received and modified + * @rx_desc: specific descriptor + * @decoded: Decoded Rx packet type related fields + * + * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible + * descriptor writeback format. + **/ +static void idpf_rx_singleq_flex_hash(struct idpf_queue *rx_q, + struct sk_buff *skb, + union virtchnl2_rx_desc *rx_desc, + struct idpf_rx_ptype_decoded *decoded) +{ + if (unlikely(!(rx_q->vport->netdev->features & NETIF_F_RXHASH))) + return; + + if (FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_RSS_VALID_M, + le16_to_cpu(rx_desc->flex_nic_wb.status_error0))) + skb_set_hash(skb, le32_to_cpu(rx_desc->flex_nic_wb.rss_hash), + idpf_ptype_to_htype(decoded)); +} + +/** + * idpf_rx_singleq_process_skb_fields - Populate skb header fields from Rx + * descriptor + * @rx_q: Rx ring being processed + * @skb: pointer to current skb being populated + * @rx_desc: descriptor for skb + * @ptype: packet type + * + * This function checks the ring, descriptor, and packet information in + * order to populate the hash, checksum, VLAN, protocol, and + * other fields within the skb. + */ +static void idpf_rx_singleq_process_skb_fields(struct idpf_queue *rx_q, + struct sk_buff *skb, + union virtchnl2_rx_desc *rx_desc, + u16 ptype) +{ + struct idpf_rx_ptype_decoded decoded = + rx_q->vport->rx_ptype_lkup[ptype]; + + /* modifies the skb - consumes the enet header */ + skb->protocol = eth_type_trans(skb, rx_q->vport->netdev); + + /* Check if we're using base mode descriptor IDs */ + if (rx_q->rxdids == VIRTCHNL2_RXDID_1_32B_BASE_M) { + idpf_rx_singleq_base_hash(rx_q, skb, rx_desc, &decoded); + idpf_rx_singleq_base_csum(rx_q, skb, rx_desc, ptype); + } else { + idpf_rx_singleq_flex_hash(rx_q, skb, rx_desc, &decoded); + idpf_rx_singleq_flex_csum(rx_q, skb, rx_desc, ptype); + } +} + +/** + * idpf_rx_singleq_buf_hw_alloc_all - Replace used receive buffers + * @rx_q: queue for which the hw buffers are allocated + * @cleaned_count: number of buffers to replace + * + * Returns false if all allocations were successful, true if any fail + */ +bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_queue *rx_q, + u16 cleaned_count) +{ + struct virtchnl2_singleq_rx_buf_desc *desc; + u16 nta = rx_q->next_to_alloc; + struct idpf_rx_buf *buf; + + if (!cleaned_count) + return false; + + desc = IDPF_SINGLEQ_RX_BUF_DESC(rx_q, nta); + buf = &rx_q->rx_buf.buf[nta]; + + do { + dma_addr_t addr; + + addr = idpf_alloc_page(rx_q->pp, buf, rx_q->rx_buf_size); + if (unlikely(addr == DMA_MAPPING_ERROR)) + break; + + /* Refresh the desc even if buffer_addrs didn't change + * because each write-back erases this info. + */ + desc->pkt_addr = cpu_to_le64(addr); + desc->hdr_addr = 0; + desc++; + + buf++; + nta++; + if (unlikely(nta == rx_q->desc_count)) { + desc = IDPF_SINGLEQ_RX_BUF_DESC(rx_q, 0); + buf = rx_q->rx_buf.buf; + nta = 0; + } + + cleaned_count--; + } while (cleaned_count); + + if (rx_q->next_to_alloc != nta) { + idpf_rx_buf_hw_update(rx_q, nta); + rx_q->next_to_alloc = nta; + } + + return !!cleaned_count; +} + +/** + * idpf_rx_singleq_extract_base_fields - Extract fields from the Rx descriptor + * @rx_q: Rx descriptor queue + * @rx_desc: the descriptor to process + * @fields: storage for extracted values + * + * Decode the Rx descriptor and extract relevant information including the + * size and Rx packet type. + * + * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte + * descriptor writeback format. + */ +static void idpf_rx_singleq_extract_base_fields(struct idpf_queue *rx_q, + union virtchnl2_rx_desc *rx_desc, + struct idpf_rx_extracted *fields) +{ + u64 qword; + + qword = le64_to_cpu(rx_desc->base_wb.qword1.status_error_ptype_len); + + fields->size = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_LEN_PBUF_M, qword); + fields->rx_ptype = FIELD_GET(VIRTCHNL2_RX_BASE_DESC_QW1_PTYPE_M, qword); +} + +/** + * idpf_rx_singleq_extract_flex_fields - Extract fields from the Rx descriptor + * @rx_q: Rx descriptor queue + * @rx_desc: the descriptor to process + * @fields: storage for extracted values + * + * Decode the Rx descriptor and extract relevant information including the + * size and Rx packet type. + * + * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible + * descriptor writeback format. + */ +static void idpf_rx_singleq_extract_flex_fields(struct idpf_queue *rx_q, + union virtchnl2_rx_desc *rx_desc, + struct idpf_rx_extracted *fields) +{ + fields->size = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_PKT_LEN_M, + le16_to_cpu(rx_desc->flex_nic_wb.pkt_len)); + fields->rx_ptype = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_PTYPE_M, + le16_to_cpu(rx_desc->flex_nic_wb.ptype_flex_flags0)); +} + +/** + * idpf_rx_singleq_extract_fields - Extract fields from the Rx descriptor + * @rx_q: Rx descriptor queue + * @rx_desc: the descriptor to process + * @fields: storage for extracted values + * + */ +static void idpf_rx_singleq_extract_fields(struct idpf_queue *rx_q, + union virtchnl2_rx_desc *rx_desc, + struct idpf_rx_extracted *fields) +{ + if (rx_q->rxdids == VIRTCHNL2_RXDID_1_32B_BASE_M) + idpf_rx_singleq_extract_base_fields(rx_q, rx_desc, fields); + else + idpf_rx_singleq_extract_flex_fields(rx_q, rx_desc, fields); +} + +/** + * idpf_rx_singleq_clean - Reclaim resources after receive completes + * @rx_q: rx queue to clean + * @budget: Total limit on number of packets to process + * + * Returns true if there's any budget left (e.g. the clean is finished) + */ +static int idpf_rx_singleq_clean(struct idpf_queue *rx_q, int budget) +{ + unsigned int total_rx_bytes = 0, total_rx_pkts = 0; + struct sk_buff *skb = rx_q->skb; + u16 ntc = rx_q->next_to_clean; + u16 cleaned_count = 0; + bool failure = false; + + /* Process Rx packets bounded by budget */ + while (likely(total_rx_pkts < (unsigned int)budget)) { + struct idpf_rx_extracted fields = { }; + union virtchnl2_rx_desc *rx_desc; + struct idpf_rx_buf *rx_buf; + + /* get the Rx desc from Rx queue based on 'next_to_clean' */ + rx_desc = IDPF_RX_DESC(rx_q, ntc); + + /* status_error_ptype_len will always be zero for unused + * descriptors because it's cleared in cleanup, and overlaps + * with hdr_addr which is always zero because packet split + * isn't used, if the hardware wrote DD then the length will be + * non-zero + */ +#define IDPF_RXD_DD VIRTCHNL2_RX_BASE_DESC_STATUS_DD_M + if (!idpf_rx_singleq_test_staterr(rx_desc, + IDPF_RXD_DD)) + break; + + /* This memory barrier is needed to keep us from reading + * any other fields out of the rx_desc + */ + dma_rmb(); + + idpf_rx_singleq_extract_fields(rx_q, rx_desc, &fields); + + rx_buf = &rx_q->rx_buf.buf[ntc]; + if (!fields.size) { + idpf_rx_put_page(rx_buf); + goto skip_data; + } + + idpf_rx_sync_for_cpu(rx_buf, fields.size); + if (skb) + idpf_rx_add_frag(rx_buf, skb, fields.size); + else + skb = idpf_rx_construct_skb(rx_q, rx_buf, fields.size); + + /* exit if we failed to retrieve a buffer */ + if (!skb) + break; + +skip_data: + IDPF_SINGLEQ_BUMP_RING_IDX(rx_q, ntc); + + cleaned_count++; + + /* skip if it is non EOP desc */ + if (idpf_rx_singleq_is_non_eop(rx_q, rx_desc, skb, ntc)) + continue; + +#define IDPF_RXD_ERR_S FIELD_PREP(VIRTCHNL2_RX_BASE_DESC_QW1_ERROR_M, \ + VIRTCHNL2_RX_BASE_DESC_ERROR_RXE_M) + if (unlikely(idpf_rx_singleq_test_staterr(rx_desc, + IDPF_RXD_ERR_S))) { + dev_kfree_skb_any(skb); + skb = NULL; + continue; + } + + /* pad skb if needed (to make valid ethernet frame) */ + if (eth_skb_pad(skb)) { + skb = NULL; + continue; + } + + /* probably a little skewed due to removing CRC */ + total_rx_bytes += skb->len; + + /* protocol */ + idpf_rx_singleq_process_skb_fields(rx_q, skb, + rx_desc, fields.rx_ptype); + + /* send completed skb up the stack */ + napi_gro_receive(&rx_q->q_vector->napi, skb); + skb = NULL; + + /* update budget accounting */ + total_rx_pkts++; + } + + rx_q->skb = skb; + + rx_q->next_to_clean = ntc; + + if (cleaned_count) + failure = idpf_rx_singleq_buf_hw_alloc_all(rx_q, cleaned_count); + + u64_stats_update_begin(&rx_q->stats_sync); + u64_stats_add(&rx_q->q_stats.rx.packets, total_rx_pkts); + u64_stats_add(&rx_q->q_stats.rx.bytes, total_rx_bytes); + u64_stats_update_end(&rx_q->stats_sync); + + /* guarantee a trip back through this routine if there was a failure */ + return failure ? budget : (int)total_rx_pkts; +} + +/** + * idpf_rx_singleq_clean_all - Clean all Rx queues + * @q_vec: queue vector + * @budget: Used to determine if we are in netpoll + * @cleaned: returns number of packets cleaned + * + * Returns false if clean is not complete else returns true + */ +static bool idpf_rx_singleq_clean_all(struct idpf_q_vector *q_vec, int budget, + int *cleaned) +{ + u16 num_rxq = q_vec->num_rxq; + bool clean_complete = true; + int budget_per_q, i; + + /* We attempt to distribute budget to each Rx queue fairly, but don't + * allow the budget to go below 1 because that would exit polling early. + */ + budget_per_q = num_rxq ? max(budget / num_rxq, 1) : 0; + for (i = 0; i < num_rxq; i++) { + struct idpf_queue *rxq = q_vec->rx[i]; + int pkts_cleaned_per_q; + + pkts_cleaned_per_q = idpf_rx_singleq_clean(rxq, budget_per_q); + + /* if we clean as many as budgeted, we must not be done */ + if (pkts_cleaned_per_q >= budget_per_q) + clean_complete = false; + *cleaned += pkts_cleaned_per_q; + } + + return clean_complete; +} + +/** + * idpf_vport_singleq_napi_poll - NAPI handler + * @napi: struct from which you get q_vector + * @budget: budget provided by stack + */ +int idpf_vport_singleq_napi_poll(struct napi_struct *napi, int budget) +{ + struct idpf_q_vector *q_vector = + container_of(napi, struct idpf_q_vector, napi); + bool clean_complete; + int work_done = 0; + + /* Handle case where we are called by netpoll with a budget of 0 */ + if (budget <= 0) { + idpf_tx_singleq_clean_all(q_vector, budget, &work_done); + + return budget; + } + + clean_complete = idpf_rx_singleq_clean_all(q_vector, budget, + &work_done); + clean_complete &= idpf_tx_singleq_clean_all(q_vector, budget, + &work_done); + + /* If work not completed, return budget and polling will return */ + if (!clean_complete) + return budget; + + work_done = min_t(int, work_done, budget - 1); + + /* Exit the polling mode, but don't re-enable interrupts if stack might + * poll us due to busy-polling + */ + if (likely(napi_complete_done(napi, work_done))) + idpf_vport_intr_update_itr_ena_irq(q_vector); + + return work_done; +} diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c new file mode 100644 index 00000000000000..35677fdf6d7a89 --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -0,0 +1,4294 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2023 Intel Corporation */ + +#include "idpf.h" +#include "idpf_virtchnl.h" + +/** + * idpf_buf_lifo_push - push a buffer pointer onto stack + * @stack: pointer to stack struct + * @buf: pointer to buf to push + * + * Returns 0 on success, negative on failure + **/ +static int idpf_buf_lifo_push(struct idpf_buf_lifo *stack, + struct idpf_tx_stash *buf) +{ + if (unlikely(stack->top == stack->size)) + return -ENOSPC; + + stack->bufs[stack->top++] = buf; + + return 0; +} + +/** + * idpf_buf_lifo_pop - pop a buffer pointer from stack + * @stack: pointer to stack struct + **/ +static struct idpf_tx_stash *idpf_buf_lifo_pop(struct idpf_buf_lifo *stack) +{ + if (unlikely(!stack->top)) + return NULL; + + return stack->bufs[--stack->top]; +} + +/** + * idpf_tx_timeout - Respond to a Tx Hang + * @netdev: network interface device structure + * @txqueue: TX queue + */ +void idpf_tx_timeout(struct net_device *netdev, unsigned int txqueue) +{ + struct idpf_adapter *adapter = idpf_netdev_to_adapter(netdev); + + adapter->tx_timeout_count++; + + netdev_err(netdev, "Detected Tx timeout: Count %d, Queue %d\n", + adapter->tx_timeout_count, txqueue); + if (!idpf_is_reset_in_prog(adapter)) { + set_bit(IDPF_HR_FUNC_RESET, adapter->flags); + queue_delayed_work(adapter->vc_event_wq, + &adapter->vc_event_task, + msecs_to_jiffies(10)); + } +} + +/** + * idpf_tx_buf_rel - Release a Tx buffer + * @tx_q: the queue that owns the buffer + * @tx_buf: the buffer to free + */ +static void idpf_tx_buf_rel(struct idpf_queue *tx_q, struct idpf_tx_buf *tx_buf) +{ + if (tx_buf->skb) { + if (dma_unmap_len(tx_buf, len)) + dma_unmap_single(tx_q->dev, + dma_unmap_addr(tx_buf, dma), + dma_unmap_len(tx_buf, len), + DMA_TO_DEVICE); + dev_kfree_skb_any(tx_buf->skb); + } else if (dma_unmap_len(tx_buf, len)) { + dma_unmap_page(tx_q->dev, + dma_unmap_addr(tx_buf, dma), + dma_unmap_len(tx_buf, len), + DMA_TO_DEVICE); + } + + tx_buf->next_to_watch = NULL; + tx_buf->skb = NULL; + tx_buf->compl_tag = IDPF_SPLITQ_TX_INVAL_COMPL_TAG; + dma_unmap_len_set(tx_buf, len, 0); +} + +/** + * idpf_tx_buf_rel_all - Free any empty Tx buffers + * @txq: queue to be cleaned + */ +static void idpf_tx_buf_rel_all(struct idpf_queue *txq) +{ + u16 i; + + /* Buffers already cleared, nothing to do */ + if (!txq->tx_buf) + return; + + /* Free all the Tx buffer sk_buffs */ + for (i = 0; i < txq->desc_count; i++) + idpf_tx_buf_rel(txq, &txq->tx_buf[i]); + + kfree(txq->tx_buf); + txq->tx_buf = NULL; + + if (!txq->buf_stack.bufs) + return; + + for (i = 0; i < txq->buf_stack.size; i++) + kfree(txq->buf_stack.bufs[i]); + + kfree(txq->buf_stack.bufs); + txq->buf_stack.bufs = NULL; +} + +/** + * idpf_tx_desc_rel - Free Tx resources per queue + * @txq: Tx descriptor ring for a specific queue + * @bufq: buffer q or completion q + * + * Free all transmit software resources + */ +static void idpf_tx_desc_rel(struct idpf_queue *txq, bool bufq) +{ + if (bufq) + idpf_tx_buf_rel_all(txq); + + if (!txq->desc_ring) + return; + + dmam_free_coherent(txq->dev, txq->size, txq->desc_ring, txq->dma); + txq->desc_ring = NULL; + txq->next_to_alloc = 0; + txq->next_to_use = 0; + txq->next_to_clean = 0; +} + +/** + * idpf_tx_desc_rel_all - Free Tx Resources for All Queues + * @vport: virtual port structure + * + * Free all transmit software resources + */ +static void idpf_tx_desc_rel_all(struct idpf_vport *vport) +{ + int i, j; + + if (!vport->txq_grps) + return; + + for (i = 0; i < vport->num_txq_grp; i++) { + struct idpf_txq_group *txq_grp = &vport->txq_grps[i]; + + for (j = 0; j < txq_grp->num_txq; j++) + idpf_tx_desc_rel(txq_grp->txqs[j], true); + + if (idpf_is_queue_model_split(vport->txq_model)) + idpf_tx_desc_rel(txq_grp->complq, false); + } +} + +/** + * idpf_tx_buf_alloc_all - Allocate memory for all buffer resources + * @tx_q: queue for which the buffers are allocated + * + * Returns 0 on success, negative on failure + */ +static int idpf_tx_buf_alloc_all(struct idpf_queue *tx_q) +{ + int buf_size; + int i; + + /* Allocate book keeping buffers only. Buffers to be supplied to HW + * are allocated by kernel network stack and received as part of skb + */ + buf_size = sizeof(struct idpf_tx_buf) * tx_q->desc_count; + tx_q->tx_buf = kzalloc(buf_size, GFP_KERNEL); + if (!tx_q->tx_buf) + return -ENOMEM; + + /* Initialize tx_bufs with invalid completion tags */ + for (i = 0; i < tx_q->desc_count; i++) + tx_q->tx_buf[i].compl_tag = IDPF_SPLITQ_TX_INVAL_COMPL_TAG; + + /* Initialize tx buf stack for out-of-order completions if + * flow scheduling offload is enabled + */ + tx_q->buf_stack.bufs = + kcalloc(tx_q->desc_count, sizeof(struct idpf_tx_stash *), + GFP_KERNEL); + if (!tx_q->buf_stack.bufs) + return -ENOMEM; + + tx_q->buf_stack.size = tx_q->desc_count; + tx_q->buf_stack.top = tx_q->desc_count; + + for (i = 0; i < tx_q->desc_count; i++) { + tx_q->buf_stack.bufs[i] = kzalloc(sizeof(*tx_q->buf_stack.bufs[i]), + GFP_KERNEL); + if (!tx_q->buf_stack.bufs[i]) + return -ENOMEM; + } + + return 0; +} + +/** + * idpf_tx_desc_alloc - Allocate the Tx descriptors + * @tx_q: the tx ring to set up + * @bufq: buffer or completion queue + * + * Returns 0 on success, negative on failure + */ +static int idpf_tx_desc_alloc(struct idpf_queue *tx_q, bool bufq) +{ + struct device *dev = tx_q->dev; + u32 desc_sz; + int err; + + if (bufq) { + err = idpf_tx_buf_alloc_all(tx_q); + if (err) + goto err_alloc; + + desc_sz = sizeof(struct idpf_base_tx_desc); + } else { + desc_sz = sizeof(struct idpf_splitq_tx_compl_desc); + } + + tx_q->size = tx_q->desc_count * desc_sz; + + /* Allocate descriptors also round up to nearest 4K */ + tx_q->size = ALIGN(tx_q->size, 4096); + tx_q->desc_ring = dmam_alloc_coherent(dev, tx_q->size, &tx_q->dma, + GFP_KERNEL); + if (!tx_q->desc_ring) { + dev_err(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n", + tx_q->size); + err = -ENOMEM; + goto err_alloc; + } + + tx_q->next_to_alloc = 0; + tx_q->next_to_use = 0; + tx_q->next_to_clean = 0; + set_bit(__IDPF_Q_GEN_CHK, tx_q->flags); + + return 0; + +err_alloc: + idpf_tx_desc_rel(tx_q, bufq); + + return err; +} + +/** + * idpf_tx_desc_alloc_all - allocate all queues Tx resources + * @vport: virtual port private structure + * + * Returns 0 on success, negative on failure + */ +static int idpf_tx_desc_alloc_all(struct idpf_vport *vport) +{ + struct device *dev = &vport->adapter->pdev->dev; + int err = 0; + int i, j; + + /* Setup buffer queues. In single queue model buffer queues and + * completion queues will be same + */ + for (i = 0; i < vport->num_txq_grp; i++) { + for (j = 0; j < vport->txq_grps[i].num_txq; j++) { + struct idpf_queue *txq = vport->txq_grps[i].txqs[j]; + u8 gen_bits = 0; + u16 bufidx_mask; + + err = idpf_tx_desc_alloc(txq, true); + if (err) { + dev_err(dev, "Allocation for Tx Queue %u failed\n", + i); + goto err_out; + } + + if (!idpf_is_queue_model_split(vport->txq_model)) + continue; + + txq->compl_tag_cur_gen = 0; + + /* Determine the number of bits in the bufid + * mask and add one to get the start of the + * generation bits + */ + bufidx_mask = txq->desc_count - 1; + while (bufidx_mask >> 1) { + txq->compl_tag_gen_s++; + bufidx_mask = bufidx_mask >> 1; + } + txq->compl_tag_gen_s++; + + gen_bits = IDPF_TX_SPLITQ_COMPL_TAG_WIDTH - + txq->compl_tag_gen_s; + txq->compl_tag_gen_max = GETMAXVAL(gen_bits); + + /* Set bufid mask based on location of first + * gen bit; it cannot simply be the descriptor + * ring size-1 since we can have size values + * where not all of those bits are set. + */ + txq->compl_tag_bufid_m = + GETMAXVAL(txq->compl_tag_gen_s); + } + + if (!idpf_is_queue_model_split(vport->txq_model)) + continue; + + /* Setup completion queues */ + err = idpf_tx_desc_alloc(vport->txq_grps[i].complq, false); + if (err) { + dev_err(dev, "Allocation for Tx Completion Queue %u failed\n", + i); + goto err_out; + } + } + +err_out: + if (err) + idpf_tx_desc_rel_all(vport); + + return err; +} + +/** + * idpf_rx_page_rel - Release an rx buffer page + * @rxq: the queue that owns the buffer + * @rx_buf: the buffer to free + */ +static void idpf_rx_page_rel(struct idpf_queue *rxq, struct idpf_rx_buf *rx_buf) +{ + if (unlikely(!rx_buf->page)) + return; + + page_pool_put_full_page(rxq->pp, rx_buf->page, false); + + rx_buf->page = NULL; + rx_buf->page_offset = 0; +} + +/** + * idpf_rx_hdr_buf_rel_all - Release header buffer memory + * @rxq: queue to use + */ +static void idpf_rx_hdr_buf_rel_all(struct idpf_queue *rxq) +{ + struct idpf_adapter *adapter = rxq->vport->adapter; + + dma_free_coherent(&adapter->pdev->dev, + rxq->desc_count * IDPF_HDR_BUF_SIZE, + rxq->rx_buf.hdr_buf_va, + rxq->rx_buf.hdr_buf_pa); + rxq->rx_buf.hdr_buf_va = NULL; +} + +/** + * idpf_rx_buf_rel_all - Free all Rx buffer resources for a queue + * @rxq: queue to be cleaned + */ +static void idpf_rx_buf_rel_all(struct idpf_queue *rxq) +{ + u16 i; + + /* queue already cleared, nothing to do */ + if (!rxq->rx_buf.buf) + return; + + /* Free all the bufs allocated and given to hw on Rx queue */ + for (i = 0; i < rxq->desc_count; i++) + idpf_rx_page_rel(rxq, &rxq->rx_buf.buf[i]); + + if (rxq->rx_hsplit_en) + idpf_rx_hdr_buf_rel_all(rxq); + + page_pool_destroy(rxq->pp); + rxq->pp = NULL; + + kfree(rxq->rx_buf.buf); + rxq->rx_buf.buf = NULL; +} + +/** + * idpf_rx_desc_rel - Free a specific Rx q resources + * @rxq: queue to clean the resources from + * @bufq: buffer q or completion q + * @q_model: single or split q model + * + * Free a specific rx queue resources + */ +static void idpf_rx_desc_rel(struct idpf_queue *rxq, bool bufq, s32 q_model) +{ + if (!rxq) + return; + + if (rxq->skb) { + dev_kfree_skb_any(rxq->skb); + rxq->skb = NULL; + } + + if (bufq || !idpf_is_queue_model_split(q_model)) + idpf_rx_buf_rel_all(rxq); + + rxq->next_to_alloc = 0; + rxq->next_to_clean = 0; + rxq->next_to_use = 0; + if (!rxq->desc_ring) + return; + + dmam_free_coherent(rxq->dev, rxq->size, rxq->desc_ring, rxq->dma); + rxq->desc_ring = NULL; +} + +/** + * idpf_rx_desc_rel_all - Free Rx Resources for All Queues + * @vport: virtual port structure + * + * Free all rx queues resources + */ +static void idpf_rx_desc_rel_all(struct idpf_vport *vport) +{ + struct idpf_rxq_group *rx_qgrp; + u16 num_rxq; + int i, j; + + if (!vport->rxq_grps) + return; + + for (i = 0; i < vport->num_rxq_grp; i++) { + rx_qgrp = &vport->rxq_grps[i]; + + if (!idpf_is_queue_model_split(vport->rxq_model)) { + for (j = 0; j < rx_qgrp->singleq.num_rxq; j++) + idpf_rx_desc_rel(rx_qgrp->singleq.rxqs[j], + false, vport->rxq_model); + continue; + } + + num_rxq = rx_qgrp->splitq.num_rxq_sets; + for (j = 0; j < num_rxq; j++) + idpf_rx_desc_rel(&rx_qgrp->splitq.rxq_sets[j]->rxq, + false, vport->rxq_model); + + if (!rx_qgrp->splitq.bufq_sets) + continue; + + for (j = 0; j < vport->num_bufqs_per_qgrp; j++) { + struct idpf_bufq_set *bufq_set = + &rx_qgrp->splitq.bufq_sets[j]; + + idpf_rx_desc_rel(&bufq_set->bufq, true, + vport->rxq_model); + } + } +} + +/** + * idpf_rx_buf_hw_update - Store the new tail and head values + * @rxq: queue to bump + * @val: new head index + */ +void idpf_rx_buf_hw_update(struct idpf_queue *rxq, u32 val) +{ + rxq->next_to_use = val; + + if (unlikely(!rxq->tail)) + return; + + /* writel has an implicit memory barrier */ + writel(val, rxq->tail); +} + +/** + * idpf_rx_hdr_buf_alloc_all - Allocate memory for header buffers + * @rxq: ring to use + * + * Returns 0 on success, negative on failure. + */ +static int idpf_rx_hdr_buf_alloc_all(struct idpf_queue *rxq) +{ + struct idpf_adapter *adapter = rxq->vport->adapter; + + rxq->rx_buf.hdr_buf_va = + dma_alloc_coherent(&adapter->pdev->dev, + IDPF_HDR_BUF_SIZE * rxq->desc_count, + &rxq->rx_buf.hdr_buf_pa, + GFP_KERNEL); + if (!rxq->rx_buf.hdr_buf_va) + return -ENOMEM; + + return 0; +} + +/** + * idpf_rx_post_buf_refill - Post buffer id to refill queue + * @refillq: refill queue to post to + * @buf_id: buffer id to post + */ +static void idpf_rx_post_buf_refill(struct idpf_sw_queue *refillq, u16 buf_id) +{ + u16 nta = refillq->next_to_alloc; + + /* store the buffer ID and the SW maintained GEN bit to the refillq */ + refillq->ring[nta] = + FIELD_PREP(IDPF_RX_BI_BUFID_M, buf_id) | + FIELD_PREP(IDPF_RX_BI_GEN_M, + test_bit(__IDPF_Q_GEN_CHK, refillq->flags)); + + if (unlikely(++nta == refillq->desc_count)) { + nta = 0; + change_bit(__IDPF_Q_GEN_CHK, refillq->flags); + } + refillq->next_to_alloc = nta; +} + +/** + * idpf_rx_post_buf_desc - Post buffer to bufq descriptor ring + * @bufq: buffer queue to post to + * @buf_id: buffer id to post + * + * Returns false if buffer could not be allocated, true otherwise. + */ +static bool idpf_rx_post_buf_desc(struct idpf_queue *bufq, u16 buf_id) +{ + struct virtchnl2_splitq_rx_buf_desc *splitq_rx_desc = NULL; + u16 nta = bufq->next_to_alloc; + struct idpf_rx_buf *buf; + dma_addr_t addr; + + splitq_rx_desc = IDPF_SPLITQ_RX_BUF_DESC(bufq, nta); + buf = &bufq->rx_buf.buf[buf_id]; + + if (bufq->rx_hsplit_en) { + splitq_rx_desc->hdr_addr = + cpu_to_le64(bufq->rx_buf.hdr_buf_pa + + (u32)buf_id * IDPF_HDR_BUF_SIZE); + } + + addr = idpf_alloc_page(bufq->pp, buf, bufq->rx_buf_size); + if (unlikely(addr == DMA_MAPPING_ERROR)) + return false; + + splitq_rx_desc->pkt_addr = cpu_to_le64(addr); + splitq_rx_desc->qword0.buf_id = cpu_to_le16(buf_id); + + nta++; + if (unlikely(nta == bufq->desc_count)) + nta = 0; + bufq->next_to_alloc = nta; + + return true; +} + +/** + * idpf_rx_post_init_bufs - Post initial buffers to bufq + * @bufq: buffer queue to post working set to + * @working_set: number of buffers to put in working set + * + * Returns true if @working_set bufs were posted successfully, false otherwise. + */ +static bool idpf_rx_post_init_bufs(struct idpf_queue *bufq, u16 working_set) +{ + int i; + + for (i = 0; i < working_set; i++) { + if (!idpf_rx_post_buf_desc(bufq, i)) + return false; + } + + idpf_rx_buf_hw_update(bufq, + bufq->next_to_alloc & ~(bufq->rx_buf_stride - 1)); + + return true; +} + +/** + * idpf_rx_create_page_pool - Create a page pool + * @rxbufq: RX queue to create page pool for + * + * Returns &page_pool on success, casted -errno on failure + */ +static struct page_pool *idpf_rx_create_page_pool(struct idpf_queue *rxbufq) +{ + struct page_pool_params pp = { + .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV, + .order = 0, + .pool_size = rxbufq->desc_count, + .nid = NUMA_NO_NODE, + .dev = rxbufq->vport->netdev->dev.parent, + .max_len = PAGE_SIZE, + .dma_dir = DMA_FROM_DEVICE, + .offset = 0, + }; + + WARN_ON_ONCE(rxbufq->rx_buf_size != IDPF_RX_BUF_4096); + + return page_pool_create(&pp); +} + +/** + * idpf_rx_buf_alloc_all - Allocate memory for all buffer resources + * @rxbufq: queue for which the buffers are allocated; equivalent to + * rxq when operating in singleq mode + * + * Returns 0 on success, negative on failure + */ +static int idpf_rx_buf_alloc_all(struct idpf_queue *rxbufq) +{ + int err = 0; + + /* Allocate book keeping buffers */ + rxbufq->rx_buf.buf = kcalloc(rxbufq->desc_count, + sizeof(struct idpf_rx_buf), GFP_KERNEL); + if (!rxbufq->rx_buf.buf) { + err = -ENOMEM; + goto rx_buf_alloc_all_out; + } + + if (rxbufq->rx_hsplit_en) { + err = idpf_rx_hdr_buf_alloc_all(rxbufq); + if (err) + goto rx_buf_alloc_all_out; + } + + /* Allocate buffers to be given to HW. */ + if (idpf_is_queue_model_split(rxbufq->vport->rxq_model)) { + int working_set = IDPF_RX_BUFQ_WORKING_SET(rxbufq); + + if (!idpf_rx_post_init_bufs(rxbufq, working_set)) + err = -ENOMEM; + } else { + if (idpf_rx_singleq_buf_hw_alloc_all(rxbufq, + rxbufq->desc_count - 1)) + err = -ENOMEM; + } + +rx_buf_alloc_all_out: + if (err) + idpf_rx_buf_rel_all(rxbufq); + + return err; +} + +/** + * idpf_rx_bufs_init - Initialize page pool, allocate rx bufs, and post to HW + * @rxbufq: RX queue to create page pool for + * + * Returns 0 on success, negative on failure + */ +static int idpf_rx_bufs_init(struct idpf_queue *rxbufq) +{ + struct page_pool *pool; + + pool = idpf_rx_create_page_pool(rxbufq); + if (IS_ERR(pool)) + return PTR_ERR(pool); + + rxbufq->pp = pool; + + return idpf_rx_buf_alloc_all(rxbufq); +} + +/** + * idpf_rx_bufs_init_all - Initialize all RX bufs + * @vport: virtual port struct + * + * Returns 0 on success, negative on failure + */ +int idpf_rx_bufs_init_all(struct idpf_vport *vport) +{ + struct idpf_rxq_group *rx_qgrp; + struct idpf_queue *q; + int i, j, err; + + for (i = 0; i < vport->num_rxq_grp; i++) { + rx_qgrp = &vport->rxq_grps[i]; + + /* Allocate bufs for the rxq itself in singleq */ + if (!idpf_is_queue_model_split(vport->rxq_model)) { + int num_rxq = rx_qgrp->singleq.num_rxq; + + for (j = 0; j < num_rxq; j++) { + q = rx_qgrp->singleq.rxqs[j]; + err = idpf_rx_bufs_init(q); + if (err) + return err; + } + + continue; + } + + /* Otherwise, allocate bufs for the buffer queues */ + for (j = 0; j < vport->num_bufqs_per_qgrp; j++) { + q = &rx_qgrp->splitq.bufq_sets[j].bufq; + err = idpf_rx_bufs_init(q); + if (err) + return err; + } + } + + return 0; +} + +/** + * idpf_rx_desc_alloc - Allocate queue Rx resources + * @rxq: Rx queue for which the resources are setup + * @bufq: buffer or completion queue + * @q_model: single or split queue model + * + * Returns 0 on success, negative on failure + */ +static int idpf_rx_desc_alloc(struct idpf_queue *rxq, bool bufq, s32 q_model) +{ + struct device *dev = rxq->dev; + + if (bufq) + rxq->size = rxq->desc_count * + sizeof(struct virtchnl2_splitq_rx_buf_desc); + else + rxq->size = rxq->desc_count * + sizeof(union virtchnl2_rx_desc); + + /* Allocate descriptors and also round up to nearest 4K */ + rxq->size = ALIGN(rxq->size, 4096); + rxq->desc_ring = dmam_alloc_coherent(dev, rxq->size, + &rxq->dma, GFP_KERNEL); + if (!rxq->desc_ring) { + dev_err(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n", + rxq->size); + return -ENOMEM; + } + + rxq->next_to_alloc = 0; + rxq->next_to_clean = 0; + rxq->next_to_use = 0; + set_bit(__IDPF_Q_GEN_CHK, rxq->flags); + + return 0; +} + +/** + * idpf_rx_desc_alloc_all - allocate all RX queues resources + * @vport: virtual port structure + * + * Returns 0 on success, negative on failure + */ +static int idpf_rx_desc_alloc_all(struct idpf_vport *vport) +{ + struct device *dev = &vport->adapter->pdev->dev; + struct idpf_rxq_group *rx_qgrp; + struct idpf_queue *q; + int i, j, err; + u16 num_rxq; + + for (i = 0; i < vport->num_rxq_grp; i++) { + rx_qgrp = &vport->rxq_grps[i]; + if (idpf_is_queue_model_split(vport->rxq_model)) + num_rxq = rx_qgrp->splitq.num_rxq_sets; + else + num_rxq = rx_qgrp->singleq.num_rxq; + + for (j = 0; j < num_rxq; j++) { + if (idpf_is_queue_model_split(vport->rxq_model)) + q = &rx_qgrp->splitq.rxq_sets[j]->rxq; + else + q = rx_qgrp->singleq.rxqs[j]; + err = idpf_rx_desc_alloc(q, false, vport->rxq_model); + if (err) { + dev_err(dev, "Memory allocation for Rx Queue %u failed\n", + i); + goto err_out; + } + } + + if (!idpf_is_queue_model_split(vport->rxq_model)) + continue; + + for (j = 0; j < vport->num_bufqs_per_qgrp; j++) { + q = &rx_qgrp->splitq.bufq_sets[j].bufq; + err = idpf_rx_desc_alloc(q, true, vport->rxq_model); + if (err) { + dev_err(dev, "Memory allocation for Rx Buffer Queue %u failed\n", + i); + goto err_out; + } + } + } + + return 0; + +err_out: + idpf_rx_desc_rel_all(vport); + + return err; +} + +/** + * idpf_txq_group_rel - Release all resources for txq groups + * @vport: vport to release txq groups on + */ +static void idpf_txq_group_rel(struct idpf_vport *vport) +{ + int i, j; + + if (!vport->txq_grps) + return; + + for (i = 0; i < vport->num_txq_grp; i++) { + struct idpf_txq_group *txq_grp = &vport->txq_grps[i]; + + for (j = 0; j < txq_grp->num_txq; j++) { + kfree(txq_grp->txqs[j]); + txq_grp->txqs[j] = NULL; + } + kfree(txq_grp->complq); + txq_grp->complq = NULL; + } + kfree(vport->txq_grps); + vport->txq_grps = NULL; +} + +/** + * idpf_rxq_sw_queue_rel - Release software queue resources + * @rx_qgrp: rx queue group with software queues + */ +static void idpf_rxq_sw_queue_rel(struct idpf_rxq_group *rx_qgrp) +{ + int i, j; + + for (i = 0; i < rx_qgrp->vport->num_bufqs_per_qgrp; i++) { + struct idpf_bufq_set *bufq_set = &rx_qgrp->splitq.bufq_sets[i]; + + for (j = 0; j < bufq_set->num_refillqs; j++) { + kfree(bufq_set->refillqs[j].ring); + bufq_set->refillqs[j].ring = NULL; + } + kfree(bufq_set->refillqs); + bufq_set->refillqs = NULL; + } +} + +/** + * idpf_rxq_group_rel - Release all resources for rxq groups + * @vport: vport to release rxq groups on + */ +static void idpf_rxq_group_rel(struct idpf_vport *vport) +{ + int i; + + if (!vport->rxq_grps) + return; + + for (i = 0; i < vport->num_rxq_grp; i++) { + struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; + u16 num_rxq; + int j; + + if (idpf_is_queue_model_split(vport->rxq_model)) { + num_rxq = rx_qgrp->splitq.num_rxq_sets; + for (j = 0; j < num_rxq; j++) { + kfree(rx_qgrp->splitq.rxq_sets[j]); + rx_qgrp->splitq.rxq_sets[j] = NULL; + } + + idpf_rxq_sw_queue_rel(rx_qgrp); + kfree(rx_qgrp->splitq.bufq_sets); + rx_qgrp->splitq.bufq_sets = NULL; + } else { + num_rxq = rx_qgrp->singleq.num_rxq; + for (j = 0; j < num_rxq; j++) { + kfree(rx_qgrp->singleq.rxqs[j]); + rx_qgrp->singleq.rxqs[j] = NULL; + } + } + } + kfree(vport->rxq_grps); + vport->rxq_grps = NULL; +} + +/** + * idpf_vport_queue_grp_rel_all - Release all queue groups + * @vport: vport to release queue groups for + */ +static void idpf_vport_queue_grp_rel_all(struct idpf_vport *vport) +{ + idpf_txq_group_rel(vport); + idpf_rxq_group_rel(vport); +} + +/** + * idpf_vport_queues_rel - Free memory for all queues + * @vport: virtual port + * + * Free the memory allocated for queues associated to a vport + */ +void idpf_vport_queues_rel(struct idpf_vport *vport) +{ + idpf_tx_desc_rel_all(vport); + idpf_rx_desc_rel_all(vport); + idpf_vport_queue_grp_rel_all(vport); + + kfree(vport->txqs); + vport->txqs = NULL; +} + +/** + * idpf_vport_init_fast_path_txqs - Initialize fast path txq array + * @vport: vport to init txqs on + * + * We get a queue index from skb->queue_mapping and we need a fast way to + * dereference the queue from queue groups. This allows us to quickly pull a + * txq based on a queue index. + * + * Returns 0 on success, negative on failure + */ +static int idpf_vport_init_fast_path_txqs(struct idpf_vport *vport) +{ + int i, j, k = 0; + + vport->txqs = kcalloc(vport->num_txq, sizeof(struct idpf_queue *), + GFP_KERNEL); + + if (!vport->txqs) + return -ENOMEM; + + for (i = 0; i < vport->num_txq_grp; i++) { + struct idpf_txq_group *tx_grp = &vport->txq_grps[i]; + + for (j = 0; j < tx_grp->num_txq; j++, k++) { + vport->txqs[k] = tx_grp->txqs[j]; + vport->txqs[k]->idx = k; + } + } + + return 0; +} + +/** + * idpf_vport_init_num_qs - Initialize number of queues + * @vport: vport to initialize queues + * @vport_msg: data to be filled into vport + */ +void idpf_vport_init_num_qs(struct idpf_vport *vport, + struct virtchnl2_create_vport *vport_msg) +{ + struct idpf_vport_user_config_data *config_data; + u16 idx = vport->idx; + + config_data = &vport->adapter->vport_config[idx]->user_config; + vport->num_txq = le16_to_cpu(vport_msg->num_tx_q); + vport->num_rxq = le16_to_cpu(vport_msg->num_rx_q); + /* number of txqs and rxqs in config data will be zeros only in the + * driver load path and we dont update them there after + */ + if (!config_data->num_req_tx_qs && !config_data->num_req_rx_qs) { + config_data->num_req_tx_qs = le16_to_cpu(vport_msg->num_tx_q); + config_data->num_req_rx_qs = le16_to_cpu(vport_msg->num_rx_q); + } + + if (idpf_is_queue_model_split(vport->txq_model)) + vport->num_complq = le16_to_cpu(vport_msg->num_tx_complq); + if (idpf_is_queue_model_split(vport->rxq_model)) + vport->num_bufq = le16_to_cpu(vport_msg->num_rx_bufq); + + /* Adjust number of buffer queues per Rx queue group. */ + if (!idpf_is_queue_model_split(vport->rxq_model)) { + vport->num_bufqs_per_qgrp = 0; + vport->bufq_size[0] = IDPF_RX_BUF_4096; + + return; + } + + vport->num_bufqs_per_qgrp = IDPF_MAX_BUFQS_PER_RXQ_GRP; + /* Bufq[0] default buffer size is 4K + * Bufq[1] default buffer size is 4K + */ + vport->bufq_size[0] = IDPF_RX_BUF_4096; + vport->bufq_size[1] = IDPF_RX_BUF_4096; +} + +/** + * idpf_vport_calc_num_q_desc - Calculate number of queue groups + * @vport: vport to calculate q groups for + */ +void idpf_vport_calc_num_q_desc(struct idpf_vport *vport) +{ + struct idpf_vport_user_config_data *config_data; + int num_bufqs = vport->num_bufqs_per_qgrp; + u32 num_req_txq_desc, num_req_rxq_desc; + u16 idx = vport->idx; + int i; + + config_data = &vport->adapter->vport_config[idx]->user_config; + num_req_txq_desc = config_data->num_req_txq_desc; + num_req_rxq_desc = config_data->num_req_rxq_desc; + + vport->complq_desc_count = 0; + if (num_req_txq_desc) { + vport->txq_desc_count = num_req_txq_desc; + if (idpf_is_queue_model_split(vport->txq_model)) { + vport->complq_desc_count = num_req_txq_desc; + if (vport->complq_desc_count < IDPF_MIN_TXQ_COMPLQ_DESC) + vport->complq_desc_count = + IDPF_MIN_TXQ_COMPLQ_DESC; + } + } else { + vport->txq_desc_count = IDPF_DFLT_TX_Q_DESC_COUNT; + if (idpf_is_queue_model_split(vport->txq_model)) + vport->complq_desc_count = + IDPF_DFLT_TX_COMPLQ_DESC_COUNT; + } + + if (num_req_rxq_desc) + vport->rxq_desc_count = num_req_rxq_desc; + else + vport->rxq_desc_count = IDPF_DFLT_RX_Q_DESC_COUNT; + + for (i = 0; i < num_bufqs; i++) { + if (!vport->bufq_desc_count[i]) + vport->bufq_desc_count[i] = + IDPF_RX_BUFQ_DESC_COUNT(vport->rxq_desc_count, + num_bufqs); + } +} + +/** + * idpf_vport_calc_total_qs - Calculate total number of queues + * @adapter: private data struct + * @vport_idx: vport idx to retrieve vport pointer + * @vport_msg: message to fill with data + * @max_q: vport max queue info + * + * Return 0 on success, error value on failure. + */ +int idpf_vport_calc_total_qs(struct idpf_adapter *adapter, u16 vport_idx, + struct virtchnl2_create_vport *vport_msg, + struct idpf_vport_max_q *max_q) +{ + int dflt_splitq_txq_grps = 0, dflt_singleq_txqs = 0; + int dflt_splitq_rxq_grps = 0, dflt_singleq_rxqs = 0; + u16 num_req_tx_qs = 0, num_req_rx_qs = 0; + struct idpf_vport_config *vport_config; + u16 num_txq_grps, num_rxq_grps; + u32 num_qs; + + vport_config = adapter->vport_config[vport_idx]; + if (vport_config) { + num_req_tx_qs = vport_config->user_config.num_req_tx_qs; + num_req_rx_qs = vport_config->user_config.num_req_rx_qs; + } else { + int num_cpus; + + /* Restrict num of queues to cpus online as a default + * configuration to give best performance. User can always + * override to a max number of queues via ethtool. + */ + num_cpus = num_online_cpus(); + + dflt_splitq_txq_grps = min_t(int, max_q->max_txq, num_cpus); + dflt_singleq_txqs = min_t(int, max_q->max_txq, num_cpus); + dflt_splitq_rxq_grps = min_t(int, max_q->max_rxq, num_cpus); + dflt_singleq_rxqs = min_t(int, max_q->max_rxq, num_cpus); + } + + if (idpf_is_queue_model_split(le16_to_cpu(vport_msg->txq_model))) { + num_txq_grps = num_req_tx_qs ? num_req_tx_qs : dflt_splitq_txq_grps; + vport_msg->num_tx_complq = cpu_to_le16(num_txq_grps * + IDPF_COMPLQ_PER_GROUP); + vport_msg->num_tx_q = cpu_to_le16(num_txq_grps * + IDPF_DFLT_SPLITQ_TXQ_PER_GROUP); + } else { + num_txq_grps = IDPF_DFLT_SINGLEQ_TX_Q_GROUPS; + num_qs = num_txq_grps * (num_req_tx_qs ? num_req_tx_qs : + dflt_singleq_txqs); + vport_msg->num_tx_q = cpu_to_le16(num_qs); + vport_msg->num_tx_complq = 0; + } + if (idpf_is_queue_model_split(le16_to_cpu(vport_msg->rxq_model))) { + num_rxq_grps = num_req_rx_qs ? num_req_rx_qs : dflt_splitq_rxq_grps; + vport_msg->num_rx_bufq = cpu_to_le16(num_rxq_grps * + IDPF_MAX_BUFQS_PER_RXQ_GRP); + vport_msg->num_rx_q = cpu_to_le16(num_rxq_grps * + IDPF_DFLT_SPLITQ_RXQ_PER_GROUP); + } else { + num_rxq_grps = IDPF_DFLT_SINGLEQ_RX_Q_GROUPS; + num_qs = num_rxq_grps * (num_req_rx_qs ? num_req_rx_qs : + dflt_singleq_rxqs); + vport_msg->num_rx_q = cpu_to_le16(num_qs); + vport_msg->num_rx_bufq = 0; + } + + return 0; +} + +/** + * idpf_vport_calc_num_q_groups - Calculate number of queue groups + * @vport: vport to calculate q groups for + */ +void idpf_vport_calc_num_q_groups(struct idpf_vport *vport) +{ + if (idpf_is_queue_model_split(vport->txq_model)) + vport->num_txq_grp = vport->num_txq; + else + vport->num_txq_grp = IDPF_DFLT_SINGLEQ_TX_Q_GROUPS; + + if (idpf_is_queue_model_split(vport->rxq_model)) + vport->num_rxq_grp = vport->num_rxq; + else + vport->num_rxq_grp = IDPF_DFLT_SINGLEQ_RX_Q_GROUPS; +} + +/** + * idpf_vport_calc_numq_per_grp - Calculate number of queues per group + * @vport: vport to calculate queues for + * @num_txq: return parameter for number of TX queues + * @num_rxq: return parameter for number of RX queues + */ +static void idpf_vport_calc_numq_per_grp(struct idpf_vport *vport, + u16 *num_txq, u16 *num_rxq) +{ + if (idpf_is_queue_model_split(vport->txq_model)) + *num_txq = IDPF_DFLT_SPLITQ_TXQ_PER_GROUP; + else + *num_txq = vport->num_txq; + + if (idpf_is_queue_model_split(vport->rxq_model)) + *num_rxq = IDPF_DFLT_SPLITQ_RXQ_PER_GROUP; + else + *num_rxq = vport->num_rxq; +} + +/** + * idpf_rxq_set_descids - set the descids supported by this queue + * @vport: virtual port data structure + * @q: rx queue for which descids are set + * + */ +static void idpf_rxq_set_descids(struct idpf_vport *vport, struct idpf_queue *q) +{ + if (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT) { + q->rxdids = VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M; + } else { + if (vport->base_rxd) + q->rxdids = VIRTCHNL2_RXDID_1_32B_BASE_M; + else + q->rxdids = VIRTCHNL2_RXDID_2_FLEX_SQ_NIC_M; + } +} + +/** + * idpf_txq_group_alloc - Allocate all txq group resources + * @vport: vport to allocate txq groups for + * @num_txq: number of txqs to allocate for each group + * + * Returns 0 on success, negative on failure + */ +static int idpf_txq_group_alloc(struct idpf_vport *vport, u16 num_txq) +{ + bool flow_sch_en; + int err, i; + + vport->txq_grps = kcalloc(vport->num_txq_grp, + sizeof(*vport->txq_grps), GFP_KERNEL); + if (!vport->txq_grps) + return -ENOMEM; + + flow_sch_en = !idpf_is_cap_ena(vport->adapter, IDPF_OTHER_CAPS, + VIRTCHNL2_CAP_SPLITQ_QSCHED); + + for (i = 0; i < vport->num_txq_grp; i++) { + struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; + struct idpf_adapter *adapter = vport->adapter; + int j; + + tx_qgrp->vport = vport; + tx_qgrp->num_txq = num_txq; + + for (j = 0; j < tx_qgrp->num_txq; j++) { + tx_qgrp->txqs[j] = kzalloc(sizeof(*tx_qgrp->txqs[j]), + GFP_KERNEL); + if (!tx_qgrp->txqs[j]) { + err = -ENOMEM; + goto err_alloc; + } + } + + for (j = 0; j < tx_qgrp->num_txq; j++) { + struct idpf_queue *q = tx_qgrp->txqs[j]; + + q->dev = &adapter->pdev->dev; + q->desc_count = vport->txq_desc_count; + q->tx_max_bufs = idpf_get_max_tx_bufs(adapter); + q->tx_min_pkt_len = idpf_get_min_tx_pkt_len(adapter); + q->vport = vport; + q->txq_grp = tx_qgrp; + hash_init(q->sched_buf_hash); + + if (flow_sch_en) + set_bit(__IDPF_Q_FLOW_SCH_EN, q->flags); + } + + if (!idpf_is_queue_model_split(vport->txq_model)) + continue; + + tx_qgrp->complq = kcalloc(IDPF_COMPLQ_PER_GROUP, + sizeof(*tx_qgrp->complq), + GFP_KERNEL); + if (!tx_qgrp->complq) { + err = -ENOMEM; + goto err_alloc; + } + + tx_qgrp->complq->dev = &adapter->pdev->dev; + tx_qgrp->complq->desc_count = vport->complq_desc_count; + tx_qgrp->complq->vport = vport; + tx_qgrp->complq->txq_grp = tx_qgrp; + + if (flow_sch_en) + __set_bit(__IDPF_Q_FLOW_SCH_EN, tx_qgrp->complq->flags); + } + + return 0; + +err_alloc: + idpf_txq_group_rel(vport); + + return err; +} + +/** + * idpf_rxq_group_alloc - Allocate all rxq group resources + * @vport: vport to allocate rxq groups for + * @num_rxq: number of rxqs to allocate for each group + * + * Returns 0 on success, negative on failure + */ +static int idpf_rxq_group_alloc(struct idpf_vport *vport, u16 num_rxq) +{ + struct idpf_adapter *adapter = vport->adapter; + struct idpf_queue *q; + int i, k, err = 0; + bool hs; + + vport->rxq_grps = kcalloc(vport->num_rxq_grp, + sizeof(struct idpf_rxq_group), GFP_KERNEL); + if (!vport->rxq_grps) + return -ENOMEM; + + hs = idpf_vport_get_hsplit(vport) == ETHTOOL_TCP_DATA_SPLIT_ENABLED; + + for (i = 0; i < vport->num_rxq_grp; i++) { + struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; + int j; + + rx_qgrp->vport = vport; + if (!idpf_is_queue_model_split(vport->rxq_model)) { + rx_qgrp->singleq.num_rxq = num_rxq; + for (j = 0; j < num_rxq; j++) { + rx_qgrp->singleq.rxqs[j] = + kzalloc(sizeof(*rx_qgrp->singleq.rxqs[j]), + GFP_KERNEL); + if (!rx_qgrp->singleq.rxqs[j]) { + err = -ENOMEM; + goto err_alloc; + } + } + goto skip_splitq_rx_init; + } + rx_qgrp->splitq.num_rxq_sets = num_rxq; + + for (j = 0; j < num_rxq; j++) { + rx_qgrp->splitq.rxq_sets[j] = + kzalloc(sizeof(struct idpf_rxq_set), + GFP_KERNEL); + if (!rx_qgrp->splitq.rxq_sets[j]) { + err = -ENOMEM; + goto err_alloc; + } + } + + rx_qgrp->splitq.bufq_sets = kcalloc(vport->num_bufqs_per_qgrp, + sizeof(struct idpf_bufq_set), + GFP_KERNEL); + if (!rx_qgrp->splitq.bufq_sets) { + err = -ENOMEM; + goto err_alloc; + } + + for (j = 0; j < vport->num_bufqs_per_qgrp; j++) { + struct idpf_bufq_set *bufq_set = + &rx_qgrp->splitq.bufq_sets[j]; + int swq_size = sizeof(struct idpf_sw_queue); + + q = &rx_qgrp->splitq.bufq_sets[j].bufq; + q->dev = &adapter->pdev->dev; + q->desc_count = vport->bufq_desc_count[j]; + q->vport = vport; + q->rxq_grp = rx_qgrp; + q->idx = j; + q->rx_buf_size = vport->bufq_size[j]; + q->rx_buffer_low_watermark = IDPF_LOW_WATERMARK; + q->rx_buf_stride = IDPF_RX_BUF_STRIDE; + + if (hs) { + q->rx_hsplit_en = true; + q->rx_hbuf_size = IDPF_HDR_BUF_SIZE; + } + + bufq_set->num_refillqs = num_rxq; + bufq_set->refillqs = kcalloc(num_rxq, swq_size, + GFP_KERNEL); + if (!bufq_set->refillqs) { + err = -ENOMEM; + goto err_alloc; + } + for (k = 0; k < bufq_set->num_refillqs; k++) { + struct idpf_sw_queue *refillq = + &bufq_set->refillqs[k]; + + refillq->dev = &vport->adapter->pdev->dev; + refillq->desc_count = + vport->bufq_desc_count[j]; + set_bit(__IDPF_Q_GEN_CHK, refillq->flags); + set_bit(__IDPF_RFLQ_GEN_CHK, refillq->flags); + refillq->ring = kcalloc(refillq->desc_count, + sizeof(u16), + GFP_KERNEL); + if (!refillq->ring) { + err = -ENOMEM; + goto err_alloc; + } + } + } + +skip_splitq_rx_init: + for (j = 0; j < num_rxq; j++) { + if (!idpf_is_queue_model_split(vport->rxq_model)) { + q = rx_qgrp->singleq.rxqs[j]; + goto setup_rxq; + } + q = &rx_qgrp->splitq.rxq_sets[j]->rxq; + rx_qgrp->splitq.rxq_sets[j]->refillq0 = + &rx_qgrp->splitq.bufq_sets[0].refillqs[j]; + if (vport->num_bufqs_per_qgrp > IDPF_SINGLE_BUFQ_PER_RXQ_GRP) + rx_qgrp->splitq.rxq_sets[j]->refillq1 = + &rx_qgrp->splitq.bufq_sets[1].refillqs[j]; + + if (hs) { + q->rx_hsplit_en = true; + q->rx_hbuf_size = IDPF_HDR_BUF_SIZE; + } + +setup_rxq: + q->dev = &adapter->pdev->dev; + q->desc_count = vport->rxq_desc_count; + q->vport = vport; + q->rxq_grp = rx_qgrp; + q->idx = (i * num_rxq) + j; + /* In splitq mode, RXQ buffer size should be + * set to that of the first buffer queue + * associated with this RXQ + */ + q->rx_buf_size = vport->bufq_size[0]; + q->rx_buffer_low_watermark = IDPF_LOW_WATERMARK; + q->rx_max_pkt_size = vport->netdev->mtu + + IDPF_PACKET_HDR_PAD; + idpf_rxq_set_descids(vport, q); + } + } + +err_alloc: + if (err) + idpf_rxq_group_rel(vport); + + return err; +} + +/** + * idpf_vport_queue_grp_alloc_all - Allocate all queue groups/resources + * @vport: vport with qgrps to allocate + * + * Returns 0 on success, negative on failure + */ +static int idpf_vport_queue_grp_alloc_all(struct idpf_vport *vport) +{ + u16 num_txq, num_rxq; + int err; + + idpf_vport_calc_numq_per_grp(vport, &num_txq, &num_rxq); + + err = idpf_txq_group_alloc(vport, num_txq); + if (err) + goto err_out; + + err = idpf_rxq_group_alloc(vport, num_rxq); + if (err) + goto err_out; + + return 0; + +err_out: + idpf_vport_queue_grp_rel_all(vport); + + return err; +} + +/** + * idpf_vport_queues_alloc - Allocate memory for all queues + * @vport: virtual port + * + * Allocate memory for queues associated with a vport. Returns 0 on success, + * negative on failure. + */ +int idpf_vport_queues_alloc(struct idpf_vport *vport) +{ + int err; + + err = idpf_vport_queue_grp_alloc_all(vport); + if (err) + goto err_out; + + err = idpf_tx_desc_alloc_all(vport); + if (err) + goto err_out; + + err = idpf_rx_desc_alloc_all(vport); + if (err) + goto err_out; + + err = idpf_vport_init_fast_path_txqs(vport); + if (err) + goto err_out; + + return 0; + +err_out: + idpf_vport_queues_rel(vport); + + return err; +} + +/** + * idpf_tx_handle_sw_marker - Handle queue marker packet + * @tx_q: tx queue to handle software marker + */ +static void idpf_tx_handle_sw_marker(struct idpf_queue *tx_q) +{ + struct idpf_vport *vport = tx_q->vport; + int i; + + clear_bit(__IDPF_Q_SW_MARKER, tx_q->flags); + /* Hardware must write marker packets to all queues associated with + * completion queues. So check if all queues received marker packets + */ + for (i = 0; i < vport->num_txq; i++) + /* If we're still waiting on any other TXQ marker completions, + * just return now since we cannot wake up the marker_wq yet. + */ + if (test_bit(__IDPF_Q_SW_MARKER, vport->txqs[i]->flags)) + return; + + /* Drain complete */ + set_bit(IDPF_VPORT_SW_MARKER, vport->flags); + wake_up(&vport->sw_marker_wq); +} + +/** + * idpf_tx_splitq_clean_hdr - Clean TX buffer resources for header portion of + * packet + * @tx_q: tx queue to clean buffer from + * @tx_buf: buffer to be cleaned + * @cleaned: pointer to stats struct to track cleaned packets/bytes + * @napi_budget: Used to determine if we are in netpoll + */ +static void idpf_tx_splitq_clean_hdr(struct idpf_queue *tx_q, + struct idpf_tx_buf *tx_buf, + struct idpf_cleaned_stats *cleaned, + int napi_budget) +{ + napi_consume_skb(tx_buf->skb, napi_budget); + + if (dma_unmap_len(tx_buf, len)) { + dma_unmap_single(tx_q->dev, + dma_unmap_addr(tx_buf, dma), + dma_unmap_len(tx_buf, len), + DMA_TO_DEVICE); + + dma_unmap_len_set(tx_buf, len, 0); + } + + /* clear tx_buf data */ + tx_buf->skb = NULL; + + cleaned->bytes += tx_buf->bytecount; + cleaned->packets += tx_buf->gso_segs; +} + +/** + * idpf_tx_clean_stashed_bufs - clean bufs that were stored for + * out of order completions + * @txq: queue to clean + * @compl_tag: completion tag of packet to clean (from completion descriptor) + * @cleaned: pointer to stats struct to track cleaned packets/bytes + * @budget: Used to determine if we are in netpoll + */ +static void idpf_tx_clean_stashed_bufs(struct idpf_queue *txq, u16 compl_tag, + struct idpf_cleaned_stats *cleaned, + int budget) +{ + struct idpf_tx_stash *stash; + struct hlist_node *tmp_buf; + + /* Buffer completion */ + hash_for_each_possible_safe(txq->sched_buf_hash, stash, tmp_buf, + hlist, compl_tag) { + if (unlikely(stash->buf.compl_tag != (int)compl_tag)) + continue; + + if (stash->buf.skb) { + idpf_tx_splitq_clean_hdr(txq, &stash->buf, cleaned, + budget); + } else if (dma_unmap_len(&stash->buf, len)) { + dma_unmap_page(txq->dev, + dma_unmap_addr(&stash->buf, dma), + dma_unmap_len(&stash->buf, len), + DMA_TO_DEVICE); + dma_unmap_len_set(&stash->buf, len, 0); + } + + /* Push shadow buf back onto stack */ + idpf_buf_lifo_push(&txq->buf_stack, stash); + + hash_del(&stash->hlist); + } +} + +/** + * idpf_stash_flow_sch_buffers - store buffer parameters info to be freed at a + * later time (only relevant for flow scheduling mode) + * @txq: Tx queue to clean + * @tx_buf: buffer to store + */ +static int idpf_stash_flow_sch_buffers(struct idpf_queue *txq, + struct idpf_tx_buf *tx_buf) +{ + struct idpf_tx_stash *stash; + + if (unlikely(!dma_unmap_addr(tx_buf, dma) && + !dma_unmap_len(tx_buf, len))) + return 0; + + stash = idpf_buf_lifo_pop(&txq->buf_stack); + if (unlikely(!stash)) { + net_err_ratelimited("%s: No out-of-order TX buffers left!\n", + txq->vport->netdev->name); + + return -ENOMEM; + } + + /* Store buffer params in shadow buffer */ + stash->buf.skb = tx_buf->skb; + stash->buf.bytecount = tx_buf->bytecount; + stash->buf.gso_segs = tx_buf->gso_segs; + dma_unmap_addr_set(&stash->buf, dma, dma_unmap_addr(tx_buf, dma)); + dma_unmap_len_set(&stash->buf, len, dma_unmap_len(tx_buf, len)); + stash->buf.compl_tag = tx_buf->compl_tag; + + /* Add buffer to buf_hash table to be freed later */ + hash_add(txq->sched_buf_hash, &stash->hlist, stash->buf.compl_tag); + + memset(tx_buf, 0, sizeof(struct idpf_tx_buf)); + + /* Reinitialize buf_id portion of tag */ + tx_buf->compl_tag = IDPF_SPLITQ_TX_INVAL_COMPL_TAG; + + return 0; +} + +#define idpf_tx_splitq_clean_bump_ntc(txq, ntc, desc, buf) \ +do { \ + (ntc)++; \ + if (unlikely(!(ntc))) { \ + ntc -= (txq)->desc_count; \ + buf = (txq)->tx_buf; \ + desc = IDPF_FLEX_TX_DESC(txq, 0); \ + } else { \ + (buf)++; \ + (desc)++; \ + } \ +} while (0) + +/** + * idpf_tx_splitq_clean - Reclaim resources from buffer queue + * @tx_q: Tx queue to clean + * @end: queue index until which it should be cleaned + * @napi_budget: Used to determine if we are in netpoll + * @cleaned: pointer to stats struct to track cleaned packets/bytes + * @descs_only: true if queue is using flow-based scheduling and should + * not clean buffers at this time + * + * Cleans the queue descriptor ring. If the queue is using queue-based + * scheduling, the buffers will be cleaned as well. If the queue is using + * flow-based scheduling, only the descriptors are cleaned at this time. + * Separate packet completion events will be reported on the completion queue, + * and the buffers will be cleaned separately. The stats are not updated from + * this function when using flow-based scheduling. + */ +static void idpf_tx_splitq_clean(struct idpf_queue *tx_q, u16 end, + int napi_budget, + struct idpf_cleaned_stats *cleaned, + bool descs_only) +{ + union idpf_tx_flex_desc *next_pending_desc = NULL; + union idpf_tx_flex_desc *tx_desc; + s16 ntc = tx_q->next_to_clean; + struct idpf_tx_buf *tx_buf; + + tx_desc = IDPF_FLEX_TX_DESC(tx_q, ntc); + next_pending_desc = IDPF_FLEX_TX_DESC(tx_q, end); + tx_buf = &tx_q->tx_buf[ntc]; + ntc -= tx_q->desc_count; + + while (tx_desc != next_pending_desc) { + union idpf_tx_flex_desc *eop_desc; + + /* If this entry in the ring was used as a context descriptor, + * it's corresponding entry in the buffer ring will have an + * invalid completion tag since no buffer was used. We can + * skip this descriptor since there is no buffer to clean. + */ + if (unlikely(tx_buf->compl_tag == IDPF_SPLITQ_TX_INVAL_COMPL_TAG)) + goto fetch_next_txq_desc; + + eop_desc = (union idpf_tx_flex_desc *)tx_buf->next_to_watch; + + /* clear next_to_watch to prevent false hangs */ + tx_buf->next_to_watch = NULL; + + if (descs_only) { + if (idpf_stash_flow_sch_buffers(tx_q, tx_buf)) + goto tx_splitq_clean_out; + + while (tx_desc != eop_desc) { + idpf_tx_splitq_clean_bump_ntc(tx_q, ntc, + tx_desc, tx_buf); + + if (dma_unmap_len(tx_buf, len)) { + if (idpf_stash_flow_sch_buffers(tx_q, + tx_buf)) + goto tx_splitq_clean_out; + } + } + } else { + idpf_tx_splitq_clean_hdr(tx_q, tx_buf, cleaned, + napi_budget); + + /* unmap remaining buffers */ + while (tx_desc != eop_desc) { + idpf_tx_splitq_clean_bump_ntc(tx_q, ntc, + tx_desc, tx_buf); + + /* unmap any remaining paged data */ + if (dma_unmap_len(tx_buf, len)) { + dma_unmap_page(tx_q->dev, + dma_unmap_addr(tx_buf, dma), + dma_unmap_len(tx_buf, len), + DMA_TO_DEVICE); + dma_unmap_len_set(tx_buf, len, 0); + } + } + } + +fetch_next_txq_desc: + idpf_tx_splitq_clean_bump_ntc(tx_q, ntc, tx_desc, tx_buf); + } + +tx_splitq_clean_out: + ntc += tx_q->desc_count; + tx_q->next_to_clean = ntc; +} + +#define idpf_tx_clean_buf_ring_bump_ntc(txq, ntc, buf) \ +do { \ + (buf)++; \ + (ntc)++; \ + if (unlikely((ntc) == (txq)->desc_count)) { \ + buf = (txq)->tx_buf; \ + ntc = 0; \ + } \ +} while (0) + +/** + * idpf_tx_clean_buf_ring - clean flow scheduling TX queue buffers + * @txq: queue to clean + * @compl_tag: completion tag of packet to clean (from completion descriptor) + * @cleaned: pointer to stats struct to track cleaned packets/bytes + * @budget: Used to determine if we are in netpoll + * + * Cleans all buffers associated with the input completion tag either from the + * TX buffer ring or from the hash table if the buffers were previously + * stashed. Returns the byte/segment count for the cleaned packet associated + * this completion tag. + */ +static bool idpf_tx_clean_buf_ring(struct idpf_queue *txq, u16 compl_tag, + struct idpf_cleaned_stats *cleaned, + int budget) +{ + u16 idx = compl_tag & txq->compl_tag_bufid_m; + struct idpf_tx_buf *tx_buf = NULL; + u16 ntc = txq->next_to_clean; + u16 num_descs_cleaned = 0; + u16 orig_idx = idx; + + tx_buf = &txq->tx_buf[idx]; + + while (tx_buf->compl_tag == (int)compl_tag) { + if (tx_buf->skb) { + idpf_tx_splitq_clean_hdr(txq, tx_buf, cleaned, budget); + } else if (dma_unmap_len(tx_buf, len)) { + dma_unmap_page(txq->dev, + dma_unmap_addr(tx_buf, dma), + dma_unmap_len(tx_buf, len), + DMA_TO_DEVICE); + dma_unmap_len_set(tx_buf, len, 0); + } + + memset(tx_buf, 0, sizeof(struct idpf_tx_buf)); + tx_buf->compl_tag = IDPF_SPLITQ_TX_INVAL_COMPL_TAG; + + num_descs_cleaned++; + idpf_tx_clean_buf_ring_bump_ntc(txq, idx, tx_buf); + } + + /* If we didn't clean anything on the ring for this completion, there's + * nothing more to do. + */ + if (unlikely(!num_descs_cleaned)) + return false; + + /* Otherwise, if we did clean a packet on the ring directly, it's safe + * to assume that the descriptors starting from the original + * next_to_clean up until the previously cleaned packet can be reused. + * Therefore, we will go back in the ring and stash any buffers still + * in the ring into the hash table to be cleaned later. + */ + tx_buf = &txq->tx_buf[ntc]; + while (tx_buf != &txq->tx_buf[orig_idx]) { + idpf_stash_flow_sch_buffers(txq, tx_buf); + idpf_tx_clean_buf_ring_bump_ntc(txq, ntc, tx_buf); + } + + /* Finally, update next_to_clean to reflect the work that was just done + * on the ring, if any. If the packet was only cleaned from the hash + * table, the ring will not be impacted, therefore we should not touch + * next_to_clean. The updated idx is used here + */ + txq->next_to_clean = idx; + + return true; +} + +/** + * idpf_tx_handle_rs_completion - clean a single packet and all of its buffers + * whether on the buffer ring or in the hash table + * @txq: Tx ring to clean + * @desc: pointer to completion queue descriptor to extract completion + * information from + * @cleaned: pointer to stats struct to track cleaned packets/bytes + * @budget: Used to determine if we are in netpoll + * + * Returns bytes/packets cleaned + */ +static void idpf_tx_handle_rs_completion(struct idpf_queue *txq, + struct idpf_splitq_tx_compl_desc *desc, + struct idpf_cleaned_stats *cleaned, + int budget) +{ + u16 compl_tag; + + if (!test_bit(__IDPF_Q_FLOW_SCH_EN, txq->flags)) { + u16 head = le16_to_cpu(desc->q_head_compl_tag.q_head); + + return idpf_tx_splitq_clean(txq, head, budget, cleaned, false); + } + + compl_tag = le16_to_cpu(desc->q_head_compl_tag.compl_tag); + + /* If we didn't clean anything on the ring, this packet must be + * in the hash table. Go clean it there. + */ + if (!idpf_tx_clean_buf_ring(txq, compl_tag, cleaned, budget)) + idpf_tx_clean_stashed_bufs(txq, compl_tag, cleaned, budget); +} + +/** + * idpf_tx_clean_complq - Reclaim resources on completion queue + * @complq: Tx ring to clean + * @budget: Used to determine if we are in netpoll + * @cleaned: returns number of packets cleaned + * + * Returns true if there's any budget left (e.g. the clean is finished) + */ +static bool idpf_tx_clean_complq(struct idpf_queue *complq, int budget, + int *cleaned) +{ + struct idpf_splitq_tx_compl_desc *tx_desc; + struct idpf_vport *vport = complq->vport; + s16 ntc = complq->next_to_clean; + struct idpf_netdev_priv *np; + unsigned int complq_budget; + bool complq_ok = true; + int i; + + complq_budget = vport->compln_clean_budget; + tx_desc = IDPF_SPLITQ_TX_COMPLQ_DESC(complq, ntc); + ntc -= complq->desc_count; + + do { + struct idpf_cleaned_stats cleaned_stats = { }; + struct idpf_queue *tx_q; + int rel_tx_qid; + u16 hw_head; + u8 ctype; /* completion type */ + u16 gen; + + /* if the descriptor isn't done, no work yet to do */ + gen = le16_get_bits(tx_desc->qid_comptype_gen, + IDPF_TXD_COMPLQ_GEN_M); + if (test_bit(__IDPF_Q_GEN_CHK, complq->flags) != gen) + break; + + /* Find necessary info of TX queue to clean buffers */ + rel_tx_qid = le16_get_bits(tx_desc->qid_comptype_gen, + IDPF_TXD_COMPLQ_QID_M); + if (rel_tx_qid >= complq->txq_grp->num_txq || + !complq->txq_grp->txqs[rel_tx_qid]) { + dev_err(&complq->vport->adapter->pdev->dev, + "TxQ not found\n"); + goto fetch_next_desc; + } + tx_q = complq->txq_grp->txqs[rel_tx_qid]; + + /* Determine completion type */ + ctype = le16_get_bits(tx_desc->qid_comptype_gen, + IDPF_TXD_COMPLQ_COMPL_TYPE_M); + switch (ctype) { + case IDPF_TXD_COMPLT_RE: + hw_head = le16_to_cpu(tx_desc->q_head_compl_tag.q_head); + + idpf_tx_splitq_clean(tx_q, hw_head, budget, + &cleaned_stats, true); + break; + case IDPF_TXD_COMPLT_RS: + idpf_tx_handle_rs_completion(tx_q, tx_desc, + &cleaned_stats, budget); + break; + case IDPF_TXD_COMPLT_SW_MARKER: + idpf_tx_handle_sw_marker(tx_q); + break; + default: + dev_err(&tx_q->vport->adapter->pdev->dev, + "Unknown TX completion type: %d\n", + ctype); + goto fetch_next_desc; + } + + u64_stats_update_begin(&tx_q->stats_sync); + u64_stats_add(&tx_q->q_stats.tx.packets, cleaned_stats.packets); + u64_stats_add(&tx_q->q_stats.tx.bytes, cleaned_stats.bytes); + tx_q->cleaned_pkts += cleaned_stats.packets; + tx_q->cleaned_bytes += cleaned_stats.bytes; + complq->num_completions++; + u64_stats_update_end(&tx_q->stats_sync); + +fetch_next_desc: + tx_desc++; + ntc++; + if (unlikely(!ntc)) { + ntc -= complq->desc_count; + tx_desc = IDPF_SPLITQ_TX_COMPLQ_DESC(complq, 0); + change_bit(__IDPF_Q_GEN_CHK, complq->flags); + } + + prefetch(tx_desc); + + /* update budget accounting */ + complq_budget--; + } while (likely(complq_budget)); + + /* Store the state of the complq to be used later in deciding if a + * TXQ can be started again + */ + if (unlikely(IDPF_TX_COMPLQ_PENDING(complq->txq_grp) > + IDPF_TX_COMPLQ_OVERFLOW_THRESH(complq))) + complq_ok = false; + + np = netdev_priv(complq->vport->netdev); + for (i = 0; i < complq->txq_grp->num_txq; ++i) { + struct idpf_queue *tx_q = complq->txq_grp->txqs[i]; + struct netdev_queue *nq; + bool dont_wake; + + /* We didn't clean anything on this queue, move along */ + if (!tx_q->cleaned_bytes) + continue; + + *cleaned += tx_q->cleaned_pkts; + + /* Update BQL */ + nq = netdev_get_tx_queue(tx_q->vport->netdev, tx_q->idx); + + dont_wake = !complq_ok || IDPF_TX_BUF_RSV_LOW(tx_q) || + np->state != __IDPF_VPORT_UP || + !netif_carrier_ok(tx_q->vport->netdev); + /* Check if the TXQ needs to and can be restarted */ + __netif_txq_completed_wake(nq, tx_q->cleaned_pkts, tx_q->cleaned_bytes, + IDPF_DESC_UNUSED(tx_q), IDPF_TX_WAKE_THRESH, + dont_wake); + + /* Reset cleaned stats for the next time this queue is + * cleaned + */ + tx_q->cleaned_bytes = 0; + tx_q->cleaned_pkts = 0; + } + + ntc += complq->desc_count; + complq->next_to_clean = ntc; + + return !!complq_budget; +} + +/** + * idpf_tx_splitq_build_ctb - populate command tag and size for queue + * based scheduling descriptors + * @desc: descriptor to populate + * @params: pointer to tx params struct + * @td_cmd: command to be filled in desc + * @size: size of buffer + */ +void idpf_tx_splitq_build_ctb(union idpf_tx_flex_desc *desc, + struct idpf_tx_splitq_params *params, + u16 td_cmd, u16 size) +{ + desc->q.qw1.cmd_dtype = + le16_encode_bits(params->dtype, IDPF_FLEX_TXD_QW1_DTYPE_M); + desc->q.qw1.cmd_dtype |= + le16_encode_bits(td_cmd, IDPF_FLEX_TXD_QW1_CMD_M); + desc->q.qw1.buf_size = cpu_to_le16(size); + desc->q.qw1.l2tags.l2tag1 = cpu_to_le16(params->td_tag); +} + +/** + * idpf_tx_splitq_build_flow_desc - populate command tag and size for flow + * scheduling descriptors + * @desc: descriptor to populate + * @params: pointer to tx params struct + * @td_cmd: command to be filled in desc + * @size: size of buffer + */ +void idpf_tx_splitq_build_flow_desc(union idpf_tx_flex_desc *desc, + struct idpf_tx_splitq_params *params, + u16 td_cmd, u16 size) +{ + desc->flow.qw1.cmd_dtype = (u16)params->dtype | td_cmd; + desc->flow.qw1.rxr_bufsize = cpu_to_le16((u16)size); + desc->flow.qw1.compl_tag = cpu_to_le16(params->compl_tag); +} + +/** + * idpf_tx_maybe_stop_common - 1st level check for common Tx stop conditions + * @tx_q: the queue to be checked + * @size: number of descriptors we want to assure is available + * + * Returns 0 if stop is not needed + */ +int idpf_tx_maybe_stop_common(struct idpf_queue *tx_q, unsigned int size) +{ + struct netdev_queue *nq; + + if (likely(IDPF_DESC_UNUSED(tx_q) >= size)) + return 0; + + u64_stats_update_begin(&tx_q->stats_sync); + u64_stats_inc(&tx_q->q_stats.tx.q_busy); + u64_stats_update_end(&tx_q->stats_sync); + + nq = netdev_get_tx_queue(tx_q->vport->netdev, tx_q->idx); + + return netif_txq_maybe_stop(nq, IDPF_DESC_UNUSED(tx_q), size, size); +} + +/** + * idpf_tx_maybe_stop_splitq - 1st level check for Tx splitq stop conditions + * @tx_q: the queue to be checked + * @descs_needed: number of descriptors required for this packet + * + * Returns 0 if stop is not needed + */ +static int idpf_tx_maybe_stop_splitq(struct idpf_queue *tx_q, + unsigned int descs_needed) +{ + if (idpf_tx_maybe_stop_common(tx_q, descs_needed)) + goto splitq_stop; + + /* If there are too many outstanding completions expected on the + * completion queue, stop the TX queue to give the device some time to + * catch up + */ + if (unlikely(IDPF_TX_COMPLQ_PENDING(tx_q->txq_grp) > + IDPF_TX_COMPLQ_OVERFLOW_THRESH(tx_q->txq_grp->complq))) + goto splitq_stop; + + /* Also check for available book keeping buffers; if we are low, stop + * the queue to wait for more completions + */ + if (unlikely(IDPF_TX_BUF_RSV_LOW(tx_q))) + goto splitq_stop; + + return 0; + +splitq_stop: + u64_stats_update_begin(&tx_q->stats_sync); + u64_stats_inc(&tx_q->q_stats.tx.q_busy); + u64_stats_update_end(&tx_q->stats_sync); + netif_stop_subqueue(tx_q->vport->netdev, tx_q->idx); + + return -EBUSY; +} + +/** + * idpf_tx_buf_hw_update - Store the new tail value + * @tx_q: queue to bump + * @val: new tail index + * @xmit_more: more skb's pending + * + * The naming here is special in that 'hw' signals that this function is about + * to do a register write to update our queue status. We know this can only + * mean tail here as HW should be owning head for TX. + */ +void idpf_tx_buf_hw_update(struct idpf_queue *tx_q, u32 val, + bool xmit_more) +{ + struct netdev_queue *nq; + + nq = netdev_get_tx_queue(tx_q->vport->netdev, tx_q->idx); + tx_q->next_to_use = val; + + idpf_tx_maybe_stop_common(tx_q, IDPF_TX_DESC_NEEDED); + + /* Force memory writes to complete before letting h/w + * know there are new descriptors to fetch. (Only + * applicable for weak-ordered memory model archs, + * such as IA-64). + */ + wmb(); + + /* notify HW of packet */ + if (netif_xmit_stopped(nq) || !xmit_more) + writel(val, tx_q->tail); +} + +/** + * idpf_tx_desc_count_required - calculate number of Tx descriptors needed + * @txq: queue to send buffer on + * @skb: send buffer + * + * Returns number of data descriptors needed for this skb. + */ +unsigned int idpf_tx_desc_count_required(struct idpf_queue *txq, + struct sk_buff *skb) +{ + const struct skb_shared_info *shinfo; + unsigned int count = 0, i; + + count += !!skb_headlen(skb); + + if (!skb_is_nonlinear(skb)) + return count; + + shinfo = skb_shinfo(skb); + for (i = 0; i < shinfo->nr_frags; i++) { + unsigned int size; + + size = skb_frag_size(&shinfo->frags[i]); + + /* We only need to use the idpf_size_to_txd_count check if the + * fragment is going to span multiple descriptors, + * i.e. size >= 16K. + */ + if (size >= SZ_16K) + count += idpf_size_to_txd_count(size); + else + count++; + } + + if (idpf_chk_linearize(skb, txq->tx_max_bufs, count)) { + if (__skb_linearize(skb)) + return 0; + + count = idpf_size_to_txd_count(skb->len); + u64_stats_update_begin(&txq->stats_sync); + u64_stats_inc(&txq->q_stats.tx.linearize); + u64_stats_update_end(&txq->stats_sync); + } + + return count; +} + +/** + * idpf_tx_dma_map_error - handle TX DMA map errors + * @txq: queue to send buffer on + * @skb: send buffer + * @first: original first buffer info buffer for packet + * @idx: starting point on ring to unwind + */ +void idpf_tx_dma_map_error(struct idpf_queue *txq, struct sk_buff *skb, + struct idpf_tx_buf *first, u16 idx) +{ + u64_stats_update_begin(&txq->stats_sync); + u64_stats_inc(&txq->q_stats.tx.dma_map_errs); + u64_stats_update_end(&txq->stats_sync); + + /* clear dma mappings for failed tx_buf map */ + for (;;) { + struct idpf_tx_buf *tx_buf; + + tx_buf = &txq->tx_buf[idx]; + idpf_tx_buf_rel(txq, tx_buf); + if (tx_buf == first) + break; + if (idx == 0) + idx = txq->desc_count; + idx--; + } + + if (skb_is_gso(skb)) { + union idpf_tx_flex_desc *tx_desc; + + /* If we failed a DMA mapping for a TSO packet, we will have + * used one additional descriptor for a context + * descriptor. Reset that here. + */ + tx_desc = IDPF_FLEX_TX_DESC(txq, idx); + memset(tx_desc, 0, sizeof(struct idpf_flex_tx_ctx_desc)); + if (idx == 0) + idx = txq->desc_count; + idx--; + } + + /* Update tail in case netdev_xmit_more was previously true */ + idpf_tx_buf_hw_update(txq, idx, false); +} + +/** + * idpf_tx_splitq_bump_ntu - adjust NTU and generation + * @txq: the tx ring to wrap + * @ntu: ring index to bump + */ +static unsigned int idpf_tx_splitq_bump_ntu(struct idpf_queue *txq, u16 ntu) +{ + ntu++; + + if (ntu == txq->desc_count) { + ntu = 0; + txq->compl_tag_cur_gen = IDPF_TX_ADJ_COMPL_TAG_GEN(txq); + } + + return ntu; +} + +/** + * idpf_tx_splitq_map - Build the Tx flex descriptor + * @tx_q: queue to send buffer on + * @params: pointer to splitq params struct + * @first: first buffer info buffer to use + * + * This function loops over the skb data pointed to by *first + * and gets a physical address for each memory location and programs + * it and the length into the transmit flex descriptor. + */ +static void idpf_tx_splitq_map(struct idpf_queue *tx_q, + struct idpf_tx_splitq_params *params, + struct idpf_tx_buf *first) +{ + union idpf_tx_flex_desc *tx_desc; + unsigned int data_len, size; + struct idpf_tx_buf *tx_buf; + u16 i = tx_q->next_to_use; + struct netdev_queue *nq; + struct sk_buff *skb; + skb_frag_t *frag; + u16 td_cmd = 0; + dma_addr_t dma; + + skb = first->skb; + + td_cmd = params->offload.td_cmd; + + data_len = skb->data_len; + size = skb_headlen(skb); + + tx_desc = IDPF_FLEX_TX_DESC(tx_q, i); + + dma = dma_map_single(tx_q->dev, skb->data, size, DMA_TO_DEVICE); + + tx_buf = first; + + params->compl_tag = + (tx_q->compl_tag_cur_gen << tx_q->compl_tag_gen_s) | i; + + for (frag = &skb_shinfo(skb)->frags[0];; frag++) { + unsigned int max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED; + + if (dma_mapping_error(tx_q->dev, dma)) + return idpf_tx_dma_map_error(tx_q, skb, first, i); + + tx_buf->compl_tag = params->compl_tag; + + /* record length, and DMA address */ + dma_unmap_len_set(tx_buf, len, size); + dma_unmap_addr_set(tx_buf, dma, dma); + + /* buf_addr is in same location for both desc types */ + tx_desc->q.buf_addr = cpu_to_le64(dma); + + /* The stack can send us fragments that are too large for a + * single descriptor i.e. frag size > 16K-1. We will need to + * split the fragment across multiple descriptors in this case. + * To adhere to HW alignment restrictions, the fragment needs + * to be split such that the first chunk ends on a 4K boundary + * and all subsequent chunks start on a 4K boundary. We still + * want to send as much data as possible though, so our + * intermediate descriptor chunk size will be 12K. + * + * For example, consider a 32K fragment mapped to DMA addr 2600. + * ------------------------------------------------------------ + * | frag_size = 32K | + * ------------------------------------------------------------ + * |2600 |16384 |28672 + * + * 3 descriptors will be used for this fragment. The HW expects + * the descriptors to contain the following: + * ------------------------------------------------------------ + * | size = 13784 | size = 12K | size = 6696 | + * | dma = 2600 | dma = 16384 | dma = 28672 | + * ------------------------------------------------------------ + * + * We need to first adjust the max_data for the first chunk so + * that it ends on a 4K boundary. By negating the value of the + * DMA address and taking only the low order bits, we're + * effectively calculating + * 4K - (DMA addr lower order bits) = + * bytes to next boundary. + * + * Add that to our base aligned max_data (12K) and we have + * our first chunk size. In the example above, + * 13784 = 12K + (4096-2600) + * + * After guaranteeing the first chunk ends on a 4K boundary, we + * will give the intermediate descriptors 12K chunks and + * whatever is left to the final descriptor. This ensures that + * all descriptors used for the remaining chunks of the + * fragment start on a 4K boundary and we use as few + * descriptors as possible. + */ + max_data += -dma & (IDPF_TX_MAX_READ_REQ_SIZE - 1); + while (unlikely(size > IDPF_TX_MAX_DESC_DATA)) { + idpf_tx_splitq_build_desc(tx_desc, params, td_cmd, + max_data); + + tx_desc++; + i++; + + if (i == tx_q->desc_count) { + tx_desc = IDPF_FLEX_TX_DESC(tx_q, 0); + i = 0; + tx_q->compl_tag_cur_gen = + IDPF_TX_ADJ_COMPL_TAG_GEN(tx_q); + } + + /* Since this packet has a buffer that is going to span + * multiple descriptors, it's going to leave holes in + * to the TX buffer ring. To ensure these holes do not + * cause issues in the cleaning routines, we will clear + * them of any stale data and assign them the same + * completion tag as the current packet. Then when the + * packet is being cleaned, the cleaning routines will + * simply pass over these holes and finish cleaning the + * rest of the packet. + */ + memset(&tx_q->tx_buf[i], 0, sizeof(struct idpf_tx_buf)); + tx_q->tx_buf[i].compl_tag = params->compl_tag; + + /* Adjust the DMA offset and the remaining size of the + * fragment. On the first iteration of this loop, + * max_data will be >= 12K and <= 16K-1. On any + * subsequent iteration of this loop, max_data will + * always be 12K. + */ + dma += max_data; + size -= max_data; + + /* Reset max_data since remaining chunks will be 12K + * at most + */ + max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED; + + /* buf_addr is in same location for both desc types */ + tx_desc->q.buf_addr = cpu_to_le64(dma); + } + + if (!data_len) + break; + + idpf_tx_splitq_build_desc(tx_desc, params, td_cmd, size); + tx_desc++; + i++; + + if (i == tx_q->desc_count) { + tx_desc = IDPF_FLEX_TX_DESC(tx_q, 0); + i = 0; + tx_q->compl_tag_cur_gen = IDPF_TX_ADJ_COMPL_TAG_GEN(tx_q); + } + + size = skb_frag_size(frag); + data_len -= size; + + dma = skb_frag_dma_map(tx_q->dev, frag, 0, size, + DMA_TO_DEVICE); + + tx_buf = &tx_q->tx_buf[i]; + } + + /* record SW timestamp if HW timestamp is not available */ + skb_tx_timestamp(skb); + + /* write last descriptor with RS and EOP bits */ + td_cmd |= params->eop_cmd; + idpf_tx_splitq_build_desc(tx_desc, params, td_cmd, size); + i = idpf_tx_splitq_bump_ntu(tx_q, i); + + /* set next_to_watch value indicating a packet is present */ + first->next_to_watch = tx_desc; + + tx_q->txq_grp->num_completions_pending++; + + /* record bytecount for BQL */ + nq = netdev_get_tx_queue(tx_q->vport->netdev, tx_q->idx); + netdev_tx_sent_queue(nq, first->bytecount); + + idpf_tx_buf_hw_update(tx_q, i, netdev_xmit_more()); +} + +/** + * idpf_tso - computes mss and TSO length to prepare for TSO + * @skb: pointer to skb + * @off: pointer to struct that holds offload parameters + * + * Returns error (negative) if TSO was requested but cannot be applied to the + * given skb, 0 if TSO does not apply to the given skb, or 1 otherwise. + */ +int idpf_tso(struct sk_buff *skb, struct idpf_tx_offload_params *off) +{ + const struct skb_shared_info *shinfo; + union { + struct iphdr *v4; + struct ipv6hdr *v6; + unsigned char *hdr; + } ip; + union { + struct tcphdr *tcp; + struct udphdr *udp; + unsigned char *hdr; + } l4; + u32 paylen, l4_start; + int err; + + if (!skb_is_gso(skb)) + return 0; + + err = skb_cow_head(skb, 0); + if (err < 0) + return err; + + shinfo = skb_shinfo(skb); + + ip.hdr = skb_network_header(skb); + l4.hdr = skb_transport_header(skb); + + /* initialize outer IP header fields */ + if (ip.v4->version == 4) { + ip.v4->tot_len = 0; + ip.v4->check = 0; + } else if (ip.v6->version == 6) { + ip.v6->payload_len = 0; + } + + l4_start = skb_transport_offset(skb); + + /* remove payload length from checksum */ + paylen = skb->len - l4_start; + + switch (shinfo->gso_type & ~SKB_GSO_DODGY) { + case SKB_GSO_TCPV4: + case SKB_GSO_TCPV6: + csum_replace_by_diff(&l4.tcp->check, + (__force __wsum)htonl(paylen)); + off->tso_hdr_len = __tcp_hdrlen(l4.tcp) + l4_start; + break; + case SKB_GSO_UDP_L4: + csum_replace_by_diff(&l4.udp->check, + (__force __wsum)htonl(paylen)); + /* compute length of segmentation header */ + off->tso_hdr_len = sizeof(struct udphdr) + l4_start; + l4.udp->len = htons(shinfo->gso_size + sizeof(struct udphdr)); + break; + default: + return -EINVAL; + } + + off->tso_len = skb->len - off->tso_hdr_len; + off->mss = shinfo->gso_size; + off->tso_segs = shinfo->gso_segs; + + off->tx_flags |= IDPF_TX_FLAGS_TSO; + + return 1; +} + +/** + * __idpf_chk_linearize - Check skb is not using too many buffers + * @skb: send buffer + * @max_bufs: maximum number of buffers + * + * For TSO we need to count the TSO header and segment payload separately. As + * such we need to check cases where we have max_bufs-1 fragments or more as we + * can potentially require max_bufs+1 DMA transactions, 1 for the TSO header, 1 + * for the segment payload in the first descriptor, and another max_buf-1 for + * the fragments. + */ +static bool __idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs) +{ + const struct skb_shared_info *shinfo = skb_shinfo(skb); + const skb_frag_t *frag, *stale; + int nr_frags, sum; + + /* no need to check if number of frags is less than max_bufs - 1 */ + nr_frags = shinfo->nr_frags; + if (nr_frags < (max_bufs - 1)) + return false; + + /* We need to walk through the list and validate that each group + * of max_bufs-2 fragments totals at least gso_size. + */ + nr_frags -= max_bufs - 2; + frag = &shinfo->frags[0]; + + /* Initialize size to the negative value of gso_size minus 1. We use + * this as the worst case scenario in which the frag ahead of us only + * provides one byte which is why we are limited to max_bufs-2 + * descriptors for a single transmit as the header and previous + * fragment are already consuming 2 descriptors. + */ + sum = 1 - shinfo->gso_size; + + /* Add size of frags 0 through 4 to create our initial sum */ + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + + /* Walk through fragments adding latest fragment, testing it, and + * then removing stale fragments from the sum. + */ + for (stale = &shinfo->frags[0];; stale++) { + int stale_size = skb_frag_size(stale); + + sum += skb_frag_size(frag++); + + /* The stale fragment may present us with a smaller + * descriptor than the actual fragment size. To account + * for that we need to remove all the data on the front and + * figure out what the remainder would be in the last + * descriptor associated with the fragment. + */ + if (stale_size > IDPF_TX_MAX_DESC_DATA) { + int align_pad = -(skb_frag_off(stale)) & + (IDPF_TX_MAX_READ_REQ_SIZE - 1); + + sum -= align_pad; + stale_size -= align_pad; + + do { + sum -= IDPF_TX_MAX_DESC_DATA_ALIGNED; + stale_size -= IDPF_TX_MAX_DESC_DATA_ALIGNED; + } while (stale_size > IDPF_TX_MAX_DESC_DATA); + } + + /* if sum is negative we failed to make sufficient progress */ + if (sum < 0) + return true; + + if (!nr_frags--) + break; + + sum -= stale_size; + } + + return false; +} + +/** + * idpf_chk_linearize - Check if skb exceeds max descriptors per packet + * @skb: send buffer + * @max_bufs: maximum scatter gather buffers for single packet + * @count: number of buffers this packet needs + * + * Make sure we don't exceed maximum scatter gather buffers for a single + * packet. We have to do some special checking around the boundary (max_bufs-1) + * if TSO is on since we need count the TSO header and payload separately. + * E.g.: a packet with 7 fragments can require 9 DMA transactions; 1 for TSO + * header, 1 for segment payload, and then 7 for the fragments. + */ +bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs, + unsigned int count) +{ + if (likely(count < max_bufs)) + return false; + if (skb_is_gso(skb)) + return __idpf_chk_linearize(skb, max_bufs); + + return count > max_bufs; +} + +/** + * idpf_tx_splitq_get_ctx_desc - grab next desc and update buffer ring + * @txq: queue to put context descriptor on + * + * Since the TX buffer rings mimics the descriptor ring, update the tx buffer + * ring entry to reflect that this index is a context descriptor + */ +static struct idpf_flex_tx_ctx_desc * +idpf_tx_splitq_get_ctx_desc(struct idpf_queue *txq) +{ + struct idpf_flex_tx_ctx_desc *desc; + int i = txq->next_to_use; + + memset(&txq->tx_buf[i], 0, sizeof(struct idpf_tx_buf)); + txq->tx_buf[i].compl_tag = IDPF_SPLITQ_TX_INVAL_COMPL_TAG; + + /* grab the next descriptor */ + desc = IDPF_FLEX_TX_CTX_DESC(txq, i); + txq->next_to_use = idpf_tx_splitq_bump_ntu(txq, i); + + return desc; +} + +/** + * idpf_tx_drop_skb - free the SKB and bump tail if necessary + * @tx_q: queue to send buffer on + * @skb: pointer to skb + */ +netdev_tx_t idpf_tx_drop_skb(struct idpf_queue *tx_q, struct sk_buff *skb) +{ + u64_stats_update_begin(&tx_q->stats_sync); + u64_stats_inc(&tx_q->q_stats.tx.skb_drops); + u64_stats_update_end(&tx_q->stats_sync); + + idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false); + + dev_kfree_skb(skb); + + return NETDEV_TX_OK; +} + +/** + * idpf_tx_splitq_frame - Sends buffer on Tx ring using flex descriptors + * @skb: send buffer + * @tx_q: queue to send buffer on + * + * Returns NETDEV_TX_OK if sent, else an error code + */ +static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb, + struct idpf_queue *tx_q) +{ + struct idpf_tx_splitq_params tx_params = { }; + struct idpf_tx_buf *first; + unsigned int count; + int tso; + + count = idpf_tx_desc_count_required(tx_q, skb); + if (unlikely(!count)) + return idpf_tx_drop_skb(tx_q, skb); + + tso = idpf_tso(skb, &tx_params.offload); + if (unlikely(tso < 0)) + return idpf_tx_drop_skb(tx_q, skb); + + /* Check for splitq specific TX resources */ + count += (IDPF_TX_DESCS_PER_CACHE_LINE + tso); + if (idpf_tx_maybe_stop_splitq(tx_q, count)) { + idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false); + + return NETDEV_TX_BUSY; + } + + if (tso) { + /* If tso is needed, set up context desc */ + struct idpf_flex_tx_ctx_desc *ctx_desc = + idpf_tx_splitq_get_ctx_desc(tx_q); + + ctx_desc->tso.qw1.cmd_dtype = + cpu_to_le16(IDPF_TX_DESC_DTYPE_FLEX_TSO_CTX | + IDPF_TX_FLEX_CTX_DESC_CMD_TSO); + ctx_desc->tso.qw0.flex_tlen = + cpu_to_le32(tx_params.offload.tso_len & + IDPF_TXD_FLEX_CTX_TLEN_M); + ctx_desc->tso.qw0.mss_rt = + cpu_to_le16(tx_params.offload.mss & + IDPF_TXD_FLEX_CTX_MSS_RT_M); + ctx_desc->tso.qw0.hdr_len = tx_params.offload.tso_hdr_len; + + u64_stats_update_begin(&tx_q->stats_sync); + u64_stats_inc(&tx_q->q_stats.tx.lso_pkts); + u64_stats_update_end(&tx_q->stats_sync); + } + + /* record the location of the first descriptor for this packet */ + first = &tx_q->tx_buf[tx_q->next_to_use]; + first->skb = skb; + + if (tso) { + first->gso_segs = tx_params.offload.tso_segs; + first->bytecount = skb->len + + ((first->gso_segs - 1) * tx_params.offload.tso_hdr_len); + } else { + first->gso_segs = 1; + first->bytecount = max_t(unsigned int, skb->len, ETH_ZLEN); + } + + if (test_bit(__IDPF_Q_FLOW_SCH_EN, tx_q->flags)) { + tx_params.dtype = IDPF_TX_DESC_DTYPE_FLEX_FLOW_SCHE; + tx_params.eop_cmd = IDPF_TXD_FLEX_FLOW_CMD_EOP; + /* Set the RE bit to catch any packets that may have not been + * stashed during RS completion cleaning. MIN_GAP is set to + * MIN_RING size to ensure it will be set at least once each + * time around the ring. + */ + if (!(tx_q->next_to_use % IDPF_TX_SPLITQ_RE_MIN_GAP)) { + tx_params.eop_cmd |= IDPF_TXD_FLEX_FLOW_CMD_RE; + tx_q->txq_grp->num_completions_pending++; + } + + if (skb->ip_summed == CHECKSUM_PARTIAL) + tx_params.offload.td_cmd |= IDPF_TXD_FLEX_FLOW_CMD_CS_EN; + + } else { + tx_params.dtype = IDPF_TX_DESC_DTYPE_FLEX_L2TAG1_L2TAG2; + tx_params.eop_cmd = IDPF_TXD_LAST_DESC_CMD; + + if (skb->ip_summed == CHECKSUM_PARTIAL) + tx_params.offload.td_cmd |= IDPF_TX_FLEX_DESC_CMD_CS_EN; + } + + idpf_tx_splitq_map(tx_q, &tx_params, first); + + return NETDEV_TX_OK; +} + +/** + * idpf_tx_splitq_start - Selects the right Tx queue to send buffer + * @skb: send buffer + * @netdev: network interface device structure + * + * Returns NETDEV_TX_OK if sent, else an error code + */ +netdev_tx_t idpf_tx_splitq_start(struct sk_buff *skb, + struct net_device *netdev) +{ + struct idpf_vport *vport = idpf_netdev_to_vport(netdev); + struct idpf_queue *tx_q; + + if (unlikely(skb_get_queue_mapping(skb) >= vport->num_txq)) { + dev_kfree_skb_any(skb); + + return NETDEV_TX_OK; + } + + tx_q = vport->txqs[skb_get_queue_mapping(skb)]; + + /* hardware can't handle really short frames, hardware padding works + * beyond this point + */ + if (skb_put_padto(skb, tx_q->tx_min_pkt_len)) { + idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false); + + return NETDEV_TX_OK; + } + + return idpf_tx_splitq_frame(skb, tx_q); +} + +/** + * idpf_ptype_to_htype - get a hash type + * @decoded: Decoded Rx packet type related fields + * + * Returns appropriate hash type (such as PKT_HASH_TYPE_L2/L3/L4) to be used by + * skb_set_hash based on PTYPE as parsed by HW Rx pipeline and is part of + * Rx desc. + */ +enum pkt_hash_types idpf_ptype_to_htype(const struct idpf_rx_ptype_decoded *decoded) +{ + if (!decoded->known) + return PKT_HASH_TYPE_NONE; + if (decoded->payload_layer == IDPF_RX_PTYPE_PAYLOAD_LAYER_PAY2 && + decoded->inner_prot) + return PKT_HASH_TYPE_L4; + if (decoded->payload_layer == IDPF_RX_PTYPE_PAYLOAD_LAYER_PAY2 && + decoded->outer_ip) + return PKT_HASH_TYPE_L3; + if (decoded->outer_ip == IDPF_RX_PTYPE_OUTER_L2) + return PKT_HASH_TYPE_L2; + + return PKT_HASH_TYPE_NONE; +} + +/** + * idpf_rx_hash - set the hash value in the skb + * @rxq: Rx descriptor ring packet is being transacted on + * @skb: pointer to current skb being populated + * @rx_desc: Receive descriptor + * @decoded: Decoded Rx packet type related fields + */ +static void idpf_rx_hash(struct idpf_queue *rxq, struct sk_buff *skb, + struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc, + struct idpf_rx_ptype_decoded *decoded) +{ + u32 hash; + + if (unlikely(!idpf_is_feature_ena(rxq->vport, NETIF_F_RXHASH))) + return; + + hash = le16_to_cpu(rx_desc->hash1) | + (rx_desc->ff2_mirrid_hash2.hash2 << 16) | + (rx_desc->hash3 << 24); + + skb_set_hash(skb, hash, idpf_ptype_to_htype(decoded)); +} + +/** + * idpf_rx_csum - Indicate in skb if checksum is good + * @rxq: Rx descriptor ring packet is being transacted on + * @skb: pointer to current skb being populated + * @csum_bits: checksum fields extracted from the descriptor + * @decoded: Decoded Rx packet type related fields + * + * skb->protocol must be set before this function is called + */ +static void idpf_rx_csum(struct idpf_queue *rxq, struct sk_buff *skb, + struct idpf_rx_csum_decoded *csum_bits, + struct idpf_rx_ptype_decoded *decoded) +{ + bool ipv4, ipv6; + + /* check if Rx checksum is enabled */ + if (unlikely(!idpf_is_feature_ena(rxq->vport, NETIF_F_RXCSUM))) + return; + + /* check if HW has decoded the packet and checksum */ + if (!(csum_bits->l3l4p)) + return; + + ipv4 = IDPF_RX_PTYPE_TO_IPV(decoded, IDPF_RX_PTYPE_OUTER_IPV4); + ipv6 = IDPF_RX_PTYPE_TO_IPV(decoded, IDPF_RX_PTYPE_OUTER_IPV6); + + if (ipv4 && (csum_bits->ipe || csum_bits->eipe)) + goto checksum_fail; + + if (ipv6 && csum_bits->ipv6exadd) + return; + + /* check for L4 errors and handle packets that were not able to be + * checksummed + */ + if (csum_bits->l4e) + goto checksum_fail; + + /* Only report checksum unnecessary for ICMP, TCP, UDP, or SCTP */ + switch (decoded->inner_prot) { + case IDPF_RX_PTYPE_INNER_PROT_ICMP: + case IDPF_RX_PTYPE_INNER_PROT_TCP: + case IDPF_RX_PTYPE_INNER_PROT_UDP: + if (!csum_bits->raw_csum_inv) { + u16 csum = csum_bits->raw_csum; + + skb->csum = csum_unfold((__force __sum16)~swab16(csum)); + skb->ip_summed = CHECKSUM_COMPLETE; + } else { + skb->ip_summed = CHECKSUM_UNNECESSARY; + } + break; + case IDPF_RX_PTYPE_INNER_PROT_SCTP: + skb->ip_summed = CHECKSUM_UNNECESSARY; + break; + default: + break; + } + + return; + +checksum_fail: + u64_stats_update_begin(&rxq->stats_sync); + u64_stats_inc(&rxq->q_stats.rx.hw_csum_err); + u64_stats_update_end(&rxq->stats_sync); +} + +/** + * idpf_rx_splitq_extract_csum_bits - Extract checksum bits from descriptor + * @rx_desc: receive descriptor + * @csum: structure to extract checksum fields + * + **/ +static void idpf_rx_splitq_extract_csum_bits(struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc, + struct idpf_rx_csum_decoded *csum) +{ + u8 qword0, qword1; + + qword0 = rx_desc->status_err0_qw0; + qword1 = rx_desc->status_err0_qw1; + + csum->ipe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_IPE_M, + qword1); + csum->eipe = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_EIPE_M, + qword1); + csum->l4e = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_L4E_M, + qword1); + csum->l3l4p = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_L3L4P_M, + qword1); + csum->ipv6exadd = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_IPV6EXADD_M, + qword0); + csum->raw_csum_inv = + le16_get_bits(rx_desc->ptype_err_fflags0, + VIRTCHNL2_RX_FLEX_DESC_ADV_RAW_CSUM_INV_M); + csum->raw_csum = le16_to_cpu(rx_desc->misc.raw_cs); +} + +/** + * idpf_rx_rsc - Set the RSC fields in the skb + * @rxq : Rx descriptor ring packet is being transacted on + * @skb : pointer to current skb being populated + * @rx_desc: Receive descriptor + * @decoded: Decoded Rx packet type related fields + * + * Return 0 on success and error code on failure + * + * Populate the skb fields with the total number of RSC segments, RSC payload + * length and packet type. + */ +static int idpf_rx_rsc(struct idpf_queue *rxq, struct sk_buff *skb, + struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc, + struct idpf_rx_ptype_decoded *decoded) +{ + u16 rsc_segments, rsc_seg_len; + bool ipv4, ipv6; + int len; + + if (unlikely(!decoded->outer_ip)) + return -EINVAL; + + rsc_seg_len = le16_to_cpu(rx_desc->misc.rscseglen); + if (unlikely(!rsc_seg_len)) + return -EINVAL; + + ipv4 = IDPF_RX_PTYPE_TO_IPV(decoded, IDPF_RX_PTYPE_OUTER_IPV4); + ipv6 = IDPF_RX_PTYPE_TO_IPV(decoded, IDPF_RX_PTYPE_OUTER_IPV6); + + if (unlikely(!(ipv4 ^ ipv6))) + return -EINVAL; + + rsc_segments = DIV_ROUND_UP(skb->data_len, rsc_seg_len); + if (unlikely(rsc_segments == 1)) + return 0; + + NAPI_GRO_CB(skb)->count = rsc_segments; + skb_shinfo(skb)->gso_size = rsc_seg_len; + + skb_reset_network_header(skb); + len = skb->len - skb_transport_offset(skb); + + if (ipv4) { + struct iphdr *ipv4h = ip_hdr(skb); + + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; + + /* Reset and set transport header offset in skb */ + skb_set_transport_header(skb, sizeof(struct iphdr)); + + /* Compute the TCP pseudo header checksum*/ + tcp_hdr(skb)->check = + ~tcp_v4_check(len, ipv4h->saddr, ipv4h->daddr, 0); + } else { + struct ipv6hdr *ipv6h = ipv6_hdr(skb); + + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; + skb_set_transport_header(skb, sizeof(struct ipv6hdr)); + tcp_hdr(skb)->check = + ~tcp_v6_check(len, &ipv6h->saddr, &ipv6h->daddr, 0); + } + + tcp_gro_complete(skb); + + u64_stats_update_begin(&rxq->stats_sync); + u64_stats_inc(&rxq->q_stats.rx.rsc_pkts); + u64_stats_update_end(&rxq->stats_sync); + + return 0; +} + +/** + * idpf_rx_process_skb_fields - Populate skb header fields from Rx descriptor + * @rxq: Rx descriptor ring packet is being transacted on + * @skb: pointer to current skb being populated + * @rx_desc: Receive descriptor + * + * This function checks the ring, descriptor, and packet information in + * order to populate the hash, checksum, protocol, and + * other fields within the skb. + */ +static int idpf_rx_process_skb_fields(struct idpf_queue *rxq, + struct sk_buff *skb, + struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc) +{ + struct idpf_rx_csum_decoded csum_bits = { }; + struct idpf_rx_ptype_decoded decoded; + u16 rx_ptype; + + rx_ptype = le16_get_bits(rx_desc->ptype_err_fflags0, + VIRTCHNL2_RX_FLEX_DESC_ADV_PTYPE_M); + + skb->protocol = eth_type_trans(skb, rxq->vport->netdev); + + decoded = rxq->vport->rx_ptype_lkup[rx_ptype]; + /* If we don't know the ptype we can't do anything else with it. Just + * pass it up the stack as-is. + */ + if (!decoded.known) + return 0; + + /* process RSS/hash */ + idpf_rx_hash(rxq, skb, rx_desc, &decoded); + + if (le16_get_bits(rx_desc->hdrlen_flags, + VIRTCHNL2_RX_FLEX_DESC_ADV_RSC_M)) + return idpf_rx_rsc(rxq, skb, rx_desc, &decoded); + + idpf_rx_splitq_extract_csum_bits(rx_desc, &csum_bits); + idpf_rx_csum(rxq, skb, &csum_bits, &decoded); + + return 0; +} + +/** + * idpf_rx_add_frag - Add contents of Rx buffer to sk_buff as a frag + * @rx_buf: buffer containing page to add + * @skb: sk_buff to place the data into + * @size: packet length from rx_desc + * + * This function will add the data contained in rx_buf->page to the skb. + * It will just attach the page as a frag to the skb. + * The function will then update the page offset. + */ +void idpf_rx_add_frag(struct idpf_rx_buf *rx_buf, struct sk_buff *skb, + unsigned int size) +{ + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buf->page, + rx_buf->page_offset, size, rx_buf->truesize); + + rx_buf->page = NULL; +} + +/** + * idpf_rx_construct_skb - Allocate skb and populate it + * @rxq: Rx descriptor queue + * @rx_buf: Rx buffer to pull data from + * @size: the length of the packet + * + * This function allocates an skb. It then populates it with the page + * data from the current receive descriptor, taking care to set up the + * skb correctly. + */ +struct sk_buff *idpf_rx_construct_skb(struct idpf_queue *rxq, + struct idpf_rx_buf *rx_buf, + unsigned int size) +{ + unsigned int headlen; + struct sk_buff *skb; + void *va; + + va = page_address(rx_buf->page) + rx_buf->page_offset; + + /* prefetch first cache line of first page */ + net_prefetch(va); + /* allocate a skb to store the frags */ + skb = __napi_alloc_skb(&rxq->q_vector->napi, IDPF_RX_HDR_SIZE, + GFP_ATOMIC); + if (unlikely(!skb)) { + idpf_rx_put_page(rx_buf); + + return NULL; + } + + skb_record_rx_queue(skb, rxq->idx); + + /* Determine available headroom for copy */ + headlen = size; + if (headlen > IDPF_RX_HDR_SIZE) + headlen = eth_get_headlen(skb->dev, va, IDPF_RX_HDR_SIZE); + + /* align pull length to size of long to optimize memcpy performance */ + memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long))); + + /* if we exhaust the linear part then add what is left as a frag */ + size -= headlen; + if (!size) { + idpf_rx_put_page(rx_buf); + + return skb; + } + + page_pool_release_page(rx_buf->pp, rx_buf->page); + skb_add_rx_frag(skb, 0, rx_buf->page, rx_buf->page_offset + headlen, + size, rx_buf->truesize); + + /* Since we're giving the page to the stack, clear our reference to it. + * We'll get a new one during buffer posting. + */ + rx_buf->page = NULL; + + return skb; +} + +/** + * idpf_rx_hdr_construct_skb - Allocate skb and populate it from header buffer + * @rxq: Rx descriptor queue + * @va: Rx buffer to pull data from + * @size: the length of the packet + * + * This function allocates an skb. It then populates it with the page data from + * the current receive descriptor, taking care to set up the skb correctly. + * This specifically uses a header buffer to start building the skb. + */ +static struct sk_buff *idpf_rx_hdr_construct_skb(struct idpf_queue *rxq, + const void *va, + unsigned int size) +{ + struct sk_buff *skb; + + /* allocate a skb to store the frags */ + skb = __napi_alloc_skb(&rxq->q_vector->napi, size, GFP_ATOMIC); + if (unlikely(!skb)) + return NULL; + + skb_record_rx_queue(skb, rxq->idx); + + memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long))); + + /* More than likely, a payload fragment, which will use a page from + * page_pool will be added to the SKB so mark it for recycle + * preemptively. And if not, it's inconsequential. + */ + + return skb; +} + +/** + * idpf_rx_splitq_test_staterr - tests bits in Rx descriptor + * status and error fields + * @stat_err_field: field from descriptor to test bits in + * @stat_err_bits: value to mask + * + */ +static bool idpf_rx_splitq_test_staterr(const u8 stat_err_field, + const u8 stat_err_bits) +{ + return !!(stat_err_field & stat_err_bits); +} + +/** + * idpf_rx_splitq_is_eop - process handling of EOP buffers + * @rx_desc: Rx descriptor for current buffer + * + * If the buffer is an EOP buffer, this function exits returning true, + * otherwise return false indicating that this is in fact a non-EOP buffer. + */ +static bool idpf_rx_splitq_is_eop(struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc) +{ + /* if we are the last buffer then there is nothing else to do */ + return likely(idpf_rx_splitq_test_staterr(rx_desc->status_err0_qw1, + IDPF_RXD_EOF_SPLITQ)); +} + +/** + * idpf_rx_splitq_clean - Clean completed descriptors from Rx queue + * @rxq: Rx descriptor queue to retrieve receive buffer queue + * @budget: Total limit on number of packets to process + * + * This function provides a "bounce buffer" approach to Rx interrupt + * processing. The advantage to this is that on systems that have + * expensive overhead for IOMMU access this provides a means of avoiding + * it by maintaining the mapping of the page to the system. + * + * Returns amount of work completed + */ +static int idpf_rx_splitq_clean(struct idpf_queue *rxq, int budget) +{ + int total_rx_bytes = 0, total_rx_pkts = 0; + struct idpf_queue *rx_bufq = NULL; + struct sk_buff *skb = rxq->skb; + u16 ntc = rxq->next_to_clean; + + /* Process Rx packets bounded by budget */ + while (likely(total_rx_pkts < budget)) { + struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc; + struct idpf_sw_queue *refillq = NULL; + struct idpf_rxq_set *rxq_set = NULL; + struct idpf_rx_buf *rx_buf = NULL; + union virtchnl2_rx_desc *desc; + unsigned int pkt_len = 0; + unsigned int hdr_len = 0; + u16 gen_id, buf_id = 0; + /* Header buffer overflow only valid for header split */ + bool hbo = false; + int bufq_id; + u8 rxdid; + + /* get the Rx desc from Rx queue based on 'next_to_clean' */ + desc = IDPF_RX_DESC(rxq, ntc); + rx_desc = (struct virtchnl2_rx_flex_desc_adv_nic_3 *)desc; + + /* This memory barrier is needed to keep us from reading + * any other fields out of the rx_desc + */ + dma_rmb(); + + /* if the descriptor isn't done, no work yet to do */ + gen_id = le16_get_bits(rx_desc->pktlen_gen_bufq_id, + VIRTCHNL2_RX_FLEX_DESC_ADV_GEN_M); + + if (test_bit(__IDPF_Q_GEN_CHK, rxq->flags) != gen_id) + break; + + rxdid = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_RXDID_M, + rx_desc->rxdid_ucast); + if (rxdid != VIRTCHNL2_RXDID_2_FLEX_SPLITQ) { + IDPF_RX_BUMP_NTC(rxq, ntc); + u64_stats_update_begin(&rxq->stats_sync); + u64_stats_inc(&rxq->q_stats.rx.bad_descs); + u64_stats_update_end(&rxq->stats_sync); + continue; + } + + pkt_len = le16_get_bits(rx_desc->pktlen_gen_bufq_id, + VIRTCHNL2_RX_FLEX_DESC_ADV_LEN_PBUF_M); + + hbo = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_HBO_M, + rx_desc->status_err0_qw1); + + if (unlikely(hbo)) { + /* If a header buffer overflow, occurs, i.e. header is + * too large to fit in the header split buffer, HW will + * put the entire packet, including headers, in the + * data/payload buffer. + */ + u64_stats_update_begin(&rxq->stats_sync); + u64_stats_inc(&rxq->q_stats.rx.hsplit_buf_ovf); + u64_stats_update_end(&rxq->stats_sync); + goto bypass_hsplit; + } + + hdr_len = le16_get_bits(rx_desc->hdrlen_flags, + VIRTCHNL2_RX_FLEX_DESC_ADV_LEN_HDR_M); + +bypass_hsplit: + bufq_id = le16_get_bits(rx_desc->pktlen_gen_bufq_id, + VIRTCHNL2_RX_FLEX_DESC_ADV_BUFQ_ID_M); + + rxq_set = container_of(rxq, struct idpf_rxq_set, rxq); + if (!bufq_id) + refillq = rxq_set->refillq0; + else + refillq = rxq_set->refillq1; + + /* retrieve buffer from the rxq */ + rx_bufq = &rxq->rxq_grp->splitq.bufq_sets[bufq_id].bufq; + + buf_id = le16_to_cpu(rx_desc->buf_id); + + rx_buf = &rx_bufq->rx_buf.buf[buf_id]; + + if (hdr_len) { + const void *va = (u8 *)rx_bufq->rx_buf.hdr_buf_va + + (u32)buf_id * IDPF_HDR_BUF_SIZE; + + skb = idpf_rx_hdr_construct_skb(rxq, va, hdr_len); + u64_stats_update_begin(&rxq->stats_sync); + u64_stats_inc(&rxq->q_stats.rx.hsplit_pkts); + u64_stats_update_end(&rxq->stats_sync); + } + + if (pkt_len) { + idpf_rx_sync_for_cpu(rx_buf, pkt_len); + if (skb) { + page_pool_release_page(rx_buf->pp, rx_buf->page); + idpf_rx_add_frag(rx_buf, skb, pkt_len); + } else { + skb = idpf_rx_construct_skb(rxq, rx_buf, + pkt_len); + } + } else { + idpf_rx_put_page(rx_buf); + } + + /* exit if we failed to retrieve a buffer */ + if (!skb) + break; + + idpf_rx_post_buf_refill(refillq, buf_id); + + IDPF_RX_BUMP_NTC(rxq, ntc); + /* skip if it is non EOP desc */ + if (!idpf_rx_splitq_is_eop(rx_desc)) + continue; + + /* pad skb if needed (to make valid ethernet frame) */ + if (eth_skb_pad(skb)) { + skb = NULL; + continue; + } + + /* probably a little skewed due to removing CRC */ + total_rx_bytes += skb->len; + + /* protocol */ + if (unlikely(idpf_rx_process_skb_fields(rxq, skb, rx_desc))) { + dev_kfree_skb_any(skb); + skb = NULL; + continue; + } + + /* send completed skb up the stack */ + napi_gro_receive(&rxq->q_vector->napi, skb); + skb = NULL; + + /* update budget accounting */ + total_rx_pkts++; + } + + rxq->next_to_clean = ntc; + + rxq->skb = skb; + u64_stats_update_begin(&rxq->stats_sync); + u64_stats_add(&rxq->q_stats.rx.packets, total_rx_pkts); + u64_stats_add(&rxq->q_stats.rx.bytes, total_rx_bytes); + u64_stats_update_end(&rxq->stats_sync); + + /* guarantee a trip back through this routine if there was a failure */ + return total_rx_pkts; +} + +/** + * idpf_rx_update_bufq_desc - Update buffer queue descriptor + * @bufq: Pointer to the buffer queue + * @refill_desc: SW Refill queue descriptor containing buffer ID + * @buf_desc: Buffer queue descriptor + * + * Return 0 on success and negative on failure. + */ +static int idpf_rx_update_bufq_desc(struct idpf_queue *bufq, u16 refill_desc, + struct virtchnl2_splitq_rx_buf_desc *buf_desc) +{ + struct idpf_rx_buf *buf; + dma_addr_t addr; + u16 buf_id; + + buf_id = FIELD_GET(IDPF_RX_BI_BUFID_M, refill_desc); + + buf = &bufq->rx_buf.buf[buf_id]; + + addr = idpf_alloc_page(bufq->pp, buf, bufq->rx_buf_size); + if (unlikely(addr == DMA_MAPPING_ERROR)) + return -ENOMEM; + + buf_desc->pkt_addr = cpu_to_le64(addr); + buf_desc->qword0.buf_id = cpu_to_le16(buf_id); + + if (!bufq->rx_hsplit_en) + return 0; + + buf_desc->hdr_addr = cpu_to_le64(bufq->rx_buf.hdr_buf_pa + + (u32)buf_id * IDPF_HDR_BUF_SIZE); + + return 0; +} + +/** + * idpf_rx_clean_refillq - Clean refill queue buffers + * @bufq: buffer queue to post buffers back to + * @refillq: refill queue to clean + * + * This function takes care of the buffer refill management + */ +static void idpf_rx_clean_refillq(struct idpf_queue *bufq, + struct idpf_sw_queue *refillq) +{ + struct virtchnl2_splitq_rx_buf_desc *buf_desc; + u16 bufq_nta = bufq->next_to_alloc; + u16 ntc = refillq->next_to_clean; + int cleaned = 0; + u16 gen; + + buf_desc = IDPF_SPLITQ_RX_BUF_DESC(bufq, bufq_nta); + + /* make sure we stop at ring wrap in the unlikely case ring is full */ + while (likely(cleaned < refillq->desc_count)) { + u16 refill_desc = IDPF_SPLITQ_RX_BI_DESC(refillq, ntc); + bool failure; + + gen = FIELD_GET(IDPF_RX_BI_GEN_M, refill_desc); + if (test_bit(__IDPF_RFLQ_GEN_CHK, refillq->flags) != gen) + break; + + failure = idpf_rx_update_bufq_desc(bufq, refill_desc, + buf_desc); + if (failure) + break; + + if (unlikely(++ntc == refillq->desc_count)) { + change_bit(__IDPF_RFLQ_GEN_CHK, refillq->flags); + ntc = 0; + } + + if (unlikely(++bufq_nta == bufq->desc_count)) { + buf_desc = IDPF_SPLITQ_RX_BUF_DESC(bufq, 0); + bufq_nta = 0; + } else { + buf_desc++; + } + + cleaned++; + } + + if (!cleaned) + return; + + /* We want to limit how many transactions on the bus we trigger with + * tail writes so we only do it in strides. It's also important we + * align the write to a multiple of 8 as required by HW. + */ + if (((bufq->next_to_use <= bufq_nta ? 0 : bufq->desc_count) + + bufq_nta - bufq->next_to_use) >= IDPF_RX_BUF_POST_STRIDE) + idpf_rx_buf_hw_update(bufq, ALIGN_DOWN(bufq_nta, + IDPF_RX_BUF_POST_STRIDE)); + + /* update next to alloc since we have filled the ring */ + refillq->next_to_clean = ntc; + bufq->next_to_alloc = bufq_nta; +} + +/** + * idpf_rx_clean_refillq_all - Clean all refill queues + * @bufq: buffer queue with refill queues + * + * Iterates through all refill queues assigned to the buffer queue assigned to + * this vector. Returns true if clean is complete within budget, false + * otherwise. + */ +static void idpf_rx_clean_refillq_all(struct idpf_queue *bufq) +{ + struct idpf_bufq_set *bufq_set; + int i; + + bufq_set = container_of(bufq, struct idpf_bufq_set, bufq); + for (i = 0; i < bufq_set->num_refillqs; i++) + idpf_rx_clean_refillq(bufq, &bufq_set->refillqs[i]); +} + +/** + * idpf_vport_intr_clean_queues - MSIX mode Interrupt Handler + * @irq: interrupt number + * @data: pointer to a q_vector + * + */ +static irqreturn_t idpf_vport_intr_clean_queues(int __always_unused irq, + void *data) +{ + struct idpf_q_vector *q_vector = (struct idpf_q_vector *)data; + + q_vector->total_events++; + napi_schedule(&q_vector->napi); + + return IRQ_HANDLED; +} + +/** + * idpf_vport_intr_napi_del_all - Unregister napi for all q_vectors in vport + * @vport: virtual port structure + * + */ +static void idpf_vport_intr_napi_del_all(struct idpf_vport *vport) +{ + u16 v_idx; + + for (v_idx = 0; v_idx < vport->num_q_vectors; v_idx++) + netif_napi_del(&vport->q_vectors[v_idx].napi); +} + +/** + * idpf_vport_intr_napi_dis_all - Disable NAPI for all q_vectors in the vport + * @vport: main vport structure + */ +static void idpf_vport_intr_napi_dis_all(struct idpf_vport *vport) +{ + int v_idx; + + for (v_idx = 0; v_idx < vport->num_q_vectors; v_idx++) + napi_disable(&vport->q_vectors[v_idx].napi); +} + +/** + * idpf_vport_intr_rel - Free memory allocated for interrupt vectors + * @vport: virtual port + * + * Free the memory allocated for interrupt vectors associated to a vport + */ +void idpf_vport_intr_rel(struct idpf_vport *vport) +{ + int i, j, v_idx; + + for (v_idx = 0; v_idx < vport->num_q_vectors; v_idx++) { + struct idpf_q_vector *q_vector = &vport->q_vectors[v_idx]; + + kfree(q_vector->bufq); + q_vector->bufq = NULL; + kfree(q_vector->tx); + q_vector->tx = NULL; + kfree(q_vector->rx); + q_vector->rx = NULL; + } + + /* Clean up the mapping of queues to vectors */ + for (i = 0; i < vport->num_rxq_grp; i++) { + struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; + + if (idpf_is_queue_model_split(vport->rxq_model)) + for (j = 0; j < rx_qgrp->splitq.num_rxq_sets; j++) + rx_qgrp->splitq.rxq_sets[j]->rxq.q_vector = NULL; + else + for (j = 0; j < rx_qgrp->singleq.num_rxq; j++) + rx_qgrp->singleq.rxqs[j]->q_vector = NULL; + } + + if (idpf_is_queue_model_split(vport->txq_model)) + for (i = 0; i < vport->num_txq_grp; i++) + vport->txq_grps[i].complq->q_vector = NULL; + else + for (i = 0; i < vport->num_txq_grp; i++) + for (j = 0; j < vport->txq_grps[i].num_txq; j++) + vport->txq_grps[i].txqs[j]->q_vector = NULL; + + kfree(vport->q_vectors); + vport->q_vectors = NULL; +} + +/** + * idpf_vport_intr_rel_irq - Free the IRQ association with the OS + * @vport: main vport structure + */ +static void idpf_vport_intr_rel_irq(struct idpf_vport *vport) +{ + struct idpf_adapter *adapter = vport->adapter; + int vector; + + for (vector = 0; vector < vport->num_q_vectors; vector++) { + struct idpf_q_vector *q_vector = &vport->q_vectors[vector]; + int irq_num, vidx; + + /* free only the irqs that were actually requested */ + if (!q_vector) + continue; + + vidx = vport->q_vector_idxs[vector]; + irq_num = adapter->msix_entries[vidx].vector; + + /* clear the affinity_mask in the IRQ descriptor */ + irq_set_affinity_hint(irq_num, NULL); + free_irq(irq_num, q_vector); + } +} + +/** + * idpf_vport_intr_dis_irq_all - Disable all interrupt + * @vport: main vport structure + */ +static void idpf_vport_intr_dis_irq_all(struct idpf_vport *vport) +{ + struct idpf_q_vector *q_vector = vport->q_vectors; + int q_idx; + + for (q_idx = 0; q_idx < vport->num_q_vectors; q_idx++) + writel(0, q_vector[q_idx].intr_reg.dyn_ctl); +} + +/** + * idpf_vport_intr_buildreg_itr - Enable default interrupt generation settings + * @q_vector: pointer to q_vector + * @type: itr index + * @itr: itr value + */ +static u32 idpf_vport_intr_buildreg_itr(struct idpf_q_vector *q_vector, + const int type, u16 itr) +{ + u32 itr_val; + + itr &= IDPF_ITR_MASK; + /* Don't clear PBA because that can cause lost interrupts that + * came in while we were cleaning/polling + */ + itr_val = q_vector->intr_reg.dyn_ctl_intena_m | + (type << q_vector->intr_reg.dyn_ctl_itridx_s) | + (itr << (q_vector->intr_reg.dyn_ctl_intrvl_s - 1)); + + return itr_val; +} + +/** + * idpf_update_dim_sample - Update dim sample with packets and bytes + * @q_vector: the vector associated with the interrupt + * @dim_sample: dim sample to update + * @dim: dim instance structure + * @packets: total packets + * @bytes: total bytes + * + * Update the dim sample with the packets and bytes which are passed to this + * function. Set the dim state appropriately if the dim settings gets stale. + */ +static void idpf_update_dim_sample(struct idpf_q_vector *q_vector, + struct dim_sample *dim_sample, + struct dim *dim, u64 packets, u64 bytes) +{ + dim_update_sample(q_vector->total_events, packets, bytes, dim_sample); + dim_sample->comp_ctr = 0; + + /* if dim settings get stale, like when not updated for 1 second or + * longer, force it to start again. This addresses the frequent case + * of an idle queue being switched to by the scheduler. + */ + if (ktime_ms_delta(dim_sample->time, dim->start_sample.time) >= HZ) + dim->state = DIM_START_MEASURE; +} + +/** + * idpf_net_dim - Update net DIM algorithm + * @q_vector: the vector associated with the interrupt + * + * Create a DIM sample and notify net_dim() so that it can possibly decide + * a new ITR value based on incoming packets, bytes, and interrupts. + * + * This function is a no-op if the queue is not configured to dynamic ITR. + */ +static void idpf_net_dim(struct idpf_q_vector *q_vector) +{ + struct dim_sample dim_sample = { }; + u64 packets, bytes; + u32 i; + + if (!IDPF_ITR_IS_DYNAMIC(q_vector->tx_intr_mode)) + goto check_rx_itr; + + for (i = 0, packets = 0, bytes = 0; i < q_vector->num_txq; i++) { + struct idpf_queue *txq = q_vector->tx[i]; + unsigned int start; + + do { + start = u64_stats_fetch_begin(&txq->stats_sync); + packets += u64_stats_read(&txq->q_stats.tx.packets); + bytes += u64_stats_read(&txq->q_stats.tx.bytes); + } while (u64_stats_fetch_retry(&txq->stats_sync, start)); + } + + idpf_update_dim_sample(q_vector, &dim_sample, &q_vector->tx_dim, + packets, bytes); + net_dim(&q_vector->tx_dim, dim_sample); + +check_rx_itr: + if (!IDPF_ITR_IS_DYNAMIC(q_vector->rx_intr_mode)) + return; + + for (i = 0, packets = 0, bytes = 0; i < q_vector->num_rxq; i++) { + struct idpf_queue *rxq = q_vector->rx[i]; + unsigned int start; + + do { + start = u64_stats_fetch_begin(&rxq->stats_sync); + packets += u64_stats_read(&rxq->q_stats.rx.packets); + bytes += u64_stats_read(&rxq->q_stats.rx.bytes); + } while (u64_stats_fetch_retry(&rxq->stats_sync, start)); + } + + idpf_update_dim_sample(q_vector, &dim_sample, &q_vector->rx_dim, + packets, bytes); + net_dim(&q_vector->rx_dim, dim_sample); +} + +/** + * idpf_vport_intr_update_itr_ena_irq - Update itr and re-enable MSIX interrupt + * @q_vector: q_vector for which itr is being updated and interrupt enabled + * + * Update the net_dim() algorithm and re-enable the interrupt associated with + * this vector. + */ +void idpf_vport_intr_update_itr_ena_irq(struct idpf_q_vector *q_vector) +{ + u32 intval; + + /* net_dim() updates ITR out-of-band using a work item */ + idpf_net_dim(q_vector); + + intval = idpf_vport_intr_buildreg_itr(q_vector, + IDPF_NO_ITR_UPDATE_IDX, 0); + + writel(intval, q_vector->intr_reg.dyn_ctl); +} + +/** + * idpf_vport_intr_req_irq - get MSI-X vectors from the OS for the vport + * @vport: main vport structure + * @basename: name for the vector + */ +static int idpf_vport_intr_req_irq(struct idpf_vport *vport, char *basename) +{ + struct idpf_adapter *adapter = vport->adapter; + int vector, err, irq_num, vidx; + const char *vec_name; + + for (vector = 0; vector < vport->num_q_vectors; vector++) { + struct idpf_q_vector *q_vector = &vport->q_vectors[vector]; + + vidx = vport->q_vector_idxs[vector]; + irq_num = adapter->msix_entries[vidx].vector; + + if (q_vector->num_rxq && q_vector->num_txq) + vec_name = "TxRx"; + else if (q_vector->num_rxq) + vec_name = "Rx"; + else if (q_vector->num_txq) + vec_name = "Tx"; + else + continue; + + q_vector->name = kasprintf(GFP_KERNEL, "%s-%s-%d", + basename, vec_name, vidx); + + err = request_irq(irq_num, idpf_vport_intr_clean_queues, 0, + q_vector->name, q_vector); + if (err) { + netdev_err(vport->netdev, + "Request_irq failed, error: %d\n", err); + goto free_q_irqs; + } + /* assign the mask for this irq */ + irq_set_affinity_hint(irq_num, &q_vector->affinity_mask); + } + + return 0; + +free_q_irqs: + while (--vector >= 0) { + vidx = vport->q_vector_idxs[vector]; + irq_num = adapter->msix_entries[vidx].vector; + free_irq(irq_num, &vport->q_vectors[vector]); + } + + return err; +} + +/** + * idpf_vport_intr_write_itr - Write ITR value to the ITR register + * @q_vector: q_vector structure + * @itr: Interrupt throttling rate + * @tx: Tx or Rx ITR + */ +void idpf_vport_intr_write_itr(struct idpf_q_vector *q_vector, u16 itr, bool tx) +{ + struct idpf_intr_reg *intr_reg; + + if (tx && !q_vector->tx) + return; + else if (!tx && !q_vector->rx) + return; + + intr_reg = &q_vector->intr_reg; + writel(ITR_REG_ALIGN(itr) >> IDPF_ITR_GRAN_S, + tx ? intr_reg->tx_itr : intr_reg->rx_itr); +} + +/** + * idpf_vport_intr_ena_irq_all - Enable IRQ for the given vport + * @vport: main vport structure + */ +static void idpf_vport_intr_ena_irq_all(struct idpf_vport *vport) +{ + bool dynamic; + int q_idx; + u16 itr; + + for (q_idx = 0; q_idx < vport->num_q_vectors; q_idx++) { + struct idpf_q_vector *qv = &vport->q_vectors[q_idx]; + + /* Set the initial ITR values */ + if (qv->num_txq) { + dynamic = IDPF_ITR_IS_DYNAMIC(qv->tx_intr_mode); + itr = vport->tx_itr_profile[qv->tx_dim.profile_ix]; + idpf_vport_intr_write_itr(qv, dynamic ? + itr : qv->tx_itr_value, + true); + } + + if (qv->num_rxq) { + dynamic = IDPF_ITR_IS_DYNAMIC(qv->rx_intr_mode); + itr = vport->rx_itr_profile[qv->rx_dim.profile_ix]; + idpf_vport_intr_write_itr(qv, dynamic ? + itr : qv->rx_itr_value, + false); + } + + if (qv->num_txq || qv->num_rxq) + idpf_vport_intr_update_itr_ena_irq(qv); + } +} + +/** + * idpf_vport_intr_deinit - Release all vector associations for the vport + * @vport: main vport structure + */ +void idpf_vport_intr_deinit(struct idpf_vport *vport) +{ + idpf_vport_intr_napi_dis_all(vport); + idpf_vport_intr_napi_del_all(vport); + idpf_vport_intr_dis_irq_all(vport); + idpf_vport_intr_rel_irq(vport); +} + +/** + * idpf_tx_dim_work - Call back from the stack + * @work: work queue structure + */ +static void idpf_tx_dim_work(struct work_struct *work) +{ + struct idpf_q_vector *q_vector; + struct idpf_vport *vport; + struct dim *dim; + u16 itr; + + dim = container_of(work, struct dim, work); + q_vector = container_of(dim, struct idpf_q_vector, tx_dim); + vport = q_vector->vport; + + if (dim->profile_ix >= ARRAY_SIZE(vport->tx_itr_profile)) + dim->profile_ix = ARRAY_SIZE(vport->tx_itr_profile) - 1; + + /* look up the values in our local table */ + itr = vport->tx_itr_profile[dim->profile_ix]; + + idpf_vport_intr_write_itr(q_vector, itr, true); + + dim->state = DIM_START_MEASURE; +} + +/** + * idpf_rx_dim_work - Call back from the stack + * @work: work queue structure + */ +static void idpf_rx_dim_work(struct work_struct *work) +{ + struct idpf_q_vector *q_vector; + struct idpf_vport *vport; + struct dim *dim; + u16 itr; + + dim = container_of(work, struct dim, work); + q_vector = container_of(dim, struct idpf_q_vector, rx_dim); + vport = q_vector->vport; + + if (dim->profile_ix >= ARRAY_SIZE(vport->rx_itr_profile)) + dim->profile_ix = ARRAY_SIZE(vport->rx_itr_profile) - 1; + + /* look up the values in our local table */ + itr = vport->rx_itr_profile[dim->profile_ix]; + + idpf_vport_intr_write_itr(q_vector, itr, false); + + dim->state = DIM_START_MEASURE; +} + +/** + * idpf_init_dim - Set up dynamic interrupt moderation + * @qv: q_vector structure + */ +static void idpf_init_dim(struct idpf_q_vector *qv) +{ + INIT_WORK(&qv->tx_dim.work, idpf_tx_dim_work); + qv->tx_dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; + qv->tx_dim.profile_ix = IDPF_DIM_DEFAULT_PROFILE_IX; + + INIT_WORK(&qv->rx_dim.work, idpf_rx_dim_work); + qv->rx_dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; + qv->rx_dim.profile_ix = IDPF_DIM_DEFAULT_PROFILE_IX; +} + +/** + * idpf_vport_intr_napi_ena_all - Enable NAPI for all q_vectors in the vport + * @vport: main vport structure + */ +static void idpf_vport_intr_napi_ena_all(struct idpf_vport *vport) +{ + int q_idx; + + for (q_idx = 0; q_idx < vport->num_q_vectors; q_idx++) { + struct idpf_q_vector *q_vector = &vport->q_vectors[q_idx]; + + idpf_init_dim(q_vector); + napi_enable(&q_vector->napi); + } +} + +/** + * idpf_tx_splitq_clean_all- Clean completion queues + * @q_vec: queue vector + * @budget: Used to determine if we are in netpoll + * @cleaned: returns number of packets cleaned + * + * Returns false if clean is not complete else returns true + */ +static bool idpf_tx_splitq_clean_all(struct idpf_q_vector *q_vec, + int budget, int *cleaned) +{ + u16 num_txq = q_vec->num_txq; + bool clean_complete = true; + int i, budget_per_q; + + if (unlikely(!num_txq)) + return true; + + budget_per_q = DIV_ROUND_UP(budget, num_txq); + for (i = 0; i < num_txq; i++) + clean_complete &= idpf_tx_clean_complq(q_vec->tx[i], + budget_per_q, cleaned); + + return clean_complete; +} + +/** + * idpf_rx_splitq_clean_all- Clean completion queues + * @q_vec: queue vector + * @budget: Used to determine if we are in netpoll + * @cleaned: returns number of packets cleaned + * + * Returns false if clean is not complete else returns true + */ +static bool idpf_rx_splitq_clean_all(struct idpf_q_vector *q_vec, int budget, + int *cleaned) +{ + u16 num_rxq = q_vec->num_rxq; + bool clean_complete = true; + int pkts_cleaned = 0; + int i, budget_per_q; + + /* We attempt to distribute budget to each Rx queue fairly, but don't + * allow the budget to go below 1 because that would exit polling early. + */ + budget_per_q = num_rxq ? max(budget / num_rxq, 1) : 0; + for (i = 0; i < num_rxq; i++) { + struct idpf_queue *rxq = q_vec->rx[i]; + int pkts_cleaned_per_q; + + pkts_cleaned_per_q = idpf_rx_splitq_clean(rxq, budget_per_q); + /* if we clean as many as budgeted, we must not be done */ + if (pkts_cleaned_per_q >= budget_per_q) + clean_complete = false; + pkts_cleaned += pkts_cleaned_per_q; + } + *cleaned = pkts_cleaned; + + for (i = 0; i < q_vec->num_bufq; i++) + idpf_rx_clean_refillq_all(q_vec->bufq[i]); + + return clean_complete; +} + +/** + * idpf_vport_splitq_napi_poll - NAPI handler + * @napi: struct from which you get q_vector + * @budget: budget provided by stack + */ +static int idpf_vport_splitq_napi_poll(struct napi_struct *napi, int budget) +{ + struct idpf_q_vector *q_vector = + container_of(napi, struct idpf_q_vector, napi); + bool clean_complete; + int work_done = 0; + + /* Handle case where we are called by netpoll with a budget of 0 */ + if (unlikely(!budget)) { + idpf_tx_splitq_clean_all(q_vector, budget, &work_done); + + return 0; + } + + clean_complete = idpf_rx_splitq_clean_all(q_vector, budget, &work_done); + clean_complete &= idpf_tx_splitq_clean_all(q_vector, budget, &work_done); + + /* If work not completed, return budget and polling will return */ + if (!clean_complete) + return budget; + + work_done = min_t(int, work_done, budget - 1); + + /* Exit the polling mode, but don't re-enable interrupts if stack might + * poll us due to busy-polling + */ + if (likely(napi_complete_done(napi, work_done))) + idpf_vport_intr_update_itr_ena_irq(q_vector); + + /* Switch to poll mode in the tear-down path after sending disable + * queues virtchnl message, as the interrupts will be disabled after + * that + */ + if (unlikely(q_vector->num_txq && test_bit(__IDPF_Q_POLL_MODE, + q_vector->tx[0]->flags))) + return budget; + else + return work_done; +} + +/** + * idpf_vport_intr_map_vector_to_qs - Map vectors to queues + * @vport: virtual port + * + * Mapping for vectors to queues + */ +static void idpf_vport_intr_map_vector_to_qs(struct idpf_vport *vport) +{ + u16 num_txq_grp = vport->num_txq_grp; + int i, j, qv_idx, bufq_vidx = 0; + struct idpf_rxq_group *rx_qgrp; + struct idpf_txq_group *tx_qgrp; + struct idpf_queue *q, *bufq; + u16 q_index; + + for (i = 0, qv_idx = 0; i < vport->num_rxq_grp; i++) { + u16 num_rxq; + + rx_qgrp = &vport->rxq_grps[i]; + if (idpf_is_queue_model_split(vport->rxq_model)) + num_rxq = rx_qgrp->splitq.num_rxq_sets; + else + num_rxq = rx_qgrp->singleq.num_rxq; + + for (j = 0; j < num_rxq; j++) { + if (qv_idx >= vport->num_q_vectors) + qv_idx = 0; + + if (idpf_is_queue_model_split(vport->rxq_model)) + q = &rx_qgrp->splitq.rxq_sets[j]->rxq; + else + q = rx_qgrp->singleq.rxqs[j]; + q->q_vector = &vport->q_vectors[qv_idx]; + q_index = q->q_vector->num_rxq; + q->q_vector->rx[q_index] = q; + q->q_vector->num_rxq++; + qv_idx++; + } + + if (idpf_is_queue_model_split(vport->rxq_model)) { + for (j = 0; j < vport->num_bufqs_per_qgrp; j++) { + bufq = &rx_qgrp->splitq.bufq_sets[j].bufq; + bufq->q_vector = &vport->q_vectors[bufq_vidx]; + q_index = bufq->q_vector->num_bufq; + bufq->q_vector->bufq[q_index] = bufq; + bufq->q_vector->num_bufq++; + } + if (++bufq_vidx >= vport->num_q_vectors) + bufq_vidx = 0; + } + } + + for (i = 0, qv_idx = 0; i < num_txq_grp; i++) { + u16 num_txq; + + tx_qgrp = &vport->txq_grps[i]; + num_txq = tx_qgrp->num_txq; + + if (idpf_is_queue_model_split(vport->txq_model)) { + if (qv_idx >= vport->num_q_vectors) + qv_idx = 0; + + q = tx_qgrp->complq; + q->q_vector = &vport->q_vectors[qv_idx]; + q_index = q->q_vector->num_txq; + q->q_vector->tx[q_index] = q; + q->q_vector->num_txq++; + qv_idx++; + } else { + for (j = 0; j < num_txq; j++) { + if (qv_idx >= vport->num_q_vectors) + qv_idx = 0; + + q = tx_qgrp->txqs[j]; + q->q_vector = &vport->q_vectors[qv_idx]; + q_index = q->q_vector->num_txq; + q->q_vector->tx[q_index] = q; + q->q_vector->num_txq++; + + qv_idx++; + } + } + } +} + +/** + * idpf_vport_intr_init_vec_idx - Initialize the vector indexes + * @vport: virtual port + * + * Initialize vector indexes with values returened over mailbox + */ +static int idpf_vport_intr_init_vec_idx(struct idpf_vport *vport) +{ + struct idpf_adapter *adapter = vport->adapter; + struct virtchnl2_alloc_vectors *ac; + u16 *vecids, total_vecs; + int i; + + ac = adapter->req_vec_chunks; + if (!ac) { + for (i = 0; i < vport->num_q_vectors; i++) + vport->q_vectors[i].v_idx = vport->q_vector_idxs[i]; + + return 0; + } + + total_vecs = idpf_get_reserved_vecs(adapter); + vecids = kcalloc(total_vecs, sizeof(u16), GFP_KERNEL); + if (!vecids) + return -ENOMEM; + + idpf_get_vec_ids(adapter, vecids, total_vecs, &ac->vchunks); + + for (i = 0; i < vport->num_q_vectors; i++) + vport->q_vectors[i].v_idx = vecids[vport->q_vector_idxs[i]]; + + kfree(vecids); + + return 0; +} + +/** + * idpf_vport_intr_napi_add_all- Register napi handler for all qvectors + * @vport: virtual port structure + */ +static void idpf_vport_intr_napi_add_all(struct idpf_vport *vport) +{ + int (*napi_poll)(struct napi_struct *napi, int budget); + u16 v_idx; + + if (idpf_is_queue_model_split(vport->txq_model)) + napi_poll = idpf_vport_splitq_napi_poll; + else + napi_poll = idpf_vport_singleq_napi_poll; + + for (v_idx = 0; v_idx < vport->num_q_vectors; v_idx++) { + struct idpf_q_vector *q_vector = &vport->q_vectors[v_idx]; + + netif_napi_add(vport->netdev, &q_vector->napi, napi_poll); + + /* only set affinity_mask if the CPU is online */ + if (cpu_online(v_idx)) + cpumask_set_cpu(v_idx, &q_vector->affinity_mask); + } +} + +/** + * idpf_vport_intr_alloc - Allocate memory for interrupt vectors + * @vport: virtual port + * + * We allocate one q_vector per queue interrupt. If allocation fails we + * return -ENOMEM. + */ +int idpf_vport_intr_alloc(struct idpf_vport *vport) +{ + u16 txqs_per_vector, rxqs_per_vector, bufqs_per_vector; + struct idpf_q_vector *q_vector; + int v_idx, err; + + vport->q_vectors = kcalloc(vport->num_q_vectors, + sizeof(struct idpf_q_vector), GFP_KERNEL); + if (!vport->q_vectors) + return -ENOMEM; + + txqs_per_vector = DIV_ROUND_UP(vport->num_txq, vport->num_q_vectors); + rxqs_per_vector = DIV_ROUND_UP(vport->num_rxq, vport->num_q_vectors); + bufqs_per_vector = vport->num_bufqs_per_qgrp * + DIV_ROUND_UP(vport->num_rxq_grp, + vport->num_q_vectors); + + for (v_idx = 0; v_idx < vport->num_q_vectors; v_idx++) { + q_vector = &vport->q_vectors[v_idx]; + q_vector->vport = vport; + + q_vector->tx_itr_value = IDPF_ITR_TX_DEF; + q_vector->tx_intr_mode = IDPF_ITR_DYNAMIC; + q_vector->tx_itr_idx = VIRTCHNL2_ITR_IDX_1; + + q_vector->rx_itr_value = IDPF_ITR_RX_DEF; + q_vector->rx_intr_mode = IDPF_ITR_DYNAMIC; + q_vector->rx_itr_idx = VIRTCHNL2_ITR_IDX_0; + + q_vector->tx = kcalloc(txqs_per_vector, + sizeof(struct idpf_queue *), + GFP_KERNEL); + if (!q_vector->tx) { + err = -ENOMEM; + goto error; + } + + q_vector->rx = kcalloc(rxqs_per_vector, + sizeof(struct idpf_queue *), + GFP_KERNEL); + if (!q_vector->rx) { + err = -ENOMEM; + goto error; + } + + if (!idpf_is_queue_model_split(vport->rxq_model)) + continue; + + q_vector->bufq = kcalloc(bufqs_per_vector, + sizeof(struct idpf_queue *), + GFP_KERNEL); + if (!q_vector->bufq) { + err = -ENOMEM; + goto error; + } + } + + return 0; + +error: + idpf_vport_intr_rel(vport); + + return err; +} + +/** + * idpf_vport_intr_init - Setup all vectors for the given vport + * @vport: virtual port + * + * Returns 0 on success or negative on failure + */ +int idpf_vport_intr_init(struct idpf_vport *vport) +{ + char *int_name; + int err; + + err = idpf_vport_intr_init_vec_idx(vport); + if (err) + return err; + + idpf_vport_intr_map_vector_to_qs(vport); + idpf_vport_intr_napi_add_all(vport); + idpf_vport_intr_napi_ena_all(vport); + + err = vport->adapter->dev_ops.reg_ops.intr_reg_init(vport); + if (err) + goto unroll_vectors_alloc; + + int_name = kasprintf(GFP_KERNEL, "%s-%s", + dev_driver_string(&vport->adapter->pdev->dev), + vport->netdev->name); + + err = idpf_vport_intr_req_irq(vport, int_name); + if (err) + goto unroll_vectors_alloc; + + idpf_vport_intr_ena_irq_all(vport); + + return 0; + +unroll_vectors_alloc: + idpf_vport_intr_napi_dis_all(vport); + idpf_vport_intr_napi_del_all(vport); + + return err; +} + +/** + * idpf_config_rss - Send virtchnl messages to configure RSS + * @vport: virtual port + * + * Return 0 on success, negative on failure + */ +int idpf_config_rss(struct idpf_vport *vport) +{ + int err; + + err = idpf_send_get_set_rss_key_msg(vport, false); + if (err) + return err; + + return idpf_send_get_set_rss_lut_msg(vport, false); +} + +/** + * idpf_fill_dflt_rss_lut - Fill the indirection table with the default values + * @vport: virtual port structure + */ +static void idpf_fill_dflt_rss_lut(struct idpf_vport *vport) +{ + struct idpf_adapter *adapter = vport->adapter; + u16 num_active_rxq = vport->num_rxq; + struct idpf_rss_data *rss_data; + int i; + + rss_data = &adapter->vport_config[vport->idx]->user_config.rss_data; + + for (i = 0; i < rss_data->rss_lut_size; i++) { + rss_data->rss_lut[i] = i % num_active_rxq; + rss_data->cached_lut[i] = rss_data->rss_lut[i]; + } +} + +/** + * idpf_init_rss - Allocate and initialize RSS resources + * @vport: virtual port + * + * Return 0 on success, negative on failure + */ +int idpf_init_rss(struct idpf_vport *vport) +{ + struct idpf_adapter *adapter = vport->adapter; + struct idpf_rss_data *rss_data; + u32 lut_size; + + rss_data = &adapter->vport_config[vport->idx]->user_config.rss_data; + + lut_size = rss_data->rss_lut_size * sizeof(u32); + rss_data->rss_lut = kzalloc(lut_size, GFP_KERNEL); + if (!rss_data->rss_lut) + return -ENOMEM; + + rss_data->cached_lut = kzalloc(lut_size, GFP_KERNEL); + if (!rss_data->cached_lut) { + kfree(rss_data->rss_lut); + rss_data->rss_lut = NULL; + + return -ENOMEM; + } + + /* Fill the default RSS lut values */ + idpf_fill_dflt_rss_lut(vport); + + return idpf_config_rss(vport); +} + +/** + * idpf_deinit_rss - Release RSS resources + * @vport: virtual port + */ +void idpf_deinit_rss(struct idpf_vport *vport) +{ + struct idpf_adapter *adapter = vport->adapter; + struct idpf_rss_data *rss_data; + + rss_data = &adapter->vport_config[vport->idx]->user_config.rss_data; + kfree(rss_data->cached_lut); + rss_data->cached_lut = NULL; + kfree(rss_data->rss_lut); + rss_data->rss_lut = NULL; +} diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h new file mode 100644 index 00000000000000..387f1af9c33fb2 --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h @@ -0,0 +1,1022 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2023 Intel Corporation */ + +#ifndef _IDPF_TXRX_H_ +#define _IDPF_TXRX_H_ + +#include +#include +#include + +#define IDPF_LARGE_MAX_Q 256 +#define IDPF_MAX_Q 16 +#define IDPF_MIN_Q 2 +/* Mailbox Queue */ +#define IDPF_MAX_MBXQ 1 + +#define IDPF_MIN_TXQ_DESC 64 +#define IDPF_MIN_RXQ_DESC 64 +#define IDPF_MIN_TXQ_COMPLQ_DESC 256 +#define IDPF_MAX_QIDS 256 + +/* Number of descriptors in a queue should be a multiple of 32. RX queue + * descriptors alone should be a multiple of IDPF_REQ_RXQ_DESC_MULTIPLE + * to achieve BufQ descriptors aligned to 32 + */ +#define IDPF_REQ_DESC_MULTIPLE 32 +#define IDPF_REQ_RXQ_DESC_MULTIPLE (IDPF_MAX_BUFQS_PER_RXQ_GRP * 32) +#define IDPF_MIN_TX_DESC_NEEDED (MAX_SKB_FRAGS + 6) +#define IDPF_TX_WAKE_THRESH ((u16)IDPF_MIN_TX_DESC_NEEDED * 2) + +#define IDPF_MAX_DESCS 8160 +#define IDPF_MAX_TXQ_DESC ALIGN_DOWN(IDPF_MAX_DESCS, IDPF_REQ_DESC_MULTIPLE) +#define IDPF_MAX_RXQ_DESC ALIGN_DOWN(IDPF_MAX_DESCS, IDPF_REQ_RXQ_DESC_MULTIPLE) +#define MIN_SUPPORT_TXDID (\ + VIRTCHNL2_TXDID_FLEX_FLOW_SCHED |\ + VIRTCHNL2_TXDID_FLEX_TSO_CTX) + +#define IDPF_DFLT_SINGLEQ_TX_Q_GROUPS 1 +#define IDPF_DFLT_SINGLEQ_RX_Q_GROUPS 1 +#define IDPF_DFLT_SINGLEQ_TXQ_PER_GROUP 4 +#define IDPF_DFLT_SINGLEQ_RXQ_PER_GROUP 4 + +#define IDPF_COMPLQ_PER_GROUP 1 +#define IDPF_SINGLE_BUFQ_PER_RXQ_GRP 1 +#define IDPF_MAX_BUFQS_PER_RXQ_GRP 2 +#define IDPF_BUFQ2_ENA 1 +#define IDPF_NUMQ_PER_CHUNK 1 + +#define IDPF_DFLT_SPLITQ_TXQ_PER_GROUP 1 +#define IDPF_DFLT_SPLITQ_RXQ_PER_GROUP 1 + +/* Default vector sharing */ +#define IDPF_MBX_Q_VEC 1 +#define IDPF_MIN_Q_VEC 1 + +#define IDPF_DFLT_TX_Q_DESC_COUNT 512 +#define IDPF_DFLT_TX_COMPLQ_DESC_COUNT 512 +#define IDPF_DFLT_RX_Q_DESC_COUNT 512 + +/* IMPORTANT: We absolutely _cannot_ have more buffers in the system than a + * given RX completion queue has descriptors. This includes _ALL_ buffer + * queues. E.g.: If you have two buffer queues of 512 descriptors and buffers, + * you have a total of 1024 buffers so your RX queue _must_ have at least that + * many descriptors. This macro divides a given number of RX descriptors by + * number of buffer queues to calculate how many descriptors each buffer queue + * can have without overrunning the RX queue. + * + * If you give hardware more buffers than completion descriptors what will + * happen is that if hardware gets a chance to post more than ring wrap of + * descriptors before SW gets an interrupt and overwrites SW head, the gen bit + * in the descriptor will be wrong. Any overwritten descriptors' buffers will + * be gone forever and SW has no reasonable way to tell that this has happened. + * From SW perspective, when we finally get an interrupt, it looks like we're + * still waiting for descriptor to be done, stalling forever. + */ +#define IDPF_RX_BUFQ_DESC_COUNT(RXD, NUM_BUFQ) ((RXD) / (NUM_BUFQ)) + +#define IDPF_RX_BUFQ_WORKING_SET(rxq) ((rxq)->desc_count - 1) + +#define IDPF_RX_BUMP_NTC(rxq, ntc) \ +do { \ + if (unlikely(++(ntc) == (rxq)->desc_count)) { \ + ntc = 0; \ + change_bit(__IDPF_Q_GEN_CHK, (rxq)->flags); \ + } \ +} while (0) + +#define IDPF_SINGLEQ_BUMP_RING_IDX(q, idx) \ +do { \ + if (unlikely(++(idx) == (q)->desc_count)) \ + idx = 0; \ +} while (0) + +#define IDPF_RX_HDR_SIZE 256 +#define IDPF_RX_BUF_2048 2048 +#define IDPF_RX_BUF_4096 4096 +#define IDPF_RX_BUF_STRIDE 32 +#define IDPF_RX_BUF_POST_STRIDE 16 +#define IDPF_LOW_WATERMARK 64 +/* Size of header buffer specifically for header split */ +#define IDPF_HDR_BUF_SIZE 256 +#define IDPF_PACKET_HDR_PAD \ + (ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN * 2) +#define IDPF_TX_TSO_MIN_MSS 88 + +/* Minimum number of descriptors between 2 descriptors with the RE bit set; + * only relevant in flow scheduling mode + */ +#define IDPF_TX_SPLITQ_RE_MIN_GAP 64 + +#define IDPF_RX_BI_BUFID_S 0 +#define IDPF_RX_BI_BUFID_M GENMASK(14, 0) +#define IDPF_RX_BI_GEN_S 15 +#define IDPF_RX_BI_GEN_M BIT(IDPF_RX_BI_GEN_S) +#define IDPF_RXD_EOF_SPLITQ VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_EOF_M +#define IDPF_RXD_EOF_SINGLEQ VIRTCHNL2_RX_BASE_DESC_STATUS_EOF_M + +#define IDPF_SINGLEQ_RX_BUF_DESC(rxq, i) \ + (&(((struct virtchnl2_singleq_rx_buf_desc *)((rxq)->desc_ring))[i])) +#define IDPF_SPLITQ_RX_BUF_DESC(rxq, i) \ + (&(((struct virtchnl2_splitq_rx_buf_desc *)((rxq)->desc_ring))[i])) +#define IDPF_SPLITQ_RX_BI_DESC(rxq, i) ((((rxq)->ring))[i]) + +#define IDPF_BASE_TX_DESC(txq, i) \ + (&(((struct idpf_base_tx_desc *)((txq)->desc_ring))[i])) +#define IDPF_BASE_TX_CTX_DESC(txq, i) \ + (&(((struct idpf_base_tx_ctx_desc *)((txq)->desc_ring))[i])) +#define IDPF_SPLITQ_TX_COMPLQ_DESC(txcq, i) \ + (&(((struct idpf_splitq_tx_compl_desc *)((txcq)->desc_ring))[i])) + +#define IDPF_FLEX_TX_DESC(txq, i) \ + (&(((union idpf_tx_flex_desc *)((txq)->desc_ring))[i])) +#define IDPF_FLEX_TX_CTX_DESC(txq, i) \ + (&(((struct idpf_flex_tx_ctx_desc *)((txq)->desc_ring))[i])) + +#define IDPF_DESC_UNUSED(txq) \ + ((((txq)->next_to_clean > (txq)->next_to_use) ? 0 : (txq)->desc_count) + \ + (txq)->next_to_clean - (txq)->next_to_use - 1) + +#define IDPF_TX_BUF_RSV_UNUSED(txq) ((txq)->buf_stack.top) +#define IDPF_TX_BUF_RSV_LOW(txq) (IDPF_TX_BUF_RSV_UNUSED(txq) < \ + (txq)->desc_count >> 2) + +#define IDPF_TX_COMPLQ_OVERFLOW_THRESH(txcq) ((txcq)->desc_count >> 1) +/* Determine the absolute number of completions pending, i.e. the number of + * completions that are expected to arrive on the TX completion queue. + */ +#define IDPF_TX_COMPLQ_PENDING(txq) \ + (((txq)->num_completions_pending >= (txq)->complq->num_completions ? \ + 0 : U64_MAX) + \ + (txq)->num_completions_pending - (txq)->complq->num_completions) + +#define IDPF_TX_SPLITQ_COMPL_TAG_WIDTH 16 +#define IDPF_SPLITQ_TX_INVAL_COMPL_TAG -1 +/* Adjust the generation for the completion tag and wrap if necessary */ +#define IDPF_TX_ADJ_COMPL_TAG_GEN(txq) \ + ((++(txq)->compl_tag_cur_gen) >= (txq)->compl_tag_gen_max ? \ + 0 : (txq)->compl_tag_cur_gen) + +#define IDPF_TXD_LAST_DESC_CMD (IDPF_TX_DESC_CMD_EOP | IDPF_TX_DESC_CMD_RS) + +#define IDPF_TX_FLAGS_TSO BIT(0) +#define IDPF_TX_FLAGS_IPV4 BIT(1) +#define IDPF_TX_FLAGS_IPV6 BIT(2) +#define IDPF_TX_FLAGS_TUNNEL BIT(3) + +union idpf_tx_flex_desc { + struct idpf_flex_tx_desc q; /* queue based scheduling */ + struct idpf_flex_tx_sched_desc flow; /* flow based scheduling */ +}; + +/** + * struct idpf_tx_buf + * @next_to_watch: Next descriptor to clean + * @skb: Pointer to the skb + * @dma: DMA address + * @len: DMA length + * @bytecount: Number of bytes + * @gso_segs: Number of GSO segments + * @compl_tag: Splitq only, unique identifier for a buffer. Used to compare + * with completion tag returned in buffer completion event. + * Because the completion tag is expected to be the same in all + * data descriptors for a given packet, and a single packet can + * span multiple buffers, we need this field to track all + * buffers associated with this completion tag independently of + * the buf_id. The tag consists of a N bit buf_id and M upper + * order "generation bits". See compl_tag_bufid_m and + * compl_tag_gen_s in struct idpf_queue. We'll use a value of -1 + * to indicate the tag is not valid. + * @ctx_entry: Singleq only. Used to indicate the corresponding entry + * in the descriptor ring was used for a context descriptor and + * this buffer entry should be skipped. + */ +struct idpf_tx_buf { + void *next_to_watch; + struct sk_buff *skb; + DEFINE_DMA_UNMAP_ADDR(dma); + DEFINE_DMA_UNMAP_LEN(len); + unsigned int bytecount; + unsigned short gso_segs; + + union { + int compl_tag; + + bool ctx_entry; + }; +}; + +struct idpf_tx_stash { + struct hlist_node hlist; + struct idpf_tx_buf buf; +}; + +/** + * struct idpf_buf_lifo - LIFO for managing OOO completions + * @top: Used to know how many buffers are left + * @size: Total size of LIFO + * @bufs: Backing array + */ +struct idpf_buf_lifo { + u16 top; + u16 size; + struct idpf_tx_stash **bufs; +}; + +/** + * struct idpf_tx_offload_params - Offload parameters for a given packet + * @tx_flags: Feature flags enabled for this packet + * @hdr_offsets: Offset parameter for single queue model + * @cd_tunneling: Type of tunneling enabled for single queue model + * @tso_len: Total length of payload to segment + * @mss: Segment size + * @tso_segs: Number of segments to be sent + * @tso_hdr_len: Length of headers to be duplicated + * @td_cmd: Command field to be inserted into descriptor + */ +struct idpf_tx_offload_params { + u32 tx_flags; + + u32 hdr_offsets; + u32 cd_tunneling; + + u32 tso_len; + u16 mss; + u16 tso_segs; + u16 tso_hdr_len; + + u16 td_cmd; +}; + +/** + * struct idpf_tx_splitq_params + * @dtype: General descriptor info + * @eop_cmd: Type of EOP + * @compl_tag: Associated tag for completion + * @td_tag: Descriptor tunneling tag + * @offload: Offload parameters + */ +struct idpf_tx_splitq_params { + enum idpf_tx_desc_dtype_value dtype; + u16 eop_cmd; + union { + u16 compl_tag; + u16 td_tag; + }; + + struct idpf_tx_offload_params offload; +}; + +enum idpf_tx_ctx_desc_eipt_offload { + IDPF_TX_CTX_EXT_IP_NONE = 0x0, + IDPF_TX_CTX_EXT_IP_IPV6 = 0x1, + IDPF_TX_CTX_EXT_IP_IPV4_NO_CSUM = 0x2, + IDPF_TX_CTX_EXT_IP_IPV4 = 0x3 +}; + +/* Checksum offload bits decoded from the receive descriptor. */ +struct idpf_rx_csum_decoded { + u32 l3l4p : 1; + u32 ipe : 1; + u32 eipe : 1; + u32 eudpe : 1; + u32 ipv6exadd : 1; + u32 l4e : 1; + u32 pprs : 1; + u32 nat : 1; + u32 raw_csum_inv : 1; + u32 raw_csum : 16; +}; + +struct idpf_rx_extracted { + unsigned int size; + u16 rx_ptype; +}; + +#define IDPF_TX_COMPLQ_CLEAN_BUDGET 256 +#define IDPF_TX_MIN_PKT_LEN 17 +#define IDPF_TX_DESCS_FOR_SKB_DATA_PTR 1 +#define IDPF_TX_DESCS_PER_CACHE_LINE (L1_CACHE_BYTES / \ + sizeof(struct idpf_flex_tx_desc)) +#define IDPF_TX_DESCS_FOR_CTX 1 +/* TX descriptors needed, worst case */ +#define IDPF_TX_DESC_NEEDED (MAX_SKB_FRAGS + IDPF_TX_DESCS_FOR_CTX + \ + IDPF_TX_DESCS_PER_CACHE_LINE + \ + IDPF_TX_DESCS_FOR_SKB_DATA_PTR) + +/* The size limit for a transmit buffer in a descriptor is (16K - 1). + * In order to align with the read requests we will align the value to + * the nearest 4K which represents our maximum read request size. + */ +#define IDPF_TX_MAX_READ_REQ_SIZE SZ_4K +#define IDPF_TX_MAX_DESC_DATA (SZ_16K - 1) +#define IDPF_TX_MAX_DESC_DATA_ALIGNED \ + ALIGN_DOWN(IDPF_TX_MAX_DESC_DATA, IDPF_TX_MAX_READ_REQ_SIZE) + +#define IDPF_RX_DMA_ATTR \ + (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING) +#define IDPF_RX_DESC(rxq, i) \ + (&(((union virtchnl2_rx_desc *)((rxq)->desc_ring))[i])) + +struct idpf_rx_buf { + struct page *page; + struct page_pool *pp; + unsigned int page_offset; + u16 truesize; +}; + +#define IDPF_RX_MAX_PTYPE_PROTO_IDS 32 +#define IDPF_RX_MAX_PTYPE_SZ (sizeof(struct virtchnl2_ptype) + \ + (sizeof(u16) * IDPF_RX_MAX_PTYPE_PROTO_IDS)) +#define IDPF_RX_PTYPE_HDR_SZ sizeof(struct virtchnl2_get_ptype_info) +#define IDPF_RX_MAX_PTYPES_PER_BUF \ + DIV_ROUND_DOWN_ULL((IDPF_CTLQ_MAX_BUF_LEN - IDPF_RX_PTYPE_HDR_SZ), \ + IDPF_RX_MAX_PTYPE_SZ) + +#define IDPF_GET_PTYPE_SIZE(p) struct_size((p), proto_id, (p)->proto_id_count) + +#define IDPF_TUN_IP_GRE (\ + IDPF_PTYPE_TUNNEL_IP |\ + IDPF_PTYPE_TUNNEL_IP_GRENAT) + +#define IDPF_TUN_IP_GRE_MAC (\ + IDPF_TUN_IP_GRE |\ + IDPF_PTYPE_TUNNEL_IP_GRENAT_MAC) + +#define IDPF_RX_MAX_PTYPE 1024 +#define IDPF_RX_MAX_BASE_PTYPE 256 +#define IDPF_INVALID_PTYPE_ID 0xFFFF + +/* Packet type non-ip values */ +enum idpf_rx_ptype_l2 { + IDPF_RX_PTYPE_L2_RESERVED = 0, + IDPF_RX_PTYPE_L2_MAC_PAY2 = 1, + IDPF_RX_PTYPE_L2_TIMESYNC_PAY2 = 2, + IDPF_RX_PTYPE_L2_FIP_PAY2 = 3, + IDPF_RX_PTYPE_L2_OUI_PAY2 = 4, + IDPF_RX_PTYPE_L2_MACCNTRL_PAY2 = 5, + IDPF_RX_PTYPE_L2_LLDP_PAY2 = 6, + IDPF_RX_PTYPE_L2_ECP_PAY2 = 7, + IDPF_RX_PTYPE_L2_EVB_PAY2 = 8, + IDPF_RX_PTYPE_L2_QCN_PAY2 = 9, + IDPF_RX_PTYPE_L2_EAPOL_PAY2 = 10, + IDPF_RX_PTYPE_L2_ARP = 11, +}; + +enum idpf_rx_ptype_outer_ip { + IDPF_RX_PTYPE_OUTER_L2 = 0, + IDPF_RX_PTYPE_OUTER_IP = 1, +}; + +#define IDPF_RX_PTYPE_TO_IPV(ptype, ipv) \ + (((ptype)->outer_ip == IDPF_RX_PTYPE_OUTER_IP) && \ + ((ptype)->outer_ip_ver == (ipv))) + +enum idpf_rx_ptype_outer_ip_ver { + IDPF_RX_PTYPE_OUTER_NONE = 0, + IDPF_RX_PTYPE_OUTER_IPV4 = 1, + IDPF_RX_PTYPE_OUTER_IPV6 = 2, +}; + +enum idpf_rx_ptype_outer_fragmented { + IDPF_RX_PTYPE_NOT_FRAG = 0, + IDPF_RX_PTYPE_FRAG = 1, +}; + +enum idpf_rx_ptype_tunnel_type { + IDPF_RX_PTYPE_TUNNEL_NONE = 0, + IDPF_RX_PTYPE_TUNNEL_IP_IP = 1, + IDPF_RX_PTYPE_TUNNEL_IP_GRENAT = 2, + IDPF_RX_PTYPE_TUNNEL_IP_GRENAT_MAC = 3, + IDPF_RX_PTYPE_TUNNEL_IP_GRENAT_MAC_VLAN = 4, +}; + +enum idpf_rx_ptype_tunnel_end_prot { + IDPF_RX_PTYPE_TUNNEL_END_NONE = 0, + IDPF_RX_PTYPE_TUNNEL_END_IPV4 = 1, + IDPF_RX_PTYPE_TUNNEL_END_IPV6 = 2, +}; + +enum idpf_rx_ptype_inner_prot { + IDPF_RX_PTYPE_INNER_PROT_NONE = 0, + IDPF_RX_PTYPE_INNER_PROT_UDP = 1, + IDPF_RX_PTYPE_INNER_PROT_TCP = 2, + IDPF_RX_PTYPE_INNER_PROT_SCTP = 3, + IDPF_RX_PTYPE_INNER_PROT_ICMP = 4, + IDPF_RX_PTYPE_INNER_PROT_TIMESYNC = 5, +}; + +enum idpf_rx_ptype_payload_layer { + IDPF_RX_PTYPE_PAYLOAD_LAYER_NONE = 0, + IDPF_RX_PTYPE_PAYLOAD_LAYER_PAY2 = 1, + IDPF_RX_PTYPE_PAYLOAD_LAYER_PAY3 = 2, + IDPF_RX_PTYPE_PAYLOAD_LAYER_PAY4 = 3, +}; + +enum idpf_tunnel_state { + IDPF_PTYPE_TUNNEL_IP = BIT(0), + IDPF_PTYPE_TUNNEL_IP_GRENAT = BIT(1), + IDPF_PTYPE_TUNNEL_IP_GRENAT_MAC = BIT(2), +}; + +struct idpf_ptype_state { + bool outer_ip; + bool outer_frag; + u8 tunnel_state; +}; + +struct idpf_rx_ptype_decoded { + u32 ptype:10; + u32 known:1; + u32 outer_ip:1; + u32 outer_ip_ver:2; + u32 outer_frag:1; + u32 tunnel_type:3; + u32 tunnel_end_prot:2; + u32 tunnel_end_frag:1; + u32 inner_prot:4; + u32 payload_layer:3; +}; + +/** + * enum idpf_queue_flags_t + * @__IDPF_Q_GEN_CHK: Queues operating in splitq mode use a generation bit to + * identify new descriptor writebacks on the ring. HW sets + * the gen bit to 1 on the first writeback of any given + * descriptor. After the ring wraps, HW sets the gen bit of + * those descriptors to 0, and continues flipping + * 0->1 or 1->0 on each ring wrap. SW maintains its own + * gen bit to know what value will indicate writebacks on + * the next pass around the ring. E.g. it is initialized + * to 1 and knows that reading a gen bit of 1 in any + * descriptor on the initial pass of the ring indicates a + * writeback. It also flips on every ring wrap. + * @__IDPF_RFLQ_GEN_CHK: Refill queues are SW only, so Q_GEN acts as the HW bit + * and RFLGQ_GEN is the SW bit. + * @__IDPF_Q_FLOW_SCH_EN: Enable flow scheduling + * @__IDPF_Q_SW_MARKER: Used to indicate TX queue marker completions + * @__IDPF_Q_POLL_MODE: Enable poll mode + * @__IDPF_Q_FLAGS_NBITS: Must be last + */ +enum idpf_queue_flags_t { + __IDPF_Q_GEN_CHK, + __IDPF_RFLQ_GEN_CHK, + __IDPF_Q_FLOW_SCH_EN, + __IDPF_Q_SW_MARKER, + __IDPF_Q_POLL_MODE, + + __IDPF_Q_FLAGS_NBITS, +}; + +/** + * struct idpf_vec_regs + * @dyn_ctl_reg: Dynamic control interrupt register offset + * @itrn_reg: Interrupt Throttling Rate register offset + * @itrn_index_spacing: Register spacing between ITR registers of the same + * vector + */ +struct idpf_vec_regs { + u32 dyn_ctl_reg; + u32 itrn_reg; + u32 itrn_index_spacing; +}; + +/** + * struct idpf_intr_reg + * @dyn_ctl: Dynamic control interrupt register + * @dyn_ctl_intena_m: Mask for dyn_ctl interrupt enable + * @dyn_ctl_itridx_s: Register bit offset for ITR index + * @dyn_ctl_itridx_m: Mask for ITR index + * @dyn_ctl_intrvl_s: Register bit offset for ITR interval + * @rx_itr: RX ITR register + * @tx_itr: TX ITR register + * @icr_ena: Interrupt cause register offset + * @icr_ena_ctlq_m: Mask for ICR + */ +struct idpf_intr_reg { + void __iomem *dyn_ctl; + u32 dyn_ctl_intena_m; + u32 dyn_ctl_itridx_s; + u32 dyn_ctl_itridx_m; + u32 dyn_ctl_intrvl_s; + void __iomem *rx_itr; + void __iomem *tx_itr; + void __iomem *icr_ena; + u32 icr_ena_ctlq_m; +}; + +/** + * struct idpf_q_vector + * @vport: Vport back pointer + * @affinity_mask: CPU affinity mask + * @napi: napi handler + * @v_idx: Vector index + * @intr_reg: See struct idpf_intr_reg + * @num_txq: Number of TX queues + * @tx: Array of TX queues to service + * @tx_dim: Data for TX net_dim algorithm + * @tx_itr_value: TX interrupt throttling rate + * @tx_intr_mode: Dynamic ITR or not + * @tx_itr_idx: TX ITR index + * @num_rxq: Number of RX queues + * @rx: Array of RX queues to service + * @rx_dim: Data for RX net_dim algorithm + * @rx_itr_value: RX interrupt throttling rate + * @rx_intr_mode: Dynamic ITR or not + * @rx_itr_idx: RX ITR index + * @num_bufq: Number of buffer queues + * @bufq: Array of buffer queues to service + * @total_events: Number of interrupts processed + * @name: Queue vector name + */ +struct idpf_q_vector { + struct idpf_vport *vport; + cpumask_t affinity_mask; + struct napi_struct napi; + u16 v_idx; + struct idpf_intr_reg intr_reg; + + u16 num_txq; + struct idpf_queue **tx; + struct dim tx_dim; + u16 tx_itr_value; + bool tx_intr_mode; + u32 tx_itr_idx; + + u16 num_rxq; + struct idpf_queue **rx; + struct dim rx_dim; + u16 rx_itr_value; + bool rx_intr_mode; + u32 rx_itr_idx; + + u16 num_bufq; + struct idpf_queue **bufq; + + u16 total_events; + char *name; +}; + +struct idpf_rx_queue_stats { + u64_stats_t packets; + u64_stats_t bytes; + u64_stats_t rsc_pkts; + u64_stats_t hw_csum_err; + u64_stats_t hsplit_pkts; + u64_stats_t hsplit_buf_ovf; + u64_stats_t bad_descs; +}; + +struct idpf_tx_queue_stats { + u64_stats_t packets; + u64_stats_t bytes; + u64_stats_t lso_pkts; + u64_stats_t linearize; + u64_stats_t q_busy; + u64_stats_t skb_drops; + u64_stats_t dma_map_errs; +}; + +struct idpf_cleaned_stats { + u32 packets; + u32 bytes; +}; + +union idpf_queue_stats { + struct idpf_rx_queue_stats rx; + struct idpf_tx_queue_stats tx; +}; + +#define IDPF_ITR_DYNAMIC 1 +#define IDPF_ITR_MAX 0x1FE0 +#define IDPF_ITR_20K 0x0032 +#define IDPF_ITR_GRAN_S 1 /* Assume ITR granularity is 2us */ +#define IDPF_ITR_MASK 0x1FFE /* ITR register value alignment mask */ +#define ITR_REG_ALIGN(setting) ((setting) & IDPF_ITR_MASK) +#define IDPF_ITR_IS_DYNAMIC(itr_mode) (itr_mode) +#define IDPF_ITR_TX_DEF IDPF_ITR_20K +#define IDPF_ITR_RX_DEF IDPF_ITR_20K +/* Index used for 'No ITR' update in DYN_CTL register */ +#define IDPF_NO_ITR_UPDATE_IDX 3 +#define IDPF_ITR_IDX_SPACING(spacing, dflt) (spacing ? spacing : dflt) +#define IDPF_DIM_DEFAULT_PROFILE_IX 1 + +/** + * struct idpf_queue + * @dev: Device back pointer for DMA mapping + * @vport: Back pointer to associated vport + * @txq_grp: See struct idpf_txq_group + * @rxq_grp: See struct idpf_rxq_group + * @idx: For buffer queue, it is used as group id, either 0 or 1. On clean, + * buffer queue uses this index to determine which group of refill queues + * to clean. + * For TX queue, it is used as index to map between TX queue group and + * hot path TX pointers stored in vport. Used in both singleq/splitq. + * For RX queue, it is used to index to total RX queue across groups and + * used for skb reporting. + * @tail: Tail offset. Used for both queue models single and split. In splitq + * model relevant only for TX queue and RX queue. + * @tx_buf: See struct idpf_tx_buf + * @rx_buf: Struct with RX buffer related members + * @rx_buf.buf: See struct idpf_rx_buf + * @rx_buf.hdr_buf_pa: DMA handle + * @rx_buf.hdr_buf_va: Virtual address + * @pp: Page pool pointer + * @skb: Pointer to the skb + * @q_type: Queue type (TX, RX, TX completion, RX buffer) + * @q_id: Queue id + * @desc_count: Number of descriptors + * @next_to_use: Next descriptor to use. Relevant in both split & single txq + * and bufq. + * @next_to_clean: Next descriptor to clean. In split queue model, only + * relevant to TX completion queue and RX queue. + * @next_to_alloc: RX buffer to allocate at. Used only for RX. In splitq model + * only relevant to RX queue. + * @flags: See enum idpf_queue_flags_t + * @q_stats: See union idpf_queue_stats + * @stats_sync: See struct u64_stats_sync + * @cleaned_bytes: Splitq only, TXQ only: When a TX completion is received on + * the TX completion queue, it can be for any TXQ associated + * with that completion queue. This means we can clean up to + * N TXQs during a single call to clean the completion queue. + * cleaned_bytes|pkts tracks the clean stats per TXQ during + * that single call to clean the completion queue. By doing so, + * we can update BQL with aggregate cleaned stats for each TXQ + * only once at the end of the cleaning routine. + * @cleaned_pkts: Number of packets cleaned for the above said case + * @rx_hsplit_en: RX headsplit enable + * @rx_hbuf_size: Header buffer size + * @rx_buf_size: Buffer size + * @rx_max_pkt_size: RX max packet size + * @rx_buf_stride: RX buffer stride + * @rx_buffer_low_watermark: RX buffer low watermark + * @rxdids: Supported RX descriptor ids + * @q_vector: Backreference to associated vector + * @size: Length of descriptor ring in bytes + * @dma: Physical address of ring + * @desc_ring: Descriptor ring memory + * @tx_max_bufs: Max buffers that can be transmitted with scatter-gather + * @tx_min_pkt_len: Min supported packet length + * @num_completions: Only relevant for TX completion queue. It tracks the + * number of completions received to compare against the + * number of completions pending, as accumulated by the + * TX queues. + * @buf_stack: Stack of empty buffers to store buffer info for out of order + * buffer completions. See struct idpf_buf_lifo. + * @compl_tag_bufid_m: Completion tag buffer id mask + * @compl_tag_gen_s: Completion tag generation bit + * The format of the completion tag will change based on the TXQ + * descriptor ring size so that we can maintain roughly the same level + * of "uniqueness" across all descriptor sizes. For example, if the + * TXQ descriptor ring size is 64 (the minimum size supported), the + * completion tag will be formatted as below: + * 15 6 5 0 + * -------------------------------- + * | GEN=0-1023 |IDX = 0-63| + * -------------------------------- + * + * This gives us 64*1024 = 65536 possible unique values. Similarly, if + * the TXQ descriptor ring size is 8160 (the maximum size supported), + * the completion tag will be formatted as below: + * 15 13 12 0 + * -------------------------------- + * |GEN | IDX = 0-8159 | + * -------------------------------- + * + * This gives us 8*8160 = 65280 possible unique values. + * @compl_tag_cur_gen: Used to keep track of current completion tag generation + * @compl_tag_gen_max: To determine when compl_tag_cur_gen should be reset + * @sched_buf_hash: Hash table to stores buffers + */ +struct idpf_queue { + struct device *dev; + struct idpf_vport *vport; + union { + struct idpf_txq_group *txq_grp; + struct idpf_rxq_group *rxq_grp; + }; + u16 idx; + void __iomem *tail; + union { + struct idpf_tx_buf *tx_buf; + struct { + struct idpf_rx_buf *buf; + dma_addr_t hdr_buf_pa; + void *hdr_buf_va; + } rx_buf; + }; + struct page_pool *pp; + struct sk_buff *skb; + u16 q_type; + u32 q_id; + u16 desc_count; + + u16 next_to_use; + u16 next_to_clean; + u16 next_to_alloc; + DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS); + + union idpf_queue_stats q_stats; + struct u64_stats_sync stats_sync; + + u32 cleaned_bytes; + u16 cleaned_pkts; + + bool rx_hsplit_en; + u16 rx_hbuf_size; + u16 rx_buf_size; + u16 rx_max_pkt_size; + u16 rx_buf_stride; + u8 rx_buffer_low_watermark; + u64 rxdids; + struct idpf_q_vector *q_vector; + unsigned int size; + dma_addr_t dma; + void *desc_ring; + + u16 tx_max_bufs; + u8 tx_min_pkt_len; + + u32 num_completions; + + struct idpf_buf_lifo buf_stack; + + u16 compl_tag_bufid_m; + u16 compl_tag_gen_s; + + u16 compl_tag_cur_gen; + u16 compl_tag_gen_max; + + DECLARE_HASHTABLE(sched_buf_hash, 12); +} ____cacheline_internodealigned_in_smp; + +/** + * struct idpf_sw_queue + * @next_to_clean: Next descriptor to clean + * @next_to_alloc: Buffer to allocate at + * @flags: See enum idpf_queue_flags_t + * @ring: Pointer to the ring + * @desc_count: Descriptor count + * @dev: Device back pointer for DMA mapping + * + * Software queues are used in splitq mode to manage buffers between rxq + * producer and the bufq consumer. These are required in order to maintain a + * lockless buffer management system and are strictly software only constructs. + */ +struct idpf_sw_queue { + u16 next_to_clean; + u16 next_to_alloc; + DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS); + u16 *ring; + u16 desc_count; + struct device *dev; +} ____cacheline_internodealigned_in_smp; + +/** + * struct idpf_rxq_set + * @rxq: RX queue + * @refillq0: Pointer to refill queue 0 + * @refillq1: Pointer to refill queue 1 + * + * Splitq only. idpf_rxq_set associates an rxq with at an array of refillqs. + * Each rxq needs a refillq to return used buffers back to the respective bufq. + * Bufqs then clean these refillqs for buffers to give to hardware. + */ +struct idpf_rxq_set { + struct idpf_queue rxq; + struct idpf_sw_queue *refillq0; + struct idpf_sw_queue *refillq1; +}; + +/** + * struct idpf_bufq_set + * @bufq: Buffer queue + * @num_refillqs: Number of refill queues. This is always equal to num_rxq_sets + * in idpf_rxq_group. + * @refillqs: Pointer to refill queues array. + * + * Splitq only. idpf_bufq_set associates a bufq to an array of refillqs. + * In this bufq_set, there will be one refillq for each rxq in this rxq_group. + * Used buffers received by rxqs will be put on refillqs which bufqs will + * clean to return new buffers back to hardware. + * + * Buffers needed by some number of rxqs associated in this rxq_group are + * managed by at most two bufqs (depending on performance configuration). + */ +struct idpf_bufq_set { + struct idpf_queue bufq; + int num_refillqs; + struct idpf_sw_queue *refillqs; +}; + +/** + * struct idpf_rxq_group + * @vport: Vport back pointer + * @singleq: Struct with single queue related members + * @singleq.num_rxq: Number of RX queues associated + * @singleq.rxqs: Array of RX queue pointers + * @splitq: Struct with split queue related members + * @splitq.num_rxq_sets: Number of RX queue sets + * @splitq.rxq_sets: Array of RX queue sets + * @splitq.bufq_sets: Buffer queue set pointer + * + * In singleq mode, an rxq_group is simply an array of rxqs. In splitq, a + * rxq_group contains all the rxqs, bufqs and refillqs needed to + * manage buffers in splitq mode. + */ +struct idpf_rxq_group { + struct idpf_vport *vport; + + union { + struct { + u16 num_rxq; + struct idpf_queue *rxqs[IDPF_LARGE_MAX_Q]; + } singleq; + struct { + u16 num_rxq_sets; + struct idpf_rxq_set *rxq_sets[IDPF_LARGE_MAX_Q]; + struct idpf_bufq_set *bufq_sets; + } splitq; + }; +}; + +/** + * struct idpf_txq_group + * @vport: Vport back pointer + * @num_txq: Number of TX queues associated + * @txqs: Array of TX queue pointers + * @complq: Associated completion queue pointer, split queue only + * @num_completions_pending: Total number of completions pending for the + * completion queue, acculumated for all TX queues + * associated with that completion queue. + * + * Between singleq and splitq, a txq_group is largely the same except for the + * complq. In splitq a single complq is responsible for handling completions + * for some number of txqs associated in this txq_group. + */ +struct idpf_txq_group { + struct idpf_vport *vport; + + u16 num_txq; + struct idpf_queue *txqs[IDPF_LARGE_MAX_Q]; + + struct idpf_queue *complq; + + u32 num_completions_pending; +}; + +/** + * idpf_size_to_txd_count - Get number of descriptors needed for large Tx frag + * @size: transmit request size in bytes + * + * In the case where a large frag (>= 16K) needs to be split across multiple + * descriptors, we need to assume that we can have no more than 12K of data + * per descriptor due to hardware alignment restrictions (4K alignment). + */ +static inline u32 idpf_size_to_txd_count(unsigned int size) +{ + return DIV_ROUND_UP(size, IDPF_TX_MAX_DESC_DATA_ALIGNED); +} + +/** + * idpf_tx_singleq_build_ctob - populate command tag offset and size + * @td_cmd: Command to be filled in desc + * @td_offset: Offset to be filled in desc + * @size: Size of the buffer + * @td_tag: td tag to be filled + * + * Returns the 64 bit value populated with the input parameters + */ +static inline __le64 idpf_tx_singleq_build_ctob(u64 td_cmd, u64 td_offset, + unsigned int size, u64 td_tag) +{ + return cpu_to_le64(IDPF_TX_DESC_DTYPE_DATA | + (td_cmd << IDPF_TXD_QW1_CMD_S) | + (td_offset << IDPF_TXD_QW1_OFFSET_S) | + ((u64)size << IDPF_TXD_QW1_TX_BUF_SZ_S) | + (td_tag << IDPF_TXD_QW1_L2TAG1_S)); +} + +void idpf_tx_splitq_build_ctb(union idpf_tx_flex_desc *desc, + struct idpf_tx_splitq_params *params, + u16 td_cmd, u16 size); +void idpf_tx_splitq_build_flow_desc(union idpf_tx_flex_desc *desc, + struct idpf_tx_splitq_params *params, + u16 td_cmd, u16 size); +/** + * idpf_tx_splitq_build_desc - determine which type of data descriptor to build + * @desc: descriptor to populate + * @params: pointer to tx params struct + * @td_cmd: command to be filled in desc + * @size: size of buffer + */ +static inline void idpf_tx_splitq_build_desc(union idpf_tx_flex_desc *desc, + struct idpf_tx_splitq_params *params, + u16 td_cmd, u16 size) +{ + if (params->dtype == IDPF_TX_DESC_DTYPE_FLEX_L2TAG1_L2TAG2) + idpf_tx_splitq_build_ctb(desc, params, td_cmd, size); + else + idpf_tx_splitq_build_flow_desc(desc, params, td_cmd, size); +} + +/** + * idpf_alloc_page - Allocate a new RX buffer from the page pool + * @pool: page_pool to allocate from + * @buf: metadata struct to populate with page info + * @buf_size: 2K or 4K + * + * Returns &dma_addr_t to be passed to HW for Rx, %DMA_MAPPING_ERROR otherwise. + */ +static inline dma_addr_t idpf_alloc_page(struct page_pool *pool, + struct idpf_rx_buf *buf, + unsigned int buf_size) +{ + WARN_ON_ONCE(buf_size != IDPF_RX_BUF_4096); + buf->page = page_pool_dev_alloc_pages(pool); + buf->pp = pool; + + if (!buf->page) + return DMA_MAPPING_ERROR; + + buf->truesize = buf_size; + + return page_pool_get_dma_addr(buf->page) + buf->page_offset + + pool->p.offset; +} + +/** + * idpf_rx_put_page - Return RX buffer page to pool + * @rx_buf: RX buffer metadata struct + */ +static inline void idpf_rx_put_page(struct idpf_rx_buf *rx_buf) +{ + page_pool_put_page(rx_buf->pp, rx_buf->page, + rx_buf->truesize, true); + rx_buf->page = NULL; +} + +/** + * idpf_rx_sync_for_cpu - Synchronize DMA buffer + * @rx_buf: RX buffer metadata struct + * @len: frame length from descriptor + */ +static inline void idpf_rx_sync_for_cpu(struct idpf_rx_buf *rx_buf, u32 len) +{ + struct page *page = rx_buf->page; + struct page_pool *pp = rx_buf->pp; + + dma_sync_single_range_for_cpu(pp->p.dev, + page_pool_get_dma_addr(page), + rx_buf->page_offset + pp->p.offset, len, + page_pool_get_dma_dir(pp)); +} + +int idpf_vport_singleq_napi_poll(struct napi_struct *napi, int budget); +void idpf_vport_init_num_qs(struct idpf_vport *vport, + struct virtchnl2_create_vport *vport_msg); +void idpf_vport_calc_num_q_desc(struct idpf_vport *vport); +int idpf_vport_calc_total_qs(struct idpf_adapter *adapter, u16 vport_index, + struct virtchnl2_create_vport *vport_msg, + struct idpf_vport_max_q *max_q); +void idpf_vport_calc_num_q_groups(struct idpf_vport *vport); +int idpf_vport_queues_alloc(struct idpf_vport *vport); +void idpf_vport_queues_rel(struct idpf_vport *vport); +void idpf_vport_intr_rel(struct idpf_vport *vport); +int idpf_vport_intr_alloc(struct idpf_vport *vport); +void idpf_vport_intr_update_itr_ena_irq(struct idpf_q_vector *q_vector); +void idpf_vport_intr_deinit(struct idpf_vport *vport); +int idpf_vport_intr_init(struct idpf_vport *vport); +enum pkt_hash_types idpf_ptype_to_htype(const struct idpf_rx_ptype_decoded *decoded); +int idpf_config_rss(struct idpf_vport *vport); +int idpf_init_rss(struct idpf_vport *vport); +void idpf_deinit_rss(struct idpf_vport *vport); +int idpf_rx_bufs_init_all(struct idpf_vport *vport); +void idpf_rx_add_frag(struct idpf_rx_buf *rx_buf, struct sk_buff *skb, + unsigned int size); +struct sk_buff *idpf_rx_construct_skb(struct idpf_queue *rxq, + struct idpf_rx_buf *rx_buf, + unsigned int size); +bool idpf_init_rx_buf_hw_alloc(struct idpf_queue *rxq, struct idpf_rx_buf *buf); +void idpf_rx_buf_hw_update(struct idpf_queue *rxq, u32 val); +void idpf_tx_buf_hw_update(struct idpf_queue *tx_q, u32 val, + bool xmit_more); +unsigned int idpf_size_to_txd_count(unsigned int size); +netdev_tx_t idpf_tx_drop_skb(struct idpf_queue *tx_q, struct sk_buff *skb); +void idpf_tx_dma_map_error(struct idpf_queue *txq, struct sk_buff *skb, + struct idpf_tx_buf *first, u16 ring_idx); +unsigned int idpf_tx_desc_count_required(struct idpf_queue *txq, + struct sk_buff *skb); +bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs, + unsigned int count); +int idpf_tx_maybe_stop_common(struct idpf_queue *tx_q, unsigned int size); +void idpf_tx_timeout(struct net_device *netdev, unsigned int txqueue); +netdev_tx_t idpf_tx_splitq_start(struct sk_buff *skb, + struct net_device *netdev); +netdev_tx_t idpf_tx_singleq_start(struct sk_buff *skb, + struct net_device *netdev); +bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_queue *rxq, + u16 cleaned_count); +int idpf_tso(struct sk_buff *skb, struct idpf_tx_offload_params *off); + +#endif /* !_IDPF_TXRX_H_ */ diff --git a/drivers/net/ethernet/intel/idpf/idpf_vf_dev.c b/drivers/net/ethernet/intel/idpf/idpf_vf_dev.c new file mode 100644 index 00000000000000..629cb5cb7c9fc1 --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/idpf_vf_dev.c @@ -0,0 +1,164 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2023 Intel Corporation */ + +#include "idpf.h" +#include "idpf_lan_vf_regs.h" +#include "idpf_virtchnl.h" + +#define IDPF_VF_ITR_IDX_SPACING 0x40 + +/** + * idpf_vf_ctlq_reg_init - initialize default mailbox registers + * @cq: pointer to the array of create control queues + */ +static void idpf_vf_ctlq_reg_init(struct idpf_ctlq_create_info *cq) +{ + int i; + + for (i = 0; i < IDPF_NUM_DFLT_MBX_Q; i++) { + struct idpf_ctlq_create_info *ccq = cq + i; + + switch (ccq->type) { + case IDPF_CTLQ_TYPE_MAILBOX_TX: + /* set head and tail registers in our local struct */ + ccq->reg.head = VF_ATQH; + ccq->reg.tail = VF_ATQT; + ccq->reg.len = VF_ATQLEN; + ccq->reg.bah = VF_ATQBAH; + ccq->reg.bal = VF_ATQBAL; + ccq->reg.len_mask = VF_ATQLEN_ATQLEN_M; + ccq->reg.len_ena_mask = VF_ATQLEN_ATQENABLE_M; + ccq->reg.head_mask = VF_ATQH_ATQH_M; + break; + case IDPF_CTLQ_TYPE_MAILBOX_RX: + /* set head and tail registers in our local struct */ + ccq->reg.head = VF_ARQH; + ccq->reg.tail = VF_ARQT; + ccq->reg.len = VF_ARQLEN; + ccq->reg.bah = VF_ARQBAH; + ccq->reg.bal = VF_ARQBAL; + ccq->reg.len_mask = VF_ARQLEN_ARQLEN_M; + ccq->reg.len_ena_mask = VF_ARQLEN_ARQENABLE_M; + ccq->reg.head_mask = VF_ARQH_ARQH_M; + break; + default: + break; + } + } +} + +/** + * idpf_vf_mb_intr_reg_init - Initialize the mailbox register + * @adapter: adapter structure + */ +static void idpf_vf_mb_intr_reg_init(struct idpf_adapter *adapter) +{ + struct idpf_intr_reg *intr = &adapter->mb_vector.intr_reg; + u32 dyn_ctl = le32_to_cpu(adapter->caps.mailbox_dyn_ctl); + + intr->dyn_ctl = idpf_get_reg_addr(adapter, dyn_ctl); + intr->dyn_ctl_intena_m = VF_INT_DYN_CTL0_INTENA_M; + intr->dyn_ctl_itridx_m = VF_INT_DYN_CTL0_ITR_INDX_M; + intr->icr_ena = idpf_get_reg_addr(adapter, VF_INT_ICR0_ENA1); + intr->icr_ena_ctlq_m = VF_INT_ICR0_ENA1_ADMINQ_M; +} + +/** + * idpf_vf_intr_reg_init - Initialize interrupt registers + * @vport: virtual port structure + */ +static int idpf_vf_intr_reg_init(struct idpf_vport *vport) +{ + struct idpf_adapter *adapter = vport->adapter; + int num_vecs = vport->num_q_vectors; + struct idpf_vec_regs *reg_vals; + int num_regs, i, err = 0; + u32 rx_itr, tx_itr; + u16 total_vecs; + + total_vecs = idpf_get_reserved_vecs(vport->adapter); + reg_vals = kcalloc(total_vecs, sizeof(struct idpf_vec_regs), + GFP_KERNEL); + if (!reg_vals) + return -ENOMEM; + + num_regs = idpf_get_reg_intr_vecs(vport, reg_vals); + if (num_regs < num_vecs) { + err = -EINVAL; + goto free_reg_vals; + } + + for (i = 0; i < num_vecs; i++) { + struct idpf_q_vector *q_vector = &vport->q_vectors[i]; + u16 vec_id = vport->q_vector_idxs[i] - IDPF_MBX_Q_VEC; + struct idpf_intr_reg *intr = &q_vector->intr_reg; + u32 spacing; + + intr->dyn_ctl = idpf_get_reg_addr(adapter, + reg_vals[vec_id].dyn_ctl_reg); + intr->dyn_ctl_intena_m = VF_INT_DYN_CTLN_INTENA_M; + intr->dyn_ctl_itridx_s = VF_INT_DYN_CTLN_ITR_INDX_S; + + spacing = IDPF_ITR_IDX_SPACING(reg_vals[vec_id].itrn_index_spacing, + IDPF_VF_ITR_IDX_SPACING); + rx_itr = VF_INT_ITRN_ADDR(VIRTCHNL2_ITR_IDX_0, + reg_vals[vec_id].itrn_reg, + spacing); + tx_itr = VF_INT_ITRN_ADDR(VIRTCHNL2_ITR_IDX_1, + reg_vals[vec_id].itrn_reg, + spacing); + intr->rx_itr = idpf_get_reg_addr(adapter, rx_itr); + intr->tx_itr = idpf_get_reg_addr(adapter, tx_itr); + } + +free_reg_vals: + kfree(reg_vals); + + return err; +} + +/** + * idpf_vf_reset_reg_init - Initialize reset registers + * @adapter: Driver specific private structure + */ +static void idpf_vf_reset_reg_init(struct idpf_adapter *adapter) +{ + adapter->reset_reg.rstat = idpf_get_reg_addr(adapter, VFGEN_RSTAT); + adapter->reset_reg.rstat_m = VFGEN_RSTAT_VFR_STATE_M; +} + +/** + * idpf_vf_trigger_reset - trigger reset + * @adapter: Driver specific private structure + * @trig_cause: Reason to trigger a reset + */ +static void idpf_vf_trigger_reset(struct idpf_adapter *adapter, + enum idpf_flags trig_cause) +{ + /* Do not send VIRTCHNL2_OP_RESET_VF message on driver unload */ + if (trig_cause == IDPF_HR_FUNC_RESET && + !test_bit(IDPF_REMOVE_IN_PROG, adapter->flags)) + idpf_send_mb_msg(adapter, VIRTCHNL2_OP_RESET_VF, 0, NULL, 0); +} + +/** + * idpf_vf_reg_ops_init - Initialize register API function pointers + * @adapter: Driver specific private structure + */ +static void idpf_vf_reg_ops_init(struct idpf_adapter *adapter) +{ + adapter->dev_ops.reg_ops.ctlq_reg_init = idpf_vf_ctlq_reg_init; + adapter->dev_ops.reg_ops.intr_reg_init = idpf_vf_intr_reg_init; + adapter->dev_ops.reg_ops.mb_intr_reg_init = idpf_vf_mb_intr_reg_init; + adapter->dev_ops.reg_ops.reset_reg_init = idpf_vf_reset_reg_init; + adapter->dev_ops.reg_ops.trigger_reset = idpf_vf_trigger_reset; +} + +/** + * idpf_vf_dev_ops_init - Initialize device API function pointers + * @adapter: Driver specific private structure + */ +void idpf_vf_dev_ops_init(struct idpf_adapter *adapter) +{ + idpf_vf_reg_ops_init(adapter); +} diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c new file mode 100644 index 00000000000000..3b849b668eded7 --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c @@ -0,0 +1,3701 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2023 Intel Corporation */ + +#include "idpf.h" +#include "idpf_virtchnl.h" + +/* RHEL8: Minimal definitions to make __free(kfree) work. + * Copied from upstream include/linux/cleanup.h and include/linux/slab.h + */ +#ifndef __free +#define __cleanup(func) __attribute__((__cleanup__(func))) +#define DEFINE_FREE(_name, _type, _free) \ + static inline void __free_##_name(void *p) { _type _T = *(_type *)p; _free; } +#define __free(_name) __cleanup(__free_##_name) +DEFINE_FREE(kfree, void *, if (_T) kfree(_T)) +#endif + +#define IDPF_VC_XN_MIN_TIMEOUT_MSEC 2000 +#define IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC (60 * 1000) +#define IDPF_VC_XN_IDX_M GENMASK(7, 0) +#define IDPF_VC_XN_SALT_M GENMASK(15, 8) +#define IDPF_VC_XN_RING_LEN U8_MAX + +/** + * enum idpf_vc_xn_state - Virtchnl transaction status + * @IDPF_VC_XN_IDLE: not expecting a reply, ready to be used + * @IDPF_VC_XN_WAITING: expecting a reply, not yet received + * @IDPF_VC_XN_COMPLETED_SUCCESS: a reply was expected and received, + * buffer updated + * @IDPF_VC_XN_COMPLETED_FAILED: a reply was expected and received, but there + * was an error, buffer not updated + * @IDPF_VC_XN_SHUTDOWN: transaction object cannot be used, VC torn down + * @IDPF_VC_XN_ASYNC: transaction sent asynchronously and doesn't have the + * return context; a callback may be provided to handle + * return + */ +enum idpf_vc_xn_state { + IDPF_VC_XN_IDLE = 1, + IDPF_VC_XN_WAITING, + IDPF_VC_XN_COMPLETED_SUCCESS, + IDPF_VC_XN_COMPLETED_FAILED, + IDPF_VC_XN_SHUTDOWN, + IDPF_VC_XN_ASYNC, +}; + +struct idpf_vc_xn; +/* Callback for asynchronous messages */ +typedef int (*async_vc_cb) (struct idpf_adapter *, struct idpf_vc_xn *, + const struct idpf_ctlq_msg *); + +/** + * struct idpf_vc_xn - Data structure representing virtchnl transactions + * @completed: virtchnl event loop uses that to signal when a reply is + * available, uses kernel completion API + * @state: virtchnl event loop stores the data below, protected by the + * completion's lock. + * @reply_sz: Original size of reply, may be > reply_buf.iov_len; it will be + * truncated on its way to the receiver thread according to + * reply_buf.iov_len. + * @reply: Reference to the buffer(s) where the reply data should be written + * to. May be 0-length (then NULL address permitted) if the reply data + * should be ignored. + * @async_handler: if sent asynchronously, a callback can be provided to handle + * the reply when it's received + * @vc_op: corresponding opcode sent with this transaction + * @idx: index used as retrieval on reply receive, used for cookie + * @salt: changed every message to make unique, used for cookie + */ +struct idpf_vc_xn { + struct completion completed; + enum idpf_vc_xn_state state; + size_t reply_sz; + struct kvec reply; + async_vc_cb async_handler; + u32 vc_op; + u8 idx; + u8 salt; +}; + +/** + * struct idpf_vc_xn_params - Parameters for executing transaction + * @send_buf: kvec for send buffer + * @recv_buf: kvec for recv buffer, may be NULL, must then have zero length + * @timeout_ms: timeout to wait for reply + * @async: send message asynchronously, will not wait on completion + * @async_handler: If sent asynchronously, optional callback handler. The user + * must be careful when using async handlers as the memory for + * the recv_buf _cannot_ be on stack if this is async. + * @vc_op: virtchnl op to send + */ +struct idpf_vc_xn_params { + struct kvec send_buf; + struct kvec recv_buf; + int timeout_ms; + bool async; + async_vc_cb async_handler; + u32 vc_op; +}; + +/** + * struct idpf_vc_xn_manager - Manager for tracking transactions + * @ring: backing and lookup for transactions + * @free_xn_bm: bitmap for free transactions + * @xn_bm_lock: make bitmap access synchronous where necessary + * @salt: used to make cookie unique every message + */ +struct idpf_vc_xn_manager { + struct idpf_vc_xn ring[IDPF_VC_XN_RING_LEN]; + DECLARE_BITMAP(free_xn_bm, IDPF_VC_XN_RING_LEN); + spinlock_t xn_bm_lock; + u8 salt; +}; + +/** + * idpf_vid_to_vport - Translate vport id to vport pointer + * @adapter: private data struct + * @v_id: vport id to translate + * + * Returns vport matching v_id, NULL if not found. + */ +static +struct idpf_vport *idpf_vid_to_vport(struct idpf_adapter *adapter, u32 v_id) +{ + u16 num_max_vports = idpf_get_max_vports(adapter); + int i; + + for (i = 0; i < num_max_vports; i++) + if (adapter->vport_ids[i] == v_id) + return adapter->vports[i]; + + return NULL; +} + +/** + * idpf_handle_event_link - Handle link event message + * @adapter: private data struct + * @v2e: virtchnl event message + */ +static void idpf_handle_event_link(struct idpf_adapter *adapter, + const struct virtchnl2_event *v2e) +{ + struct idpf_netdev_priv *np; + struct idpf_vport *vport; + + vport = idpf_vid_to_vport(adapter, le32_to_cpu(v2e->vport_id)); + if (!vport) { + dev_err_ratelimited(&adapter->pdev->dev, "Failed to find vport_id %d for link event\n", + v2e->vport_id); + return; + } + np = netdev_priv(vport->netdev); + + vport->link_speed_mbps = le32_to_cpu(v2e->link_speed); + + if (vport->link_up == v2e->link_status) + return; + + vport->link_up = v2e->link_status; + + if (np->state != __IDPF_VPORT_UP) + return; + + if (vport->link_up) { + netif_tx_start_all_queues(vport->netdev); + netif_carrier_on(vport->netdev); + } else { + netif_tx_stop_all_queues(vport->netdev); + netif_carrier_off(vport->netdev); + } +} + +/** + * idpf_recv_event_msg - Receive virtchnl event message + * @adapter: Driver specific private structure + * @ctlq_msg: message to copy from + * + * Receive virtchnl event message + */ +static void idpf_recv_event_msg(struct idpf_adapter *adapter, + struct idpf_ctlq_msg *ctlq_msg) +{ + int payload_size = ctlq_msg->ctx.indirect.payload->size; + struct virtchnl2_event *v2e; + u32 event; + + if (payload_size < sizeof(*v2e)) { + dev_err_ratelimited(&adapter->pdev->dev, "Failed to receive valid payload for event msg (op %d len %d)\n", + ctlq_msg->cookie.mbx.chnl_opcode, + payload_size); + return; + } + + v2e = (struct virtchnl2_event *)ctlq_msg->ctx.indirect.payload->va; + event = le32_to_cpu(v2e->event); + + switch (event) { + case VIRTCHNL2_EVENT_LINK_CHANGE: + idpf_handle_event_link(adapter, v2e); + return; + default: + dev_err(&adapter->pdev->dev, + "Unknown event %d from PF\n", event); + break; + } +} + +/** + * idpf_mb_clean - Reclaim the send mailbox queue entries + * @adapter: Driver specific private structure + * + * Reclaim the send mailbox queue entries to be used to send further messages + * + * Returns 0 on success, negative on failure + */ +static int idpf_mb_clean(struct idpf_adapter *adapter) +{ + u16 i, num_q_msg = IDPF_DFLT_MBX_Q_LEN; + struct idpf_ctlq_msg **q_msg; + struct idpf_dma_mem *dma_mem; + int err; + + q_msg = kcalloc(num_q_msg, sizeof(struct idpf_ctlq_msg *), GFP_ATOMIC); + if (!q_msg) + return -ENOMEM; + + err = idpf_ctlq_clean_sq(adapter->hw.asq, &num_q_msg, q_msg); + if (err) + goto err_kfree; + + for (i = 0; i < num_q_msg; i++) { + if (!q_msg[i]) + continue; + dma_mem = q_msg[i]->ctx.indirect.payload; + if (dma_mem) + dma_free_coherent(&adapter->pdev->dev, dma_mem->size, + dma_mem->va, dma_mem->pa); + kfree(q_msg[i]); + kfree(dma_mem); + } + +err_kfree: + kfree(q_msg); + + return err; +} + +/** + * idpf_send_mb_msg - Send message over mailbox + * @adapter: Driver specific private structure + * @op: virtchnl opcode + * @msg_size: size of the payload + * @msg: pointer to buffer holding the payload + * @cookie: unique SW generated cookie per message + * + * Will prepare the control queue message and initiates the send api + * + * Returns 0 on success, negative on failure + */ +int idpf_send_mb_msg(struct idpf_adapter *adapter, u32 op, + u16 msg_size, u8 *msg, u16 cookie) +{ + struct idpf_ctlq_msg *ctlq_msg; + struct idpf_dma_mem *dma_mem; + int err; + + /* If we are here and a reset is detected nothing much can be + * done. This thread should silently abort and expected to + * be corrected with a new run either by user or driver + * flows after reset + */ + if (idpf_is_reset_detected(adapter)) + return 0; + + err = idpf_mb_clean(adapter); + if (err) + return err; + + ctlq_msg = kzalloc(sizeof(*ctlq_msg), GFP_ATOMIC); + if (!ctlq_msg) + return -ENOMEM; + + dma_mem = kzalloc(sizeof(*dma_mem), GFP_ATOMIC); + if (!dma_mem) { + err = -ENOMEM; + goto dma_mem_error; + } + + ctlq_msg->opcode = idpf_mbq_opc_send_msg_to_cp; + ctlq_msg->func_id = 0; + ctlq_msg->data_len = msg_size; + ctlq_msg->cookie.mbx.chnl_opcode = op; + ctlq_msg->cookie.mbx.chnl_retval = 0; + dma_mem->size = IDPF_CTLQ_MAX_BUF_LEN; + dma_mem->va = dma_alloc_coherent(&adapter->pdev->dev, dma_mem->size, + &dma_mem->pa, GFP_ATOMIC); + if (!dma_mem->va) { + err = -ENOMEM; + goto dma_alloc_error; + } + + /* It's possible we're just sending an opcode but no buffer */ + if (msg && msg_size) + memcpy(dma_mem->va, msg, msg_size); + ctlq_msg->ctx.indirect.payload = dma_mem; + ctlq_msg->ctx.sw_cookie.data = cookie; + + err = idpf_ctlq_send(&adapter->hw, adapter->hw.asq, 1, ctlq_msg); + if (err) + goto send_error; + + return 0; + +send_error: + dma_free_coherent(&adapter->pdev->dev, dma_mem->size, dma_mem->va, + dma_mem->pa); +dma_alloc_error: + kfree(dma_mem); +dma_mem_error: + kfree(ctlq_msg); + + return err; +} + +/* API for virtchnl "transaction" support ("xn" for short). + * + * We are reusing the completion lock to serialize the accesses to the + * transaction state for simplicity, but it could be its own separate synchro + * as well. For now, this API is only used from within a workqueue context; + * raw_spin_lock() is enough. + */ +/** + * idpf_vc_xn_lock - Request exclusive access to vc transaction + * @xn: struct idpf_vc_xn* to access + */ +#define idpf_vc_xn_lock(xn) \ + raw_spin_lock(&(xn)->completed.wait.lock) + +/** + * idpf_vc_xn_unlock - Release exclusive access to vc transaction + * @xn: struct idpf_vc_xn* to access + */ +#define idpf_vc_xn_unlock(xn) \ + raw_spin_unlock(&(xn)->completed.wait.lock) + +/** + * idpf_vc_xn_release_bufs - Release reference to reply buffer(s) and + * reset the transaction state. + * @xn: struct idpf_vc_xn to update + */ +static void idpf_vc_xn_release_bufs(struct idpf_vc_xn *xn) +{ + xn->reply.iov_base = NULL; + xn->reply.iov_len = 0; + + if (xn->state != IDPF_VC_XN_SHUTDOWN) + xn->state = IDPF_VC_XN_IDLE; +} + +/** + * idpf_vc_xn_init - Initialize virtchnl transaction object + * @vcxn_mngr: pointer to vc transaction manager struct + */ +static void idpf_vc_xn_init(struct idpf_vc_xn_manager *vcxn_mngr) +{ + int i; + + spin_lock_init(&vcxn_mngr->xn_bm_lock); + + for (i = 0; i < ARRAY_SIZE(vcxn_mngr->ring); i++) { + struct idpf_vc_xn *xn = &vcxn_mngr->ring[i]; + + xn->state = IDPF_VC_XN_IDLE; + xn->idx = i; + idpf_vc_xn_release_bufs(xn); + init_completion(&xn->completed); + } + + bitmap_fill(vcxn_mngr->free_xn_bm, IDPF_VC_XN_RING_LEN); +} + +/** + * idpf_vc_xn_shutdown - Uninitialize virtchnl transaction object + * @vcxn_mngr: pointer to vc transaction manager struct + * + * All waiting threads will be woken-up and their transaction aborted. Further + * operations on that object will fail. + */ +static void idpf_vc_xn_shutdown(struct idpf_vc_xn_manager *vcxn_mngr) +{ + int i; + + spin_lock_bh(&vcxn_mngr->xn_bm_lock); + bitmap_zero(vcxn_mngr->free_xn_bm, IDPF_VC_XN_RING_LEN); + spin_unlock_bh(&vcxn_mngr->xn_bm_lock); + + for (i = 0; i < ARRAY_SIZE(vcxn_mngr->ring); i++) { + struct idpf_vc_xn *xn = &vcxn_mngr->ring[i]; + + idpf_vc_xn_lock(xn); + xn->state = IDPF_VC_XN_SHUTDOWN; + idpf_vc_xn_release_bufs(xn); + idpf_vc_xn_unlock(xn); + complete_all(&xn->completed); + } +} + +/** + * idpf_vc_xn_pop_free - Pop a free transaction from free list + * @vcxn_mngr: transaction manager to pop from + * + * Returns NULL if no free transactions + */ +static +struct idpf_vc_xn *idpf_vc_xn_pop_free(struct idpf_vc_xn_manager *vcxn_mngr) +{ + struct idpf_vc_xn *xn = NULL; + unsigned long free_idx; + + spin_lock_bh(&vcxn_mngr->xn_bm_lock); + free_idx = find_first_bit(vcxn_mngr->free_xn_bm, IDPF_VC_XN_RING_LEN); + if (free_idx == IDPF_VC_XN_RING_LEN) + goto do_unlock; + + clear_bit(free_idx, vcxn_mngr->free_xn_bm); + xn = &vcxn_mngr->ring[free_idx]; + xn->salt = vcxn_mngr->salt++; + +do_unlock: + spin_unlock_bh(&vcxn_mngr->xn_bm_lock); + + return xn; +} + +/** + * idpf_vc_xn_push_free - Push a free transaction to free list + * @vcxn_mngr: transaction manager to push to + * @xn: transaction to push + */ +static void idpf_vc_xn_push_free(struct idpf_vc_xn_manager *vcxn_mngr, + struct idpf_vc_xn *xn) +{ + idpf_vc_xn_release_bufs(xn); + set_bit(xn->idx, vcxn_mngr->free_xn_bm); +} + +/** + * idpf_vc_xn_exec - Perform a send/recv virtchnl transaction + * @adapter: driver specific private structure with vcxn_mngr + * @params: parameters for this particular transaction including + * -vc_op: virtchannel operation to send + * -send_buf: kvec iov for send buf and len + * -recv_buf: kvec iov for recv buf and len (ignored if NULL) + * -timeout_ms: timeout waiting for a reply (milliseconds) + * -async: don't wait for message reply, will lose caller context + * -async_handler: callback to handle async replies + * + * @returns >= 0 for success, the size of the initial reply (may or may not be + * >= @recv_buf.iov_len, but we never overflow @@recv_buf_iov_base). < 0 for + * error. + */ +static ssize_t idpf_vc_xn_exec(struct idpf_adapter *adapter, + const struct idpf_vc_xn_params *params) +{ + const struct kvec *send_buf = ¶ms->send_buf; + struct idpf_vc_xn *xn; + ssize_t retval; + u16 cookie; + + xn = idpf_vc_xn_pop_free(adapter->vcxn_mngr); + /* no free transactions available */ + if (!xn) + return -ENOSPC; + + idpf_vc_xn_lock(xn); + if (xn->state == IDPF_VC_XN_SHUTDOWN) { + retval = -ENXIO; + goto only_unlock; + } else if (xn->state != IDPF_VC_XN_IDLE) { + /* We're just going to clobber this transaction even though + * it's not IDLE. If we don't reuse it we could theoretically + * eventually leak all the free transactions and not be able to + * send any messages. At least this way we make an attempt to + * remain functional even though something really bad is + * happening that's corrupting what was supposed to be free + * transactions. + */ + WARN_ONCE(1, "There should only be idle transactions in free list (idx %d op %d)\n", + xn->idx, xn->vc_op); + } + + xn->reply = params->recv_buf; + xn->reply_sz = 0; + xn->state = params->async ? IDPF_VC_XN_ASYNC : IDPF_VC_XN_WAITING; + xn->vc_op = params->vc_op; + xn->async_handler = params->async_handler; + idpf_vc_xn_unlock(xn); + + if (!params->async) + reinit_completion(&xn->completed); + cookie = FIELD_PREP(IDPF_VC_XN_SALT_M, xn->salt) | + FIELD_PREP(IDPF_VC_XN_IDX_M, xn->idx); + + retval = idpf_send_mb_msg(adapter, params->vc_op, + send_buf->iov_len, send_buf->iov_base, + cookie); + if (retval) { + idpf_vc_xn_lock(xn); + goto release_and_unlock; + } + + if (params->async) + return 0; + + wait_for_completion_timeout(&xn->completed, + msecs_to_jiffies(params->timeout_ms)); + + /* No need to check the return value; we check the final state of the + * transaction below. It's possible the transaction actually gets more + * timeout than specified if we get preempted here but after + * wait_for_completion_timeout returns. This should be non-issue + * however. + */ + idpf_vc_xn_lock(xn); + switch (xn->state) { + case IDPF_VC_XN_SHUTDOWN: + retval = -ENXIO; + goto only_unlock; + case IDPF_VC_XN_WAITING: + dev_notice_ratelimited(&adapter->pdev->dev, "Transaction timed-out (op %d, %dms)\n", + params->vc_op, params->timeout_ms); + retval = -ETIME; + break; + case IDPF_VC_XN_COMPLETED_SUCCESS: + retval = xn->reply_sz; + break; + case IDPF_VC_XN_COMPLETED_FAILED: + dev_notice_ratelimited(&adapter->pdev->dev, "Transaction failed (op %d)\n", + params->vc_op); + retval = -EIO; + break; + default: + /* Invalid state. */ + WARN_ON_ONCE(1); + retval = -EIO; + break; + } + +release_and_unlock: + idpf_vc_xn_push_free(adapter->vcxn_mngr, xn); + /* If we receive a VC reply after here, it will be dropped. */ +only_unlock: + idpf_vc_xn_unlock(xn); + + return retval; +} + +/** + * idpf_vc_xn_forward_async - Handle async reply receives + * @adapter: private data struct + * @xn: transaction to handle + * @ctlq_msg: corresponding ctlq_msg + * + * For async sends we're going to lose the caller's context so, if an + * async_handler was provided, it can deal with the reply, otherwise we'll just + * check and report if there is an error. + */ +static int +idpf_vc_xn_forward_async(struct idpf_adapter *adapter, struct idpf_vc_xn *xn, + const struct idpf_ctlq_msg *ctlq_msg) +{ + int err = 0; + + if (ctlq_msg->cookie.mbx.chnl_opcode != xn->vc_op) { + dev_err_ratelimited(&adapter->pdev->dev, "Async message opcode does not match transaction opcode (msg: %d) (xn: %d)\n", + ctlq_msg->cookie.mbx.chnl_opcode, xn->vc_op); + xn->reply_sz = 0; + err = -EINVAL; + goto release_bufs; + } + + if (xn->async_handler) { + err = xn->async_handler(adapter, xn, ctlq_msg); + goto release_bufs; + } + + if (ctlq_msg->cookie.mbx.chnl_retval) { + xn->reply_sz = 0; + dev_err_ratelimited(&adapter->pdev->dev, "Async message failure (op %d)\n", + ctlq_msg->cookie.mbx.chnl_opcode); + err = -EINVAL; + } + +release_bufs: + idpf_vc_xn_push_free(adapter->vcxn_mngr, xn); + + return err; +} + +/** + * idpf_vc_xn_forward_reply - copy a reply back to receiving thread + * @adapter: driver specific private structure with vcxn_mngr + * @ctlq_msg: controlq message to send back to receiving thread + */ +static int +idpf_vc_xn_forward_reply(struct idpf_adapter *adapter, + const struct idpf_ctlq_msg *ctlq_msg) +{ + const void *payload = NULL; + size_t payload_size = 0; + struct idpf_vc_xn *xn; + u16 msg_info; + int err = 0; + u16 xn_idx; + u16 salt; + + msg_info = ctlq_msg->ctx.sw_cookie.data; + xn_idx = FIELD_GET(IDPF_VC_XN_IDX_M, msg_info); + if (xn_idx >= ARRAY_SIZE(adapter->vcxn_mngr->ring)) { + dev_err_ratelimited(&adapter->pdev->dev, "Out of bounds cookie received: %02x\n", + xn_idx); + return -EINVAL; + } + xn = &adapter->vcxn_mngr->ring[xn_idx]; + salt = FIELD_GET(IDPF_VC_XN_SALT_M, msg_info); + if (xn->salt != salt) { + dev_err_ratelimited(&adapter->pdev->dev, "Transaction salt does not match (%02x != %02x)\n", + xn->salt, salt); + return -EINVAL; + } + + idpf_vc_xn_lock(xn); + switch (xn->state) { + case IDPF_VC_XN_WAITING: + /* success */ + break; + case IDPF_VC_XN_IDLE: + dev_err_ratelimited(&adapter->pdev->dev, "Unexpected or belated VC reply (op %d)\n", + ctlq_msg->cookie.mbx.chnl_opcode); + err = -EINVAL; + goto out_unlock; + case IDPF_VC_XN_SHUTDOWN: + /* ENXIO is a bit special here as the recv msg loop uses that + * know if it should stop trying to clean the ring if we lost + * the virtchnl. We need to stop playing with registers and + * yield. + */ + err = -ENXIO; + goto out_unlock; + case IDPF_VC_XN_ASYNC: + err = idpf_vc_xn_forward_async(adapter, xn, ctlq_msg); + idpf_vc_xn_unlock(xn); + return err; + default: + dev_err_ratelimited(&adapter->pdev->dev, "Overwriting VC reply (op %d)\n", + ctlq_msg->cookie.mbx.chnl_opcode); + err = -EBUSY; + goto out_unlock; + } + + if (ctlq_msg->cookie.mbx.chnl_opcode != xn->vc_op) { + dev_err_ratelimited(&adapter->pdev->dev, "Message opcode does not match transaction opcode (msg: %d) (xn: %d)\n", + ctlq_msg->cookie.mbx.chnl_opcode, xn->vc_op); + xn->reply_sz = 0; + xn->state = IDPF_VC_XN_COMPLETED_FAILED; + err = -EINVAL; + goto out_unlock; + } + + if (ctlq_msg->cookie.mbx.chnl_retval) { + xn->reply_sz = 0; + xn->state = IDPF_VC_XN_COMPLETED_FAILED; + err = -EINVAL; + goto out_unlock; + } + + if (ctlq_msg->data_len) { + payload = ctlq_msg->ctx.indirect.payload->va; + payload_size = ctlq_msg->ctx.indirect.payload->size; + } + + xn->reply_sz = payload_size; + xn->state = IDPF_VC_XN_COMPLETED_SUCCESS; + + if (xn->reply.iov_base && xn->reply.iov_len && payload_size) + memcpy(xn->reply.iov_base, payload, + min_t(size_t, xn->reply.iov_len, payload_size)); + +out_unlock: + idpf_vc_xn_unlock(xn); + /* we _cannot_ hold lock while calling complete */ + complete(&xn->completed); + + return err; +} + +/** + * idpf_recv_mb_msg - Receive message over mailbox + * @adapter: Driver specific private structure + * + * Will receive control queue message and posts the receive buffer. Returns 0 + * on success and negative on failure. + */ +int idpf_recv_mb_msg(struct idpf_adapter *adapter) +{ + struct idpf_ctlq_msg ctlq_msg; + struct idpf_dma_mem *dma_mem; + int post_err, err; + u16 num_recv; + + while (1) { + /* This will get <= num_recv messages and output how many + * actually received on num_recv. + */ + num_recv = 1; + err = idpf_ctlq_recv(adapter->hw.arq, &num_recv, &ctlq_msg); + if (err || !num_recv) + break; + + if (ctlq_msg.data_len) { + dma_mem = ctlq_msg.ctx.indirect.payload; + } else { + dma_mem = NULL; + num_recv = 0; + } + + if (ctlq_msg.cookie.mbx.chnl_opcode == VIRTCHNL2_OP_EVENT) + idpf_recv_event_msg(adapter, &ctlq_msg); + else + err = idpf_vc_xn_forward_reply(adapter, &ctlq_msg); + + post_err = idpf_ctlq_post_rx_buffs(&adapter->hw, + adapter->hw.arq, + &num_recv, &dma_mem); + + /* If post failed clear the only buffer we supplied */ + if (post_err) { + if (dma_mem) + dmam_free_coherent(&adapter->pdev->dev, + dma_mem->size, dma_mem->va, + dma_mem->pa); + break; + } + + /* virtchnl trying to shutdown, stop cleaning */ + if (err == -ENXIO) + break; + } + + return err; +} + +/** + * idpf_wait_for_marker_event - wait for software marker response + * @vport: virtual port data structure + * + * Returns 0 success, negative on failure. + **/ +static int idpf_wait_for_marker_event(struct idpf_vport *vport) +{ + int event; + int i; + + for (i = 0; i < vport->num_txq; i++) + set_bit(__IDPF_Q_SW_MARKER, vport->txqs[i]->flags); + + event = wait_event_timeout(vport->sw_marker_wq, + test_and_clear_bit(IDPF_VPORT_SW_MARKER, + vport->flags), + msecs_to_jiffies(500)); + + for (i = 0; i < vport->num_txq; i++) + clear_bit(__IDPF_Q_POLL_MODE, vport->txqs[i]->flags); + + if (event) + return 0; + + dev_warn(&vport->adapter->pdev->dev, "Failed to receive marker packets\n"); + + return -ETIMEDOUT; +} + +/** + * idpf_send_ver_msg - send virtchnl version message + * @adapter: Driver specific private structure + * + * Send virtchnl version message. Returns 0 on success, negative on failure. + */ +static int idpf_send_ver_msg(struct idpf_adapter *adapter) +{ + struct idpf_vc_xn_params xn_params = {}; + struct virtchnl2_version_info vvi; + ssize_t reply_sz; + u32 major, minor; + int err = 0; + + if (adapter->virt_ver_maj) { + vvi.major = cpu_to_le32(adapter->virt_ver_maj); + vvi.minor = cpu_to_le32(adapter->virt_ver_min); + } else { + vvi.major = cpu_to_le32(IDPF_VIRTCHNL_VERSION_MAJOR); + vvi.minor = cpu_to_le32(IDPF_VIRTCHNL_VERSION_MINOR); + } + + xn_params.vc_op = VIRTCHNL2_OP_VERSION; + xn_params.send_buf.iov_base = &vvi; + xn_params.send_buf.iov_len = sizeof(vvi); + xn_params.recv_buf = xn_params.send_buf; + xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; + + reply_sz = idpf_vc_xn_exec(adapter, &xn_params); + if (reply_sz < 0) + return reply_sz; + if (reply_sz < sizeof(vvi)) + return -EIO; + + major = le32_to_cpu(vvi.major); + minor = le32_to_cpu(vvi.minor); + + if (major > IDPF_VIRTCHNL_VERSION_MAJOR) { + dev_warn(&adapter->pdev->dev, "Virtchnl major version greater than supported\n"); + return -EINVAL; + } + + if (major == IDPF_VIRTCHNL_VERSION_MAJOR && + minor > IDPF_VIRTCHNL_VERSION_MINOR) + dev_warn(&adapter->pdev->dev, "Virtchnl minor version didn't match\n"); + + /* If we have a mismatch, resend version to update receiver on what + * version we will use. + */ + if (!adapter->virt_ver_maj && + major != IDPF_VIRTCHNL_VERSION_MAJOR && + minor != IDPF_VIRTCHNL_VERSION_MINOR) + err = -EAGAIN; + + adapter->virt_ver_maj = major; + adapter->virt_ver_min = minor; + + return err; +} + +/** + * idpf_send_get_caps_msg - Send virtchnl get capabilities message + * @adapter: Driver specific private structure + * + * Send virtchl get capabilities message. Returns 0 on success, negative on + * failure. + */ +static int idpf_send_get_caps_msg(struct idpf_adapter *adapter) +{ + struct virtchnl2_get_capabilities caps = {}; + struct idpf_vc_xn_params xn_params = {}; + ssize_t reply_sz; + + caps.csum_caps = + cpu_to_le32(VIRTCHNL2_CAP_TX_CSUM_L3_IPV4 | + VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_TCP | + VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_UDP | + VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_SCTP | + VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_TCP | + VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_UDP | + VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_SCTP | + VIRTCHNL2_CAP_RX_CSUM_L3_IPV4 | + VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_TCP | + VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_UDP | + VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_SCTP | + VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_TCP | + VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_UDP | + VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_SCTP | + VIRTCHNL2_CAP_TX_CSUM_L3_SINGLE_TUNNEL | + VIRTCHNL2_CAP_RX_CSUM_L3_SINGLE_TUNNEL | + VIRTCHNL2_CAP_TX_CSUM_L4_SINGLE_TUNNEL | + VIRTCHNL2_CAP_RX_CSUM_L4_SINGLE_TUNNEL | + VIRTCHNL2_CAP_RX_CSUM_GENERIC); + + caps.seg_caps = + cpu_to_le32(VIRTCHNL2_CAP_SEG_IPV4_TCP | + VIRTCHNL2_CAP_SEG_IPV4_UDP | + VIRTCHNL2_CAP_SEG_IPV4_SCTP | + VIRTCHNL2_CAP_SEG_IPV6_TCP | + VIRTCHNL2_CAP_SEG_IPV6_UDP | + VIRTCHNL2_CAP_SEG_IPV6_SCTP | + VIRTCHNL2_CAP_SEG_TX_SINGLE_TUNNEL); + + caps.rss_caps = + cpu_to_le64(VIRTCHNL2_CAP_RSS_IPV4_TCP | + VIRTCHNL2_CAP_RSS_IPV4_UDP | + VIRTCHNL2_CAP_RSS_IPV4_SCTP | + VIRTCHNL2_CAP_RSS_IPV4_OTHER | + VIRTCHNL2_CAP_RSS_IPV6_TCP | + VIRTCHNL2_CAP_RSS_IPV6_UDP | + VIRTCHNL2_CAP_RSS_IPV6_SCTP | + VIRTCHNL2_CAP_RSS_IPV6_OTHER); + + caps.hsplit_caps = + cpu_to_le32(VIRTCHNL2_CAP_RX_HSPLIT_AT_L4V4 | + VIRTCHNL2_CAP_RX_HSPLIT_AT_L4V6); + + caps.rsc_caps = + cpu_to_le32(VIRTCHNL2_CAP_RSC_IPV4_TCP | + VIRTCHNL2_CAP_RSC_IPV6_TCP); + + caps.other_caps = + cpu_to_le64(VIRTCHNL2_CAP_SRIOV | + VIRTCHNL2_CAP_MACFILTER | + VIRTCHNL2_CAP_SPLITQ_QSCHED | + VIRTCHNL2_CAP_PROMISC | + VIRTCHNL2_CAP_LOOPBACK); + + xn_params.vc_op = VIRTCHNL2_OP_GET_CAPS; + xn_params.send_buf.iov_base = ∩︀ + xn_params.send_buf.iov_len = sizeof(caps); + xn_params.recv_buf.iov_base = &adapter->caps; + xn_params.recv_buf.iov_len = sizeof(adapter->caps); + xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; + + reply_sz = idpf_vc_xn_exec(adapter, &xn_params); + if (reply_sz < 0) + return reply_sz; + if (reply_sz < sizeof(adapter->caps)) + return -EIO; + + return 0; +} + +/** + * idpf_vport_alloc_max_qs - Allocate max queues for a vport + * @adapter: Driver specific private structure + * @max_q: vport max queue structure + */ +int idpf_vport_alloc_max_qs(struct idpf_adapter *adapter, + struct idpf_vport_max_q *max_q) +{ + struct idpf_avail_queue_info *avail_queues = &adapter->avail_queues; + struct virtchnl2_get_capabilities *caps = &adapter->caps; + u16 default_vports = idpf_get_default_vports(adapter); + int max_rx_q, max_tx_q; + + mutex_lock(&adapter->queue_lock); + + max_rx_q = le16_to_cpu(caps->max_rx_q) / default_vports; + max_tx_q = le16_to_cpu(caps->max_tx_q) / default_vports; + if (adapter->num_alloc_vports < default_vports) { + max_q->max_rxq = min_t(u16, max_rx_q, IDPF_MAX_Q); + max_q->max_txq = min_t(u16, max_tx_q, IDPF_MAX_Q); + } else { + max_q->max_rxq = IDPF_MIN_Q; + max_q->max_txq = IDPF_MIN_Q; + } + max_q->max_bufq = max_q->max_rxq * IDPF_MAX_BUFQS_PER_RXQ_GRP; + max_q->max_complq = max_q->max_txq; + + if (avail_queues->avail_rxq < max_q->max_rxq || + avail_queues->avail_txq < max_q->max_txq || + avail_queues->avail_bufq < max_q->max_bufq || + avail_queues->avail_complq < max_q->max_complq) { + mutex_unlock(&adapter->queue_lock); + + return -EINVAL; + } + + avail_queues->avail_rxq -= max_q->max_rxq; + avail_queues->avail_txq -= max_q->max_txq; + avail_queues->avail_bufq -= max_q->max_bufq; + avail_queues->avail_complq -= max_q->max_complq; + + mutex_unlock(&adapter->queue_lock); + + return 0; +} + +/** + * idpf_vport_dealloc_max_qs - Deallocate max queues of a vport + * @adapter: Driver specific private structure + * @max_q: vport max queue structure + */ +void idpf_vport_dealloc_max_qs(struct idpf_adapter *adapter, + struct idpf_vport_max_q *max_q) +{ + struct idpf_avail_queue_info *avail_queues; + + mutex_lock(&adapter->queue_lock); + avail_queues = &adapter->avail_queues; + + avail_queues->avail_rxq += max_q->max_rxq; + avail_queues->avail_txq += max_q->max_txq; + avail_queues->avail_bufq += max_q->max_bufq; + avail_queues->avail_complq += max_q->max_complq; + + mutex_unlock(&adapter->queue_lock); +} + +/** + * idpf_init_avail_queues - Initialize available queues on the device + * @adapter: Driver specific private structure + */ +static void idpf_init_avail_queues(struct idpf_adapter *adapter) +{ + struct idpf_avail_queue_info *avail_queues = &adapter->avail_queues; + struct virtchnl2_get_capabilities *caps = &adapter->caps; + + avail_queues->avail_rxq = le16_to_cpu(caps->max_rx_q); + avail_queues->avail_txq = le16_to_cpu(caps->max_tx_q); + avail_queues->avail_bufq = le16_to_cpu(caps->max_rx_bufq); + avail_queues->avail_complq = le16_to_cpu(caps->max_tx_complq); +} + +/** + * idpf_get_reg_intr_vecs - Get vector queue register offset + * @vport: virtual port structure + * @reg_vals: Register offsets to store in + * + * Returns number of registers that got populated + */ +int idpf_get_reg_intr_vecs(struct idpf_vport *vport, + struct idpf_vec_regs *reg_vals) +{ + struct virtchnl2_vector_chunks *chunks; + struct idpf_vec_regs reg_val; + u16 num_vchunks, num_vec; + int num_regs = 0, i, j; + + chunks = &vport->adapter->req_vec_chunks->vchunks; + num_vchunks = le16_to_cpu(chunks->num_vchunks); + + for (j = 0; j < num_vchunks; j++) { + struct virtchnl2_vector_chunk *chunk; + u32 dynctl_reg_spacing; + u32 itrn_reg_spacing; + + chunk = &chunks->vchunks[j]; + num_vec = le16_to_cpu(chunk->num_vectors); + reg_val.dyn_ctl_reg = le32_to_cpu(chunk->dynctl_reg_start); + reg_val.itrn_reg = le32_to_cpu(chunk->itrn_reg_start); + reg_val.itrn_index_spacing = le32_to_cpu(chunk->itrn_index_spacing); + + dynctl_reg_spacing = le32_to_cpu(chunk->dynctl_reg_spacing); + itrn_reg_spacing = le32_to_cpu(chunk->itrn_reg_spacing); + + for (i = 0; i < num_vec; i++) { + reg_vals[num_regs].dyn_ctl_reg = reg_val.dyn_ctl_reg; + reg_vals[num_regs].itrn_reg = reg_val.itrn_reg; + reg_vals[num_regs].itrn_index_spacing = + reg_val.itrn_index_spacing; + + reg_val.dyn_ctl_reg += dynctl_reg_spacing; + reg_val.itrn_reg += itrn_reg_spacing; + num_regs++; + } + } + + return num_regs; +} + +/** + * idpf_vport_get_q_reg - Get the queue registers for the vport + * @reg_vals: register values needing to be set + * @num_regs: amount we expect to fill + * @q_type: queue model + * @chunks: queue regs received over mailbox + * + * This function parses the queue register offsets from the queue register + * chunk information, with a specific queue type and stores it into the array + * passed as an argument. It returns the actual number of queue registers that + * are filled. + */ +static int idpf_vport_get_q_reg(u32 *reg_vals, int num_regs, u32 q_type, + struct virtchnl2_queue_reg_chunks *chunks) +{ + u16 num_chunks = le16_to_cpu(chunks->num_chunks); + int reg_filled = 0, i; + u32 reg_val; + + while (num_chunks--) { + struct virtchnl2_queue_reg_chunk *chunk; + u16 num_q; + + chunk = &chunks->chunks[num_chunks]; + if (le32_to_cpu(chunk->type) != q_type) + continue; + + num_q = le32_to_cpu(chunk->num_queues); + reg_val = le64_to_cpu(chunk->qtail_reg_start); + for (i = 0; i < num_q && reg_filled < num_regs ; i++) { + reg_vals[reg_filled++] = reg_val; + reg_val += le32_to_cpu(chunk->qtail_reg_spacing); + } + } + + return reg_filled; +} + +/** + * __idpf_queue_reg_init - initialize queue registers + * @vport: virtual port structure + * @reg_vals: registers we are initializing + * @num_regs: how many registers there are in total + * @q_type: queue model + * + * Return number of queues that are initialized + */ +static int __idpf_queue_reg_init(struct idpf_vport *vport, u32 *reg_vals, + int num_regs, u32 q_type) +{ + struct idpf_adapter *adapter = vport->adapter; + struct idpf_queue *q; + int i, j, k = 0; + + switch (q_type) { + case VIRTCHNL2_QUEUE_TYPE_TX: + for (i = 0; i < vport->num_txq_grp; i++) { + struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; + + for (j = 0; j < tx_qgrp->num_txq && k < num_regs; j++, k++) + tx_qgrp->txqs[j]->tail = + idpf_get_reg_addr(adapter, reg_vals[k]); + } + break; + case VIRTCHNL2_QUEUE_TYPE_RX: + for (i = 0; i < vport->num_rxq_grp; i++) { + struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; + u16 num_rxq = rx_qgrp->singleq.num_rxq; + + for (j = 0; j < num_rxq && k < num_regs; j++, k++) { + q = rx_qgrp->singleq.rxqs[j]; + q->tail = idpf_get_reg_addr(adapter, + reg_vals[k]); + } + } + break; + case VIRTCHNL2_QUEUE_TYPE_RX_BUFFER: + for (i = 0; i < vport->num_rxq_grp; i++) { + struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; + u8 num_bufqs = vport->num_bufqs_per_qgrp; + + for (j = 0; j < num_bufqs && k < num_regs; j++, k++) { + q = &rx_qgrp->splitq.bufq_sets[j].bufq; + q->tail = idpf_get_reg_addr(adapter, + reg_vals[k]); + } + } + break; + default: + break; + } + + return k; +} + +/** + * idpf_queue_reg_init - initialize queue registers + * @vport: virtual port structure + * + * Return 0 on success, negative on failure + */ +int idpf_queue_reg_init(struct idpf_vport *vport) +{ + struct virtchnl2_create_vport *vport_params; + struct virtchnl2_queue_reg_chunks *chunks; + struct idpf_vport_config *vport_config; + u16 vport_idx = vport->idx; + int num_regs, ret = 0; + u32 *reg_vals; + + /* We may never deal with more than 256 same type of queues */ + reg_vals = kzalloc(sizeof(void *) * IDPF_LARGE_MAX_Q, GFP_KERNEL); + if (!reg_vals) + return -ENOMEM; + + vport_config = vport->adapter->vport_config[vport_idx]; + if (vport_config->req_qs_chunks) { + struct virtchnl2_add_queues *vc_aq = + (struct virtchnl2_add_queues *)vport_config->req_qs_chunks; + chunks = &vc_aq->chunks; + } else { + vport_params = vport->adapter->vport_params_recvd[vport_idx]; + chunks = &vport_params->chunks; + } + + /* Initialize Tx queue tail register address */ + num_regs = idpf_vport_get_q_reg(reg_vals, IDPF_LARGE_MAX_Q, + VIRTCHNL2_QUEUE_TYPE_TX, + chunks); + if (num_regs < vport->num_txq) { + ret = -EINVAL; + goto free_reg_vals; + } + + num_regs = __idpf_queue_reg_init(vport, reg_vals, num_regs, + VIRTCHNL2_QUEUE_TYPE_TX); + if (num_regs < vport->num_txq) { + ret = -EINVAL; + goto free_reg_vals; + } + + /* Initialize Rx/buffer queue tail register address based on Rx queue + * model + */ + if (idpf_is_queue_model_split(vport->rxq_model)) { + num_regs = idpf_vport_get_q_reg(reg_vals, IDPF_LARGE_MAX_Q, + VIRTCHNL2_QUEUE_TYPE_RX_BUFFER, + chunks); + if (num_regs < vport->num_bufq) { + ret = -EINVAL; + goto free_reg_vals; + } + + num_regs = __idpf_queue_reg_init(vport, reg_vals, num_regs, + VIRTCHNL2_QUEUE_TYPE_RX_BUFFER); + if (num_regs < vport->num_bufq) { + ret = -EINVAL; + goto free_reg_vals; + } + } else { + num_regs = idpf_vport_get_q_reg(reg_vals, IDPF_LARGE_MAX_Q, + VIRTCHNL2_QUEUE_TYPE_RX, + chunks); + if (num_regs < vport->num_rxq) { + ret = -EINVAL; + goto free_reg_vals; + } + + num_regs = __idpf_queue_reg_init(vport, reg_vals, num_regs, + VIRTCHNL2_QUEUE_TYPE_RX); + if (num_regs < vport->num_rxq) { + ret = -EINVAL; + goto free_reg_vals; + } + } + +free_reg_vals: + kfree(reg_vals); + + return ret; +} + +/** + * idpf_send_create_vport_msg - Send virtchnl create vport message + * @adapter: Driver specific private structure + * @max_q: vport max queue info + * + * send virtchnl creae vport message + * + * Returns 0 on success, negative on failure + */ +int idpf_send_create_vport_msg(struct idpf_adapter *adapter, + struct idpf_vport_max_q *max_q) +{ + struct virtchnl2_create_vport *vport_msg; + struct idpf_vc_xn_params xn_params = {}; + u16 idx = adapter->next_vport; + int err, buf_size; + ssize_t reply_sz; + + buf_size = sizeof(struct virtchnl2_create_vport); + if (!adapter->vport_params_reqd[idx]) { + adapter->vport_params_reqd[idx] = kzalloc(buf_size, + GFP_KERNEL); + if (!adapter->vport_params_reqd[idx]) + return -ENOMEM; + } + + vport_msg = adapter->vport_params_reqd[idx]; + vport_msg->vport_type = cpu_to_le16(VIRTCHNL2_VPORT_TYPE_DEFAULT); + vport_msg->vport_index = cpu_to_le16(idx); + + if (adapter->req_tx_splitq) + vport_msg->txq_model = cpu_to_le16(VIRTCHNL2_QUEUE_MODEL_SPLIT); + else + vport_msg->txq_model = cpu_to_le16(VIRTCHNL2_QUEUE_MODEL_SINGLE); + + if (adapter->req_rx_splitq) + vport_msg->rxq_model = cpu_to_le16(VIRTCHNL2_QUEUE_MODEL_SPLIT); + else + vport_msg->rxq_model = cpu_to_le16(VIRTCHNL2_QUEUE_MODEL_SINGLE); + + err = idpf_vport_calc_total_qs(adapter, idx, vport_msg, max_q); + if (err) { + dev_err(&adapter->pdev->dev, "Enough queues are not available"); + + return err; + } + + if (!adapter->vport_params_recvd[idx]) { + adapter->vport_params_recvd[idx] = kzalloc(IDPF_CTLQ_MAX_BUF_LEN, + GFP_KERNEL); + if (!adapter->vport_params_recvd[idx]) { + err = -ENOMEM; + goto free_vport_params; + } + } + + xn_params.vc_op = VIRTCHNL2_OP_CREATE_VPORT; + xn_params.send_buf.iov_base = vport_msg; + xn_params.send_buf.iov_len = buf_size; + xn_params.recv_buf.iov_base = adapter->vport_params_recvd[idx]; + xn_params.recv_buf.iov_len = IDPF_CTLQ_MAX_BUF_LEN; + xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; + reply_sz = idpf_vc_xn_exec(adapter, &xn_params); + if (reply_sz < 0) { + err = reply_sz; + goto free_vport_params; + } + if (reply_sz < IDPF_CTLQ_MAX_BUF_LEN) { + err = -EIO; + goto free_vport_params; + } + + return 0; + +free_vport_params: + kfree(adapter->vport_params_recvd[idx]); + adapter->vport_params_recvd[idx] = NULL; + kfree(adapter->vport_params_reqd[idx]); + adapter->vport_params_reqd[idx] = NULL; + + return err; +} + +/** + * idpf_check_supported_desc_ids - Verify we have required descriptor support + * @vport: virtual port structure + * + * Return 0 on success, error on failure + */ +int idpf_check_supported_desc_ids(struct idpf_vport *vport) +{ + struct idpf_adapter *adapter = vport->adapter; + struct virtchnl2_create_vport *vport_msg; + u64 rx_desc_ids, tx_desc_ids; + + vport_msg = adapter->vport_params_recvd[vport->idx]; + + rx_desc_ids = le64_to_cpu(vport_msg->rx_desc_ids); + tx_desc_ids = le64_to_cpu(vport_msg->tx_desc_ids); + + if (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT) { + if (!(rx_desc_ids & VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M)) { + dev_info(&adapter->pdev->dev, "Minimum RX descriptor support not provided, using the default\n"); + vport_msg->rx_desc_ids = cpu_to_le64(VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M); + } + } else { + if (!(rx_desc_ids & VIRTCHNL2_RXDID_2_FLEX_SQ_NIC_M)) + vport->base_rxd = true; + } + + if (vport->txq_model != VIRTCHNL2_QUEUE_MODEL_SPLIT) + return 0; + + if ((tx_desc_ids & MIN_SUPPORT_TXDID) != MIN_SUPPORT_TXDID) { + dev_info(&adapter->pdev->dev, "Minimum TX descriptor support not provided, using the default\n"); + vport_msg->tx_desc_ids = cpu_to_le64(MIN_SUPPORT_TXDID); + } + + return 0; +} + +/** + * idpf_send_destroy_vport_msg - Send virtchnl destroy vport message + * @vport: virtual port data structure + * + * Send virtchnl destroy vport message. Returns 0 on success, negative on + * failure. + */ +int idpf_send_destroy_vport_msg(struct idpf_vport *vport) +{ + struct idpf_vc_xn_params xn_params = {}; + struct virtchnl2_vport v_id; + ssize_t reply_sz; + + v_id.vport_id = cpu_to_le32(vport->vport_id); + + xn_params.vc_op = VIRTCHNL2_OP_DESTROY_VPORT; + xn_params.send_buf.iov_base = &v_id; + xn_params.send_buf.iov_len = sizeof(v_id); + xn_params.timeout_ms = IDPF_VC_XN_MIN_TIMEOUT_MSEC; + reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params); + + return reply_sz < 0 ? reply_sz : 0; +} + +/** + * idpf_send_enable_vport_msg - Send virtchnl enable vport message + * @vport: virtual port data structure + * + * Send enable vport virtchnl message. Returns 0 on success, negative on + * failure. + */ +int idpf_send_enable_vport_msg(struct idpf_vport *vport) +{ + struct idpf_vc_xn_params xn_params = {}; + struct virtchnl2_vport v_id; + ssize_t reply_sz; + + v_id.vport_id = cpu_to_le32(vport->vport_id); + + xn_params.vc_op = VIRTCHNL2_OP_ENABLE_VPORT; + xn_params.send_buf.iov_base = &v_id; + xn_params.send_buf.iov_len = sizeof(v_id); + xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; + reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params); + + return reply_sz < 0 ? reply_sz : 0; +} + +/** + * idpf_send_disable_vport_msg - Send virtchnl disable vport message + * @vport: virtual port data structure + * + * Send disable vport virtchnl message. Returns 0 on success, negative on + * failure. + */ +int idpf_send_disable_vport_msg(struct idpf_vport *vport) +{ + struct idpf_vc_xn_params xn_params = {}; + struct virtchnl2_vport v_id; + ssize_t reply_sz; + + v_id.vport_id = cpu_to_le32(vport->vport_id); + + xn_params.vc_op = VIRTCHNL2_OP_DISABLE_VPORT; + xn_params.send_buf.iov_base = &v_id; + xn_params.send_buf.iov_len = sizeof(v_id); + xn_params.timeout_ms = IDPF_VC_XN_MIN_TIMEOUT_MSEC; + reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params); + + return reply_sz < 0 ? reply_sz : 0; +} + +/** + * idpf_send_config_tx_queues_msg - Send virtchnl config tx queues message + * @vport: virtual port data structure + * + * Send config tx queues virtchnl message. Returns 0 on success, negative on + * failure. + */ +static int idpf_send_config_tx_queues_msg(struct idpf_vport *vport) +{ + struct virtchnl2_config_tx_queues *ctq __free(kfree) = NULL; + struct virtchnl2_txq_info *qi __free(kfree) = NULL; + struct idpf_vc_xn_params xn_params = {}; + u32 config_sz, chunk_sz, buf_sz; + int totqs, num_msgs, num_chunks; + ssize_t reply_sz; + int i, k = 0; + + totqs = vport->num_txq + vport->num_complq; + qi = kcalloc(totqs, sizeof(struct virtchnl2_txq_info), GFP_KERNEL); + if (!qi) + return -ENOMEM; + + /* Populate the queue info buffer with all queue context info */ + for (i = 0; i < vport->num_txq_grp; i++) { + struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; + int j, sched_mode; + + for (j = 0; j < tx_qgrp->num_txq; j++, k++) { + qi[k].queue_id = + cpu_to_le32(tx_qgrp->txqs[j]->q_id); + qi[k].model = + cpu_to_le16(vport->txq_model); + qi[k].type = + cpu_to_le32(tx_qgrp->txqs[j]->q_type); + qi[k].ring_len = + cpu_to_le16(tx_qgrp->txqs[j]->desc_count); + qi[k].dma_ring_addr = + cpu_to_le64(tx_qgrp->txqs[j]->dma); + if (idpf_is_queue_model_split(vport->txq_model)) { + struct idpf_queue *q = tx_qgrp->txqs[j]; + + qi[k].tx_compl_queue_id = + cpu_to_le16(tx_qgrp->complq->q_id); + qi[k].relative_queue_id = cpu_to_le16(j); + + if (test_bit(__IDPF_Q_FLOW_SCH_EN, q->flags)) + qi[k].sched_mode = + cpu_to_le16(VIRTCHNL2_TXQ_SCHED_MODE_FLOW); + else + qi[k].sched_mode = + cpu_to_le16(VIRTCHNL2_TXQ_SCHED_MODE_QUEUE); + } else { + qi[k].sched_mode = + cpu_to_le16(VIRTCHNL2_TXQ_SCHED_MODE_QUEUE); + } + } + + if (!idpf_is_queue_model_split(vport->txq_model)) + continue; + + qi[k].queue_id = cpu_to_le32(tx_qgrp->complq->q_id); + qi[k].model = cpu_to_le16(vport->txq_model); + qi[k].type = cpu_to_le32(tx_qgrp->complq->q_type); + qi[k].ring_len = cpu_to_le16(tx_qgrp->complq->desc_count); + qi[k].dma_ring_addr = cpu_to_le64(tx_qgrp->complq->dma); + + if (test_bit(__IDPF_Q_FLOW_SCH_EN, tx_qgrp->complq->flags)) + sched_mode = VIRTCHNL2_TXQ_SCHED_MODE_FLOW; + else + sched_mode = VIRTCHNL2_TXQ_SCHED_MODE_QUEUE; + qi[k].sched_mode = cpu_to_le16(sched_mode); + + k++; + } + + /* Make sure accounting agrees */ + if (k != totqs) + return -EINVAL; + + /* Chunk up the queue contexts into multiple messages to avoid + * sending a control queue message buffer that is too large + */ + config_sz = sizeof(struct virtchnl2_config_tx_queues); + chunk_sz = sizeof(struct virtchnl2_txq_info); + + num_chunks = min_t(u32, IDPF_NUM_CHUNKS_PER_MSG(config_sz, chunk_sz), + totqs); + num_msgs = DIV_ROUND_UP(totqs, num_chunks); + + buf_sz = struct_size(ctq, qinfo, num_chunks); + ctq = kzalloc(buf_sz, GFP_KERNEL); + if (!ctq) + return -ENOMEM; + + xn_params.vc_op = VIRTCHNL2_OP_CONFIG_TX_QUEUES; + xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; + + for (i = 0, k = 0; i < num_msgs; i++) { + memset(ctq, 0, buf_sz); + ctq->vport_id = cpu_to_le32(vport->vport_id); + ctq->num_qinfo = cpu_to_le16(num_chunks); + memcpy(ctq->qinfo, &qi[k], chunk_sz * num_chunks); + + xn_params.send_buf.iov_base = ctq; + xn_params.send_buf.iov_len = buf_sz; + reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params); + if (reply_sz < 0) + return reply_sz; + + k += num_chunks; + totqs -= num_chunks; + num_chunks = min(num_chunks, totqs); + /* Recalculate buffer size */ + buf_sz = struct_size(ctq, qinfo, num_chunks); + } + + return 0; +} + +/** + * idpf_send_config_rx_queues_msg - Send virtchnl config rx queues message + * @vport: virtual port data structure + * + * Send config rx queues virtchnl message. Returns 0 on success, negative on + * failure. + */ +static int idpf_send_config_rx_queues_msg(struct idpf_vport *vport) +{ + struct virtchnl2_config_rx_queues *crq __free(kfree) = NULL; + struct virtchnl2_rxq_info *qi __free(kfree) = NULL; + struct idpf_vc_xn_params xn_params = {}; + u32 config_sz, chunk_sz, buf_sz; + int totqs, num_msgs, num_chunks; + ssize_t reply_sz; + int i, k = 0; + + totqs = vport->num_rxq + vport->num_bufq; + qi = kcalloc(totqs, sizeof(struct virtchnl2_rxq_info), GFP_KERNEL); + if (!qi) + return -ENOMEM; + + /* Populate the queue info buffer with all queue context info */ + for (i = 0; i < vport->num_rxq_grp; i++) { + struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; + u16 num_rxq; + int j; + + if (!idpf_is_queue_model_split(vport->rxq_model)) + goto setup_rxqs; + + for (j = 0; j < vport->num_bufqs_per_qgrp; j++, k++) { + struct idpf_queue *bufq = + &rx_qgrp->splitq.bufq_sets[j].bufq; + + qi[k].queue_id = cpu_to_le32(bufq->q_id); + qi[k].model = cpu_to_le16(vport->rxq_model); + qi[k].type = cpu_to_le32(bufq->q_type); + qi[k].desc_ids = cpu_to_le64(VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M); + qi[k].ring_len = cpu_to_le16(bufq->desc_count); + qi[k].dma_ring_addr = cpu_to_le64(bufq->dma); + qi[k].data_buffer_size = cpu_to_le32(bufq->rx_buf_size); + qi[k].buffer_notif_stride = bufq->rx_buf_stride; + qi[k].rx_buffer_low_watermark = + cpu_to_le16(bufq->rx_buffer_low_watermark); + if (idpf_is_feature_ena(vport, NETIF_F_GRO_HW)) + qi[k].qflags |= cpu_to_le16(VIRTCHNL2_RXQ_RSC); + } + +setup_rxqs: + if (idpf_is_queue_model_split(vport->rxq_model)) + num_rxq = rx_qgrp->splitq.num_rxq_sets; + else + num_rxq = rx_qgrp->singleq.num_rxq; + + for (j = 0; j < num_rxq; j++, k++) { + struct idpf_queue *rxq; + + if (!idpf_is_queue_model_split(vport->rxq_model)) { + rxq = rx_qgrp->singleq.rxqs[j]; + goto common_qi_fields; + } + rxq = &rx_qgrp->splitq.rxq_sets[j]->rxq; + qi[k].rx_bufq1_id = + cpu_to_le16(rxq->rxq_grp->splitq.bufq_sets[0].bufq.q_id); + if (vport->num_bufqs_per_qgrp > IDPF_SINGLE_BUFQ_PER_RXQ_GRP) { + qi[k].bufq2_ena = IDPF_BUFQ2_ENA; + qi[k].rx_bufq2_id = + cpu_to_le16(rxq->rxq_grp->splitq.bufq_sets[1].bufq.q_id); + } + qi[k].rx_buffer_low_watermark = + cpu_to_le16(rxq->rx_buffer_low_watermark); + if (idpf_is_feature_ena(vport, NETIF_F_GRO_HW)) + qi[k].qflags |= cpu_to_le16(VIRTCHNL2_RXQ_RSC); + +common_qi_fields: + if (rxq->rx_hsplit_en) { + qi[k].qflags |= + cpu_to_le16(VIRTCHNL2_RXQ_HDR_SPLIT); + qi[k].hdr_buffer_size = + cpu_to_le16(rxq->rx_hbuf_size); + } + qi[k].queue_id = cpu_to_le32(rxq->q_id); + qi[k].model = cpu_to_le16(vport->rxq_model); + qi[k].type = cpu_to_le32(rxq->q_type); + qi[k].ring_len = cpu_to_le16(rxq->desc_count); + qi[k].dma_ring_addr = cpu_to_le64(rxq->dma); + qi[k].max_pkt_size = cpu_to_le32(rxq->rx_max_pkt_size); + qi[k].data_buffer_size = cpu_to_le32(rxq->rx_buf_size); + qi[k].qflags |= + cpu_to_le16(VIRTCHNL2_RX_DESC_SIZE_32BYTE); + qi[k].desc_ids = cpu_to_le64(rxq->rxdids); + } + } + + /* Make sure accounting agrees */ + if (k != totqs) + return -EINVAL; + + /* Chunk up the queue contexts into multiple messages to avoid + * sending a control queue message buffer that is too large + */ + config_sz = sizeof(struct virtchnl2_config_rx_queues); + chunk_sz = sizeof(struct virtchnl2_rxq_info); + + num_chunks = min_t(u32, IDPF_NUM_CHUNKS_PER_MSG(config_sz, chunk_sz), + totqs); + num_msgs = DIV_ROUND_UP(totqs, num_chunks); + + buf_sz = struct_size(crq, qinfo, num_chunks); + crq = kzalloc(buf_sz, GFP_KERNEL); + if (!crq) + return -ENOMEM; + + xn_params.vc_op = VIRTCHNL2_OP_CONFIG_RX_QUEUES; + xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; + + for (i = 0, k = 0; i < num_msgs; i++) { + memset(crq, 0, buf_sz); + crq->vport_id = cpu_to_le32(vport->vport_id); + crq->num_qinfo = cpu_to_le16(num_chunks); + memcpy(crq->qinfo, &qi[k], chunk_sz * num_chunks); + + xn_params.send_buf.iov_base = crq; + xn_params.send_buf.iov_len = buf_sz; + reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params); + if (reply_sz < 0) + return reply_sz; + + k += num_chunks; + totqs -= num_chunks; + num_chunks = min(num_chunks, totqs); + /* Recalculate buffer size */ + buf_sz = struct_size(crq, qinfo, num_chunks); + } + + return 0; +} + +/** + * idpf_send_ena_dis_queues_msg - Send virtchnl enable or disable + * queues message + * @vport: virtual port data structure + * @ena: if true enable, false disable + * + * Send enable or disable queues virtchnl message. Returns 0 on success, + * negative on failure. + */ +static int idpf_send_ena_dis_queues_msg(struct idpf_vport *vport, bool ena) +{ + struct virtchnl2_del_ena_dis_queues *eq __free(kfree) = NULL; + struct virtchnl2_queue_chunk *qc __free(kfree) = NULL; + u32 num_msgs, num_chunks, num_txq, num_rxq, num_q; + struct idpf_vc_xn_params xn_params = {}; + struct virtchnl2_queue_chunks *qcs; + u32 config_sz, chunk_sz, buf_sz; + ssize_t reply_sz; + int i, j, k = 0; + + num_txq = vport->num_txq + vport->num_complq; + num_rxq = vport->num_rxq + vport->num_bufq; + num_q = num_txq + num_rxq; + buf_sz = sizeof(struct virtchnl2_queue_chunk) * num_q; + qc = kzalloc(buf_sz, GFP_KERNEL); + if (!qc) + return -ENOMEM; + + for (i = 0; i < vport->num_txq_grp; i++) { + struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; + + for (j = 0; j < tx_qgrp->num_txq; j++, k++) { + qc[k].type = cpu_to_le32(tx_qgrp->txqs[j]->q_type); + qc[k].start_queue_id = cpu_to_le32(tx_qgrp->txqs[j]->q_id); + qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK); + } + } + if (vport->num_txq != k) + return -EINVAL; + + if (!idpf_is_queue_model_split(vport->txq_model)) + goto setup_rx; + + for (i = 0; i < vport->num_txq_grp; i++, k++) { + struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; + + qc[k].type = cpu_to_le32(tx_qgrp->complq->q_type); + qc[k].start_queue_id = cpu_to_le32(tx_qgrp->complq->q_id); + qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK); + } + if (vport->num_complq != (k - vport->num_txq)) + return -EINVAL; + +setup_rx: + for (i = 0; i < vport->num_rxq_grp; i++) { + struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; + + if (idpf_is_queue_model_split(vport->rxq_model)) + num_rxq = rx_qgrp->splitq.num_rxq_sets; + else + num_rxq = rx_qgrp->singleq.num_rxq; + + for (j = 0; j < num_rxq; j++, k++) { + if (idpf_is_queue_model_split(vport->rxq_model)) { + qc[k].start_queue_id = + cpu_to_le32(rx_qgrp->splitq.rxq_sets[j]->rxq.q_id); + qc[k].type = + cpu_to_le32(rx_qgrp->splitq.rxq_sets[j]->rxq.q_type); + } else { + qc[k].start_queue_id = + cpu_to_le32(rx_qgrp->singleq.rxqs[j]->q_id); + qc[k].type = + cpu_to_le32(rx_qgrp->singleq.rxqs[j]->q_type); + } + qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK); + } + } + if (vport->num_rxq != k - (vport->num_txq + vport->num_complq)) + return -EINVAL; + + if (!idpf_is_queue_model_split(vport->rxq_model)) + goto send_msg; + + for (i = 0; i < vport->num_rxq_grp; i++) { + struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; + + for (j = 0; j < vport->num_bufqs_per_qgrp; j++, k++) { + struct idpf_queue *q; + + q = &rx_qgrp->splitq.bufq_sets[j].bufq; + qc[k].type = cpu_to_le32(q->q_type); + qc[k].start_queue_id = cpu_to_le32(q->q_id); + qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK); + } + } + if (vport->num_bufq != k - (vport->num_txq + + vport->num_complq + + vport->num_rxq)) + return -EINVAL; + +send_msg: + /* Chunk up the queue info into multiple messages */ + config_sz = sizeof(struct virtchnl2_del_ena_dis_queues); + chunk_sz = sizeof(struct virtchnl2_queue_chunk); + + num_chunks = min_t(u32, IDPF_NUM_CHUNKS_PER_MSG(config_sz, chunk_sz), + num_q); + num_msgs = DIV_ROUND_UP(num_q, num_chunks); + + buf_sz = struct_size(eq, chunks.chunks, num_chunks); + eq = kzalloc(buf_sz, GFP_KERNEL); + if (!eq) + return -ENOMEM; + + if (ena) { + xn_params.vc_op = VIRTCHNL2_OP_ENABLE_QUEUES; + xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; + } else { + xn_params.vc_op = VIRTCHNL2_OP_DISABLE_QUEUES; + xn_params.timeout_ms = IDPF_VC_XN_MIN_TIMEOUT_MSEC; + } + + for (i = 0, k = 0; i < num_msgs; i++) { + memset(eq, 0, buf_sz); + eq->vport_id = cpu_to_le32(vport->vport_id); + eq->chunks.num_chunks = cpu_to_le16(num_chunks); + qcs = &eq->chunks; + memcpy(qcs->chunks, &qc[k], chunk_sz * num_chunks); + + xn_params.send_buf.iov_base = eq; + xn_params.send_buf.iov_len = buf_sz; + reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params); + if (reply_sz < 0) + return reply_sz; + + k += num_chunks; + num_q -= num_chunks; + num_chunks = min(num_chunks, num_q); + /* Recalculate buffer size */ + buf_sz = struct_size(eq, chunks.chunks, num_chunks); + } + + return 0; +} + +/** + * idpf_send_map_unmap_queue_vector_msg - Send virtchnl map or unmap queue + * vector message + * @vport: virtual port data structure + * @map: true for map and false for unmap + * + * Send map or unmap queue vector virtchnl message. Returns 0 on success, + * negative on failure. + */ +int idpf_send_map_unmap_queue_vector_msg(struct idpf_vport *vport, bool map) +{ + struct virtchnl2_queue_vector_maps *vqvm __free(kfree) = NULL; + struct virtchnl2_queue_vector *vqv __free(kfree) = NULL; + struct idpf_vc_xn_params xn_params = {}; + u32 config_sz, chunk_sz, buf_sz; + u32 num_msgs, num_chunks, num_q; + ssize_t reply_sz; + int i, j, k = 0; + + num_q = vport->num_txq + vport->num_rxq; + + buf_sz = sizeof(struct virtchnl2_queue_vector) * num_q; + vqv = kzalloc(buf_sz, GFP_KERNEL); + if (!vqv) + return -ENOMEM; + + for (i = 0; i < vport->num_txq_grp; i++) { + struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; + + for (j = 0; j < tx_qgrp->num_txq; j++, k++) { + vqv[k].queue_type = cpu_to_le32(tx_qgrp->txqs[j]->q_type); + vqv[k].queue_id = cpu_to_le32(tx_qgrp->txqs[j]->q_id); + + if (idpf_is_queue_model_split(vport->txq_model)) { + vqv[k].vector_id = + cpu_to_le16(tx_qgrp->complq->q_vector->v_idx); + vqv[k].itr_idx = + cpu_to_le32(tx_qgrp->complq->q_vector->tx_itr_idx); + } else { + vqv[k].vector_id = + cpu_to_le16(tx_qgrp->txqs[j]->q_vector->v_idx); + vqv[k].itr_idx = + cpu_to_le32(tx_qgrp->txqs[j]->q_vector->tx_itr_idx); + } + } + } + + if (vport->num_txq != k) + return -EINVAL; + + for (i = 0; i < vport->num_rxq_grp; i++) { + struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; + u16 num_rxq; + + if (idpf_is_queue_model_split(vport->rxq_model)) + num_rxq = rx_qgrp->splitq.num_rxq_sets; + else + num_rxq = rx_qgrp->singleq.num_rxq; + + for (j = 0; j < num_rxq; j++, k++) { + struct idpf_queue *rxq; + + if (idpf_is_queue_model_split(vport->rxq_model)) + rxq = &rx_qgrp->splitq.rxq_sets[j]->rxq; + else + rxq = rx_qgrp->singleq.rxqs[j]; + + vqv[k].queue_type = cpu_to_le32(rxq->q_type); + vqv[k].queue_id = cpu_to_le32(rxq->q_id); + vqv[k].vector_id = cpu_to_le16(rxq->q_vector->v_idx); + vqv[k].itr_idx = cpu_to_le32(rxq->q_vector->rx_itr_idx); + } + } + + if (idpf_is_queue_model_split(vport->txq_model)) { + if (vport->num_rxq != k - vport->num_complq) + return -EINVAL; + } else { + if (vport->num_rxq != k - vport->num_txq) + return -EINVAL; + } + + /* Chunk up the vector info into multiple messages */ + config_sz = sizeof(struct virtchnl2_queue_vector_maps); + chunk_sz = sizeof(struct virtchnl2_queue_vector); + + num_chunks = min_t(u32, IDPF_NUM_CHUNKS_PER_MSG(config_sz, chunk_sz), + num_q); + num_msgs = DIV_ROUND_UP(num_q, num_chunks); + + buf_sz = struct_size(vqvm, qv_maps, num_chunks); + vqvm = kzalloc(buf_sz, GFP_KERNEL); + if (!vqvm) + return -ENOMEM; + + if (map) { + xn_params.vc_op = VIRTCHNL2_OP_MAP_QUEUE_VECTOR; + xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; + } else { + xn_params.vc_op = VIRTCHNL2_OP_UNMAP_QUEUE_VECTOR; + xn_params.timeout_ms = IDPF_VC_XN_MIN_TIMEOUT_MSEC; + } + + for (i = 0, k = 0; i < num_msgs; i++) { + memset(vqvm, 0, buf_sz); + xn_params.send_buf.iov_base = vqvm; + xn_params.send_buf.iov_len = buf_sz; + vqvm->vport_id = cpu_to_le32(vport->vport_id); + vqvm->num_qv_maps = cpu_to_le16(num_chunks); + memcpy(vqvm->qv_maps, &vqv[k], chunk_sz * num_chunks); + + reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params); + if (reply_sz < 0) + return reply_sz; + + k += num_chunks; + num_q -= num_chunks; + num_chunks = min(num_chunks, num_q); + /* Recalculate buffer size */ + buf_sz = struct_size(vqvm, qv_maps, num_chunks); + } + + return 0; +} + +/** + * idpf_send_enable_queues_msg - send enable queues virtchnl message + * @vport: Virtual port private data structure + * + * Will send enable queues virtchnl message. Returns 0 on success, negative on + * failure. + */ +int idpf_send_enable_queues_msg(struct idpf_vport *vport) +{ + return idpf_send_ena_dis_queues_msg(vport, true); +} + +/** + * idpf_send_disable_queues_msg - send disable queues virtchnl message + * @vport: Virtual port private data structure + * + * Will send disable queues virtchnl message. Returns 0 on success, negative + * on failure. + */ +int idpf_send_disable_queues_msg(struct idpf_vport *vport) +{ + int err, i; + + err = idpf_send_ena_dis_queues_msg(vport, false); + if (err) + return err; + + /* switch to poll mode as interrupts will be disabled after disable + * queues virtchnl message is sent + */ + for (i = 0; i < vport->num_txq; i++) + set_bit(__IDPF_Q_POLL_MODE, vport->txqs[i]->flags); + + /* schedule the napi to receive all the marker packets */ + local_bh_disable(); + for (i = 0; i < vport->num_q_vectors; i++) + napi_schedule(&vport->q_vectors[i].napi); + local_bh_enable(); + + return idpf_wait_for_marker_event(vport); +} + +/** + * idpf_convert_reg_to_queue_chunks - Copy queue chunk information to the right + * structure + * @dchunks: Destination chunks to store data to + * @schunks: Source chunks to copy data from + * @num_chunks: number of chunks to copy + */ +static void idpf_convert_reg_to_queue_chunks(struct virtchnl2_queue_chunk *dchunks, + struct virtchnl2_queue_reg_chunk *schunks, + u16 num_chunks) +{ + u16 i; + + for (i = 0; i < num_chunks; i++) { + dchunks[i].type = schunks[i].type; + dchunks[i].start_queue_id = schunks[i].start_queue_id; + dchunks[i].num_queues = schunks[i].num_queues; + } +} + +/** + * idpf_send_delete_queues_msg - send delete queues virtchnl message + * @vport: Virtual port private data structure + * + * Will send delete queues virtchnl message. Return 0 on success, negative on + * failure. + */ +int idpf_send_delete_queues_msg(struct idpf_vport *vport) +{ + struct virtchnl2_del_ena_dis_queues *eq __free(kfree) = NULL; + struct virtchnl2_create_vport *vport_params; + struct virtchnl2_queue_reg_chunks *chunks; + struct idpf_vc_xn_params xn_params = {}; + struct idpf_vport_config *vport_config; + u16 vport_idx = vport->idx; + ssize_t reply_sz; + u16 num_chunks; + int buf_size; + + vport_config = vport->adapter->vport_config[vport_idx]; + if (vport_config->req_qs_chunks) { + chunks = &vport_config->req_qs_chunks->chunks; + } else { + vport_params = vport->adapter->vport_params_recvd[vport_idx]; + chunks = &vport_params->chunks; + } + + num_chunks = le16_to_cpu(chunks->num_chunks); + buf_size = struct_size(eq, chunks.chunks, num_chunks); + + eq = kzalloc(buf_size, GFP_KERNEL); + if (!eq) + return -ENOMEM; + + eq->vport_id = cpu_to_le32(vport->vport_id); + eq->chunks.num_chunks = cpu_to_le16(num_chunks); + + idpf_convert_reg_to_queue_chunks(eq->chunks.chunks, chunks->chunks, + num_chunks); + + xn_params.vc_op = VIRTCHNL2_OP_DEL_QUEUES; + xn_params.timeout_ms = IDPF_VC_XN_MIN_TIMEOUT_MSEC; + xn_params.send_buf.iov_base = eq; + xn_params.send_buf.iov_len = buf_size; + reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params); + + return reply_sz < 0 ? reply_sz : 0; +} + +/** + * idpf_send_config_queues_msg - Send config queues virtchnl message + * @vport: Virtual port private data structure + * + * Will send config queues virtchnl message. Returns 0 on success, negative on + * failure. + */ +int idpf_send_config_queues_msg(struct idpf_vport *vport) +{ + int err; + + err = idpf_send_config_tx_queues_msg(vport); + if (err) + return err; + + return idpf_send_config_rx_queues_msg(vport); +} + +/** + * idpf_send_add_queues_msg - Send virtchnl add queues message + * @vport: Virtual port private data structure + * @num_tx_q: number of transmit queues + * @num_complq: number of transmit completion queues + * @num_rx_q: number of receive queues + * @num_rx_bufq: number of receive buffer queues + * + * Returns 0 on success, negative on failure. vport _MUST_ be const here as + * we should not change any fields within vport itself in this function. + */ +int idpf_send_add_queues_msg(const struct idpf_vport *vport, u16 num_tx_q, + u16 num_complq, u16 num_rx_q, u16 num_rx_bufq) +{ + struct virtchnl2_add_queues *vc_msg __free(kfree) = NULL; + struct idpf_vc_xn_params xn_params = {}; + struct idpf_vport_config *vport_config; + struct virtchnl2_add_queues aq = {}; + u16 vport_idx = vport->idx; + ssize_t reply_sz; + int size; + + vc_msg = kzalloc(IDPF_CTLQ_MAX_BUF_LEN, GFP_KERNEL); + if (!vc_msg) + return -ENOMEM; + + vport_config = vport->adapter->vport_config[vport_idx]; + kfree(vport_config->req_qs_chunks); + vport_config->req_qs_chunks = NULL; + + aq.vport_id = cpu_to_le32(vport->vport_id); + aq.num_tx_q = cpu_to_le16(num_tx_q); + aq.num_tx_complq = cpu_to_le16(num_complq); + aq.num_rx_q = cpu_to_le16(num_rx_q); + aq.num_rx_bufq = cpu_to_le16(num_rx_bufq); + + xn_params.vc_op = VIRTCHNL2_OP_ADD_QUEUES; + xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; + xn_params.send_buf.iov_base = &aq; + xn_params.send_buf.iov_len = sizeof(aq); + xn_params.recv_buf.iov_base = vc_msg; + xn_params.recv_buf.iov_len = IDPF_CTLQ_MAX_BUF_LEN; + reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params); + if (reply_sz < 0) + return reply_sz; + + /* compare vc_msg num queues with vport num queues */ + if (le16_to_cpu(vc_msg->num_tx_q) != num_tx_q || + le16_to_cpu(vc_msg->num_rx_q) != num_rx_q || + le16_to_cpu(vc_msg->num_tx_complq) != num_complq || + le16_to_cpu(vc_msg->num_rx_bufq) != num_rx_bufq) + return -EINVAL; + + size = struct_size(vc_msg, chunks.chunks, + le16_to_cpu(vc_msg->chunks.num_chunks)); + if (reply_sz < size) + return -EIO; + + vport_config->req_qs_chunks = kmemdup(vc_msg, size, GFP_KERNEL); + if (!vport_config->req_qs_chunks) + return -ENOMEM; + + return 0; +} + +/** + * idpf_send_alloc_vectors_msg - Send virtchnl alloc vectors message + * @adapter: Driver specific private structure + * @num_vectors: number of vectors to be allocated + * + * Returns 0 on success, negative on failure. + */ +int idpf_send_alloc_vectors_msg(struct idpf_adapter *adapter, u16 num_vectors) +{ + struct virtchnl2_alloc_vectors *rcvd_vec __free(kfree) = NULL; + struct idpf_vc_xn_params xn_params = {}; + struct virtchnl2_alloc_vectors ac = {}; + ssize_t reply_sz; + u16 num_vchunks; + int size; + + ac.num_vectors = cpu_to_le16(num_vectors); + + rcvd_vec = kzalloc(IDPF_CTLQ_MAX_BUF_LEN, GFP_KERNEL); + if (!rcvd_vec) + return -ENOMEM; + + xn_params.vc_op = VIRTCHNL2_OP_ALLOC_VECTORS; + xn_params.send_buf.iov_base = ∾ + xn_params.send_buf.iov_len = sizeof(ac); + xn_params.recv_buf.iov_base = rcvd_vec; + xn_params.recv_buf.iov_len = IDPF_CTLQ_MAX_BUF_LEN; + xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; + reply_sz = idpf_vc_xn_exec(adapter, &xn_params); + if (reply_sz < 0) + return reply_sz; + + num_vchunks = le16_to_cpu(rcvd_vec->vchunks.num_vchunks); + size = struct_size(rcvd_vec, vchunks.vchunks, num_vchunks); + if (reply_sz < size) + return -EIO; + + if (size > IDPF_CTLQ_MAX_BUF_LEN) + return -EINVAL; + + kfree(adapter->req_vec_chunks); + adapter->req_vec_chunks = kmemdup(rcvd_vec, size, GFP_KERNEL); + if (!adapter->req_vec_chunks) + return -ENOMEM; + + if (le16_to_cpu(adapter->req_vec_chunks->num_vectors) < num_vectors) { + kfree(adapter->req_vec_chunks); + adapter->req_vec_chunks = NULL; + return -EINVAL; + } + + return 0; +} + +/** + * idpf_send_dealloc_vectors_msg - Send virtchnl de allocate vectors message + * @adapter: Driver specific private structure + * + * Returns 0 on success, negative on failure. + */ +int idpf_send_dealloc_vectors_msg(struct idpf_adapter *adapter) +{ + struct virtchnl2_alloc_vectors *ac = adapter->req_vec_chunks; + struct virtchnl2_vector_chunks *vcs = &ac->vchunks; + struct idpf_vc_xn_params xn_params = {}; + ssize_t reply_sz; + int buf_size; + + buf_size = struct_size(vcs, vchunks, le16_to_cpu(vcs->num_vchunks)); + + xn_params.vc_op = VIRTCHNL2_OP_DEALLOC_VECTORS; + xn_params.send_buf.iov_base = vcs; + xn_params.send_buf.iov_len = buf_size; + xn_params.timeout_ms = IDPF_VC_XN_MIN_TIMEOUT_MSEC; + reply_sz = idpf_vc_xn_exec(adapter, &xn_params); + if (reply_sz < 0) + return reply_sz; + + kfree(adapter->req_vec_chunks); + adapter->req_vec_chunks = NULL; + + return 0; +} + +/** + * idpf_get_max_vfs - Get max number of vfs supported + * @adapter: Driver specific private structure + * + * Returns max number of VFs + */ +static int idpf_get_max_vfs(struct idpf_adapter *adapter) +{ + return le16_to_cpu(adapter->caps.max_sriov_vfs); +} + +/** + * idpf_send_set_sriov_vfs_msg - Send virtchnl set sriov vfs message + * @adapter: Driver specific private structure + * @num_vfs: number of virtual functions to be created + * + * Returns 0 on success, negative on failure. + */ +int idpf_send_set_sriov_vfs_msg(struct idpf_adapter *adapter, u16 num_vfs) +{ + struct virtchnl2_sriov_vfs_info svi = {}; + struct idpf_vc_xn_params xn_params = {}; + ssize_t reply_sz; + + svi.num_vfs = cpu_to_le16(num_vfs); + xn_params.vc_op = VIRTCHNL2_OP_SET_SRIOV_VFS; + xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; + xn_params.send_buf.iov_base = &svi; + xn_params.send_buf.iov_len = sizeof(svi); + reply_sz = idpf_vc_xn_exec(adapter, &xn_params); + + return reply_sz < 0 ? reply_sz : 0; +} + +/** + * idpf_send_get_stats_msg - Send virtchnl get statistics message + * @vport: vport to get stats for + * + * Returns 0 on success, negative on failure. + */ +int idpf_send_get_stats_msg(struct idpf_vport *vport) +{ + struct idpf_netdev_priv *np = netdev_priv(vport->netdev); + struct rtnl_link_stats64 *netstats = &np->netstats; + struct virtchnl2_vport_stats stats_msg = {}; + struct idpf_vc_xn_params xn_params = {}; + ssize_t reply_sz; + + + /* Don't send get_stats message if the link is down */ + if (np->state <= __IDPF_VPORT_DOWN) + return 0; + + stats_msg.vport_id = cpu_to_le32(vport->vport_id); + + xn_params.vc_op = VIRTCHNL2_OP_GET_STATS; + xn_params.send_buf.iov_base = &stats_msg; + xn_params.send_buf.iov_len = sizeof(stats_msg); + xn_params.recv_buf = xn_params.send_buf; + xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; + + reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params); + if (reply_sz < 0) + return reply_sz; + if (reply_sz < sizeof(stats_msg)) + return -EIO; + + spin_lock_bh(&np->stats_lock); + + netstats->rx_packets = le64_to_cpu(stats_msg.rx_unicast) + + le64_to_cpu(stats_msg.rx_multicast) + + le64_to_cpu(stats_msg.rx_broadcast); + netstats->tx_packets = le64_to_cpu(stats_msg.tx_unicast) + + le64_to_cpu(stats_msg.tx_multicast) + + le64_to_cpu(stats_msg.tx_broadcast); + netstats->rx_bytes = le64_to_cpu(stats_msg.rx_bytes); + netstats->tx_bytes = le64_to_cpu(stats_msg.tx_bytes); + netstats->rx_errors = le64_to_cpu(stats_msg.rx_errors); + netstats->tx_errors = le64_to_cpu(stats_msg.tx_errors); + netstats->rx_dropped = le64_to_cpu(stats_msg.rx_discards); + netstats->tx_dropped = le64_to_cpu(stats_msg.tx_discards); + + vport->port_stats.vport_stats = stats_msg; + + spin_unlock_bh(&np->stats_lock); + + return 0; +} + +/** + * idpf_send_get_set_rss_lut_msg - Send virtchnl get or set rss lut message + * @vport: virtual port data structure + * @get: flag to set or get rss look up table + * + * Returns 0 on success, negative on failure. + */ +int idpf_send_get_set_rss_lut_msg(struct idpf_vport *vport, bool get) +{ + struct virtchnl2_rss_lut *recv_rl __free(kfree) = NULL; + struct virtchnl2_rss_lut *rl __free(kfree) = NULL; + struct idpf_vc_xn_params xn_params = {}; + struct idpf_rss_data *rss_data; + int buf_size, lut_buf_size; + ssize_t reply_sz; + int i; + + rss_data = + &vport->adapter->vport_config[vport->idx]->user_config.rss_data; + buf_size = struct_size(rl, lut, rss_data->rss_lut_size); + rl = kzalloc(buf_size, GFP_KERNEL); + if (!rl) + return -ENOMEM; + + rl->vport_id = cpu_to_le32(vport->vport_id); + + xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; + xn_params.send_buf.iov_base = rl; + xn_params.send_buf.iov_len = buf_size; + + if (get) { + recv_rl = kzalloc(IDPF_CTLQ_MAX_BUF_LEN, GFP_KERNEL); + if (!recv_rl) + return -ENOMEM; + xn_params.vc_op = VIRTCHNL2_OP_GET_RSS_LUT; + xn_params.recv_buf.iov_base = recv_rl; + xn_params.recv_buf.iov_len = IDPF_CTLQ_MAX_BUF_LEN; + } else { + rl->lut_entries = cpu_to_le16(rss_data->rss_lut_size); + for (i = 0; i < rss_data->rss_lut_size; i++) + rl->lut[i] = cpu_to_le32(rss_data->rss_lut[i]); + + xn_params.vc_op = VIRTCHNL2_OP_SET_RSS_LUT; + } + reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params); + if (reply_sz < 0) + return reply_sz; + if (!get) + return 0; + if (reply_sz < sizeof(struct virtchnl2_rss_lut)) + return -EIO; + + lut_buf_size = le16_to_cpu(recv_rl->lut_entries) * sizeof(u32); + if (reply_sz < lut_buf_size) + return -EIO; + + /* size didn't change, we can reuse existing lut buf */ + if (rss_data->rss_lut_size == le16_to_cpu(recv_rl->lut_entries)) + goto do_memcpy; + + rss_data->rss_lut_size = le16_to_cpu(recv_rl->lut_entries); + kfree(rss_data->rss_lut); + + rss_data->rss_lut = kzalloc(lut_buf_size, GFP_KERNEL); + if (!rss_data->rss_lut) { + rss_data->rss_lut_size = 0; + return -ENOMEM; + } + +do_memcpy: + memcpy(rss_data->rss_lut, recv_rl->lut, rss_data->rss_lut_size); + + return 0; +} + +/** + * idpf_send_get_set_rss_key_msg - Send virtchnl get or set rss key message + * @vport: virtual port data structure + * @get: flag to set or get rss look up table + * + * Returns 0 on success, negative on failure + */ +int idpf_send_get_set_rss_key_msg(struct idpf_vport *vport, bool get) +{ + struct virtchnl2_rss_key *recv_rk __free(kfree) = NULL; + struct virtchnl2_rss_key *rk __free(kfree) = NULL; + struct idpf_vc_xn_params xn_params = {}; + struct idpf_rss_data *rss_data; + ssize_t reply_sz; + int i, buf_size; + u16 key_size; + + rss_data = + &vport->adapter->vport_config[vport->idx]->user_config.rss_data; + buf_size = struct_size(rk, key_flex, rss_data->rss_key_size); + rk = kzalloc(buf_size, GFP_KERNEL); + if (!rk) + return -ENOMEM; + + rk->vport_id = cpu_to_le32(vport->vport_id); + xn_params.send_buf.iov_base = rk; + xn_params.send_buf.iov_len = buf_size; + xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; + if (get) { + recv_rk = kzalloc(IDPF_CTLQ_MAX_BUF_LEN, GFP_KERNEL); + if (!recv_rk) + return -ENOMEM; + + xn_params.vc_op = VIRTCHNL2_OP_GET_RSS_KEY; + xn_params.recv_buf.iov_base = recv_rk; + xn_params.recv_buf.iov_len = IDPF_CTLQ_MAX_BUF_LEN; + } else { + rk->key_len = cpu_to_le16(rss_data->rss_key_size); + for (i = 0; i < rss_data->rss_key_size; i++) + rk->key_flex[i] = rss_data->rss_key[i]; + + xn_params.vc_op = VIRTCHNL2_OP_SET_RSS_KEY; + } + + reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params); + if (reply_sz < 0) + return reply_sz; + if (!get) + return 0; + if (reply_sz < sizeof(struct virtchnl2_rss_key)) + return -EIO; + + key_size = min_t(u16, NETDEV_RSS_KEY_LEN, + le16_to_cpu(recv_rk->key_len)); + if (reply_sz < key_size) + return -EIO; + + /* key len didn't change, reuse existing buf */ + if (rss_data->rss_key_size == key_size) + goto do_memcpy; + + rss_data->rss_key_size = key_size; + kfree(rss_data->rss_key); + rss_data->rss_key = kzalloc(key_size, GFP_KERNEL); + if (!rss_data->rss_key) { + rss_data->rss_key_size = 0; + return -ENOMEM; + } + +do_memcpy: + memcpy(rss_data->rss_key, recv_rk->key_flex, rss_data->rss_key_size); + + return 0; +} + +/** + * idpf_fill_ptype_lookup - Fill L3 specific fields in ptype lookup table + * @ptype: ptype lookup table + * @pstate: state machine for ptype lookup table + * @ipv4: ipv4 or ipv6 + * @frag: fragmentation allowed + * + */ +static void idpf_fill_ptype_lookup(struct idpf_rx_ptype_decoded *ptype, + struct idpf_ptype_state *pstate, + bool ipv4, bool frag) +{ + if (!pstate->outer_ip || !pstate->outer_frag) { + ptype->outer_ip = IDPF_RX_PTYPE_OUTER_IP; + pstate->outer_ip = true; + + if (ipv4) + ptype->outer_ip_ver = IDPF_RX_PTYPE_OUTER_IPV4; + else + ptype->outer_ip_ver = IDPF_RX_PTYPE_OUTER_IPV6; + + if (frag) { + ptype->outer_frag = IDPF_RX_PTYPE_FRAG; + pstate->outer_frag = true; + } + } else { + ptype->tunnel_type = IDPF_RX_PTYPE_TUNNEL_IP_IP; + pstate->tunnel_state = IDPF_PTYPE_TUNNEL_IP; + + if (ipv4) + ptype->tunnel_end_prot = + IDPF_RX_PTYPE_TUNNEL_END_IPV4; + else + ptype->tunnel_end_prot = + IDPF_RX_PTYPE_TUNNEL_END_IPV6; + + if (frag) + ptype->tunnel_end_frag = IDPF_RX_PTYPE_FRAG; + } +} + +/** + * idpf_send_get_rx_ptype_msg - Send virtchnl for ptype info + * @vport: virtual port data structure + * + * Returns 0 on success, negative on failure. + */ +int idpf_send_get_rx_ptype_msg(struct idpf_vport *vport) +{ + struct virtchnl2_get_ptype_info *get_ptype_info __free(kfree) = NULL; + struct virtchnl2_get_ptype_info *ptype_info __free(kfree) = NULL; + struct idpf_rx_ptype_decoded *ptype_lkup = vport->rx_ptype_lkup; + int max_ptype, ptypes_recvd = 0, ptype_offset; + struct idpf_adapter *adapter = vport->adapter; + struct idpf_vc_xn_params xn_params = {}; + u16 next_ptype_id = 0; + ssize_t reply_sz; + int i, j, k; + + if (idpf_is_queue_model_split(vport->rxq_model)) + max_ptype = IDPF_RX_MAX_PTYPE; + else + max_ptype = IDPF_RX_MAX_BASE_PTYPE; + + memset(vport->rx_ptype_lkup, 0, sizeof(vport->rx_ptype_lkup)); + + get_ptype_info = kzalloc(sizeof(*get_ptype_info), GFP_KERNEL); + if (!get_ptype_info) + return -ENOMEM; + + ptype_info = kzalloc(IDPF_CTLQ_MAX_BUF_LEN, GFP_KERNEL); + if (!ptype_info) + return -ENOMEM; + + xn_params.vc_op = VIRTCHNL2_OP_GET_PTYPE_INFO; + xn_params.send_buf.iov_base = get_ptype_info; + xn_params.send_buf.iov_len = sizeof(*get_ptype_info); + xn_params.recv_buf.iov_base = ptype_info; + xn_params.recv_buf.iov_len = IDPF_CTLQ_MAX_BUF_LEN; + xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; + + while (next_ptype_id < max_ptype) { + get_ptype_info->start_ptype_id = cpu_to_le16(next_ptype_id); + + if ((next_ptype_id + IDPF_RX_MAX_PTYPES_PER_BUF) > max_ptype) + get_ptype_info->num_ptypes = + cpu_to_le16(max_ptype - next_ptype_id); + else + get_ptype_info->num_ptypes = + cpu_to_le16(IDPF_RX_MAX_PTYPES_PER_BUF); + + reply_sz = idpf_vc_xn_exec(adapter, &xn_params); + if (reply_sz < 0) + return reply_sz; + + if (reply_sz < IDPF_CTLQ_MAX_BUF_LEN) + return -EIO; + + ptypes_recvd += le16_to_cpu(ptype_info->num_ptypes); + if (ptypes_recvd > max_ptype) + return -EINVAL; + + next_ptype_id = le16_to_cpu(get_ptype_info->start_ptype_id) + + le16_to_cpu(get_ptype_info->num_ptypes); + + ptype_offset = IDPF_RX_PTYPE_HDR_SZ; + + for (i = 0; i < le16_to_cpu(ptype_info->num_ptypes); i++) { + struct idpf_ptype_state pstate = { }; + struct virtchnl2_ptype *ptype; + u16 id; + + ptype = (struct virtchnl2_ptype *) + ((u8 *)ptype_info + ptype_offset); + + ptype_offset += IDPF_GET_PTYPE_SIZE(ptype); + if (ptype_offset > IDPF_CTLQ_MAX_BUF_LEN) + return -EINVAL; + + /* 0xFFFF indicates end of ptypes */ + if (le16_to_cpu(ptype->ptype_id_10) == + IDPF_INVALID_PTYPE_ID) + return 0; + + if (idpf_is_queue_model_split(vport->rxq_model)) + k = le16_to_cpu(ptype->ptype_id_10); + else + k = ptype->ptype_id_8; + + if (ptype->proto_id_count) + ptype_lkup[k].known = 1; + + for (j = 0; j < ptype->proto_id_count; j++) { + id = le16_to_cpu(ptype->proto_id[j]); + switch (id) { + case VIRTCHNL2_PROTO_HDR_GRE: + if (pstate.tunnel_state == + IDPF_PTYPE_TUNNEL_IP) { + ptype_lkup[k].tunnel_type = + IDPF_RX_PTYPE_TUNNEL_IP_GRENAT; + pstate.tunnel_state |= + IDPF_PTYPE_TUNNEL_IP_GRENAT; + } + break; + case VIRTCHNL2_PROTO_HDR_MAC: + ptype_lkup[k].outer_ip = + IDPF_RX_PTYPE_OUTER_L2; + if (pstate.tunnel_state == + IDPF_TUN_IP_GRE) { + ptype_lkup[k].tunnel_type = + IDPF_RX_PTYPE_TUNNEL_IP_GRENAT_MAC; + pstate.tunnel_state |= + IDPF_PTYPE_TUNNEL_IP_GRENAT_MAC; + } + break; + case VIRTCHNL2_PROTO_HDR_IPV4: + idpf_fill_ptype_lookup(&ptype_lkup[k], + &pstate, true, + false); + break; + case VIRTCHNL2_PROTO_HDR_IPV6: + idpf_fill_ptype_lookup(&ptype_lkup[k], + &pstate, false, + false); + break; + case VIRTCHNL2_PROTO_HDR_IPV4_FRAG: + idpf_fill_ptype_lookup(&ptype_lkup[k], + &pstate, true, + true); + break; + case VIRTCHNL2_PROTO_HDR_IPV6_FRAG: + idpf_fill_ptype_lookup(&ptype_lkup[k], + &pstate, false, + true); + break; + case VIRTCHNL2_PROTO_HDR_UDP: + ptype_lkup[k].inner_prot = + IDPF_RX_PTYPE_INNER_PROT_UDP; + break; + case VIRTCHNL2_PROTO_HDR_TCP: + ptype_lkup[k].inner_prot = + IDPF_RX_PTYPE_INNER_PROT_TCP; + break; + case VIRTCHNL2_PROTO_HDR_SCTP: + ptype_lkup[k].inner_prot = + IDPF_RX_PTYPE_INNER_PROT_SCTP; + break; + case VIRTCHNL2_PROTO_HDR_ICMP: + ptype_lkup[k].inner_prot = + IDPF_RX_PTYPE_INNER_PROT_ICMP; + break; + case VIRTCHNL2_PROTO_HDR_PAY: + ptype_lkup[k].payload_layer = + IDPF_RX_PTYPE_PAYLOAD_LAYER_PAY2; + break; + case VIRTCHNL2_PROTO_HDR_ICMPV6: + case VIRTCHNL2_PROTO_HDR_IPV6_EH: + case VIRTCHNL2_PROTO_HDR_PRE_MAC: + case VIRTCHNL2_PROTO_HDR_POST_MAC: + case VIRTCHNL2_PROTO_HDR_ETHERTYPE: + case VIRTCHNL2_PROTO_HDR_SVLAN: + case VIRTCHNL2_PROTO_HDR_CVLAN: + case VIRTCHNL2_PROTO_HDR_MPLS: + case VIRTCHNL2_PROTO_HDR_MMPLS: + case VIRTCHNL2_PROTO_HDR_PTP: + case VIRTCHNL2_PROTO_HDR_CTRL: + case VIRTCHNL2_PROTO_HDR_LLDP: + case VIRTCHNL2_PROTO_HDR_ARP: + case VIRTCHNL2_PROTO_HDR_ECP: + case VIRTCHNL2_PROTO_HDR_EAPOL: + case VIRTCHNL2_PROTO_HDR_PPPOD: + case VIRTCHNL2_PROTO_HDR_PPPOE: + case VIRTCHNL2_PROTO_HDR_IGMP: + case VIRTCHNL2_PROTO_HDR_AH: + case VIRTCHNL2_PROTO_HDR_ESP: + case VIRTCHNL2_PROTO_HDR_IKE: + case VIRTCHNL2_PROTO_HDR_NATT_KEEP: + case VIRTCHNL2_PROTO_HDR_L2TPV2: + case VIRTCHNL2_PROTO_HDR_L2TPV2_CONTROL: + case VIRTCHNL2_PROTO_HDR_L2TPV3: + case VIRTCHNL2_PROTO_HDR_GTP: + case VIRTCHNL2_PROTO_HDR_GTP_EH: + case VIRTCHNL2_PROTO_HDR_GTPCV2: + case VIRTCHNL2_PROTO_HDR_GTPC_TEID: + case VIRTCHNL2_PROTO_HDR_GTPU: + case VIRTCHNL2_PROTO_HDR_GTPU_UL: + case VIRTCHNL2_PROTO_HDR_GTPU_DL: + case VIRTCHNL2_PROTO_HDR_ECPRI: + case VIRTCHNL2_PROTO_HDR_VRRP: + case VIRTCHNL2_PROTO_HDR_OSPF: + case VIRTCHNL2_PROTO_HDR_TUN: + case VIRTCHNL2_PROTO_HDR_NVGRE: + case VIRTCHNL2_PROTO_HDR_VXLAN: + case VIRTCHNL2_PROTO_HDR_VXLAN_GPE: + case VIRTCHNL2_PROTO_HDR_GENEVE: + case VIRTCHNL2_PROTO_HDR_NSH: + case VIRTCHNL2_PROTO_HDR_QUIC: + case VIRTCHNL2_PROTO_HDR_PFCP: + case VIRTCHNL2_PROTO_HDR_PFCP_NODE: + case VIRTCHNL2_PROTO_HDR_PFCP_SESSION: + case VIRTCHNL2_PROTO_HDR_RTP: + case VIRTCHNL2_PROTO_HDR_NO_PROTO: + break; + default: + break; + } + } + } + } + + return 0; +} + +/** + * idpf_send_ena_dis_loopback_msg - Send virtchnl enable/disable loopback + * message + * @vport: virtual port data structure + * + * Returns 0 on success, negative on failure. + */ +int idpf_send_ena_dis_loopback_msg(struct idpf_vport *vport) +{ + struct idpf_vc_xn_params xn_params = {}; + struct virtchnl2_loopback loopback; + ssize_t reply_sz; + + loopback.vport_id = cpu_to_le32(vport->vport_id); + loopback.enable = idpf_is_feature_ena(vport, NETIF_F_LOOPBACK); + + xn_params.vc_op = VIRTCHNL2_OP_LOOPBACK; + xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; + xn_params.send_buf.iov_base = &loopback; + xn_params.send_buf.iov_len = sizeof(loopback); + reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params); + + return reply_sz < 0 ? reply_sz : 0; +} + +/** + * idpf_find_ctlq - Given a type and id, find ctlq info + * @hw: hardware struct + * @type: type of ctrlq to find + * @id: ctlq id to find + * + * Returns pointer to found ctlq info struct, NULL otherwise. + */ +static struct idpf_ctlq_info *idpf_find_ctlq(struct idpf_hw *hw, + enum idpf_ctlq_type type, int id) +{ + struct idpf_ctlq_info *cq, *tmp; + + list_for_each_entry_safe(cq, tmp, &hw->cq_list_head, cq_list) + if (cq->q_id == id && cq->cq_type == type) + return cq; + + return NULL; +} + +/** + * idpf_init_dflt_mbx - Setup default mailbox parameters and make request + * @adapter: adapter info struct + * + * Returns 0 on success, negative otherwise + */ +int idpf_init_dflt_mbx(struct idpf_adapter *adapter) +{ + struct idpf_ctlq_create_info ctlq_info[] = { + { + .type = IDPF_CTLQ_TYPE_MAILBOX_TX, + .id = IDPF_DFLT_MBX_ID, + .len = IDPF_DFLT_MBX_Q_LEN, + .buf_size = IDPF_CTLQ_MAX_BUF_LEN + }, + { + .type = IDPF_CTLQ_TYPE_MAILBOX_RX, + .id = IDPF_DFLT_MBX_ID, + .len = IDPF_DFLT_MBX_Q_LEN, + .buf_size = IDPF_CTLQ_MAX_BUF_LEN + } + }; + struct idpf_hw *hw = &adapter->hw; + int err; + + adapter->dev_ops.reg_ops.ctlq_reg_init(ctlq_info); + + err = idpf_ctlq_init(hw, IDPF_NUM_DFLT_MBX_Q, ctlq_info); + if (err) + return err; + + hw->asq = idpf_find_ctlq(hw, IDPF_CTLQ_TYPE_MAILBOX_TX, + IDPF_DFLT_MBX_ID); + hw->arq = idpf_find_ctlq(hw, IDPF_CTLQ_TYPE_MAILBOX_RX, + IDPF_DFLT_MBX_ID); + + if (!hw->asq || !hw->arq) { + idpf_ctlq_deinit(hw); + + return -ENOENT; + } + + adapter->state = __IDPF_VER_CHECK; + + return 0; +} + +/** + * idpf_deinit_dflt_mbx - Free up ctlqs setup + * @adapter: Driver specific private data structure + */ +void idpf_deinit_dflt_mbx(struct idpf_adapter *adapter) +{ + if (adapter->hw.arq && adapter->hw.asq) { + idpf_mb_clean(adapter); + idpf_ctlq_deinit(&adapter->hw); + } + adapter->hw.arq = NULL; + adapter->hw.asq = NULL; +} + +/** + * idpf_vport_params_buf_rel - Release memory for MailBox resources + * @adapter: Driver specific private data structure + * + * Will release memory to hold the vport parameters received on MailBox + */ +static void idpf_vport_params_buf_rel(struct idpf_adapter *adapter) +{ + kfree(adapter->vport_params_recvd); + adapter->vport_params_recvd = NULL; + kfree(adapter->vport_params_reqd); + adapter->vport_params_reqd = NULL; + kfree(adapter->vport_ids); + adapter->vport_ids = NULL; +} + +/** + * idpf_vport_params_buf_alloc - Allocate memory for MailBox resources + * @adapter: Driver specific private data structure + * + * Will alloc memory to hold the vport parameters received on MailBox + */ +static int idpf_vport_params_buf_alloc(struct idpf_adapter *adapter) +{ + u16 num_max_vports = idpf_get_max_vports(adapter); + + adapter->vport_params_reqd = kcalloc(num_max_vports, + sizeof(*adapter->vport_params_reqd), + GFP_KERNEL); + if (!adapter->vport_params_reqd) + return -ENOMEM; + + adapter->vport_params_recvd = kcalloc(num_max_vports, + sizeof(*adapter->vport_params_recvd), + GFP_KERNEL); + if (!adapter->vport_params_recvd) + goto err_mem; + + adapter->vport_ids = kcalloc(num_max_vports, sizeof(u32), GFP_KERNEL); + if (!adapter->vport_ids) + goto err_mem; + + if (adapter->vport_config) + return 0; + + adapter->vport_config = kcalloc(num_max_vports, + sizeof(*adapter->vport_config), + GFP_KERNEL); + if (!adapter->vport_config) + goto err_mem; + + return 0; + +err_mem: + idpf_vport_params_buf_rel(adapter); + + return -ENOMEM; +} + +/** + * idpf_vc_core_init - Initialize state machine and get driver specific + * resources + * @adapter: Driver specific private structure + * + * This function will initialize the state machine and request all necessary + * resources required by the device driver. Once the state machine is + * initialized, allocate memory to store vport specific information and also + * requests required interrupts. + * + * Returns 0 on success, -EAGAIN function will get called again, + * otherwise negative on failure. + */ +int idpf_vc_core_init(struct idpf_adapter *adapter) +{ + int task_delay = 30; + u16 num_max_vports; + int err = 0; + + if (!adapter->vcxn_mngr) { + adapter->vcxn_mngr = kzalloc(sizeof(*adapter->vcxn_mngr), GFP_KERNEL); + if (!adapter->vcxn_mngr) { + err = -ENOMEM; + goto init_failed; + } + } + idpf_vc_xn_init(adapter->vcxn_mngr); + + while (adapter->state != __IDPF_INIT_SW) { + switch (adapter->state) { + case __IDPF_VER_CHECK: + err = idpf_send_ver_msg(adapter); + switch (err) { + case 0: + /* success, move state machine forward */ + adapter->state = __IDPF_GET_CAPS; + fallthrough; + case -EAGAIN: + goto restart; + default: + /* Something bad happened, try again but only a + * few times. + */ + goto init_failed; + } + case __IDPF_GET_CAPS: + err = idpf_send_get_caps_msg(adapter); + if (err) + goto init_failed; + adapter->state = __IDPF_INIT_SW; + break; + default: + dev_err(&adapter->pdev->dev, "Device is in bad state: %d\n", + adapter->state); + err = -EINVAL; + goto init_failed; + } + break; +restart: + /* Give enough time before proceeding further with + * state machine + */ + msleep(task_delay); + } + + pci_sriov_set_totalvfs(adapter->pdev, idpf_get_max_vfs(adapter)); + num_max_vports = idpf_get_max_vports(adapter); + adapter->max_vports = num_max_vports; + adapter->vports = kcalloc(num_max_vports, sizeof(*adapter->vports), + GFP_KERNEL); + if (!adapter->vports) + return -ENOMEM; + + if (!adapter->netdevs) { + adapter->netdevs = kcalloc(num_max_vports, + sizeof(struct net_device *), + GFP_KERNEL); + if (!adapter->netdevs) { + err = -ENOMEM; + goto err_netdev_alloc; + } + } + + err = idpf_vport_params_buf_alloc(adapter); + if (err) { + dev_err(&adapter->pdev->dev, "Failed to alloc vport params buffer: %d\n", + err); + goto err_netdev_alloc; + } + + /* Start the mailbox task before requesting vectors. This will ensure + * vector information response from mailbox is handled + */ + queue_delayed_work(adapter->mbx_wq, &adapter->mbx_task, 0); + + queue_delayed_work(adapter->serv_wq, &adapter->serv_task, + msecs_to_jiffies(5 * (adapter->pdev->devfn & 0x07))); + + err = idpf_intr_req(adapter); + if (err) { + dev_err(&adapter->pdev->dev, "failed to enable interrupt vectors: %d\n", + err); + goto err_intr_req; + } + + idpf_init_avail_queues(adapter); + + /* Skew the delay for init tasks for each function based on fn number + * to prevent every function from making the same call simultaneously. + */ + queue_delayed_work(adapter->init_wq, &adapter->init_task, + msecs_to_jiffies(5 * (adapter->pdev->devfn & 0x07))); + + set_bit(IDPF_VC_CORE_INIT, adapter->flags); + + return 0; + +err_intr_req: + cancel_delayed_work_sync(&adapter->serv_task); + cancel_delayed_work_sync(&adapter->mbx_task); + idpf_vport_params_buf_rel(adapter); +err_netdev_alloc: + kfree(adapter->vports); + adapter->vports = NULL; + return err; + +init_failed: + /* Don't retry if we're trying to go down, just bail. */ + if (test_bit(IDPF_REMOVE_IN_PROG, adapter->flags)) + return err; + + if (++adapter->mb_wait_count > IDPF_MB_MAX_ERR) { + dev_err(&adapter->pdev->dev, "Failed to establish mailbox communications with hardware\n"); + + return -EFAULT; + } + /* If it reached here, it is possible that mailbox queue initialization + * register writes might not have taken effect. Retry to initialize + * the mailbox again + */ + adapter->state = __IDPF_VER_CHECK; + if (adapter->vcxn_mngr) + idpf_vc_xn_shutdown(adapter->vcxn_mngr); + idpf_deinit_dflt_mbx(adapter); + set_bit(IDPF_HR_DRV_LOAD, adapter->flags); + queue_delayed_work(adapter->vc_event_wq, &adapter->vc_event_task, + msecs_to_jiffies(task_delay)); + + return -EAGAIN; +} + +/** + * idpf_vc_core_deinit - Device deinit routine + * @adapter: Driver specific private structure + * + */ +void idpf_vc_core_deinit(struct idpf_adapter *adapter) +{ + if (!test_bit(IDPF_VC_CORE_INIT, adapter->flags)) + return; + + idpf_vc_xn_shutdown(adapter->vcxn_mngr); + idpf_deinit_task(adapter); + idpf_intr_rel(adapter); + + cancel_delayed_work_sync(&adapter->serv_task); + cancel_delayed_work_sync(&adapter->mbx_task); + + idpf_vport_params_buf_rel(adapter); + + kfree(adapter->vports); + adapter->vports = NULL; + + clear_bit(IDPF_VC_CORE_INIT, adapter->flags); +} + +/** + * idpf_vport_alloc_vec_indexes - Get relative vector indexes + * @vport: virtual port data struct + * + * This function requests the vector information required for the vport and + * stores the vector indexes received from the 'global vector distribution' + * in the vport's queue vectors array. + * + * Return 0 on success, error on failure + */ +int idpf_vport_alloc_vec_indexes(struct idpf_vport *vport) +{ + struct idpf_vector_info vec_info; + int num_alloc_vecs; + + vec_info.num_curr_vecs = vport->num_q_vectors; + vec_info.num_req_vecs = max(vport->num_txq, vport->num_rxq); + vec_info.default_vport = vport->default_vport; + vec_info.index = vport->idx; + + num_alloc_vecs = idpf_req_rel_vector_indexes(vport->adapter, + vport->q_vector_idxs, + &vec_info); + if (num_alloc_vecs <= 0) { + dev_err(&vport->adapter->pdev->dev, "Vector distribution failed: %d\n", + num_alloc_vecs); + return -EINVAL; + } + + vport->num_q_vectors = num_alloc_vecs; + + return 0; +} + +/** + * idpf_vport_init - Initialize virtual port + * @vport: virtual port to be initialized + * @max_q: vport max queue info + * + * Will initialize vport with the info received through MB earlier + */ +void idpf_vport_init(struct idpf_vport *vport, struct idpf_vport_max_q *max_q) +{ + struct idpf_adapter *adapter = vport->adapter; + struct virtchnl2_create_vport *vport_msg; + struct idpf_vport_config *vport_config; + u16 tx_itr[] = {2, 8, 64, 128, 256}; + u16 rx_itr[] = {2, 8, 32, 96, 128}; + struct idpf_rss_data *rss_data; + u16 idx = vport->idx; + + vport_config = adapter->vport_config[idx]; + rss_data = &vport_config->user_config.rss_data; + vport_msg = adapter->vport_params_recvd[idx]; + + vport_config->max_q.max_txq = max_q->max_txq; + vport_config->max_q.max_rxq = max_q->max_rxq; + vport_config->max_q.max_complq = max_q->max_complq; + vport_config->max_q.max_bufq = max_q->max_bufq; + + vport->txq_model = le16_to_cpu(vport_msg->txq_model); + vport->rxq_model = le16_to_cpu(vport_msg->rxq_model); + vport->vport_type = le16_to_cpu(vport_msg->vport_type); + vport->vport_id = le32_to_cpu(vport_msg->vport_id); + + rss_data->rss_key_size = min_t(u16, NETDEV_RSS_KEY_LEN, + le16_to_cpu(vport_msg->rss_key_size)); + rss_data->rss_lut_size = le16_to_cpu(vport_msg->rss_lut_size); + + ether_addr_copy(vport->default_mac_addr, vport_msg->default_mac_addr); + vport->max_mtu = le16_to_cpu(vport_msg->max_mtu) - IDPF_PACKET_HDR_PAD; + + /* Initialize Tx and Rx profiles for Dynamic Interrupt Moderation */ + memcpy(vport->rx_itr_profile, rx_itr, IDPF_DIM_PROFILE_SLOTS); + memcpy(vport->tx_itr_profile, tx_itr, IDPF_DIM_PROFILE_SLOTS); + + idpf_vport_set_hsplit(vport, ETHTOOL_TCP_DATA_SPLIT_ENABLED); + + idpf_vport_init_num_qs(vport, vport_msg); + idpf_vport_calc_num_q_desc(vport); + idpf_vport_calc_num_q_groups(vport); + idpf_vport_alloc_vec_indexes(vport); + + vport->crc_enable = adapter->crc_enable; +} + +/** + * idpf_get_vec_ids - Initialize vector id from Mailbox parameters + * @adapter: adapter structure to get the mailbox vector id + * @vecids: Array of vector ids + * @num_vecids: number of vector ids + * @chunks: vector ids received over mailbox + * + * Will initialize the mailbox vector id which is received from the + * get capabilities and data queue vector ids with ids received as + * mailbox parameters. + * Returns number of ids filled + */ +int idpf_get_vec_ids(struct idpf_adapter *adapter, + u16 *vecids, int num_vecids, + struct virtchnl2_vector_chunks *chunks) +{ + u16 num_chunks = le16_to_cpu(chunks->num_vchunks); + int num_vecid_filled = 0; + int i, j; + + vecids[num_vecid_filled] = adapter->mb_vector.v_idx; + num_vecid_filled++; + + for (j = 0; j < num_chunks; j++) { + struct virtchnl2_vector_chunk *chunk; + u16 start_vecid, num_vec; + + chunk = &chunks->vchunks[j]; + num_vec = le16_to_cpu(chunk->num_vectors); + start_vecid = le16_to_cpu(chunk->start_vector_id); + + for (i = 0; i < num_vec; i++) { + if ((num_vecid_filled + i) < num_vecids) { + vecids[num_vecid_filled + i] = start_vecid; + start_vecid++; + } else { + break; + } + } + num_vecid_filled = num_vecid_filled + i; + } + + return num_vecid_filled; +} + +/** + * idpf_vport_get_queue_ids - Initialize queue id from Mailbox parameters + * @qids: Array of queue ids + * @num_qids: number of queue ids + * @q_type: queue model + * @chunks: queue ids received over mailbox + * + * Will initialize all queue ids with ids received as mailbox parameters + * Returns number of ids filled + */ +static int idpf_vport_get_queue_ids(u32 *qids, int num_qids, u16 q_type, + struct virtchnl2_queue_reg_chunks *chunks) +{ + u16 num_chunks = le16_to_cpu(chunks->num_chunks); + u32 num_q_id_filled = 0, i; + u32 start_q_id, num_q; + + while (num_chunks--) { + struct virtchnl2_queue_reg_chunk *chunk; + + chunk = &chunks->chunks[num_chunks]; + if (le32_to_cpu(chunk->type) != q_type) + continue; + + num_q = le32_to_cpu(chunk->num_queues); + start_q_id = le32_to_cpu(chunk->start_queue_id); + + for (i = 0; i < num_q; i++) { + if ((num_q_id_filled + i) < num_qids) { + qids[num_q_id_filled + i] = start_q_id; + start_q_id++; + } else { + break; + } + } + num_q_id_filled = num_q_id_filled + i; + } + + return num_q_id_filled; +} + +/** + * __idpf_vport_queue_ids_init - Initialize queue ids from Mailbox parameters + * @vport: virtual port for which the queues ids are initialized + * @qids: queue ids + * @num_qids: number of queue ids + * @q_type: type of queue + * + * Will initialize all queue ids with ids received as mailbox + * parameters. Returns number of queue ids initialized. + */ +static int __idpf_vport_queue_ids_init(struct idpf_vport *vport, + const u32 *qids, + int num_qids, + u32 q_type) +{ + struct idpf_queue *q; + int i, j, k = 0; + + switch (q_type) { + case VIRTCHNL2_QUEUE_TYPE_TX: + for (i = 0; i < vport->num_txq_grp; i++) { + struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; + + for (j = 0; j < tx_qgrp->num_txq && k < num_qids; j++, k++) { + tx_qgrp->txqs[j]->q_id = qids[k]; + tx_qgrp->txqs[j]->q_type = + VIRTCHNL2_QUEUE_TYPE_TX; + } + } + break; + case VIRTCHNL2_QUEUE_TYPE_RX: + for (i = 0; i < vport->num_rxq_grp; i++) { + struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; + u16 num_rxq; + + if (idpf_is_queue_model_split(vport->rxq_model)) + num_rxq = rx_qgrp->splitq.num_rxq_sets; + else + num_rxq = rx_qgrp->singleq.num_rxq; + + for (j = 0; j < num_rxq && k < num_qids; j++, k++) { + if (idpf_is_queue_model_split(vport->rxq_model)) + q = &rx_qgrp->splitq.rxq_sets[j]->rxq; + else + q = rx_qgrp->singleq.rxqs[j]; + q->q_id = qids[k]; + q->q_type = VIRTCHNL2_QUEUE_TYPE_RX; + } + } + break; + case VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION: + for (i = 0; i < vport->num_txq_grp && k < num_qids; i++, k++) { + struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; + + tx_qgrp->complq->q_id = qids[k]; + tx_qgrp->complq->q_type = + VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION; + } + break; + case VIRTCHNL2_QUEUE_TYPE_RX_BUFFER: + for (i = 0; i < vport->num_rxq_grp; i++) { + struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; + u8 num_bufqs = vport->num_bufqs_per_qgrp; + + for (j = 0; j < num_bufqs && k < num_qids; j++, k++) { + q = &rx_qgrp->splitq.bufq_sets[j].bufq; + q->q_id = qids[k]; + q->q_type = VIRTCHNL2_QUEUE_TYPE_RX_BUFFER; + } + } + break; + default: + break; + } + + return k; +} + +/** + * idpf_vport_queue_ids_init - Initialize queue ids from Mailbox parameters + * @vport: virtual port for which the queues ids are initialized + * + * Will initialize all queue ids with ids received as mailbox parameters. + * Returns 0 on success, negative if all the queues are not initialized. + */ +int idpf_vport_queue_ids_init(struct idpf_vport *vport) +{ + struct virtchnl2_create_vport *vport_params; + struct virtchnl2_queue_reg_chunks *chunks; + struct idpf_vport_config *vport_config; + u16 vport_idx = vport->idx; + int num_ids, err = 0; + u16 q_type; + u32 *qids; + + vport_config = vport->adapter->vport_config[vport_idx]; + if (vport_config->req_qs_chunks) { + struct virtchnl2_add_queues *vc_aq = + (struct virtchnl2_add_queues *)vport_config->req_qs_chunks; + chunks = &vc_aq->chunks; + } else { + vport_params = vport->adapter->vport_params_recvd[vport_idx]; + chunks = &vport_params->chunks; + } + + qids = kcalloc(IDPF_MAX_QIDS, sizeof(u32), GFP_KERNEL); + if (!qids) + return -ENOMEM; + + num_ids = idpf_vport_get_queue_ids(qids, IDPF_MAX_QIDS, + VIRTCHNL2_QUEUE_TYPE_TX, + chunks); + if (num_ids < vport->num_txq) { + err = -EINVAL; + goto mem_rel; + } + num_ids = __idpf_vport_queue_ids_init(vport, qids, num_ids, + VIRTCHNL2_QUEUE_TYPE_TX); + if (num_ids < vport->num_txq) { + err = -EINVAL; + goto mem_rel; + } + + num_ids = idpf_vport_get_queue_ids(qids, IDPF_MAX_QIDS, + VIRTCHNL2_QUEUE_TYPE_RX, + chunks); + if (num_ids < vport->num_rxq) { + err = -EINVAL; + goto mem_rel; + } + num_ids = __idpf_vport_queue_ids_init(vport, qids, num_ids, + VIRTCHNL2_QUEUE_TYPE_RX); + if (num_ids < vport->num_rxq) { + err = -EINVAL; + goto mem_rel; + } + + if (!idpf_is_queue_model_split(vport->txq_model)) + goto check_rxq; + + q_type = VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION; + num_ids = idpf_vport_get_queue_ids(qids, IDPF_MAX_QIDS, q_type, chunks); + if (num_ids < vport->num_complq) { + err = -EINVAL; + goto mem_rel; + } + num_ids = __idpf_vport_queue_ids_init(vport, qids, num_ids, q_type); + if (num_ids < vport->num_complq) { + err = -EINVAL; + goto mem_rel; + } + +check_rxq: + if (!idpf_is_queue_model_split(vport->rxq_model)) + goto mem_rel; + + q_type = VIRTCHNL2_QUEUE_TYPE_RX_BUFFER; + num_ids = idpf_vport_get_queue_ids(qids, IDPF_MAX_QIDS, q_type, chunks); + if (num_ids < vport->num_bufq) { + err = -EINVAL; + goto mem_rel; + } + num_ids = __idpf_vport_queue_ids_init(vport, qids, num_ids, q_type); + if (num_ids < vport->num_bufq) + err = -EINVAL; + +mem_rel: + kfree(qids); + + return err; +} + +/** + * idpf_vport_adjust_qs - Adjust to new requested queues + * @vport: virtual port data struct + * + * Renegotiate queues. Returns 0 on success, negative on failure. + */ +int idpf_vport_adjust_qs(struct idpf_vport *vport) +{ + struct virtchnl2_create_vport vport_msg; + int err; + + vport_msg.txq_model = cpu_to_le16(vport->txq_model); + vport_msg.rxq_model = cpu_to_le16(vport->rxq_model); + err = idpf_vport_calc_total_qs(vport->adapter, vport->idx, &vport_msg, + NULL); + if (err) + return err; + + idpf_vport_init_num_qs(vport, &vport_msg); + idpf_vport_calc_num_q_groups(vport); + + return 0; +} + +/** + * idpf_is_capability_ena - Default implementation of capability checking + * @adapter: Private data struct + * @all: all or one flag + * @field: caps field to check for flags + * @flag: flag to check + * + * Return true if all capabilities are supported, false otherwise + */ +bool idpf_is_capability_ena(struct idpf_adapter *adapter, bool all, + enum idpf_cap_field field, u64 flag) +{ + u8 *caps = (u8 *)&adapter->caps; + u32 *cap_field; + + if (!caps) + return false; + + if (field == IDPF_BASE_CAPS) + return false; + + cap_field = (u32 *)(caps + field); + + if (all) + return (*cap_field & flag) == flag; + else + return !!(*cap_field & flag); +} + +/** + * idpf_get_vport_id: Get vport id + * @vport: virtual port structure + * + * Return vport id from the adapter persistent data + */ +u32 idpf_get_vport_id(struct idpf_vport *vport) +{ + struct virtchnl2_create_vport *vport_msg; + + vport_msg = vport->adapter->vport_params_recvd[vport->idx]; + + return le32_to_cpu(vport_msg->vport_id); +} + +/** + * idpf_mac_filter_async_handler - Async callback for mac filters + * @adapter: private data struct + * @xn: transaction for message + * @ctlq_msg: received message + * + * In some scenarios driver can't sleep and wait for a reply (e.g.: stack is + * holding rtnl_lock) when adding a new mac filter. It puts us in a difficult + * situation to deal with errors returned on the reply. The best we can + * ultimately do is remove it from our list of mac filters and report the + * error. + */ +static int idpf_mac_filter_async_handler(struct idpf_adapter *adapter, + struct idpf_vc_xn *xn, + const struct idpf_ctlq_msg *ctlq_msg) +{ + struct virtchnl2_mac_addr_list *ma_list; + struct idpf_vport_config *vport_config; + struct virtchnl2_mac_addr *mac_addr; + struct idpf_mac_filter *f, *tmp; + struct list_head *ma_list_head; + struct idpf_vport *vport; + u16 num_entries; + int i; + + /* if success we're done, we're only here if something bad happened */ + if (!ctlq_msg->cookie.mbx.chnl_retval) + return 0; + + /* make sure at least struct is there */ + if (xn->reply_sz < sizeof(*ma_list)) + goto invalid_payload; + + ma_list = ctlq_msg->ctx.indirect.payload->va; + mac_addr = ma_list->mac_addr_list; + num_entries = le16_to_cpu(ma_list->num_mac_addr); + /* we should have received a buffer at least this big */ + if (xn->reply_sz < struct_size(ma_list, mac_addr_list, num_entries)) + goto invalid_payload; + + vport = idpf_vid_to_vport(adapter, le32_to_cpu(ma_list->vport_id)); + if (!vport) + goto invalid_payload; + + vport_config = adapter->vport_config[le32_to_cpu(ma_list->vport_id)]; + ma_list_head = &vport_config->user_config.mac_filter_list; + + /* We can't do much to reconcile bad filters at this point, however we + * should at least remove them from our list one way or the other so we + * have some idea what good filters we have. + */ + spin_lock_bh(&vport_config->mac_filter_list_lock); + list_for_each_entry_safe(f, tmp, ma_list_head, list) + for (i = 0; i < num_entries; i++) + if (ether_addr_equal(mac_addr[i].addr, f->macaddr)) + list_del(&f->list); + spin_unlock_bh(&vport_config->mac_filter_list_lock); + dev_err_ratelimited(&adapter->pdev->dev, "Received error sending MAC filter request (op %d)\n", + xn->vc_op); + + return 0; + +invalid_payload: + dev_err_ratelimited(&adapter->pdev->dev, "Received invalid MAC filter payload (op %d) (len %zd)\n", + xn->vc_op, xn->reply_sz); + + return -EINVAL; +} + +/** + * idpf_add_del_mac_filters - Add/del mac filters + * @vport: Virtual port data structure + * @np: Netdev private structure + * @add: Add or delete flag + * @async: Don't wait for return message + * + * Returns 0 on success, error on failure. + **/ +int idpf_add_del_mac_filters(struct idpf_vport *vport, + struct idpf_netdev_priv *np, + bool add, bool async) +{ + struct virtchnl2_mac_addr_list *ma_list __free(kfree) = NULL; + struct virtchnl2_mac_addr *mac_addr __free(kfree) = NULL; + struct idpf_adapter *adapter = np->adapter; + struct idpf_vc_xn_params xn_params = {}; + struct idpf_vport_config *vport_config; + u32 num_msgs, total_filters = 0; + struct idpf_mac_filter *f; + ssize_t reply_sz; + int i = 0, k; + + xn_params.vc_op = add ? VIRTCHNL2_OP_ADD_MAC_ADDR : + VIRTCHNL2_OP_DEL_MAC_ADDR; + xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; + xn_params.async = async; + xn_params.async_handler = idpf_mac_filter_async_handler; + + vport_config = adapter->vport_config[np->vport_idx]; + spin_lock_bh(&vport_config->mac_filter_list_lock); + + /* Find the number of newly added filters */ + list_for_each_entry(f, &vport_config->user_config.mac_filter_list, + list) { + if (add && f->add) + total_filters++; + else if (!add && f->remove) + total_filters++; + } + + if (!total_filters) { + spin_unlock_bh(&vport_config->mac_filter_list_lock); + + return 0; + } + + /* Fill all the new filters into virtchannel message */ + mac_addr = kcalloc(total_filters, sizeof(struct virtchnl2_mac_addr), + GFP_ATOMIC); + if (!mac_addr) { + spin_unlock_bh(&vport_config->mac_filter_list_lock); + + return -ENOMEM; + } + + list_for_each_entry(f, &vport_config->user_config.mac_filter_list, + list) { + if (add && f->add) { + ether_addr_copy(mac_addr[i].addr, f->macaddr); + i++; + f->add = false; + if (i == total_filters) + break; + } + if (!add && f->remove) { + ether_addr_copy(mac_addr[i].addr, f->macaddr); + i++; + f->remove = false; + if (i == total_filters) + break; + } + } + + spin_unlock_bh(&vport_config->mac_filter_list_lock); + + /* Chunk up the filters into multiple messages to avoid + * sending a control queue message buffer that is too large + */ + num_msgs = DIV_ROUND_UP(total_filters, IDPF_NUM_FILTERS_PER_MSG); + + for (i = 0, k = 0; i < num_msgs; i++) { + u32 entries_size, buf_size, num_entries; + + num_entries = min_t(u32, total_filters, + IDPF_NUM_FILTERS_PER_MSG); + entries_size = sizeof(struct virtchnl2_mac_addr) * num_entries; + buf_size = struct_size(ma_list, mac_addr_list, num_entries); + + if (!ma_list || num_entries != IDPF_NUM_FILTERS_PER_MSG) { + kfree(ma_list); + ma_list = kzalloc(buf_size, GFP_ATOMIC); + if (!ma_list) + return -ENOMEM; + } else { + memset(ma_list, 0, buf_size); + } + + ma_list->vport_id = cpu_to_le32(np->vport_id); + ma_list->num_mac_addr = cpu_to_le16(num_entries); + memcpy(ma_list->mac_addr_list, &mac_addr[k], entries_size); + + xn_params.send_buf.iov_base = ma_list; + xn_params.send_buf.iov_len = buf_size; + reply_sz = idpf_vc_xn_exec(adapter, &xn_params); + if (reply_sz < 0) + return reply_sz; + + k += num_entries; + total_filters -= num_entries; + } + + return 0; +} + +/** + * idpf_set_promiscuous - set promiscuous and send message to mailbox + * @adapter: Driver specific private structure + * @config_data: Vport specific config data + * @vport_id: Vport identifier + * + * Request to enable promiscuous mode for the vport. Message is sent + * asynchronously and won't wait for response. Returns 0 on success, negative + * on failure; + */ +int idpf_set_promiscuous(struct idpf_adapter *adapter, + struct idpf_vport_user_config_data *config_data, + u32 vport_id) +{ + struct idpf_vc_xn_params xn_params = {}; + struct virtchnl2_promisc_info vpi; + ssize_t reply_sz; + u16 flags = 0; + + if (test_bit(__IDPF_PROMISC_UC, config_data->user_flags)) + flags |= VIRTCHNL2_UNICAST_PROMISC; + if (test_bit(__IDPF_PROMISC_MC, config_data->user_flags)) + flags |= VIRTCHNL2_MULTICAST_PROMISC; + + vpi.vport_id = cpu_to_le32(vport_id); + vpi.flags = cpu_to_le16(flags); + + xn_params.vc_op = VIRTCHNL2_OP_CONFIG_PROMISCUOUS_MODE; + xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; + xn_params.send_buf.iov_base = &vpi; + xn_params.send_buf.iov_len = sizeof(vpi); + /* setting promiscuous is only ever done asynchronously */ + xn_params.async = true; + reply_sz = idpf_vc_xn_exec(adapter, &xn_params); + + return reply_sz < 0 ? reply_sz : 0; +} diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.h b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.h new file mode 100644 index 00000000000000..83da5d8da56bf2 --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2024 Intel Corporation */ + +#ifndef _IDPF_VIRTCHNL_H_ +#define _IDPF_VIRTCHNL_H_ + +struct idpf_adapter; +struct idpf_netdev_priv; +struct idpf_vec_regs; +struct idpf_vport; +struct idpf_vport_max_q; +struct idpf_vport_user_config_data; + +int idpf_init_dflt_mbx(struct idpf_adapter *adapter); +void idpf_deinit_dflt_mbx(struct idpf_adapter *adapter); +int idpf_vc_core_init(struct idpf_adapter *adapter); +void idpf_vc_core_deinit(struct idpf_adapter *adapter); + +int idpf_get_reg_intr_vecs(struct idpf_vport *vport, + struct idpf_vec_regs *reg_vals); +int idpf_queue_reg_init(struct idpf_vport *vport); +int idpf_vport_queue_ids_init(struct idpf_vport *vport); + +int idpf_recv_mb_msg(struct idpf_adapter *adapter); +int idpf_send_mb_msg(struct idpf_adapter *adapter, u32 op, + u16 msg_size, u8 *msg, u16 cookie); + +void idpf_vport_init(struct idpf_vport *vport, struct idpf_vport_max_q *max_q); +u32 idpf_get_vport_id(struct idpf_vport *vport); +int idpf_send_create_vport_msg(struct idpf_adapter *adapter, + struct idpf_vport_max_q *max_q); +int idpf_send_destroy_vport_msg(struct idpf_vport *vport); +int idpf_send_enable_vport_msg(struct idpf_vport *vport); +int idpf_send_disable_vport_msg(struct idpf_vport *vport); + +int idpf_vport_adjust_qs(struct idpf_vport *vport); +int idpf_vport_alloc_max_qs(struct idpf_adapter *adapter, + struct idpf_vport_max_q *max_q); +void idpf_vport_dealloc_max_qs(struct idpf_adapter *adapter, + struct idpf_vport_max_q *max_q); +int idpf_send_add_queues_msg(const struct idpf_vport *vport, u16 num_tx_q, + u16 num_complq, u16 num_rx_q, u16 num_rx_bufq); +int idpf_send_delete_queues_msg(struct idpf_vport *vport); +int idpf_send_enable_queues_msg(struct idpf_vport *vport); +int idpf_send_disable_queues_msg(struct idpf_vport *vport); +int idpf_send_config_queues_msg(struct idpf_vport *vport); + +int idpf_vport_alloc_vec_indexes(struct idpf_vport *vport); +int idpf_get_vec_ids(struct idpf_adapter *adapter, + u16 *vecids, int num_vecids, + struct virtchnl2_vector_chunks *chunks); +int idpf_send_alloc_vectors_msg(struct idpf_adapter *adapter, u16 num_vectors); +int idpf_send_dealloc_vectors_msg(struct idpf_adapter *adapter); +int idpf_send_map_unmap_queue_vector_msg(struct idpf_vport *vport, bool map); + +int idpf_add_del_mac_filters(struct idpf_vport *vport, + struct idpf_netdev_priv *np, + bool add, bool async); +int idpf_set_promiscuous(struct idpf_adapter *adapter, + struct idpf_vport_user_config_data *config_data, + u32 vport_id); +int idpf_check_supported_desc_ids(struct idpf_vport *vport); +int idpf_send_get_rx_ptype_msg(struct idpf_vport *vport); +int idpf_send_ena_dis_loopback_msg(struct idpf_vport *vport); +int idpf_send_get_stats_msg(struct idpf_vport *vport); +int idpf_send_set_sriov_vfs_msg(struct idpf_adapter *adapter, u16 num_vfs); +int idpf_send_get_set_rss_key_msg(struct idpf_vport *vport, bool get); +int idpf_send_get_set_rss_lut_msg(struct idpf_vport *vport, bool get); + +#endif /* _IDPF_VIRTCHNL_H_ */ diff --git a/drivers/net/ethernet/intel/idpf/virtchnl2.h b/drivers/net/ethernet/intel/idpf/virtchnl2.h new file mode 100644 index 00000000000000..4a3c4454d25aba --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/virtchnl2.h @@ -0,0 +1,1273 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2023 Intel Corporation */ + +#ifndef _VIRTCHNL2_H_ +#define _VIRTCHNL2_H_ + +/* All opcodes associated with virtchnl2 are prefixed with virtchnl2 or + * VIRTCHNL2. Any future opcodes, offloads/capabilities, structures, + * and defines must be prefixed with virtchnl2 or VIRTCHNL2 to avoid confusion. + * + * PF/VF uses the virtchnl2 interface defined in this header file to communicate + * with device Control Plane (CP). Driver and the CP may run on different + * platforms with different endianness. To avoid byte order discrepancies, + * all the structures in this header follow little-endian format. + * + * This is an interface definition file where existing enums and their values + * must remain unchanged over time, so we specify explicit values for all enums. + */ + +#include "virtchnl2_lan_desc.h" + +/* This macro is used to generate compilation errors if a structure + * is not exactly the correct length. + */ +#define VIRTCHNL2_CHECK_STRUCT_LEN(n, X) \ + static_assert((n) == sizeof(struct X)) + +/* New major set of opcodes introduced and so leaving room for + * old misc opcodes to be added in future. Also these opcodes may only + * be used if both the PF and VF have successfully negotiated the + * VIRTCHNL version as 2.0 during VIRTCHNL2_OP_VERSION exchange. + */ +enum virtchnl2_op { + VIRTCHNL2_OP_UNKNOWN = 0, + VIRTCHNL2_OP_VERSION = 1, + VIRTCHNL2_OP_GET_CAPS = 500, + VIRTCHNL2_OP_CREATE_VPORT = 501, + VIRTCHNL2_OP_DESTROY_VPORT = 502, + VIRTCHNL2_OP_ENABLE_VPORT = 503, + VIRTCHNL2_OP_DISABLE_VPORT = 504, + VIRTCHNL2_OP_CONFIG_TX_QUEUES = 505, + VIRTCHNL2_OP_CONFIG_RX_QUEUES = 506, + VIRTCHNL2_OP_ENABLE_QUEUES = 507, + VIRTCHNL2_OP_DISABLE_QUEUES = 508, + VIRTCHNL2_OP_ADD_QUEUES = 509, + VIRTCHNL2_OP_DEL_QUEUES = 510, + VIRTCHNL2_OP_MAP_QUEUE_VECTOR = 511, + VIRTCHNL2_OP_UNMAP_QUEUE_VECTOR = 512, + VIRTCHNL2_OP_GET_RSS_KEY = 513, + VIRTCHNL2_OP_SET_RSS_KEY = 514, + VIRTCHNL2_OP_GET_RSS_LUT = 515, + VIRTCHNL2_OP_SET_RSS_LUT = 516, + VIRTCHNL2_OP_GET_RSS_HASH = 517, + VIRTCHNL2_OP_SET_RSS_HASH = 518, + VIRTCHNL2_OP_SET_SRIOV_VFS = 519, + VIRTCHNL2_OP_ALLOC_VECTORS = 520, + VIRTCHNL2_OP_DEALLOC_VECTORS = 521, + VIRTCHNL2_OP_EVENT = 522, + VIRTCHNL2_OP_GET_STATS = 523, + VIRTCHNL2_OP_RESET_VF = 524, + VIRTCHNL2_OP_GET_EDT_CAPS = 525, + VIRTCHNL2_OP_GET_PTYPE_INFO = 526, + /* Opcode 527 and 528 are reserved for VIRTCHNL2_OP_GET_PTYPE_ID and + * VIRTCHNL2_OP_GET_PTYPE_INFO_RAW. + * Opcodes 529, 530, 531, 532 and 533 are reserved. + */ + VIRTCHNL2_OP_LOOPBACK = 534, + VIRTCHNL2_OP_ADD_MAC_ADDR = 535, + VIRTCHNL2_OP_DEL_MAC_ADDR = 536, + VIRTCHNL2_OP_CONFIG_PROMISCUOUS_MODE = 537, +}; + +/** + * enum virtchnl2_vport_type - Type of virtual port. + * @VIRTCHNL2_VPORT_TYPE_DEFAULT: Default virtual port type. + */ +enum virtchnl2_vport_type { + VIRTCHNL2_VPORT_TYPE_DEFAULT = 0, +}; + +/** + * enum virtchnl2_queue_model - Type of queue model. + * @VIRTCHNL2_QUEUE_MODEL_SINGLE: Single queue model. + * @VIRTCHNL2_QUEUE_MODEL_SPLIT: Split queue model. + * + * In the single queue model, the same transmit descriptor queue is used by + * software to post descriptors to hardware and by hardware to post completed + * descriptors to software. + * Likewise, the same receive descriptor queue is used by hardware to post + * completions to software and by software to post buffers to hardware. + * + * In the split queue model, hardware uses transmit completion queues to post + * descriptor/buffer completions to software, while software uses transmit + * descriptor queues to post descriptors to hardware. + * Likewise, hardware posts descriptor completions to the receive descriptor + * queue, while software uses receive buffer queues to post buffers to hardware. + */ +enum virtchnl2_queue_model { + VIRTCHNL2_QUEUE_MODEL_SINGLE = 0, + VIRTCHNL2_QUEUE_MODEL_SPLIT = 1, +}; + +/* Checksum offload capability flags */ +enum virtchnl2_cap_txrx_csum { + VIRTCHNL2_CAP_TX_CSUM_L3_IPV4 = BIT(0), + VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_TCP = BIT(1), + VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_UDP = BIT(2), + VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_SCTP = BIT(3), + VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_TCP = BIT(4), + VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_UDP = BIT(5), + VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_SCTP = BIT(6), + VIRTCHNL2_CAP_TX_CSUM_GENERIC = BIT(7), + VIRTCHNL2_CAP_RX_CSUM_L3_IPV4 = BIT(8), + VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_TCP = BIT(9), + VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_UDP = BIT(10), + VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_SCTP = BIT(11), + VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_TCP = BIT(12), + VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_UDP = BIT(13), + VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_SCTP = BIT(14), + VIRTCHNL2_CAP_RX_CSUM_GENERIC = BIT(15), + VIRTCHNL2_CAP_TX_CSUM_L3_SINGLE_TUNNEL = BIT(16), + VIRTCHNL2_CAP_TX_CSUM_L3_DOUBLE_TUNNEL = BIT(17), + VIRTCHNL2_CAP_RX_CSUM_L3_SINGLE_TUNNEL = BIT(18), + VIRTCHNL2_CAP_RX_CSUM_L3_DOUBLE_TUNNEL = BIT(19), + VIRTCHNL2_CAP_TX_CSUM_L4_SINGLE_TUNNEL = BIT(20), + VIRTCHNL2_CAP_TX_CSUM_L4_DOUBLE_TUNNEL = BIT(21), + VIRTCHNL2_CAP_RX_CSUM_L4_SINGLE_TUNNEL = BIT(22), + VIRTCHNL2_CAP_RX_CSUM_L4_DOUBLE_TUNNEL = BIT(23), +}; + +/* Segmentation offload capability flags */ +enum virtchnl2_cap_seg { + VIRTCHNL2_CAP_SEG_IPV4_TCP = BIT(0), + VIRTCHNL2_CAP_SEG_IPV4_UDP = BIT(1), + VIRTCHNL2_CAP_SEG_IPV4_SCTP = BIT(2), + VIRTCHNL2_CAP_SEG_IPV6_TCP = BIT(3), + VIRTCHNL2_CAP_SEG_IPV6_UDP = BIT(4), + VIRTCHNL2_CAP_SEG_IPV6_SCTP = BIT(5), + VIRTCHNL2_CAP_SEG_GENERIC = BIT(6), + VIRTCHNL2_CAP_SEG_TX_SINGLE_TUNNEL = BIT(7), + VIRTCHNL2_CAP_SEG_TX_DOUBLE_TUNNEL = BIT(8), +}; + +/* Receive Side Scaling Flow type capability flags */ +enum virtchnl2_cap_rss { + VIRTCHNL2_CAP_RSS_IPV4_TCP = BIT(0), + VIRTCHNL2_CAP_RSS_IPV4_UDP = BIT(1), + VIRTCHNL2_CAP_RSS_IPV4_SCTP = BIT(2), + VIRTCHNL2_CAP_RSS_IPV4_OTHER = BIT(3), + VIRTCHNL2_CAP_RSS_IPV6_TCP = BIT(4), + VIRTCHNL2_CAP_RSS_IPV6_UDP = BIT(5), + VIRTCHNL2_CAP_RSS_IPV6_SCTP = BIT(6), + VIRTCHNL2_CAP_RSS_IPV6_OTHER = BIT(7), + VIRTCHNL2_CAP_RSS_IPV4_AH = BIT(8), + VIRTCHNL2_CAP_RSS_IPV4_ESP = BIT(9), + VIRTCHNL2_CAP_RSS_IPV4_AH_ESP = BIT(10), + VIRTCHNL2_CAP_RSS_IPV6_AH = BIT(11), + VIRTCHNL2_CAP_RSS_IPV6_ESP = BIT(12), + VIRTCHNL2_CAP_RSS_IPV6_AH_ESP = BIT(13), +}; + +/* Header split capability flags */ +enum virtchnl2_cap_rx_hsplit_at { + /* for prepended metadata */ + VIRTCHNL2_CAP_RX_HSPLIT_AT_L2 = BIT(0), + /* all VLANs go into header buffer */ + VIRTCHNL2_CAP_RX_HSPLIT_AT_L3 = BIT(1), + VIRTCHNL2_CAP_RX_HSPLIT_AT_L4V4 = BIT(2), + VIRTCHNL2_CAP_RX_HSPLIT_AT_L4V6 = BIT(3), +}; + +/* Receive Side Coalescing offload capability flags */ +enum virtchnl2_cap_rsc { + VIRTCHNL2_CAP_RSC_IPV4_TCP = BIT(0), + VIRTCHNL2_CAP_RSC_IPV4_SCTP = BIT(1), + VIRTCHNL2_CAP_RSC_IPV6_TCP = BIT(2), + VIRTCHNL2_CAP_RSC_IPV6_SCTP = BIT(3), +}; + +/* Other capability flags */ +enum virtchnl2_cap_other { + VIRTCHNL2_CAP_RDMA = BIT_ULL(0), + VIRTCHNL2_CAP_SRIOV = BIT_ULL(1), + VIRTCHNL2_CAP_MACFILTER = BIT_ULL(2), + VIRTCHNL2_CAP_FLOW_DIRECTOR = BIT_ULL(3), + /* Queue based scheduling using split queue model */ + VIRTCHNL2_CAP_SPLITQ_QSCHED = BIT_ULL(4), + VIRTCHNL2_CAP_CRC = BIT_ULL(5), + VIRTCHNL2_CAP_ADQ = BIT_ULL(6), + VIRTCHNL2_CAP_WB_ON_ITR = BIT_ULL(7), + VIRTCHNL2_CAP_PROMISC = BIT_ULL(8), + VIRTCHNL2_CAP_LINK_SPEED = BIT_ULL(9), + VIRTCHNL2_CAP_INLINE_IPSEC = BIT_ULL(10), + VIRTCHNL2_CAP_LARGE_NUM_QUEUES = BIT_ULL(11), + VIRTCHNL2_CAP_VLAN = BIT_ULL(12), + VIRTCHNL2_CAP_PTP = BIT_ULL(13), + /* EDT: Earliest Departure Time capability used for Timing Wheel */ + VIRTCHNL2_CAP_EDT = BIT_ULL(14), + VIRTCHNL2_CAP_ADV_RSS = BIT_ULL(15), + VIRTCHNL2_CAP_FDIR = BIT_ULL(16), + VIRTCHNL2_CAP_RX_FLEX_DESC = BIT_ULL(17), + VIRTCHNL2_CAP_PTYPE = BIT_ULL(18), + VIRTCHNL2_CAP_LOOPBACK = BIT_ULL(19), + /* Other capability 20 is reserved */ + + /* this must be the last capability */ + VIRTCHNL2_CAP_OEM = BIT_ULL(63), +}; + +/* underlying device type */ +enum virtchl2_device_type { + VIRTCHNL2_MEV_DEVICE = 0, +}; + +/** + * enum virtchnl2_txq_sched_mode - Transmit Queue Scheduling Modes. + * @VIRTCHNL2_TXQ_SCHED_MODE_QUEUE: Queue mode is the legacy mode i.e. inorder + * completions where descriptors and buffers + * are completed at the same time. + * @VIRTCHNL2_TXQ_SCHED_MODE_FLOW: Flow scheduling mode allows for out of order + * packet processing where descriptors are + * cleaned in order, but buffers can be + * completed out of order. + */ +enum virtchnl2_txq_sched_mode { + VIRTCHNL2_TXQ_SCHED_MODE_QUEUE = 0, + VIRTCHNL2_TXQ_SCHED_MODE_FLOW = 1, +}; + +/** + * enum virtchnl2_rxq_flags - Receive Queue Feature flags. + * @VIRTCHNL2_RXQ_RSC: Rx queue RSC flag. + * @VIRTCHNL2_RXQ_HDR_SPLIT: Rx queue header split flag. + * @VIRTCHNL2_RXQ_IMMEDIATE_WRITE_BACK: When set, packet descriptors are flushed + * by hardware immediately after processing + * each packet. + * @VIRTCHNL2_RX_DESC_SIZE_16BYTE: Rx queue 16 byte descriptor size. + * @VIRTCHNL2_RX_DESC_SIZE_32BYTE: Rx queue 32 byte descriptor size. + */ +enum virtchnl2_rxq_flags { + VIRTCHNL2_RXQ_RSC = BIT(0), + VIRTCHNL2_RXQ_HDR_SPLIT = BIT(1), + VIRTCHNL2_RXQ_IMMEDIATE_WRITE_BACK = BIT(2), + VIRTCHNL2_RX_DESC_SIZE_16BYTE = BIT(3), + VIRTCHNL2_RX_DESC_SIZE_32BYTE = BIT(4), +}; + +/* Type of RSS algorithm */ +enum virtchnl2_rss_alg { + VIRTCHNL2_RSS_ALG_TOEPLITZ_ASYMMETRIC = 0, + VIRTCHNL2_RSS_ALG_R_ASYMMETRIC = 1, + VIRTCHNL2_RSS_ALG_TOEPLITZ_SYMMETRIC = 2, + VIRTCHNL2_RSS_ALG_XOR_SYMMETRIC = 3, +}; + +/* Type of event */ +enum virtchnl2_event_codes { + VIRTCHNL2_EVENT_UNKNOWN = 0, + VIRTCHNL2_EVENT_LINK_CHANGE = 1, + /* Event type 2, 3 are reserved */ +}; + +/* Transmit and Receive queue types are valid in legacy as well as split queue + * models. With Split Queue model, 2 additional types are introduced - + * TX_COMPLETION and RX_BUFFER. In split queue model, receive corresponds to + * the queue where hardware posts completions. + */ +enum virtchnl2_queue_type { + VIRTCHNL2_QUEUE_TYPE_TX = 0, + VIRTCHNL2_QUEUE_TYPE_RX = 1, + VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION = 2, + VIRTCHNL2_QUEUE_TYPE_RX_BUFFER = 3, + VIRTCHNL2_QUEUE_TYPE_CONFIG_TX = 4, + VIRTCHNL2_QUEUE_TYPE_CONFIG_RX = 5, + /* Queue types 6, 7, 8, 9 are reserved */ + VIRTCHNL2_QUEUE_TYPE_MBX_TX = 10, + VIRTCHNL2_QUEUE_TYPE_MBX_RX = 11, +}; + +/* Interrupt throttling rate index */ +enum virtchnl2_itr_idx { + VIRTCHNL2_ITR_IDX_0 = 0, + VIRTCHNL2_ITR_IDX_1 = 1, +}; + +/** + * enum virtchnl2_mac_addr_type - MAC address types. + * @VIRTCHNL2_MAC_ADDR_PRIMARY: PF/VF driver should set this type for the + * primary/device unicast MAC address filter for + * VIRTCHNL2_OP_ADD_MAC_ADDR and + * VIRTCHNL2_OP_DEL_MAC_ADDR. This allows for the + * underlying control plane function to accurately + * track the MAC address and for VM/function reset. + * + * @VIRTCHNL2_MAC_ADDR_EXTRA: PF/VF driver should set this type for any extra + * unicast and/or multicast filters that are being + * added/deleted via VIRTCHNL2_OP_ADD_MAC_ADDR or + * VIRTCHNL2_OP_DEL_MAC_ADDR. + */ +enum virtchnl2_mac_addr_type { + VIRTCHNL2_MAC_ADDR_PRIMARY = 1, + VIRTCHNL2_MAC_ADDR_EXTRA = 2, +}; + +/* Flags used for promiscuous mode */ +enum virtchnl2_promisc_flags { + VIRTCHNL2_UNICAST_PROMISC = BIT(0), + VIRTCHNL2_MULTICAST_PROMISC = BIT(1), +}; + +/* Protocol header type within a packet segment. A segment consists of one or + * more protocol headers that make up a logical group of protocol headers. Each + * logical group of protocol headers encapsulates or is encapsulated using/by + * tunneling or encapsulation protocols for network virtualization. + */ +enum virtchnl2_proto_hdr_type { + /* VIRTCHNL2_PROTO_HDR_ANY is a mandatory protocol id */ + VIRTCHNL2_PROTO_HDR_ANY = 0, + VIRTCHNL2_PROTO_HDR_PRE_MAC = 1, + /* VIRTCHNL2_PROTO_HDR_MAC is a mandatory protocol id */ + VIRTCHNL2_PROTO_HDR_MAC = 2, + VIRTCHNL2_PROTO_HDR_POST_MAC = 3, + VIRTCHNL2_PROTO_HDR_ETHERTYPE = 4, + VIRTCHNL2_PROTO_HDR_VLAN = 5, + VIRTCHNL2_PROTO_HDR_SVLAN = 6, + VIRTCHNL2_PROTO_HDR_CVLAN = 7, + VIRTCHNL2_PROTO_HDR_MPLS = 8, + VIRTCHNL2_PROTO_HDR_UMPLS = 9, + VIRTCHNL2_PROTO_HDR_MMPLS = 10, + VIRTCHNL2_PROTO_HDR_PTP = 11, + VIRTCHNL2_PROTO_HDR_CTRL = 12, + VIRTCHNL2_PROTO_HDR_LLDP = 13, + VIRTCHNL2_PROTO_HDR_ARP = 14, + VIRTCHNL2_PROTO_HDR_ECP = 15, + VIRTCHNL2_PROTO_HDR_EAPOL = 16, + VIRTCHNL2_PROTO_HDR_PPPOD = 17, + VIRTCHNL2_PROTO_HDR_PPPOE = 18, + /* VIRTCHNL2_PROTO_HDR_IPV4 is a mandatory protocol id */ + VIRTCHNL2_PROTO_HDR_IPV4 = 19, + /* IPv4 and IPv6 Fragment header types are only associated to + * VIRTCHNL2_PROTO_HDR_IPV4 and VIRTCHNL2_PROTO_HDR_IPV6 respectively, + * cannot be used independently. + */ + /* VIRTCHNL2_PROTO_HDR_IPV4_FRAG is a mandatory protocol id */ + VIRTCHNL2_PROTO_HDR_IPV4_FRAG = 20, + /* VIRTCHNL2_PROTO_HDR_IPV6 is a mandatory protocol id */ + VIRTCHNL2_PROTO_HDR_IPV6 = 21, + /* VIRTCHNL2_PROTO_HDR_IPV6_FRAG is a mandatory protocol id */ + VIRTCHNL2_PROTO_HDR_IPV6_FRAG = 22, + VIRTCHNL2_PROTO_HDR_IPV6_EH = 23, + /* VIRTCHNL2_PROTO_HDR_UDP is a mandatory protocol id */ + VIRTCHNL2_PROTO_HDR_UDP = 24, + /* VIRTCHNL2_PROTO_HDR_TCP is a mandatory protocol id */ + VIRTCHNL2_PROTO_HDR_TCP = 25, + /* VIRTCHNL2_PROTO_HDR_SCTP is a mandatory protocol id */ + VIRTCHNL2_PROTO_HDR_SCTP = 26, + /* VIRTCHNL2_PROTO_HDR_ICMP is a mandatory protocol id */ + VIRTCHNL2_PROTO_HDR_ICMP = 27, + /* VIRTCHNL2_PROTO_HDR_ICMPV6 is a mandatory protocol id */ + VIRTCHNL2_PROTO_HDR_ICMPV6 = 28, + VIRTCHNL2_PROTO_HDR_IGMP = 29, + VIRTCHNL2_PROTO_HDR_AH = 30, + VIRTCHNL2_PROTO_HDR_ESP = 31, + VIRTCHNL2_PROTO_HDR_IKE = 32, + VIRTCHNL2_PROTO_HDR_NATT_KEEP = 33, + /* VIRTCHNL2_PROTO_HDR_PAY is a mandatory protocol id */ + VIRTCHNL2_PROTO_HDR_PAY = 34, + VIRTCHNL2_PROTO_HDR_L2TPV2 = 35, + VIRTCHNL2_PROTO_HDR_L2TPV2_CONTROL = 36, + VIRTCHNL2_PROTO_HDR_L2TPV3 = 37, + VIRTCHNL2_PROTO_HDR_GTP = 38, + VIRTCHNL2_PROTO_HDR_GTP_EH = 39, + VIRTCHNL2_PROTO_HDR_GTPCV2 = 40, + VIRTCHNL2_PROTO_HDR_GTPC_TEID = 41, + VIRTCHNL2_PROTO_HDR_GTPU = 42, + VIRTCHNL2_PROTO_HDR_GTPU_UL = 43, + VIRTCHNL2_PROTO_HDR_GTPU_DL = 44, + VIRTCHNL2_PROTO_HDR_ECPRI = 45, + VIRTCHNL2_PROTO_HDR_VRRP = 46, + VIRTCHNL2_PROTO_HDR_OSPF = 47, + /* VIRTCHNL2_PROTO_HDR_TUN is a mandatory protocol id */ + VIRTCHNL2_PROTO_HDR_TUN = 48, + VIRTCHNL2_PROTO_HDR_GRE = 49, + VIRTCHNL2_PROTO_HDR_NVGRE = 50, + VIRTCHNL2_PROTO_HDR_VXLAN = 51, + VIRTCHNL2_PROTO_HDR_VXLAN_GPE = 52, + VIRTCHNL2_PROTO_HDR_GENEVE = 53, + VIRTCHNL2_PROTO_HDR_NSH = 54, + VIRTCHNL2_PROTO_HDR_QUIC = 55, + VIRTCHNL2_PROTO_HDR_PFCP = 56, + VIRTCHNL2_PROTO_HDR_PFCP_NODE = 57, + VIRTCHNL2_PROTO_HDR_PFCP_SESSION = 58, + VIRTCHNL2_PROTO_HDR_RTP = 59, + VIRTCHNL2_PROTO_HDR_ROCE = 60, + VIRTCHNL2_PROTO_HDR_ROCEV1 = 61, + VIRTCHNL2_PROTO_HDR_ROCEV2 = 62, + /* Protocol ids up to 32767 are reserved. + * 32768 - 65534 are used for user defined protocol ids. + * VIRTCHNL2_PROTO_HDR_NO_PROTO is a mandatory protocol id. + */ + VIRTCHNL2_PROTO_HDR_NO_PROTO = 65535, +}; + +enum virtchl2_version { + VIRTCHNL2_VERSION_MINOR_0 = 0, + VIRTCHNL2_VERSION_MAJOR_2 = 2, +}; + +/** + * struct virtchnl2_edt_caps - Get EDT granularity and time horizon. + * @tstamp_granularity_ns: Timestamp granularity in nanoseconds. + * @time_horizon_ns: Total time window in nanoseconds. + * + * Associated with VIRTCHNL2_OP_GET_EDT_CAPS. + */ +struct virtchnl2_edt_caps { + __le64 tstamp_granularity_ns; + __le64 time_horizon_ns; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(16, virtchnl2_edt_caps); + +/** + * struct virtchnl2_version_info - Version information. + * @major: Major version. + * @minor: Minor version. + * + * PF/VF posts its version number to the CP. CP responds with its version number + * in the same format, along with a return code. + * If there is a major version mismatch, then the PF/VF cannot operate. + * If there is a minor version mismatch, then the PF/VF can operate but should + * add a warning to the system log. + * + * This version opcode MUST always be specified as == 1, regardless of other + * changes in the API. The CP must always respond to this message without + * error regardless of version mismatch. + * + * Associated with VIRTCHNL2_OP_VERSION. + */ +struct virtchnl2_version_info { + __le32 major; + __le32 minor; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(8, virtchnl2_version_info); + +/** + * struct virtchnl2_get_capabilities - Capabilities info. + * @csum_caps: See enum virtchnl2_cap_txrx_csum. + * @seg_caps: See enum virtchnl2_cap_seg. + * @hsplit_caps: See enum virtchnl2_cap_rx_hsplit_at. + * @rsc_caps: See enum virtchnl2_cap_rsc. + * @rss_caps: See enum virtchnl2_cap_rss. + * @other_caps: See enum virtchnl2_cap_other. + * @mailbox_dyn_ctl: DYN_CTL register offset and vector id for mailbox + * provided by CP. + * @mailbox_vector_id: Mailbox vector id. + * @num_allocated_vectors: Maximum number of allocated vectors for the device. + * @max_rx_q: Maximum number of supported Rx queues. + * @max_tx_q: Maximum number of supported Tx queues. + * @max_rx_bufq: Maximum number of supported buffer queues. + * @max_tx_complq: Maximum number of supported completion queues. + * @max_sriov_vfs: The PF sends the maximum VFs it is requesting. The CP + * responds with the maximum VFs granted. + * @max_vports: Maximum number of vports that can be supported. + * @default_num_vports: Default number of vports driver should allocate on load. + * @max_tx_hdr_size: Max header length hardware can parse/checksum, in bytes. + * @max_sg_bufs_per_tx_pkt: Max number of scatter gather buffers that can be + * sent per transmit packet without needing to be + * linearized. + * @pad: Padding. + * @reserved: Reserved. + * @device_type: See enum virtchl2_device_type. + * @min_sso_packet_len: Min packet length supported by device for single + * segment offload. + * @max_hdr_buf_per_lso: Max number of header buffers that can be used for + * an LSO. + * @pad1: Padding for future extensions. + * + * Dataplane driver sends this message to CP to negotiate capabilities and + * provides a virtchnl2_get_capabilities structure with its desired + * capabilities, max_sriov_vfs and num_allocated_vectors. + * CP responds with a virtchnl2_get_capabilities structure updated + * with allowed capabilities and the other fields as below. + * If PF sets max_sriov_vfs as 0, CP will respond with max number of VFs + * that can be created by this PF. For any other value 'n', CP responds + * with max_sriov_vfs set to min(n, x) where x is the max number of VFs + * allowed by CP's policy. max_sriov_vfs is not applicable for VFs. + * If dataplane driver sets num_allocated_vectors as 0, CP will respond with 1 + * which is default vector associated with the default mailbox. For any other + * value 'n', CP responds with a value <= n based on the CP's policy of + * max number of vectors for a PF. + * CP will respond with the vector ID of mailbox allocated to the PF in + * mailbox_vector_id and the number of itr index registers in itr_idx_map. + * It also responds with default number of vports that the dataplane driver + * should comeup with in default_num_vports and maximum number of vports that + * can be supported in max_vports. + * + * Associated with VIRTCHNL2_OP_GET_CAPS. + */ +struct virtchnl2_get_capabilities { + __le32 csum_caps; + __le32 seg_caps; + __le32 hsplit_caps; + __le32 rsc_caps; + __le64 rss_caps; + __le64 other_caps; + __le32 mailbox_dyn_ctl; + __le16 mailbox_vector_id; + __le16 num_allocated_vectors; + __le16 max_rx_q; + __le16 max_tx_q; + __le16 max_rx_bufq; + __le16 max_tx_complq; + __le16 max_sriov_vfs; + __le16 max_vports; + __le16 default_num_vports; + __le16 max_tx_hdr_size; + u8 max_sg_bufs_per_tx_pkt; + u8 pad[3]; + u8 reserved[4]; + __le32 device_type; + u8 min_sso_packet_len; + u8 max_hdr_buf_per_lso; + u8 pad1[10]; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(80, virtchnl2_get_capabilities); + +/** + * struct virtchnl2_queue_reg_chunk - Single queue chunk. + * @type: See enum virtchnl2_queue_type. + * @start_queue_id: Start Queue ID. + * @num_queues: Number of queues in the chunk. + * @pad: Padding. + * @qtail_reg_start: Queue tail register offset. + * @qtail_reg_spacing: Queue tail register spacing. + * @pad1: Padding for future extensions. + */ +struct virtchnl2_queue_reg_chunk { + __le32 type; + __le32 start_queue_id; + __le32 num_queues; + __le32 pad; + __le64 qtail_reg_start; + __le32 qtail_reg_spacing; + u8 pad1[4]; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(32, virtchnl2_queue_reg_chunk); + +/** + * struct virtchnl2_queue_reg_chunks - Specify several chunks of contiguous + * queues. + * @num_chunks: Number of chunks. + * @pad: Padding. + * @chunks: Chunks of queue info. + */ +struct virtchnl2_queue_reg_chunks { + __le16 num_chunks; + u8 pad[6]; + struct virtchnl2_queue_reg_chunk chunks[]; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(8, virtchnl2_queue_reg_chunks); + +/** + * struct virtchnl2_create_vport - Create vport config info. + * @vport_type: See enum virtchnl2_vport_type. + * @txq_model: See virtchnl2_queue_model. + * @rxq_model: See virtchnl2_queue_model. + * @num_tx_q: Number of Tx queues. + * @num_tx_complq: Valid only if txq_model is split queue. + * @num_rx_q: Number of Rx queues. + * @num_rx_bufq: Valid only if rxq_model is split queue. + * @default_rx_q: Relative receive queue index to be used as default. + * @vport_index: Used to align PF and CP in case of default multiple vports, + * it is filled by the PF and CP returns the same value, to + * enable the driver to support multiple asynchronous parallel + * CREATE_VPORT requests and associate a response to a specific + * request. + * @max_mtu: Max MTU. CP populates this field on response. + * @vport_id: Vport id. CP populates this field on response. + * @default_mac_addr: Default MAC address. + * @pad: Padding. + * @rx_desc_ids: See VIRTCHNL2_RX_DESC_IDS definitions. + * @tx_desc_ids: See VIRTCHNL2_TX_DESC_IDS definitions. + * @pad1: Padding. + * @rss_algorithm: RSS algorithm. + * @rss_key_size: RSS key size. + * @rss_lut_size: RSS LUT size. + * @rx_split_pos: See enum virtchnl2_cap_rx_hsplit_at. + * @pad2: Padding. + * @chunks: Chunks of contiguous queues. + * + * PF sends this message to CP to create a vport by filling in required + * fields of virtchnl2_create_vport structure. + * CP responds with the updated virtchnl2_create_vport structure containing the + * necessary fields followed by chunks which in turn will have an array of + * num_chunks entries of virtchnl2_queue_chunk structures. + * + * Associated with VIRTCHNL2_OP_CREATE_VPORT. + */ +struct virtchnl2_create_vport { + __le16 vport_type; + __le16 txq_model; + __le16 rxq_model; + __le16 num_tx_q; + __le16 num_tx_complq; + __le16 num_rx_q; + __le16 num_rx_bufq; + __le16 default_rx_q; + __le16 vport_index; + /* CP populates the following fields on response */ + __le16 max_mtu; + __le32 vport_id; + u8 default_mac_addr[ETH_ALEN]; + __le16 pad; + __le64 rx_desc_ids; + __le64 tx_desc_ids; + u8 pad1[72]; + __le32 rss_algorithm; + __le16 rss_key_size; + __le16 rss_lut_size; + __le32 rx_split_pos; + u8 pad2[20]; + struct virtchnl2_queue_reg_chunks chunks; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(160, virtchnl2_create_vport); + +/** + * struct virtchnl2_vport - Vport ID info. + * @vport_id: Vport id. + * @pad: Padding for future extensions. + * + * PF sends this message to CP to destroy, enable or disable a vport by filling + * in the vport_id in virtchnl2_vport structure. + * CP responds with the status of the requested operation. + * + * Associated with VIRTCHNL2_OP_DESTROY_VPORT, VIRTCHNL2_OP_ENABLE_VPORT, + * VIRTCHNL2_OP_DISABLE_VPORT. + */ +struct virtchnl2_vport { + __le32 vport_id; + u8 pad[4]; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(8, virtchnl2_vport); + +/** + * struct virtchnl2_txq_info - Transmit queue config info + * @dma_ring_addr: DMA address. + * @type: See enum virtchnl2_queue_type. + * @queue_id: Queue ID. + * @relative_queue_id: Valid only if queue model is split and type is transmit + * queue. Used in many to one mapping of transmit queues to + * completion queue. + * @model: See enum virtchnl2_queue_model. + * @sched_mode: See enum virtchnl2_txq_sched_mode. + * @qflags: TX queue feature flags. + * @ring_len: Ring length. + * @tx_compl_queue_id: Valid only if queue model is split and type is transmit + * queue. + * @peer_type: Valid only if queue type is VIRTCHNL2_QUEUE_TYPE_MAILBOX_TX + * @peer_rx_queue_id: Valid only if queue type is CONFIG_TX and used to deliver + * messages for the respective CONFIG_TX queue. + * @pad: Padding. + * @egress_pasid: Egress PASID info. + * @egress_hdr_pasid: Egress HDR passid. + * @egress_buf_pasid: Egress buf passid. + * @pad1: Padding for future extensions. + */ +struct virtchnl2_txq_info { + __le64 dma_ring_addr; + __le32 type; + __le32 queue_id; + __le16 relative_queue_id; + __le16 model; + __le16 sched_mode; + __le16 qflags; + __le16 ring_len; + __le16 tx_compl_queue_id; + __le16 peer_type; + __le16 peer_rx_queue_id; + u8 pad[4]; + __le32 egress_pasid; + __le32 egress_hdr_pasid; + __le32 egress_buf_pasid; + u8 pad1[8]; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(56, virtchnl2_txq_info); + +/** + * struct virtchnl2_config_tx_queues - TX queue config. + * @vport_id: Vport id. + * @num_qinfo: Number of virtchnl2_txq_info structs. + * @pad: Padding. + * @qinfo: Tx queues config info. + * + * PF sends this message to set up parameters for one or more transmit queues. + * This message contains an array of num_qinfo instances of virtchnl2_txq_info + * structures. CP configures requested queues and returns a status code. If + * num_qinfo specified is greater than the number of queues associated with the + * vport, an error is returned and no queues are configured. + * + * Associated with VIRTCHNL2_OP_CONFIG_TX_QUEUES. + */ +struct virtchnl2_config_tx_queues { + __le32 vport_id; + __le16 num_qinfo; + u8 pad[10]; + struct virtchnl2_txq_info qinfo[]; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(16, virtchnl2_config_tx_queues); + +/** + * struct virtchnl2_rxq_info - Receive queue config info. + * @desc_ids: See VIRTCHNL2_RX_DESC_IDS definitions. + * @dma_ring_addr: See VIRTCHNL2_RX_DESC_IDS definitions. + * @type: See enum virtchnl2_queue_type. + * @queue_id: Queue id. + * @model: See enum virtchnl2_queue_model. + * @hdr_buffer_size: Header buffer size. + * @data_buffer_size: Data buffer size. + * @max_pkt_size: Max packet size. + * @ring_len: Ring length. + * @buffer_notif_stride: Buffer notification stride in units of 32-descriptors. + * This field must be a power of 2. + * @pad: Padding. + * @dma_head_wb_addr: Applicable only for receive buffer queues. + * @qflags: Applicable only for receive completion queues. + * See enum virtchnl2_rxq_flags. + * @rx_buffer_low_watermark: Rx buffer low watermark. + * @rx_bufq1_id: Buffer queue index of the first buffer queue associated with + * the Rx queue. Valid only in split queue model. + * @rx_bufq2_id: Buffer queue index of the second buffer queue associated with + * the Rx queue. Valid only in split queue model. + * @bufq2_ena: It indicates if there is a second buffer, rx_bufq2_id is valid + * only if this field is set. + * @pad1: Padding. + * @ingress_pasid: Ingress PASID. + * @ingress_hdr_pasid: Ingress PASID header. + * @ingress_buf_pasid: Ingress PASID buffer. + * @pad2: Padding for future extensions. + */ +struct virtchnl2_rxq_info { + __le64 desc_ids; + __le64 dma_ring_addr; + __le32 type; + __le32 queue_id; + __le16 model; + __le16 hdr_buffer_size; + __le32 data_buffer_size; + __le32 max_pkt_size; + __le16 ring_len; + u8 buffer_notif_stride; + u8 pad; + __le64 dma_head_wb_addr; + __le16 qflags; + __le16 rx_buffer_low_watermark; + __le16 rx_bufq1_id; + __le16 rx_bufq2_id; + u8 bufq2_ena; + u8 pad1[3]; + __le32 ingress_pasid; + __le32 ingress_hdr_pasid; + __le32 ingress_buf_pasid; + u8 pad2[16]; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(88, virtchnl2_rxq_info); + +/** + * struct virtchnl2_config_rx_queues - Rx queues config. + * @vport_id: Vport id. + * @num_qinfo: Number of instances. + * @pad: Padding. + * @qinfo: Rx queues config info. + * + * PF sends this message to set up parameters for one or more receive queues. + * This message contains an array of num_qinfo instances of virtchnl2_rxq_info + * structures. CP configures requested queues and returns a status code. + * If the number of queues specified is greater than the number of queues + * associated with the vport, an error is returned and no queues are configured. + * + * Associated with VIRTCHNL2_OP_CONFIG_RX_QUEUES. + */ +struct virtchnl2_config_rx_queues { + __le32 vport_id; + __le16 num_qinfo; + u8 pad[18]; + struct virtchnl2_rxq_info qinfo[]; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(24, virtchnl2_config_rx_queues); + +/** + * struct virtchnl2_add_queues - data for VIRTCHNL2_OP_ADD_QUEUES. + * @vport_id: Vport id. + * @num_tx_q: Number of Tx qieues. + * @num_tx_complq: Number of Tx completion queues. + * @num_rx_q: Number of Rx queues. + * @num_rx_bufq: Number of Rx buffer queues. + * @pad: Padding. + * @chunks: Chunks of contiguous queues. + * + * PF sends this message to request additional transmit/receive queues beyond + * the ones that were assigned via CREATE_VPORT request. virtchnl2_add_queues + * structure is used to specify the number of each type of queues. + * CP responds with the same structure with the actual number of queues assigned + * followed by num_chunks of virtchnl2_queue_chunk structures. + * + * Associated with VIRTCHNL2_OP_ADD_QUEUES. + */ +struct virtchnl2_add_queues { + __le32 vport_id; + __le16 num_tx_q; + __le16 num_tx_complq; + __le16 num_rx_q; + __le16 num_rx_bufq; + u8 pad[4]; + struct virtchnl2_queue_reg_chunks chunks; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(24, virtchnl2_add_queues); + +/** + * struct virtchnl2_vector_chunk - Structure to specify a chunk of contiguous + * interrupt vectors. + * @start_vector_id: Start vector id. + * @start_evv_id: Start EVV id. + * @num_vectors: Number of vectors. + * @pad: Padding. + * @dynctl_reg_start: DYN_CTL register offset. + * @dynctl_reg_spacing: register spacing between DYN_CTL registers of 2 + * consecutive vectors. + * @itrn_reg_start: ITRN register offset. + * @itrn_reg_spacing: Register spacing between dynctl registers of 2 + * consecutive vectors. + * @itrn_index_spacing: Register spacing between itrn registers of the same + * vector where n=0..2. + * @pad1: Padding for future extensions. + * + * Register offsets and spacing provided by CP. + * Dynamic control registers are used for enabling/disabling/re-enabling + * interrupts and updating interrupt rates in the hotpath. Any changes + * to interrupt rates in the dynamic control registers will be reflected + * in the interrupt throttling rate registers. + * itrn registers are used to update interrupt rates for specific + * interrupt indices without modifying the state of the interrupt. + */ +struct virtchnl2_vector_chunk { + __le16 start_vector_id; + __le16 start_evv_id; + __le16 num_vectors; + __le16 pad; + __le32 dynctl_reg_start; + __le32 dynctl_reg_spacing; + __le32 itrn_reg_start; + __le32 itrn_reg_spacing; + __le32 itrn_index_spacing; + u8 pad1[4]; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(32, virtchnl2_vector_chunk); + +/** + * struct virtchnl2_vector_chunks - chunks of contiguous interrupt vectors. + * @num_vchunks: number of vector chunks. + * @pad: Padding. + * @vchunks: Chunks of contiguous vector info. + * + * PF sends virtchnl2_vector_chunks struct to specify the vectors it is giving + * away. CP performs requested action and returns status. + * + * Associated with VIRTCHNL2_OP_DEALLOC_VECTORS. + */ +struct virtchnl2_vector_chunks { + __le16 num_vchunks; + u8 pad[14]; + struct virtchnl2_vector_chunk vchunks[]; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(16, virtchnl2_vector_chunks); + +/** + * struct virtchnl2_alloc_vectors - vector allocation info. + * @num_vectors: Number of vectors. + * @pad: Padding. + * @vchunks: Chunks of contiguous vector info. + * + * PF sends this message to request additional interrupt vectors beyond the + * ones that were assigned via GET_CAPS request. virtchnl2_alloc_vectors + * structure is used to specify the number of vectors requested. CP responds + * with the same structure with the actual number of vectors assigned followed + * by virtchnl2_vector_chunks structure identifying the vector ids. + * + * Associated with VIRTCHNL2_OP_ALLOC_VECTORS. + */ +struct virtchnl2_alloc_vectors { + __le16 num_vectors; + u8 pad[14]; + struct virtchnl2_vector_chunks vchunks; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(32, virtchnl2_alloc_vectors); + +/** + * struct virtchnl2_rss_lut - RSS LUT info. + * @vport_id: Vport id. + * @lut_entries_start: Start of LUT entries. + * @lut_entries: Number of LUT entrties. + * @pad: Padding. + * @lut: RSS lookup table. + * + * PF sends this message to get or set RSS lookup table. Only supported if + * both PF and CP drivers set the VIRTCHNL2_CAP_RSS bit during configuration + * negotiation. + * + * Associated with VIRTCHNL2_OP_GET_RSS_LUT and VIRTCHNL2_OP_SET_RSS_LUT. + */ +struct virtchnl2_rss_lut { + __le32 vport_id; + __le16 lut_entries_start; + __le16 lut_entries; + u8 pad[4]; + __le32 lut[]; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(12, virtchnl2_rss_lut); + +/** + * struct virtchnl2_rss_hash - RSS hash info. + * @ptype_groups: Packet type groups bitmap. + * @vport_id: Vport id. + * @pad: Padding for future extensions. + * + * PF sends these messages to get and set the hash filter enable bits for RSS. + * By default, the CP sets these to all possible traffic types that the + * hardware supports. The PF can query this value if it wants to change the + * traffic types that are hashed by the hardware. + * Only supported if both PF and CP drivers set the VIRTCHNL2_CAP_RSS bit + * during configuration negotiation. + * + * Associated with VIRTCHNL2_OP_GET_RSS_HASH and VIRTCHNL2_OP_SET_RSS_HASH + */ +struct virtchnl2_rss_hash { + __le64 ptype_groups; + __le32 vport_id; + u8 pad[4]; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(16, virtchnl2_rss_hash); + +/** + * struct virtchnl2_sriov_vfs_info - VFs info. + * @num_vfs: Number of VFs. + * @pad: Padding for future extensions. + * + * This message is used to set number of SRIOV VFs to be created. The actual + * allocation of resources for the VFs in terms of vport, queues and interrupts + * is done by CP. When this call completes, the IDPF driver calls + * pci_enable_sriov to let the OS instantiate the SRIOV PCIE devices. + * The number of VFs set to 0 will destroy all the VFs of this function. + * + * Associated with VIRTCHNL2_OP_SET_SRIOV_VFS. + */ +struct virtchnl2_sriov_vfs_info { + __le16 num_vfs; + __le16 pad; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(4, virtchnl2_sriov_vfs_info); + +/** + * struct virtchnl2_ptype - Packet type info. + * @ptype_id_10: 10-bit packet type. + * @ptype_id_8: 8-bit packet type. + * @proto_id_count: Number of protocol ids the packet supports, maximum of 32 + * protocol ids are supported. + * @pad: Padding. + * @proto_id: proto_id_count decides the allocation of protocol id array. + * See enum virtchnl2_proto_hdr_type. + * + * Based on the descriptor type the PF supports, CP fills ptype_id_10 or + * ptype_id_8 for flex and base descriptor respectively. If ptype_id_10 value + * is set to 0xFFFF, PF should consider this ptype as dummy one and it is the + * last ptype. + */ +struct virtchnl2_ptype { + __le16 ptype_id_10; + u8 ptype_id_8; + u8 proto_id_count; + __le16 pad; + __le16 proto_id[]; +} __packed __aligned(2); +VIRTCHNL2_CHECK_STRUCT_LEN(6, virtchnl2_ptype); + +/** + * struct virtchnl2_get_ptype_info - Packet type info. + * @start_ptype_id: Starting ptype ID. + * @num_ptypes: Number of packet types from start_ptype_id. + * @pad: Padding for future extensions. + * + * The total number of supported packet types is based on the descriptor type. + * For the flex descriptor, it is 1024 (10-bit ptype), and for the base + * descriptor, it is 256 (8-bit ptype). Send this message to the CP by + * populating the 'start_ptype_id' and the 'num_ptypes'. CP responds with the + * 'start_ptype_id', 'num_ptypes', and the array of ptype (virtchnl2_ptype) that + * are added at the end of the 'virtchnl2_get_ptype_info' message (Note: There + * is no specific field for the ptypes but are added at the end of the + * ptype info message. PF/VF is expected to extract the ptypes accordingly. + * Reason for doing this is because compiler doesn't allow nested flexible + * array fields). + * + * If all the ptypes don't fit into one mailbox buffer, CP splits the + * ptype info into multiple messages, where each message will have its own + * 'start_ptype_id', 'num_ptypes', and the ptype array itself. When CP is done + * updating all the ptype information extracted from the package (the number of + * ptypes extracted might be less than what PF/VF expects), it will append a + * dummy ptype (which has 'ptype_id_10' of 'struct virtchnl2_ptype' as 0xFFFF) + * to the ptype array. + * + * PF/VF is expected to receive multiple VIRTCHNL2_OP_GET_PTYPE_INFO messages. + * + * Associated with VIRTCHNL2_OP_GET_PTYPE_INFO. + */ +struct virtchnl2_get_ptype_info { + __le16 start_ptype_id; + __le16 num_ptypes; + __le32 pad; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(8, virtchnl2_get_ptype_info); + +/** + * struct virtchnl2_vport_stats - Vport statistics. + * @vport_id: Vport id. + * @pad: Padding. + * @rx_bytes: Received bytes. + * @rx_unicast: Received unicast packets. + * @rx_multicast: Received multicast packets. + * @rx_broadcast: Received broadcast packets. + * @rx_discards: Discarded packets on receive. + * @rx_errors: Receive errors. + * @rx_unknown_protocol: Unlnown protocol. + * @tx_bytes: Transmitted bytes. + * @tx_unicast: Transmitted unicast packets. + * @tx_multicast: Transmitted multicast packets. + * @tx_broadcast: Transmitted broadcast packets. + * @tx_discards: Discarded packets on transmit. + * @tx_errors: Transmit errors. + * @rx_invalid_frame_length: Packets with invalid frame length. + * @rx_overflow_drop: Packets dropped on buffer overflow. + * + * PF/VF sends this message to CP to get the update stats by specifying the + * vport_id. CP responds with stats in struct virtchnl2_vport_stats. + * + * Associated with VIRTCHNL2_OP_GET_STATS. + */ +struct virtchnl2_vport_stats { + __le32 vport_id; + u8 pad[4]; + __le64 rx_bytes; + __le64 rx_unicast; + __le64 rx_multicast; + __le64 rx_broadcast; + __le64 rx_discards; + __le64 rx_errors; + __le64 rx_unknown_protocol; + __le64 tx_bytes; + __le64 tx_unicast; + __le64 tx_multicast; + __le64 tx_broadcast; + __le64 tx_discards; + __le64 tx_errors; + __le64 rx_invalid_frame_length; + __le64 rx_overflow_drop; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(128, virtchnl2_vport_stats); + +/** + * struct virtchnl2_event - Event info. + * @event: Event opcode. See enum virtchnl2_event_codes. + * @link_speed: Link_speed provided in Mbps. + * @vport_id: Vport ID. + * @link_status: Link status. + * @pad: Padding. + * @reserved: Reserved. + * + * CP sends this message to inform the PF/VF driver of events that may affect + * it. No direct response is expected from the driver, though it may generate + * other messages in response to this one. + * + * Associated with VIRTCHNL2_OP_EVENT. + */ +struct virtchnl2_event { + __le32 event; + __le32 link_speed; + __le32 vport_id; + u8 link_status; + u8 pad; + __le16 reserved; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(16, virtchnl2_event); + +/** + * struct virtchnl2_rss_key - RSS key info. + * @vport_id: Vport id. + * @key_len: Length of RSS key. + * @pad: Padding. + * @key_flex: RSS hash key, packed bytes. + * PF/VF sends this message to get or set RSS key. Only supported if both + * PF/VF and CP drivers set the VIRTCHNL2_CAP_RSS bit during configuration + * negotiation. + * + * Associated with VIRTCHNL2_OP_GET_RSS_KEY and VIRTCHNL2_OP_SET_RSS_KEY. + */ +struct virtchnl2_rss_key { + __le32 vport_id; + __le16 key_len; + u8 pad; + u8 key_flex[]; +} __packed; +VIRTCHNL2_CHECK_STRUCT_LEN(7, virtchnl2_rss_key); + +/** + * struct virtchnl2_queue_chunk - chunk of contiguous queues + * @type: See enum virtchnl2_queue_type. + * @start_queue_id: Starting queue id. + * @num_queues: Number of queues. + * @pad: Padding for future extensions. + */ +struct virtchnl2_queue_chunk { + __le32 type; + __le32 start_queue_id; + __le32 num_queues; + u8 pad[4]; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(16, virtchnl2_queue_chunk); + +/* struct virtchnl2_queue_chunks - chunks of contiguous queues + * @num_chunks: Number of chunks. + * @pad: Padding. + * @chunks: Chunks of contiguous queues info. + */ +struct virtchnl2_queue_chunks { + __le16 num_chunks; + u8 pad[6]; + struct virtchnl2_queue_chunk chunks[]; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(8, virtchnl2_queue_chunks); + +/** + * struct virtchnl2_del_ena_dis_queues - Enable/disable queues info. + * @vport_id: Vport id. + * @pad: Padding. + * @chunks: Chunks of contiguous queues info. + * + * PF sends these messages to enable, disable or delete queues specified in + * chunks. PF sends virtchnl2_del_ena_dis_queues struct to specify the queues + * to be enabled/disabled/deleted. Also applicable to single queue receive or + * transmit. CP performs requested action and returns status. + * + * Associated with VIRTCHNL2_OP_ENABLE_QUEUES, VIRTCHNL2_OP_DISABLE_QUEUES and + * VIRTCHNL2_OP_DISABLE_QUEUES. + */ +struct virtchnl2_del_ena_dis_queues { + __le32 vport_id; + u8 pad[4]; + struct virtchnl2_queue_chunks chunks; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(16, virtchnl2_del_ena_dis_queues); + +/** + * struct virtchnl2_queue_vector - Queue to vector mapping. + * @queue_id: Queue id. + * @vector_id: Vector id. + * @pad: Padding. + * @itr_idx: See enum virtchnl2_itr_idx. + * @queue_type: See enum virtchnl2_queue_type. + * @pad1: Padding for future extensions. + */ +struct virtchnl2_queue_vector { + __le32 queue_id; + __le16 vector_id; + u8 pad[2]; + __le32 itr_idx; + __le32 queue_type; + u8 pad1[8]; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(24, virtchnl2_queue_vector); + +/** + * struct virtchnl2_queue_vector_maps - Map/unmap queues info. + * @vport_id: Vport id. + * @num_qv_maps: Number of queue vector maps. + * @pad: Padding. + * @qv_maps: Queue to vector maps. + * + * PF sends this message to map or unmap queues to vectors and interrupt + * throttling rate index registers. External data buffer contains + * virtchnl2_queue_vector_maps structure that contains num_qv_maps of + * virtchnl2_queue_vector structures. CP maps the requested queue vector maps + * after validating the queue and vector ids and returns a status code. + * + * Associated with VIRTCHNL2_OP_MAP_QUEUE_VECTOR and + * VIRTCHNL2_OP_UNMAP_QUEUE_VECTOR. + */ +struct virtchnl2_queue_vector_maps { + __le32 vport_id; + __le16 num_qv_maps; + u8 pad[10]; + struct virtchnl2_queue_vector qv_maps[]; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(16, virtchnl2_queue_vector_maps); + +/** + * struct virtchnl2_loopback - Loopback info. + * @vport_id: Vport id. + * @enable: Enable/disable. + * @pad: Padding for future extensions. + * + * PF/VF sends this message to transition to/from the loopback state. Setting + * the 'enable' to 1 enables the loopback state and setting 'enable' to 0 + * disables it. CP configures the state to loopback and returns status. + * + * Associated with VIRTCHNL2_OP_LOOPBACK. + */ +struct virtchnl2_loopback { + __le32 vport_id; + u8 enable; + u8 pad[3]; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(8, virtchnl2_loopback); + +/* struct virtchnl2_mac_addr - MAC address info. + * @addr: MAC address. + * @type: MAC type. See enum virtchnl2_mac_addr_type. + * @pad: Padding for future extensions. + */ +struct virtchnl2_mac_addr { + u8 addr[ETH_ALEN]; + u8 type; + u8 pad; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(8, virtchnl2_mac_addr); + +/** + * struct virtchnl2_mac_addr_list - List of MAC addresses. + * @vport_id: Vport id. + * @num_mac_addr: Number of MAC addresses. + * @pad: Padding. + * @mac_addr_list: List with MAC address info. + * + * PF/VF driver uses this structure to send list of MAC addresses to be + * added/deleted to the CP where as CP performs the action and returns the + * status. + * + * Associated with VIRTCHNL2_OP_ADD_MAC_ADDR and VIRTCHNL2_OP_DEL_MAC_ADDR. + */ +struct virtchnl2_mac_addr_list { + __le32 vport_id; + __le16 num_mac_addr; + u8 pad[2]; + struct virtchnl2_mac_addr mac_addr_list[]; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(8, virtchnl2_mac_addr_list); + +/** + * struct virtchnl2_promisc_info - Promisc type info. + * @vport_id: Vport id. + * @flags: See enum virtchnl2_promisc_flags. + * @pad: Padding for future extensions. + * + * PF/VF sends vport id and flags to the CP where as CP performs the action + * and returns the status. + * + * Associated with VIRTCHNL2_OP_CONFIG_PROMISCUOUS_MODE. + */ +struct virtchnl2_promisc_info { + __le32 vport_id; + /* See VIRTCHNL2_PROMISC_FLAGS definitions */ + __le16 flags; + u8 pad[2]; +}; +VIRTCHNL2_CHECK_STRUCT_LEN(8, virtchnl2_promisc_info); + +#endif /* _VIRTCHNL_2_H_ */ diff --git a/drivers/net/ethernet/intel/idpf/virtchnl2_lan_desc.h b/drivers/net/ethernet/intel/idpf/virtchnl2_lan_desc.h new file mode 100644 index 00000000000000..f1b577f1c452f3 --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/virtchnl2_lan_desc.h @@ -0,0 +1,451 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2023 Intel Corporation */ + +#ifndef _VIRTCHNL2_LAN_DESC_H_ +#define _VIRTCHNL2_LAN_DESC_H_ + +#include + +/* This is an interface definition file where existing enums and their values + * must remain unchanged over time, so we specify explicit values for all enums. + */ + +/* Transmit descriptor ID flags + */ +enum virtchnl2_tx_desc_ids { + VIRTCHNL2_TXDID_DATA = BIT(0), + VIRTCHNL2_TXDID_CTX = BIT(1), + /* TXDID bit 2 is reserved + * TXDID bit 3 is free for future use + * TXDID bit 4 is reserved + */ + VIRTCHNL2_TXDID_FLEX_TSO_CTX = BIT(5), + /* TXDID bit 6 is reserved */ + VIRTCHNL2_TXDID_FLEX_L2TAG1_L2TAG2 = BIT(7), + /* TXDID bits 8 and 9 are free for future use + * TXDID bit 10 is reserved + * TXDID bit 11 is free for future use + */ + VIRTCHNL2_TXDID_FLEX_FLOW_SCHED = BIT(12), + /* TXDID bits 13 and 14 are free for future use */ + VIRTCHNL2_TXDID_DESC_DONE = BIT(15), +}; + +/* Receive descriptor IDs */ +enum virtchnl2_rx_desc_ids { + VIRTCHNL2_RXDID_1_32B_BASE = 1, + /* FLEX_SQ_NIC and FLEX_SPLITQ share desc ids because they can be + * differentiated based on queue model; e.g. single queue model can + * only use FLEX_SQ_NIC and split queue model can only use FLEX_SPLITQ + * for DID 2. + */ + VIRTCHNL2_RXDID_2_FLEX_SPLITQ = 2, + VIRTCHNL2_RXDID_2_FLEX_SQ_NIC = VIRTCHNL2_RXDID_2_FLEX_SPLITQ, + /* 3 through 6 are reserved */ + VIRTCHNL2_RXDID_7_HW_RSVD = 7, + /* 8 through 15 are free */ +}; + +/* Receive descriptor ID bitmasks */ +#define VIRTCHNL2_RXDID_M(bit) BIT_ULL(VIRTCHNL2_RXDID_##bit) + +enum virtchnl2_rx_desc_id_bitmasks { + VIRTCHNL2_RXDID_1_32B_BASE_M = VIRTCHNL2_RXDID_M(1_32B_BASE), + VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M = VIRTCHNL2_RXDID_M(2_FLEX_SPLITQ), + VIRTCHNL2_RXDID_2_FLEX_SQ_NIC_M = VIRTCHNL2_RXDID_M(2_FLEX_SQ_NIC), + VIRTCHNL2_RXDID_7_HW_RSVD_M = VIRTCHNL2_RXDID_M(7_HW_RSVD), +}; + +/* For splitq virtchnl2_rx_flex_desc_adv_nic_3 desc members */ +#define VIRTCHNL2_RX_FLEX_DESC_ADV_RXDID_M GENMASK(3, 0) +#define VIRTCHNL2_RX_FLEX_DESC_ADV_UMBCAST_M GENMASK(7, 6) +#define VIRTCHNL2_RX_FLEX_DESC_ADV_PTYPE_M GENMASK(9, 0) +#define VIRTCHNL2_RX_FLEX_DESC_ADV_RAW_CSUM_INV_S 12 +#define VIRTCHNL2_RX_FLEX_DESC_ADV_RAW_CSUM_INV_M \ + BIT_ULL(VIRTCHNL2_RX_FLEX_DESC_ADV_RAW_CSUM_INV_S) +#define VIRTCHNL2_RX_FLEX_DESC_ADV_FF0_M GENMASK(15, 13) +#define VIRTCHNL2_RX_FLEX_DESC_ADV_LEN_PBUF_M GENMASK(13, 0) +#define VIRTCHNL2_RX_FLEX_DESC_ADV_GEN_S 14 +#define VIRTCHNL2_RX_FLEX_DESC_ADV_GEN_M \ + BIT_ULL(VIRTCHNL2_RX_FLEX_DESC_ADV_GEN_S) +#define VIRTCHNL2_RX_FLEX_DESC_ADV_BUFQ_ID_S 15 +#define VIRTCHNL2_RX_FLEX_DESC_ADV_BUFQ_ID_M \ + BIT_ULL(VIRTCHNL2_RX_FLEX_DESC_ADV_BUFQ_ID_S) +#define VIRTCHNL2_RX_FLEX_DESC_ADV_LEN_HDR_M GENMASK(9, 0) +#define VIRTCHNL2_RX_FLEX_DESC_ADV_RSC_S 10 +#define VIRTCHNL2_RX_FLEX_DESC_ADV_RSC_M \ + BIT_ULL(VIRTCHNL2_RX_FLEX_DESC_ADV_RSC_S) +#define VIRTCHNL2_RX_FLEX_DESC_ADV_SPH_S 11 +#define VIRTCHNL2_RX_FLEX_DESC_ADV_SPH_M \ + BIT_ULL(VIRTCHNL2_RX_FLEX_DESC_ADV_SPH_S) +#define VIRTCHNL2_RX_FLEX_DESC_ADV_FF1_S 12 +#define VIRTCHNL2_RX_FLEX_DESC_ADV_FF1_M GENMASK(14, 12) +#define VIRTCHNL2_RX_FLEX_DESC_ADV_MISS_S 15 +#define VIRTCHNL2_RX_FLEX_DESC_ADV_MISS_M \ + BIT_ULL(VIRTCHNL2_RX_FLEX_DESC_ADV_MISS_S) + +/* Bitmasks for splitq virtchnl2_rx_flex_desc_adv_nic_3 */ +enum virtchl2_rx_flex_desc_adv_status_error_0_qw1_bits { + VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_DD_M = BIT(0), + VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_EOF_M = BIT(1), + VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_HBO_M = BIT(2), + VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_L3L4P_M = BIT(3), + VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_IPE_M = BIT(4), + VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_L4E_M = BIT(5), + VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_EIPE_M = BIT(6), + VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_EUDPE_M = BIT(7), +}; + +/* Bitmasks for splitq virtchnl2_rx_flex_desc_adv_nic_3 */ +enum virtchnl2_rx_flex_desc_adv_status_error_0_qw0_bits { + VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_LPBK_M = BIT(0), + VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_IPV6EXADD_M = BIT(1), + VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_RXE_M = BIT(2), + VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_CRCP_M = BIT(3), + VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_RSS_VALID_M = BIT(4), + VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_L2TAG1P_M = BIT(5), + VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XTRMD0_VALID_M = BIT(6), + VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XTRMD1_VALID_M = BIT(7), +}; + +/* Bitmasks for splitq virtchnl2_rx_flex_desc_adv_nic_3 */ +enum virtchnl2_rx_flex_desc_adv_status_error_1_bits { + VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS1_RSVD_M = GENMASK(1, 0), + VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS1_ATRAEFAIL_M = BIT(2), + VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS1_L2TAG2P_M = BIT(3), + VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS1_XTRMD2_VALID_M = BIT(4), + VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS1_XTRMD3_VALID_M = BIT(5), + VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS1_XTRMD4_VALID_M = BIT(6), + VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS1_XTRMD5_VALID_M = BIT(7), +}; + +/* For singleq (flex) virtchnl2_rx_flex_desc fields + * For virtchnl2_rx_flex_desc.ptype_flex_flags0 member + */ +#define VIRTCHNL2_RX_FLEX_DESC_PTYPE_M GENMASK(9, 0) + +/* For virtchnl2_rx_flex_desc.pkt_len member */ +#define VIRTCHNL2_RX_FLEX_DESC_PKT_LEN_M GENMASK(13, 0) + +/* Bitmasks for singleq (flex) virtchnl2_rx_flex_desc */ +enum virtchnl2_rx_flex_desc_status_error_0_bits { + VIRTCHNL2_RX_FLEX_DESC_STATUS0_DD_M = BIT(0), + VIRTCHNL2_RX_FLEX_DESC_STATUS0_EOF_M = BIT(1), + VIRTCHNL2_RX_FLEX_DESC_STATUS0_HBO_M = BIT(2), + VIRTCHNL2_RX_FLEX_DESC_STATUS0_L3L4P_M = BIT(3), + VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_IPE_M = BIT(4), + VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_L4E_M = BIT(5), + VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EIPE_M = BIT(6), + VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_M = BIT(7), + VIRTCHNL2_RX_FLEX_DESC_STATUS0_LPBK_M = BIT(8), + VIRTCHNL2_RX_FLEX_DESC_STATUS0_IPV6EXADD_M = BIT(9), + VIRTCHNL2_RX_FLEX_DESC_STATUS0_RXE_M = BIT(10), + VIRTCHNL2_RX_FLEX_DESC_STATUS0_CRCP_M = BIT(11), + VIRTCHNL2_RX_FLEX_DESC_STATUS0_RSS_VALID_M = BIT(12), + VIRTCHNL2_RX_FLEX_DESC_STATUS0_L2TAG1P_M = BIT(13), + VIRTCHNL2_RX_FLEX_DESC_STATUS0_XTRMD0_VALID_M = BIT(14), + VIRTCHNL2_RX_FLEX_DESC_STATUS0_XTRMD1_VALID_M = BIT(15), +}; + +/* Bitmasks for singleq (flex) virtchnl2_rx_flex_desc */ +enum virtchnl2_rx_flex_desc_status_error_1_bits { + VIRTCHNL2_RX_FLEX_DESC_STATUS1_CPM_M = GENMASK(3, 0), + VIRTCHNL2_RX_FLEX_DESC_STATUS1_NAT_M = BIT(4), + VIRTCHNL2_RX_FLEX_DESC_STATUS1_CRYPTO_M = BIT(5), + /* [10:6] reserved */ + VIRTCHNL2_RX_FLEX_DESC_STATUS1_L2TAG2P_M = BIT(11), + VIRTCHNL2_RX_FLEX_DESC_STATUS1_XTRMD2_VALID_M = BIT(12), + VIRTCHNL2_RX_FLEX_DESC_STATUS1_XTRMD3_VALID_M = BIT(13), + VIRTCHNL2_RX_FLEX_DESC_STATUS1_XTRMD4_VALID_M = BIT(14), + VIRTCHNL2_RX_FLEX_DESC_STATUS1_XTRMD5_VALID_M = BIT(15), +}; + +/* For virtchnl2_rx_flex_desc.ts_low member */ +#define VIRTCHNL2_RX_FLEX_TSTAMP_VALID BIT(0) + +/* For singleq (non flex) virtchnl2_singleq_base_rx_desc legacy desc members */ +#define VIRTCHNL2_RX_BASE_DESC_QW1_LEN_PBUF_M GENMASK_ULL(51, 38) +#define VIRTCHNL2_RX_BASE_DESC_QW1_PTYPE_M GENMASK_ULL(37, 30) +#define VIRTCHNL2_RX_BASE_DESC_QW1_ERROR_M GENMASK_ULL(26, 19) +#define VIRTCHNL2_RX_BASE_DESC_QW1_STATUS_M GENMASK_ULL(18, 0) + +/* Bitmasks for singleq (base) virtchnl2_rx_base_desc */ +enum virtchnl2_rx_base_desc_status_bits { + VIRTCHNL2_RX_BASE_DESC_STATUS_DD_M = BIT(0), + VIRTCHNL2_RX_BASE_DESC_STATUS_EOF_M = BIT(1), + VIRTCHNL2_RX_BASE_DESC_STATUS_L2TAG1P_M = BIT(2), + VIRTCHNL2_RX_BASE_DESC_STATUS_L3L4P_M = BIT(3), + VIRTCHNL2_RX_BASE_DESC_STATUS_CRCP_M = BIT(4), + VIRTCHNL2_RX_BASE_DESC_STATUS_RSVD_M = GENMASK(7, 5), + VIRTCHNL2_RX_BASE_DESC_STATUS_EXT_UDP_0_M = BIT(8), + VIRTCHNL2_RX_BASE_DESC_STATUS_UMBCAST_M = GENMASK(10, 9), + VIRTCHNL2_RX_BASE_DESC_STATUS_FLM_M = BIT(11), + VIRTCHNL2_RX_BASE_DESC_STATUS_FLTSTAT_M = GENMASK(13, 12), + VIRTCHNL2_RX_BASE_DESC_STATUS_LPBK_M = BIT(14), + VIRTCHNL2_RX_BASE_DESC_STATUS_IPV6EXADD_M = BIT(15), + VIRTCHNL2_RX_BASE_DESC_STATUS_RSVD1_M = GENMASK(17, 16), + VIRTCHNL2_RX_BASE_DESC_STATUS_INT_UDP_0_M = BIT(18), +}; + +/* Bitmasks for singleq (base) virtchnl2_rx_base_desc */ +enum virtchnl2_rx_base_desc_error_bits { + VIRTCHNL2_RX_BASE_DESC_ERROR_RXE_M = BIT(0), + VIRTCHNL2_RX_BASE_DESC_ERROR_ATRAEFAIL_M = BIT(1), + VIRTCHNL2_RX_BASE_DESC_ERROR_HBO_M = BIT(2), + VIRTCHNL2_RX_BASE_DESC_ERROR_L3L4E_M = GENMASK(5, 3), + VIRTCHNL2_RX_BASE_DESC_ERROR_IPE_M = BIT(3), + VIRTCHNL2_RX_BASE_DESC_ERROR_L4E_M = BIT(4), + VIRTCHNL2_RX_BASE_DESC_ERROR_EIPE_M = BIT(5), + VIRTCHNL2_RX_BASE_DESC_ERROR_OVERSIZE_M = BIT(6), + VIRTCHNL2_RX_BASE_DESC_ERROR_PPRS_M = BIT(7), +}; + +/* Bitmasks for singleq (base) virtchnl2_rx_base_desc */ +#define VIRTCHNL2_RX_BASE_DESC_FLTSTAT_RSS_HASH_M GENMASK(13, 12) + +/** + * struct virtchnl2_splitq_rx_buf_desc - SplitQ RX buffer descriptor format + * @qword0: RX buffer struct. + * @qword0.buf_id: Buffer identifier. + * @qword0.rsvd0: Reserved. + * @qword0.rsvd1: Reserved. + * @pkt_addr: Packet buffer address. + * @hdr_addr: Header buffer address. + * @rsvd2: Rerserved. + * + * Receive Descriptors + * SplitQ buffer + * | 16| 0| + * ---------------------------------------------------------------- + * | RSV | Buffer ID | + * ---------------------------------------------------------------- + * | Rx packet buffer address | + * ---------------------------------------------------------------- + * | Rx header buffer address | + * ---------------------------------------------------------------- + * | RSV | + * ---------------------------------------------------------------- + * | 0| + */ +struct virtchnl2_splitq_rx_buf_desc { + struct { + __le16 buf_id; + __le16 rsvd0; + __le32 rsvd1; + } qword0; + __le64 pkt_addr; + __le64 hdr_addr; + __le64 rsvd2; +}; + +/** + * struct virtchnl2_singleq_rx_buf_desc - SingleQ RX buffer descriptor format. + * @pkt_addr: Packet buffer address. + * @hdr_addr: Header buffer address. + * @rsvd1: Reserved. + * @rsvd2: Reserved. + * + * SingleQ buffer + * | 0| + * ---------------------------------------------------------------- + * | Rx packet buffer address | + * ---------------------------------------------------------------- + * | Rx header buffer address | + * ---------------------------------------------------------------- + * | RSV | + * ---------------------------------------------------------------- + * | RSV | + * ---------------------------------------------------------------- + * | 0| + */ +struct virtchnl2_singleq_rx_buf_desc { + __le64 pkt_addr; + __le64 hdr_addr; + __le64 rsvd1; + __le64 rsvd2; +}; + +/** + * struct virtchnl2_singleq_base_rx_desc - RX descriptor writeback format. + * @qword0: First quad word struct. + * @qword0.lo_dword: Lower dual word struct. + * @qword0.lo_dword.mirroring_status: Mirrored packet status. + * @qword0.lo_dword.l2tag1: Stripped L2 tag from the received packet. + * @qword0.hi_dword: High dual word union. + * @qword0.hi_dword.rss: RSS hash. + * @qword0.hi_dword.fd_id: Flow director filter id. + * @qword1: Second quad word struct. + * @qword1.status_error_ptype_len: Status/error/PTYPE/length. + * @qword2: Third quad word struct. + * @qword2.ext_status: Extended status. + * @qword2.rsvd: Reserved. + * @qword2.l2tag2_1: Extracted L2 tag 2 from the packet. + * @qword2.l2tag2_2: Reserved. + * @qword3: Fourth quad word struct. + * @qword3.reserved: Reserved. + * @qword3.fd_id: Flow director filter id. + * + * Profile ID 0x1, SingleQ, base writeback format + */ +struct virtchnl2_singleq_base_rx_desc { + struct { + struct { + __le16 mirroring_status; + __le16 l2tag1; + } lo_dword; + union { + __le32 rss; + __le32 fd_id; + } hi_dword; + } qword0; + struct { + __le64 status_error_ptype_len; + } qword1; + struct { + __le16 ext_status; + __le16 rsvd; + __le16 l2tag2_1; + __le16 l2tag2_2; + } qword2; + struct { + __le32 reserved; + __le32 fd_id; + } qword3; +}; + +/** + * struct virtchnl2_rx_flex_desc_nic - RX descriptor writeback format. + * + * @rxdid: Descriptor builder profile id. + * @mir_id_umb_cast: umb_cast=[7:6], mirror=[5:0] + * @ptype_flex_flags0: ff0=[15:10], ptype=[9:0] + * @pkt_len: Packet length, [15:14] are reserved. + * @hdr_len_sph_flex_flags1: ff1/ext=[15:12], sph=[11], header=[10:0]. + * @status_error0: Status/Error section 0. + * @l2tag1: Stripped L2 tag from the received packet + * @rss_hash: RSS hash. + * @status_error1: Status/Error section 1. + * @flexi_flags2: Flexible flags section 2. + * @ts_low: Lower word of timestamp value. + * @l2tag2_1st: First L2TAG2. + * @l2tag2_2nd: Second L2TAG2. + * @flow_id: Flow id. + * @flex_ts: Timestamp and flexible flow id union. + * @flex_ts.ts_high: Timestamp higher word of the timestamp value. + * @flex_ts.flex.rsvd: Reserved. + * @flex_ts.flex.flow_id_ipv6: IPv6 flow id. + * + * Profile ID 0x2, SingleQ, flex writeback format + */ +struct virtchnl2_rx_flex_desc_nic { + /* Qword 0 */ + u8 rxdid; + u8 mir_id_umb_cast; + __le16 ptype_flex_flags0; + __le16 pkt_len; + __le16 hdr_len_sph_flex_flags1; + /* Qword 1 */ + __le16 status_error0; + __le16 l2tag1; + __le32 rss_hash; + /* Qword 2 */ + __le16 status_error1; + u8 flexi_flags2; + u8 ts_low; + __le16 l2tag2_1st; + __le16 l2tag2_2nd; + /* Qword 3 */ + __le32 flow_id; + union { + struct { + __le16 rsvd; + __le16 flow_id_ipv6; + } flex; + __le32 ts_high; + } flex_ts; +}; + +/** + * struct virtchnl2_rx_flex_desc_adv_nic_3 - RX descriptor writeback format. + * @rxdid_ucast: ucast=[7:6], rsvd=[5:4], profile_id=[3:0]. + * @status_err0_qw0: Status/Error section 0 in quad word 0. + * @ptype_err_fflags0: ff0=[15:12], udp_len_err=[11], ip_hdr_err=[10], + * ptype=[9:0]. + * @pktlen_gen_bufq_id: bufq_id=[15] only in splitq, gen=[14] only in splitq, + * plen=[13:0]. + * @hdrlen_flags: miss_prepend=[15], trunc_mirr=[14], int_udp_0=[13], + * ext_udp0=[12], sph=[11] only in splitq, rsc=[10] + * only in splitq, header=[9:0]. + * @status_err0_qw1: Status/Error section 0 in quad word 1. + * @status_err1: Status/Error section 1. + * @fflags1: Flexible flags section 1. + * @ts_low: Lower word of timestamp value. + * @buf_id: Buffer identifier. Only in splitq mode. + * @misc: Union. + * @misc.raw_cs: Raw checksum. + * @misc.l2tag1: Stripped L2 tag from the received packet + * @misc.rscseglen: + * @hash1: Lower bits of Rx hash value. + * @ff2_mirrid_hash2: Union. + * @ff2_mirrid_hash2.fflags2: Flexible flags section 2. + * @ff2_mirrid_hash2.mirrorid: Mirror id. + * @ff2_mirrid_hash2.rscseglen: RSC segment length. + * @hash3: Upper bits of Rx hash value. + * @l2tag2: Extracted L2 tag 2 from the packet. + * @fmd4: Flexible metadata container 4. + * @l2tag1: Stripped L2 tag from the received packet + * @fmd6: Flexible metadata container 6. + * @ts_high: Timestamp higher word of the timestamp value. + * + * Profile ID 0x2, SplitQ, flex writeback format + * + * Flex-field 0: BufferID + * Flex-field 1: Raw checksum/L2TAG1/RSC Seg Len (determined by HW) + * Flex-field 2: Hash[15:0] + * Flex-flags 2: Hash[23:16] + * Flex-field 3: L2TAG2 + * Flex-field 5: L2TAG1 + * Flex-field 7: Timestamp (upper 32 bits) + */ +struct virtchnl2_rx_flex_desc_adv_nic_3 { + /* Qword 0 */ + u8 rxdid_ucast; + u8 status_err0_qw0; + __le16 ptype_err_fflags0; + __le16 pktlen_gen_bufq_id; + __le16 hdrlen_flags; + /* Qword 1 */ + u8 status_err0_qw1; + u8 status_err1; + u8 fflags1; + u8 ts_low; + __le16 buf_id; + union { + __le16 raw_cs; + __le16 l2tag1; + __le16 rscseglen; + } misc; + /* Qword 2 */ + __le16 hash1; + union { + u8 fflags2; + u8 mirrorid; + u8 hash2; + } ff2_mirrid_hash2; + u8 hash3; + __le16 l2tag2; + __le16 fmd4; + /* Qword 3 */ + __le16 l2tag1; + __le16 fmd6; + __le32 ts_high; +}; + +/* Common union for accessing descriptor format structs */ +union virtchnl2_rx_desc { + struct virtchnl2_singleq_base_rx_desc base_wb; + struct virtchnl2_rx_flex_desc_nic flex_nic_wb; + struct virtchnl2_rx_flex_desc_adv_nic_3 flex_adv_nic_3_wb; +}; + +#endif /* _VIRTCHNL_LAN_DESC_H_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index 329d8c90facdd5..bda5ff5db3ad5d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -45,6 +45,10 @@ struct arfs_table { struct hlist_head rules_hash[ARFS_HASH_SIZE]; }; +enum { + MLX5E_ARFS_STATE_ENABLED, +}; + enum arfs_type { ARFS_IPV4_TCP, ARFS_IPV6_TCP, @@ -59,6 +63,7 @@ struct mlx5e_arfs_tables { spinlock_t arfs_lock; int last_filter_id; struct workqueue_struct *wq; + unsigned long state; }; struct arfs_tuple { @@ -169,6 +174,8 @@ int mlx5e_arfs_enable(struct mlx5e_flow_steering *fs) return err; } } + set_bit(MLX5E_ARFS_STATE_ENABLED, &arfs->state); + return 0; } @@ -448,6 +455,8 @@ static void arfs_del_rules(struct mlx5e_flow_steering *fs) int i; int j; + clear_bit(MLX5E_ARFS_STATE_ENABLED, &arfs->state); + spin_lock_bh(&arfs->arfs_lock); mlx5e_for_each_arfs_rule(rule, htmp, arfs->arfs_tables, i, j) { hlist_del_init(&rule->hlist); @@ -615,17 +624,8 @@ static void arfs_handle_work(struct work_struct *work) struct mlx5_flow_handle *rule; arfs = mlx5e_fs_get_arfs(priv->fs); - mutex_lock(&priv->state_lock); - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { - spin_lock_bh(&arfs->arfs_lock); - hlist_del(&arfs_rule->hlist); - spin_unlock_bh(&arfs->arfs_lock); - - mutex_unlock(&priv->state_lock); - kfree(arfs_rule); - goto out; - } - mutex_unlock(&priv->state_lock); + if (!test_bit(MLX5E_ARFS_STATE_ENABLED, &arfs->state)) + return; if (!arfs_rule->rule) { rule = arfs_add_rule(priv, arfs_rule); @@ -738,6 +738,11 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, return -EPROTONOSUPPORT; spin_lock_bh(&arfs->arfs_lock); + if (!test_bit(MLX5E_ARFS_STATE_ENABLED, &arfs->state)) { + spin_unlock_bh(&arfs->arfs_lock); + return -EPERM; + } + arfs_rule = arfs_find_rule(arfs_t, &fk); if (arfs_rule) { if (arfs_rule->rxq == rxq_index) { diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 17b7d6b2883b92..e16317aadbca42 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -597,7 +597,7 @@ static void mana_get_rxbuf_cfg(int mtu, u32 *datasize, u32 *alloc_size, *alloc_size = mtu + MANA_RXBUF_PAD + *headroom; - *datasize = ALIGN(mtu + ETH_HLEN, MANA_RX_DATA_ALIGN); + *datasize = mtu + ETH_HLEN; } static int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu) diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.c b/drivers/net/ethernet/qualcomm/emac/emac-mac.c index b41c22bff26d97..ba5f277b5fee70 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac-mac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c @@ -1449,6 +1449,7 @@ int emac_mac_tx_buf_send(struct emac_adapter *adpt, struct emac_tx_queue *tx_q, { struct emac_tpd tpd; u32 prod_idx; + int len; memset(&tpd, 0, sizeof(tpd)); @@ -1468,9 +1469,10 @@ int emac_mac_tx_buf_send(struct emac_adapter *adpt, struct emac_tx_queue *tx_q, if (skb_network_offset(skb) != ETH_HLEN) TPD_TYP_SET(&tpd, 1); + len = skb->len; emac_tx_fill_tpd(adpt, tx_q, skb, &tpd); - netdev_sent_queue(adpt->netdev, skb->len); + netdev_sent_queue(adpt->netdev, len); /* Make sure the are enough free descriptors to hold one * maximum-sized SKB. We need one desc for each fragment, diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c index cf9d179c082a0e..645fa87b3fb6f7 100644 --- a/drivers/net/usb/smsc75xx.c +++ b/drivers/net/usb/smsc75xx.c @@ -102,7 +102,9 @@ static int __must_check __smsc75xx_read_reg(struct usbnet *dev, u32 index, ret = fn(dev, USB_VENDOR_REQUEST_READ_REGISTER, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, index, &buf, 4); - if (unlikely(ret < 0)) { + if (unlikely(ret < 4)) { + ret = ret < 0 ? ret : -ENODATA; + netdev_warn(dev->net, "Failed to read reg index 0x%08x: %d\n", index, ret); return ret; @@ -1495,7 +1497,7 @@ static int smsc75xx_bind(struct usbnet *dev, struct usb_interface *intf) ret = smsc75xx_wait_ready(dev, 0); if (ret < 0) { netdev_warn(dev->net, "device not ready in smsc75xx_bind\n"); - return ret; + goto free_pdata; } smsc75xx_init_mac_address(dev); @@ -1504,7 +1506,7 @@ static int smsc75xx_bind(struct usbnet *dev, struct usb_interface *intf) ret = smsc75xx_reset(dev); if (ret < 0) { netdev_warn(dev->net, "smsc75xx_reset error %d\n", ret); - return ret; + goto cancel_work; } dev->net->netdev_ops = &smsc75xx_netdev_ops; @@ -1514,6 +1516,13 @@ static int smsc75xx_bind(struct usbnet *dev, struct usb_interface *intf) dev->hard_mtu = dev->net->mtu + dev->net->hard_header_len; dev->net->max_mtu = MAX_SINGLE_PACKET_SIZE; return 0; + +cancel_work: + cancel_work_sync(&pdata->set_multicast); +free_pdata: + kfree(pdata); + dev->data[0] = 0; + return ret; } static void smsc75xx_unbind(struct usbnet *dev, struct usb_interface *intf) @@ -1523,7 +1532,6 @@ static void smsc75xx_unbind(struct usbnet *dev, struct usb_interface *intf) cancel_work_sync(&pdata->set_multicast); netif_dbg(dev, ifdown, dev->net, "free pdata\n"); kfree(pdata); - pdata = NULL; dev->data[0] = 0; } } diff --git a/drivers/net/wireless/ath/ath9k/htc.h b/drivers/net/wireless/ath/ath9k/htc.h index f6230531e7e54a..1c7c0f2f9cc82f 100644 --- a/drivers/net/wireless/ath/ath9k/htc.h +++ b/drivers/net/wireless/ath/ath9k/htc.h @@ -306,7 +306,6 @@ struct ath9k_htc_tx { DECLARE_BITMAP(tx_slot, MAX_TX_BUF_NUM); struct timer_list cleanup_timer; spinlock_t tx_lock; - bool initialized; }; struct ath9k_htc_tx_ctl { @@ -515,6 +514,7 @@ struct ath9k_htc_priv { unsigned long ps_usecount; bool ps_enabled; bool ps_idle; + bool initialized; #ifdef CONFIG_MAC80211_LEDS enum led_brightness brightness; diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_init.c b/drivers/net/wireless/ath/ath9k/htc_drv_init.c index fcba0e96b9052e..7ee1530ecb0d81 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_init.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_init.c @@ -968,6 +968,10 @@ int ath9k_htc_probe_device(struct htc_target *htc_handle, struct device *dev, htc_handle->drv_priv = priv; + /* Allow ath9k_wmi_event_tasklet() to operate. */ + smp_wmb(); + priv->initialized = true; + return 0; err_init: diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c index 820dd64a577f67..3e5f24c89ca071 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c @@ -652,9 +652,10 @@ void ath9k_htc_txstatus(struct ath9k_htc_priv *priv, void *wmi_event) struct ath9k_htc_tx_event *tx_pend; int i; - for (i = 0; i < txs->cnt; i++) { - WARN_ON(txs->cnt > HTC_MAX_TX_STATUS); + if (WARN_ON_ONCE(txs->cnt > HTC_MAX_TX_STATUS)) + return; + for (i = 0; i < txs->cnt; i++) { __txs = &txs->txstatus[i]; skb = ath9k_htc_tx_get_packet(priv, __txs); @@ -814,10 +815,6 @@ int ath9k_tx_init(struct ath9k_htc_priv *priv) skb_queue_head_init(&priv->tx.data_vo_queue); skb_queue_head_init(&priv->tx.tx_failed); - /* Allow ath9k_wmi_event_tasklet(WMI_TXSTATUS_EVENTID) to operate. */ - smp_wmb(); - priv->tx.initialized = true; - return 0; } diff --git a/drivers/net/wireless/ath/ath9k/wmi.c b/drivers/net/wireless/ath/ath9k/wmi.c index 567d84be0e1ce4..a136bcd1dd9d85 100644 --- a/drivers/net/wireless/ath/ath9k/wmi.c +++ b/drivers/net/wireless/ath/ath9k/wmi.c @@ -156,6 +156,12 @@ void ath9k_wmi_event_tasklet(unsigned long data) } spin_unlock_irqrestore(&wmi->wmi_lock, flags); + /* Check if ath9k_htc_probe_device() completed. */ + if (!data_race(priv->initialized)) { + kfree_skb(skb); + continue; + } + hdr = (struct wmi_cmd_hdr *) skb->data; cmd_id = be16_to_cpu(hdr->command_id); wmi_event = skb_pull(skb, sizeof(struct wmi_cmd_hdr)); @@ -170,10 +176,6 @@ void ath9k_wmi_event_tasklet(unsigned long data) &wmi->drv_priv->fatal_work); break; case WMI_TXSTATUS_EVENTID: - /* Check if ath9k_tx_init() completed. */ - if (!data_race(priv->tx.initialized)) - break; - spin_lock_bh(&priv->tx.tx_lock); if (priv->tx.flags & ATH9K_HTC_OP_TX_DRAIN) { spin_unlock_bh(&priv->tx.tx_lock); diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index de8a2e27f49c73..88613c034cf266 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -4316,6 +4316,9 @@ brcmf_pmksa_v3_op(struct brcmf_if *ifp, struct cfg80211_pmksa *pmksa, int ret; pmk_op = kzalloc(sizeof(*pmk_op), GFP_KERNEL); + if (!pmk_op) + return -ENOMEM; + pmk_op->version = cpu_to_le16(BRCMF_PMKSA_VER_3); if (!pmksa) { diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c index bed6f942ab1a97..8194048d04329a 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c @@ -1085,7 +1085,7 @@ static int iwl_dbg_tlv_override_trig_node(struct iwl_fw_runtime *fwrt, node_trig = (void *)node_tlv->data; } - memcpy(node_trig->data + offset, trig->data, trig_data_len); + memcpy((u8 *)node_trig->data + offset, trig->data, trig_data_len); node_tlv->length = cpu_to_le32(size); if (policy & IWL_FW_INI_APPLY_POLICY_OVERRIDE_CFG) { diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c index 34feb4d29adc8f..88623609b49b77 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c @@ -128,6 +128,7 @@ static void iwl_dealloc_ucode(struct iwl_drv *drv) kfree(drv->fw.ucode_capa.cmd_versions); kfree(drv->fw.phy_integration_ver); kfree(drv->trans->dbg.pc_data); + drv->trans->dbg.pc_data = NULL; for (i = 0; i < IWL_UCODE_TYPE_MAX; i++) iwl_free_fw_img(drv, drv->fw.img + i); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c index 6d1007f24b4aa9..3532d2f536aaff 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c @@ -1286,7 +1286,9 @@ static int __iwl_mvm_suspend(struct ieee80211_hw *hw, mvm->net_detect = true; } else { - struct iwl_wowlan_config_cmd wowlan_config_cmd = {}; + struct iwl_wowlan_config_cmd wowlan_config_cmd = { + .offloading_tid = 0, + }; wowlan_config_cmd.sta_id = mvmvif->deflink.ap_sta_id; @@ -1298,6 +1300,11 @@ static int __iwl_mvm_suspend(struct ieee80211_hw *hw, goto out_noreset; } + ret = iwl_mvm_sta_ensure_queue( + mvm, ap_sta->txq[wowlan_config_cmd.offloading_tid]); + if (ret) + goto out_noreset; + ret = iwl_mvm_get_wowlan_config(mvm, wowlan, &wowlan_config_cmd, vif, mvmvif, ap_sta); if (ret) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index fea5ae4661feae..db70a002775232 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -3698,6 +3698,9 @@ iwl_mvm_sta_state_notexist_to_none(struct iwl_mvm *mvm, NL80211_TDLS_SETUP); } + if (ret) + return ret; + for_each_sta_active_link(vif, sta, link_sta, i) link_sta->agg.max_rc_amsdu_len = 1; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index 9d229c333c3bf1..8e1d09e7923f54 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -500,6 +500,10 @@ static bool iwl_mvm_is_dup(struct ieee80211_sta *sta, int queue, return false; mvm_sta = iwl_mvm_sta_from_mac80211(sta); + + if (WARN_ON_ONCE(!mvm_sta->dup_data)) + return false; + dup_data = &mvm_sta->dup_data[queue]; /* diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 7eebca1947185f..e0bb0a2fc9500a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -1493,6 +1493,34 @@ static int iwl_mvm_sta_alloc_queue(struct iwl_mvm *mvm, return ret; } +int iwl_mvm_sta_ensure_queue(struct iwl_mvm *mvm, + struct ieee80211_txq *txq) +{ + struct iwl_mvm_txq *mvmtxq = iwl_mvm_txq_from_mac80211(txq); + int ret = -EINVAL; + + lockdep_assert_held(&mvm->mutex); + + if (likely(test_bit(IWL_MVM_TXQ_STATE_READY, &mvmtxq->state)) || + !txq->sta) { + return 0; + } + + if (!iwl_mvm_sta_alloc_queue(mvm, txq->sta, txq->ac, txq->tid)) { + set_bit(IWL_MVM_TXQ_STATE_READY, &mvmtxq->state); + ret = 0; + } + + local_bh_disable(); + spin_lock(&mvm->add_stream_lock); + if (!list_empty(&mvmtxq->list)) + list_del_init(&mvmtxq->list); + spin_unlock(&mvm->add_stream_lock); + local_bh_enable(); + + return ret; +} + void iwl_mvm_add_new_dqa_stream_wk(struct work_struct *wk) { struct iwl_mvm *mvm = container_of(wk, struct iwl_mvm, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h index a61d4f88125f02..40fa374cb26783 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h @@ -574,6 +574,7 @@ void iwl_mvm_modify_all_sta_disable_tx(struct iwl_mvm *mvm, bool disable); void iwl_mvm_csa_client_absent(struct iwl_mvm *mvm, struct ieee80211_vif *vif); +int iwl_mvm_sta_ensure_queue(struct iwl_mvm *mvm, struct ieee80211_txq *txq); void iwl_mvm_add_new_dqa_stream_wk(struct work_struct *wk); int iwl_mvm_add_pasn_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif, struct iwl_mvm_int_sta *sta, u8 *addr, u32 cipher, diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c index 95610a117d2f0e..8082fc07988132 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c @@ -43,6 +43,8 @@ static irqreturn_t mt7921_irq_handler(int irq, void *dev_instance) { struct mt7921_dev *dev = dev_instance; + if (test_bit(MT76_REMOVED, &dev->mt76.phy.state)) + return IRQ_NONE; mt76_wr(dev, MT_WFDMA0_HOST_INT_ENA, 0); if (!test_bit(MT76_STATE_INITIALIZED, &dev->mphy.state)) @@ -376,6 +378,7 @@ static void mt7921_pci_remove(struct pci_dev *pdev) struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76); mt7921e_unregister_device(dev); + set_bit(MT76_REMOVED, &mdev->phy.state); devm_free_irq(&pdev->dev, pdev->irq, dev); mt76_free_device(&dev->mt76); pci_free_irq_vectors(pdev); diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00dev.c b/drivers/net/wireless/ralink/rt2x00/rt2x00dev.c index 3a8148f99720af..7c4b26ef499c41 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00dev.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00dev.c @@ -112,6 +112,7 @@ void rt2x00lib_disable_radio(struct rt2x00_dev *rt2x00dev) rt2x00link_stop_tuner(rt2x00dev); rt2x00queue_stop_queues(rt2x00dev); rt2x00queue_flush_queues(rt2x00dev, true); + rt2x00queue_stop_queue(rt2x00dev->bcn); /* * Disable radio. @@ -1298,6 +1299,7 @@ int rt2x00lib_start(struct rt2x00_dev *rt2x00dev) rt2x00dev->intf_ap_count = 0; rt2x00dev->intf_sta_count = 0; rt2x00dev->intf_associated = 0; + rt2x00dev->intf_beaconing = 0; /* Enable the radio */ retval = rt2x00lib_enable_radio(rt2x00dev); @@ -1324,6 +1326,7 @@ void rt2x00lib_stop(struct rt2x00_dev *rt2x00dev) rt2x00dev->intf_ap_count = 0; rt2x00dev->intf_sta_count = 0; rt2x00dev->intf_associated = 0; + rt2x00dev->intf_beaconing = 0; } static inline void rt2x00lib_set_if_combinations(struct rt2x00_dev *rt2x00dev) diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c b/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c index 4780d5f953a2bd..5730b22eb6b864 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c @@ -609,6 +609,17 @@ void rt2x00mac_bss_info_changed(struct ieee80211_hw *hw, */ if (changes & BSS_CHANGED_BEACON_ENABLED) { mutex_lock(&intf->beacon_skb_mutex); + + /* + * Clear the 'enable_beacon' flag and clear beacon because + * the beacon queue has been stopped after hardware reset. + */ + if (test_bit(DEVICE_STATE_RESET, &rt2x00dev->flags) && + intf->enable_beacon) { + intf->enable_beacon = false; + rt2x00queue_clear_beacon(rt2x00dev, vif); + } + if (!bss_conf->enable_beacon && intf->enable_beacon) { rt2x00dev->intf_beaconing--; intf->enable_beacon = false; diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index 831639d73657be..c0b30d42ebc271 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -7081,6 +7081,7 @@ static void rtl8xxxu_stop(struct ieee80211_hw *hw) if (priv->usb_interrupts) rtl8xxxu_write32(priv, REG_USB_HIMR, 0); + cancel_work_sync(&priv->c2hcmd_work); cancel_delayed_work_sync(&priv->ra_watchdog); rtl8xxxu_free_rx_resources(priv); diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c index f27840b0931fe4..24a552e8eb93b6 100644 --- a/drivers/platform/x86/think-lmi.c +++ b/drivers/platform/x86/think-lmi.c @@ -755,6 +755,24 @@ static void tlmi_release_attr(void) kset_unregister(tlmi_priv.authentication_kset); } +static int tlmi_validate_setting_name(struct kset *attribute_kset, char *name) +{ + struct kobject *duplicate; + + if (!strcmp(name, "Reserved")) + return -EINVAL; + + duplicate = kset_find_obj(attribute_kset, name); + if (duplicate) { + pr_debug("Duplicate attribute name found - %s\n", name); + /* kset_find_obj() returns a reference */ + kobject_put(duplicate); + return -EBUSY; + } + + return 0; +} + static int tlmi_sysfs_init(void) { int i, ret; @@ -783,10 +801,8 @@ static int tlmi_sysfs_init(void) continue; /* check for duplicate or reserved values */ - if (kset_find_obj(tlmi_priv.attribute_kset, tlmi_priv.setting[i]->display_name) || - !strcmp(tlmi_priv.setting[i]->display_name, "Reserved")) { - pr_debug("duplicate or reserved attribute name found - %s\n", - tlmi_priv.setting[i]->display_name); + if (tlmi_validate_setting_name(tlmi_priv.attribute_kset, + tlmi_priv.setting[i]->display_name) < 0) { kfree(tlmi_priv.setting[i]->possible_values); kfree(tlmi_priv.setting[i]); tlmi_priv.setting[i] = NULL; diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c index 8c52445c73fb3c..682dadab7dae7f 100644 --- a/drivers/tty/tty_buffer.c +++ b/drivers/tty/tty_buffer.c @@ -508,6 +508,9 @@ static void flush_to_ldisc(struct work_struct *work) if (!count) break; head->read += count; + + if (need_resched()) + cond_resched(); } mutex_unlock(&buf->lock); diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index f2eb9a021973f7..2bfa16bcfc92b9 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -713,7 +713,7 @@ static void delete_char(struct vc_data *vc, unsigned int nr) unsigned short *p = (unsigned short *) vc->vc_pos; vc_uniscr_delete(vc, nr); - scr_memcpyw(p, p + nr, (vc->vc_cols - vc->vc_x - nr) * 2); + scr_memmovew(p, p + nr, (vc->vc_cols - vc->vc_x - nr) * 2); scr_memsetw(p + vc->vc_cols - vc->vc_x - nr, vc->vc_video_erase_char, nr * 2); vc->vc_need_wrap = 0; diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index 7d402e736ccf78..217acdc1a23b47 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -464,13 +464,13 @@ static int uio_open(struct inode *inode, struct file *filep) mutex_lock(&minor_lock); idev = idr_find(&uio_idr, iminor(inode)); - mutex_unlock(&minor_lock); if (!idev) { ret = -ENODEV; + mutex_unlock(&minor_lock); goto out; } - get_device(&idev->dev); + mutex_unlock(&minor_lock); if (!try_module_get(idev->owner)) { ret = -ENODEV; @@ -1024,9 +1024,8 @@ void uio_unregister_device(struct uio_info *info) wake_up_interruptible(&idev->wait); kill_fasync(&idev->async_queue, SIGIO, POLL_HUP); - device_unregister(&idev->dev); - uio_free_minor(minor); + device_unregister(&idev->dev); return; } diff --git a/drivers/usb/common/ulpi.c b/drivers/usb/common/ulpi.c index 32490871720d52..e6b4f6e6d10a32 100644 --- a/drivers/usb/common/ulpi.c +++ b/drivers/usb/common/ulpi.c @@ -303,7 +303,7 @@ static int ulpi_register(struct device *dev, struct ulpi *ulpi) return ret; } - root = debugfs_create_dir(dev_name(dev), ulpi_root); + root = debugfs_create_dir(dev_name(&ulpi->dev), ulpi_root); debugfs_create_file("regs", 0444, root, ulpi, &ulpi_regs_fops); dev_dbg(&ulpi->dev, "registered ULPI PHY: vendor %04x, product %04x\n", diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index e0bef73ef34b5e..2e89a429782c52 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -116,7 +116,6 @@ EXPORT_SYMBOL_GPL(ehci_cf_port_reset_rwsem); #define HUB_DEBOUNCE_STEP 25 #define HUB_DEBOUNCE_STABLE 100 -static void hub_release(struct kref *kref); static int usb_reset_and_verify_device(struct usb_device *udev); static int hub_port_disable(struct usb_hub *hub, int port1, int set_state); static bool hub_port_warm_reset_required(struct usb_hub *hub, int port1, @@ -150,6 +149,10 @@ int usb_device_supports_lpm(struct usb_device *udev) if (udev->quirks & USB_QUIRK_NO_LPM) return 0; + /* Skip if the device BOS descriptor couldn't be read */ + if (!udev->bos) + return 0; + /* USB 2.1 (and greater) devices indicate LPM support through * their USB 2.0 Extended Capabilities BOS descriptor. */ @@ -326,6 +329,10 @@ static void usb_set_lpm_parameters(struct usb_device *udev) if (!udev->lpm_capable || udev->speed < USB_SPEED_SUPER) return; + /* Skip if the device BOS descriptor couldn't be read */ + if (!udev->bos) + return; + hub = usb_hub_to_struct_hub(udev->parent); /* It doesn't take time to transition the roothub into U0, since it * doesn't have an upstream link. @@ -670,14 +677,14 @@ static void kick_hub_wq(struct usb_hub *hub) */ intf = to_usb_interface(hub->intfdev); usb_autopm_get_interface_no_resume(intf); - kref_get(&hub->kref); + hub_get(hub); if (queue_work(hub_wq, &hub->events)) return; /* the work has already been scheduled */ usb_autopm_put_interface_async(intf); - kref_put(&hub->kref, hub_release); + hub_put(hub); } void usb_kick_hub_wq(struct usb_device *hdev) @@ -1045,7 +1052,7 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) goto init2; goto init3; } - kref_get(&hub->kref); + hub_get(hub); /* The superspeed hub except for root hub has to use Hub Depth * value as an offset into the route string to locate the bits @@ -1293,7 +1300,7 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) device_unlock(&hdev->dev); } - kref_put(&hub->kref, hub_release); + hub_put(hub); } /* Implement the continuations for the delays above */ @@ -1709,6 +1716,16 @@ static void hub_release(struct kref *kref) kfree(hub); } +void hub_get(struct usb_hub *hub) +{ + kref_get(&hub->kref); +} + +void hub_put(struct usb_hub *hub) +{ + kref_put(&hub->kref, hub_release); +} + static unsigned highspeed_hubs; static void hub_disconnect(struct usb_interface *intf) @@ -1755,7 +1772,7 @@ static void hub_disconnect(struct usb_interface *intf) if (hub->quirk_disable_autosuspend) usb_autopm_put_interface(intf); - kref_put(&hub->kref, hub_release); + hub_put(hub); } static bool hub_descriptor_is_sane(struct usb_host_interface *desc) @@ -2698,13 +2715,17 @@ int usb_authorize_device(struct usb_device *usb_dev) static enum usb_ssp_rate get_port_ssp_rate(struct usb_device *hdev, u32 ext_portstatus) { - struct usb_ssp_cap_descriptor *ssp_cap = hdev->bos->ssp_cap; + struct usb_ssp_cap_descriptor *ssp_cap; u32 attr; u8 speed_id; u8 ssac; u8 lanes; int i; + if (!hdev->bos) + goto out; + + ssp_cap = hdev->bos->ssp_cap; if (!ssp_cap) goto out; @@ -4222,8 +4243,15 @@ static void usb_enable_link_state(struct usb_hcd *hcd, struct usb_device *udev, enum usb3_link_state state) { int timeout; - __u8 u1_mel = udev->bos->ss_cap->bU1devExitLat; - __le16 u2_mel = udev->bos->ss_cap->bU2DevExitLat; + __u8 u1_mel; + __le16 u2_mel; + + /* Skip if the device BOS descriptor couldn't be read */ + if (!udev->bos) + return; + + u1_mel = udev->bos->ss_cap->bU1devExitLat; + u2_mel = udev->bos->ss_cap->bU2DevExitLat; /* If the device says it doesn't have *any* exit latency to come out of * U1 or U2, it's probably lying. Assume it doesn't implement that link @@ -5882,7 +5910,7 @@ static void hub_event(struct work_struct *work) /* Balance the stuff in kick_hub_wq() and allow autosuspend */ usb_autopm_put_interface(intf); - kref_put(&hub->kref, hub_release); + hub_put(hub); kcov_remote_stop(); } diff --git a/drivers/usb/core/hub.h b/drivers/usb/core/hub.h index 0de57e0efa1950..a79e901a420cac 100644 --- a/drivers/usb/core/hub.h +++ b/drivers/usb/core/hub.h @@ -121,6 +121,8 @@ extern void usb_hub_remove_port_device(struct usb_hub *hub, extern int usb_hub_set_port_power(struct usb_device *hdev, struct usb_hub *hub, int port1, bool set); extern struct usb_hub *usb_hub_to_struct_hub(struct usb_device *hdev); +extern void hub_get(struct usb_hub *hub); +extern void hub_put(struct usb_hub *hub); extern int hub_port_debounce(struct usb_hub *hub, int port1, bool must_be_connected); extern int usb_clear_port_feature(struct usb_device *hdev, @@ -148,7 +150,7 @@ static inline int hub_is_superspeedplus(struct usb_device *hdev) { return (hdev->descriptor.bDeviceProtocol == USB_HUB_PR_SS && le16_to_cpu(hdev->descriptor.bcdUSB) >= 0x0310 && - hdev->bos->ssp_cap); + hdev->bos && hdev->bos->ssp_cap); } static inline unsigned hub_power_on_good_delay(struct usb_hub *hub) diff --git a/drivers/usb/core/port.c b/drivers/usb/core/port.c index 06a8f1f84f6f87..d10f1082260231 100644 --- a/drivers/usb/core/port.c +++ b/drivers/usb/core/port.c @@ -55,11 +55,22 @@ static ssize_t disable_show(struct device *dev, u16 portstatus, unused; bool disabled; int rc; + struct kernfs_node *kn; + hub_get(hub); rc = usb_autopm_get_interface(intf); if (rc < 0) - return rc; + goto out_hub_get; + /* + * Prevent deadlock if another process is concurrently + * trying to unregister hdev. + */ + kn = sysfs_break_active_protection(&dev->kobj, &attr->attr); + if (!kn) { + rc = -ENODEV; + goto out_autopm; + } usb_lock_device(hdev); if (hub->disconnected) { rc = -ENODEV; @@ -69,9 +80,13 @@ static ssize_t disable_show(struct device *dev, usb_hub_port_status(hub, port1, &portstatus, &unused); disabled = !usb_port_is_power_on(hub, portstatus); -out_hdev_lock: + out_hdev_lock: usb_unlock_device(hdev); + sysfs_unbreak_active_protection(kn); + out_autopm: usb_autopm_put_interface(intf); + out_hub_get: + hub_put(hub); if (rc) return rc; @@ -89,15 +104,26 @@ static ssize_t disable_store(struct device *dev, struct device_attribute *attr, int port1 = port_dev->portnum; bool disabled; int rc; + struct kernfs_node *kn; rc = kstrtobool(buf, &disabled); if (rc) return rc; + hub_get(hub); rc = usb_autopm_get_interface(intf); if (rc < 0) - return rc; + goto out_hub_get; + /* + * Prevent deadlock if another process is concurrently + * trying to unregister hdev. + */ + kn = sysfs_break_active_protection(&dev->kobj, &attr->attr); + if (!kn) { + rc = -ENODEV; + goto out_autopm; + } usb_lock_device(hdev); if (hub->disconnected) { rc = -ENODEV; @@ -118,9 +144,13 @@ static ssize_t disable_store(struct device *dev, struct device_attribute *attr, if (!rc) rc = count; -out_hdev_lock: + out_hdev_lock: usb_unlock_device(hdev); + sysfs_unbreak_active_protection(kn); + out_autopm: usb_autopm_put_interface(intf); + out_hub_get: + hub_put(hub); return rc; } diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c index 323dc02becbe25..cd79cab1f4f620 100644 --- a/drivers/usb/core/sysfs.c +++ b/drivers/usb/core/sysfs.c @@ -1171,14 +1171,24 @@ static ssize_t interface_authorized_store(struct device *dev, { struct usb_interface *intf = to_usb_interface(dev); bool val; + struct kernfs_node *kn; if (kstrtobool(buf, &val) != 0) return -EINVAL; - if (val) + if (val) { usb_authorize_interface(intf); - else - usb_deauthorize_interface(intf); + } else { + /* + * Prevent deadlock if another process is concurrently + * trying to unregister intf. + */ + kn = sysfs_break_active_protection(&dev->kobj, &attr->attr); + if (kn) { + usb_deauthorize_interface(intf); + sysfs_unbreak_active_protection(kn); + } + } return count; } diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index e24a62198255d6..03638f15837640 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2410,6 +2410,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, /* handle completion code */ switch (trb_comp_code) { case COMP_SUCCESS: + /* Don't overwrite status if TD had an error, see xHCI 4.9.1 */ + if (td->error_mid_td) + break; if (remaining) { frame->status = short_framestatus; if (xhci->quirks & XHCI_TRUST_TX_LENGTH) @@ -2425,9 +2428,13 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, case COMP_BANDWIDTH_OVERRUN_ERROR: frame->status = -ECOMM; break; - case COMP_ISOCH_BUFFER_OVERRUN: case COMP_BABBLE_DETECTED_ERROR: + sum_trbs_for_length = true; + fallthrough; + case COMP_ISOCH_BUFFER_OVERRUN: frame->status = -EOVERFLOW; + if (ep_trb != td->last_trb) + td->error_mid_td = true; break; case COMP_INCOMPATIBLE_DEVICE_ERROR: case COMP_STALL_ERROR: @@ -2435,8 +2442,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, break; case COMP_USB_TRANSACTION_ERROR: frame->status = -EPROTO; + sum_trbs_for_length = true; if (ep_trb != td->last_trb) - return 0; + td->error_mid_td = true; break; case COMP_STOPPED: sum_trbs_for_length = true; @@ -2456,6 +2464,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, break; } + if (td->urb_length_set) + goto finish_td; + if (sum_trbs_for_length) frame->actual_length = sum_trb_lengths(xhci, ep->ring, ep_trb) + ep_trb_len - remaining; @@ -2464,6 +2475,14 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, td->urb->actual_length += frame->actual_length; +finish_td: + /* Don't give back TD yet if we encountered an error mid TD */ + if (td->error_mid_td && ep_trb != td->last_trb) { + xhci_dbg(xhci, "Error mid isoc TD, wait for final completion event\n"); + td->urb_length_set = true; + return 0; + } + return finish_td(xhci, ep, ep_ring, td, trb_comp_code); } @@ -2843,17 +2862,51 @@ static int handle_tx_event(struct xhci_hcd *xhci, } if (!ep_seg) { - if (!ep->skip || - !usb_endpoint_xfer_isoc(&td->urb->ep->desc)) { - /* Some host controllers give a spurious - * successful event after a short transfer. - * Ignore it. - */ - if ((xhci->quirks & XHCI_SPURIOUS_SUCCESS) && - ep_ring->last_td_was_short) { - ep_ring->last_td_was_short = false; - goto cleanup; + + if (ep->skip && usb_endpoint_xfer_isoc(&td->urb->ep->desc)) { + skip_isoc_td(xhci, td, ep, status); + goto cleanup; + } + + /* + * Some hosts give a spurious success event after a short + * transfer. Ignore it. + */ + if ((xhci->quirks & XHCI_SPURIOUS_SUCCESS) && + ep_ring->last_td_was_short) { + ep_ring->last_td_was_short = false; + goto cleanup; + } + + /* + * xhci 4.10.2 states isoc endpoints should continue + * processing the next TD if there was an error mid TD. + * So host like NEC don't generate an event for the last + * isoc TRB even if the IOC flag is set. + * xhci 4.9.1 states that if there are errors in mult-TRB + * TDs xHC should generate an error for that TRB, and if xHC + * proceeds to the next TD it should genete an event for + * any TRB with IOC flag on the way. Other host follow this. + * So this event might be for the next TD. + */ + if (td->error_mid_td && + !list_is_last(&td->td_list, &ep_ring->td_list)) { + struct xhci_td *td_next = list_next_entry(td, td_list); + + ep_seg = trb_in_td(xhci, td_next->start_seg, td_next->first_trb, + td_next->last_trb, ep_trb_dma, false); + if (ep_seg) { + /* give back previous TD, start handling new */ + xhci_dbg(xhci, "Missing TD completion event after mid TD error\n"); + ep_ring->dequeue = td->last_trb; + ep_ring->deq_seg = td->last_trb_seg; + inc_deq(xhci, ep_ring); + xhci_td_cleanup(xhci, td, ep_ring, td->status); + td = td_next; } + } + + if (!ep_seg) { /* HC is busted, give up! */ xhci_err(xhci, "ERROR Transfer event TRB DMA ptr not " @@ -2865,9 +2918,6 @@ static int handle_tx_event(struct xhci_hcd *xhci, ep_trb_dma, true); return -ESHUTDOWN; } - - skip_isoc_td(xhci, td, ep, status); - goto cleanup; } if (trb_comp_code == COMP_SHORT_PACKET) ep_ring->last_td_was_short = true; diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 490e4b71b5ad94..d8b58b996e26a8 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -1168,6 +1168,8 @@ static int xhci_map_temp_buffer(struct usb_hcd *hcd, struct urb *urb) temp = kzalloc_node(buf_len, GFP_ATOMIC, dev_to_node(hcd->self.sysdev)); + if (!temp) + return -ENOMEM; if (usb_urb_dir_out(urb)) sg_pcopy_to_buffer(urb->sg, urb->num_sgs, diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 7faa0f8d6332da..5302cd4ca3a413 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1573,6 +1573,7 @@ struct xhci_td { struct xhci_segment *bounce_seg; /* actual_length of the URB has already been set */ bool urb_length_set; + bool error_mid_td; unsigned int num_trbs; }; diff --git a/drivers/usb/storage/isd200.c b/drivers/usb/storage/isd200.c index 1dd5959295af06..7825f122a74a94 100644 --- a/drivers/usb/storage/isd200.c +++ b/drivers/usb/storage/isd200.c @@ -1104,7 +1104,7 @@ static void isd200_dump_driveid(struct us_data *us, u16 *id) static int isd200_get_inquiry_data( struct us_data *us ) { struct isd200_info *info = (struct isd200_info *)us->extra; - int retStatus = ISD200_GOOD; + int retStatus; u16 *id = info->id; usb_stor_dbg(us, "Entering isd200_get_inquiry_data\n"); @@ -1136,6 +1136,13 @@ static int isd200_get_inquiry_data( struct us_data *us ) isd200_fix_driveid(id); isd200_dump_driveid(us, id); + /* Prevent division by 0 in isd200_scsi_to_ata() */ + if (id[ATA_ID_HEADS] == 0 || id[ATA_ID_SECTORS] == 0) { + usb_stor_dbg(us, " Invalid ATA Identify data\n"); + retStatus = ISD200_ERROR; + goto Done; + } + memset(&info->InquiryData, 0, sizeof(info->InquiryData)); /* Standard IDE interface only supports disks */ @@ -1201,6 +1208,7 @@ static int isd200_get_inquiry_data( struct us_data *us ) } } + Done: usb_stor_dbg(us, "Leaving isd200_get_inquiry_data %08X\n", retStatus); return(retStatus); @@ -1480,22 +1488,27 @@ static int isd200_init_info(struct us_data *us) static int isd200_Initialization(struct us_data *us) { + int rc = 0; + usb_stor_dbg(us, "ISD200 Initialization...\n"); /* Initialize ISD200 info struct */ - if (isd200_init_info(us) == ISD200_ERROR) { + if (isd200_init_info(us) < 0) { usb_stor_dbg(us, "ERROR Initializing ISD200 Info struct\n"); + rc = -ENOMEM; } else { /* Get device specific data */ - if (isd200_get_inquiry_data(us) != ISD200_GOOD) + if (isd200_get_inquiry_data(us) != ISD200_GOOD) { usb_stor_dbg(us, "ISD200 Initialization Failure\n"); - else + rc = -EINVAL; + } else { usb_stor_dbg(us, "ISD200 Initialization complete\n"); + } } - return 0; + return rc; } diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index ac2ef7305cc27d..98cfa42fe4d75c 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -2551,12 +2551,11 @@ EXPORT_SYMBOL_GPL(vhost_disable_notify); /* Create a new message. */ struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type) { - struct vhost_msg_node *node = kmalloc(sizeof *node, GFP_KERNEL); + /* Make sure all padding within the structure is initialized. */ + struct vhost_msg_node *node = kzalloc(sizeof(*node), GFP_KERNEL); if (!node) return NULL; - /* Make sure all padding within the structure is initialized. */ - memset(&node->msg, 0, sizeof node->msg); node->vq = vq; node->msg.type = type; return node; diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 4c565850e8315b..d80718fa3c665b 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -4499,13 +4499,13 @@ int ceph_drop_caps_for_unlink(struct inode *inode) ceph_inode_to_client(inode)->mdsc; dout("%p %llx.%llx\n", inode, ceph_vinop(inode)); - spin_lock(&mdsc->cap_unlink_delay_lock); + spin_lock(&mdsc->cap_delay_lock); ci->i_ceph_flags |= CEPH_I_FLUSH; if (!list_empty(&ci->i_cap_delay_list)) list_del_init(&ci->i_cap_delay_list); list_add_tail(&ci->i_cap_delay_list, &mdsc->cap_unlink_delay_list); - spin_unlock(&mdsc->cap_unlink_delay_lock); + spin_unlock(&mdsc->cap_delay_lock); /* * Fire the work immediately, because the MDS maybe diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index fb9ec81e3efbb4..448a51eb60e5fa 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1043,6 +1043,9 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir, struct ceph_mds_request *req; int err; + if (dentry->d_flags & DCACHE_DISCONNECTED) + return -EINVAL; + err = ceph_wait_on_conflict_unlink(dentry); if (err) return err; @@ -1050,8 +1053,8 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir, if (ceph_snap(dir) != CEPH_NOSNAP) return -EROFS; - dout("link in dir %p old_dentry %p dentry %p\n", dir, - old_dentry, dentry); + dout("link in dir %p %llx.%llx old_dentry %p:'%pd' dentry %p:'%pd'\n", + dir, ceph_vinop(dir), old_dentry, old_dentry, dentry, dentry); req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LINK, USE_AUTH_MDS); if (IS_ERR(req)) { d_drop(dentry); @@ -1060,6 +1063,12 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir, req->r_dentry = dget(dentry); req->r_num_caps = 2; req->r_old_dentry = dget(old_dentry); + /* + * The old_dentry maybe a DCACHE_DISCONNECTED dentry, then we + * will just pass the ino# to MDSs. + */ + if (old_dentry->d_flags & DCACHE_DISCONNECTED) + req->r_ino2 = ceph_vino(d_inode(old_dentry)); req->r_parent = dir; ihold(dir); set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index b7a32a20504d4c..d1ac6368a47f8c 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2270,7 +2270,7 @@ static void ceph_cap_unlink_work(struct work_struct *work) container_of(work, struct ceph_mds_client, cap_unlink_work); dout("begin\n"); - spin_lock(&mdsc->cap_unlink_delay_lock); + spin_lock(&mdsc->cap_delay_lock); while (!list_empty(&mdsc->cap_unlink_delay_list)) { struct ceph_inode_info *ci; struct inode *inode; @@ -2282,14 +2282,14 @@ static void ceph_cap_unlink_work(struct work_struct *work) inode = igrab(&ci->vfs_inode); if (inode) { - spin_unlock(&mdsc->cap_unlink_delay_lock); + spin_unlock(&mdsc->cap_delay_lock); dout("on %p %llx.%llx\n", inode, ceph_vinop(inode)); ceph_check_caps(ci, CHECK_CAPS_FLUSH, NULL); iput(inode); - spin_lock(&mdsc->cap_unlink_delay_lock); + spin_lock(&mdsc->cap_delay_lock); } } - spin_unlock(&mdsc->cap_unlink_delay_lock); + spin_unlock(&mdsc->cap_delay_lock); dout("done\n"); } @@ -2584,6 +2584,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, u64 ino1 = 0, ino2 = 0; int pathlen1 = 0, pathlen2 = 0; bool freepath1 = false, freepath2 = false; + struct dentry *old_dentry = NULL; int len; u16 releases; void *p, *end; @@ -2601,7 +2602,10 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, } /* If r_old_dentry is set, then assume that its parent is locked */ - ret = set_request_path_attr(NULL, req->r_old_dentry, + if (req->r_old_dentry && + !(req->r_old_dentry->d_flags & DCACHE_DISCONNECTED)) + old_dentry = req->r_old_dentry; + ret = set_request_path_attr(NULL, old_dentry, req->r_old_dentry_dir, req->r_path2, req->r_ino2.ino, &path2, &pathlen2, &ino2, &freepath2, true); @@ -4831,7 +4835,6 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) INIT_LIST_HEAD(&mdsc->cap_wait_list); spin_lock_init(&mdsc->cap_delay_lock); INIT_LIST_HEAD(&mdsc->cap_unlink_delay_list); - spin_lock_init(&mdsc->cap_unlink_delay_lock); INIT_LIST_HEAD(&mdsc->snap_flush_list); spin_lock_init(&mdsc->snap_flush_lock); mdsc->last_cap_flush_tid = 1; diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 29691b786d4b32..b79bc8576f4fbd 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -420,9 +420,8 @@ struct ceph_mds_client { struct delayed_work delayed_work; /* delayed work */ unsigned long last_renew_caps; /* last time we renewed our caps */ struct list_head cap_delay_list; /* caps with delayed release */ - spinlock_t cap_delay_lock; /* protects cap_delay_list */ struct list_head cap_unlink_delay_list; /* caps with delayed release for unlink */ - spinlock_t cap_unlink_delay_lock; /* protects cap_unlink_delay_list */ + spinlock_t cap_delay_lock; /* protects cap_delay_list and cap_unlink_delay_list */ struct list_head snap_flush_list; /* cap_snaps ready to flush */ spinlock_t snap_flush_lock; diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index 8d0a6d2c2da433..a3370be2711bf2 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c @@ -379,10 +379,11 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2) ceph_decode_skip_8(p, end, bad_ext); /* required_client_features */ ceph_decode_skip_set(p, end, 64, bad_ext); + /* bal_rank_mask */ + ceph_decode_skip_string(p, end, bad_ext); + } + if (mdsmap_ev >= 18) { ceph_decode_64_safe(p, end, m->m_max_xattr_size, bad_ext); - } else { - /* This forces the usage of the (sync) SETXATTR Op */ - m->m_max_xattr_size = 0; } bad_ext: dout("mdsmap_decode m_enabled: %d, m_damaged: %d, m_num_laggy: %d\n", diff --git a/fs/fat/nfs.c b/fs/fat/nfs.c index eb192656fba278..f442a8d2f114b8 100644 --- a/fs/fat/nfs.c +++ b/fs/fat/nfs.c @@ -139,6 +139,12 @@ fat_encode_fh_nostale(struct inode *inode, __u32 *fh, int *lenp, fid->parent_i_gen = parent->i_generation; type = FILEID_FAT_WITH_PARENT; *lenp = FAT_FID_SIZE_WITH_PARENT; + } else { + /* + * We need to initialize this field because the fh is actually + * 12 bytes long + */ + fid->parent_i_pos_hi = 0; } return type; diff --git a/fs/fhandle.c b/fs/fhandle.c index 0ee727485615b6..cb78dbfd7cd511 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c @@ -37,7 +37,7 @@ static long do_sys_name_to_handle(struct path *path, if (f_handle.handle_bytes > MAX_HANDLE_SZ) return -EINVAL; - handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_bytes, + handle = kzalloc(sizeof(struct file_handle) + f_handle.handle_bytes, GFP_KERNEL); if (!handle) return -ENOMEM; diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 0fb8366e631310..7f33b35968e169 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1715,7 +1715,8 @@ static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length) struct buffer_head *dibh, *bh; struct gfs2_holder rd_gh; unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift; - u64 lblock = (offset + (1 << bsize_shift) - 1) >> bsize_shift; + unsigned int bsize = 1 << bsize_shift; + u64 lblock = (offset + bsize - 1) >> bsize_shift; __u16 start_list[GFS2_MAX_META_HEIGHT]; __u16 __end_list[GFS2_MAX_META_HEIGHT], *end_list = NULL; unsigned int start_aligned, uninitialized_var(end_aligned); @@ -1726,7 +1727,7 @@ static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length) u64 prev_bnr = 0; __be64 *start, *end; - if (offset >= maxsize) { + if (offset + bsize - 1 >= maxsize) { /* * The starting point lies beyond the allocated meta-data; * there are no blocks do deallocate. diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index b9504926559459..0e6bdad97bd1fc 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -447,6 +447,7 @@ int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred, delegation->cred = get_cred(cred); delegation->inode = inode; delegation->flags = 1<test_gen = 0; spin_lock_init(&delegation->lock); spin_lock(&clp->cl_lock); @@ -1293,6 +1294,8 @@ static int nfs_server_reap_expired_delegations(struct nfs_server *server, struct inode *inode; const struct cred *cred; nfs4_stateid stateid; + unsigned long gen = ++server->delegation_gen; + restart: rcu_read_lock(); restart_locked: @@ -1302,7 +1305,8 @@ static int nfs_server_reap_expired_delegations(struct nfs_server *server, test_bit(NFS_DELEGATION_RETURNING, &delegation->flags) || test_bit(NFS_DELEGATION_TEST_EXPIRED, - &delegation->flags) == 0) + &delegation->flags) == 0 || + delegation->test_gen == gen) continue; inode = nfs_delegation_grab_inode(delegation); if (inode == NULL) @@ -1311,6 +1315,7 @@ static int nfs_server_reap_expired_delegations(struct nfs_server *server, cred = get_cred_rcu(delegation->cred); nfs4_stateid_copy(&stateid, &delegation->stateid); spin_unlock(&delegation->lock); + delegation->test_gen = gen; clear_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags); rcu_read_unlock(); nfs_delegation_test_free_expired(inode, &stateid, cred); diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 1c378992b7c0fc..a6f495d012cf11 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -21,6 +21,7 @@ struct nfs_delegation { fmode_t type; unsigned long pagemod_limit; __u64 change_attr; + unsigned long test_gen; unsigned long flags; refcount_t refcount; spinlock_t lock; diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 198f2f891cb27a..26325b1fb7e4e2 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -2073,6 +2073,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) sb->s_fs_info = ofs; sb->s_flags |= SB_POSIXACL; sb->s_iflags |= SB_I_SKIP_SYNC; + sb->s_iflags |= SB_I_EVM_UNSUPPORTED; err = -ENOMEM; root_dentry = ovl_get_root(sb, upperpath.dentry, oe); diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index fffaa3e2d94abc..41a9d7dda56541 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -429,6 +429,8 @@ struct kernfs_node *sysfs_break_active_protection(struct kobject *kobj, kn = kernfs_find_and_get(kobj->sd, attr->name); if (kn) kernfs_break_active_protection(kn); + else + kobject_put(kobj); return kn; } EXPORT_SYMBOL_GPL(sysfs_break_active_protection); diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h index 4c3e0648dc2775..fcc95bff72a571 100644 --- a/include/linux/ceph/mdsmap.h +++ b/include/linux/ceph/mdsmap.h @@ -25,7 +25,11 @@ struct ceph_mdsmap { u32 m_session_timeout; /* seconds */ u32 m_session_autoclose; /* seconds */ u64 m_max_file_size; - u64 m_max_xattr_size; /* maximum size for xattrs blob */ + /* + * maximum size for xattrs blob. + * Zeroed by default to force the usage of the (sync) SETXATTR Op. + */ + u64 m_max_xattr_size; u32 m_max_mds; /* expected up:active mds number */ u32 m_num_active_mds; /* actual up:active mds number */ u32 possible_max_rank; /* possible max rank index */ diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h index 990a61a7e977b9..af610c80af1e02 100644 --- a/include/linux/crash_dump.h +++ b/include/linux/crash_dump.h @@ -51,6 +51,7 @@ void vmcore_cleanup(void); #define vmcore_elf64_check_arch(x) (elf_check_arch(x) || vmcore_elf_check_arch_cross(x)) #endif +#ifndef is_kdump_kernel /* * is_kdump_kernel() checks whether this kernel is booting after a panic of * previous kernel or not. This is determined by checking if previous kernel @@ -65,6 +66,7 @@ static inline bool is_kdump_kernel(void) { return elfcorehdr_addr != ELFCORE_ADDR_MAX; } +#endif /* is_vmcore_usable() checks if the kernel is booting after a panic and * the vmcore region is usable. @@ -76,7 +78,8 @@ static inline bool is_kdump_kernel(void) static inline int is_vmcore_usable(void) { - return is_kdump_kernel() && elfcorehdr_addr != ELFCORE_ADDR_ERR ? 1 : 0; + return elfcorehdr_addr != ELFCORE_ADDR_ERR && + elfcorehdr_addr != ELFCORE_ADDR_MAX ? 1 : 0; } /* vmcore_unusable() marks the vmcore as unusable, @@ -85,8 +88,7 @@ static inline int is_vmcore_usable(void) static inline void vmcore_unusable(void) { - if (is_kdump_kernel()) - elfcorehdr_addr = ELFCORE_ADDR_ERR; + elfcorehdr_addr = ELFCORE_ADDR_ERR; } /** diff --git a/include/linux/evm.h b/include/linux/evm.h index 39bb17a8236b1a..beee7dfccc795b 100644 --- a/include/linux/evm.h +++ b/include/linux/evm.h @@ -30,6 +30,7 @@ extern void evm_inode_post_setxattr(struct dentry *dentry, const void *xattr_value, size_t xattr_value_len); extern int evm_inode_removexattr(struct dentry *dentry, const char *xattr_name); +extern int evm_inode_copy_up_xattr(const char *name); extern void evm_inode_post_removexattr(struct dentry *dentry, const char *xattr_name); extern int evm_inode_init_security(struct inode *inode, @@ -92,6 +93,11 @@ static inline int evm_inode_removexattr(struct dentry *dentry, return 0; } +static inline int evm_inode_copy_up_xattr(const char *name) +{ + return 0; +} + static inline void evm_inode_post_removexattr(struct dentry *dentry, const char *xattr_name) { diff --git a/include/linux/fs.h b/include/linux/fs.h index f010cc46d832ae..8aae96b246e520 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1413,6 +1413,7 @@ extern int send_sigurg(struct fown_struct *fown); #define SB_I_USERNS_VISIBLE 0x00000010 /* fstype already mounted */ #define SB_I_IMA_UNVERIFIABLE_SIGNATURE 0x00000020 #define SB_I_UNTRUSTED_MOUNTER 0x00000040 +#define SB_I_EVM_UNSUPPORTED 0x00000080 #define SB_I_SKIP_SYNC 0x00000100 /* Skip superblock at global sync */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 80b00a090931da..db557d9d9ffb83 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -171,31 +171,39 @@ static inline bool dev_xmit_complete(int rc) * (unsigned long) so they can be read and written atomically. */ +#define NET_DEV_STAT(FIELD) \ + RH_KABI_REPLACE(unsigned long FIELD, \ + union { \ + unsigned long FIELD; \ + atomic_long_t __##FIELD; \ + }) + struct net_device_stats { - unsigned long rx_packets; - unsigned long tx_packets; - unsigned long rx_bytes; - unsigned long tx_bytes; - unsigned long rx_errors; - unsigned long tx_errors; - unsigned long rx_dropped; - unsigned long tx_dropped; - unsigned long multicast; - unsigned long collisions; - unsigned long rx_length_errors; - unsigned long rx_over_errors; - unsigned long rx_crc_errors; - unsigned long rx_frame_errors; - unsigned long rx_fifo_errors; - unsigned long rx_missed_errors; - unsigned long tx_aborted_errors; - unsigned long tx_carrier_errors; - unsigned long tx_fifo_errors; - unsigned long tx_heartbeat_errors; - unsigned long tx_window_errors; - unsigned long rx_compressed; - unsigned long tx_compressed; + NET_DEV_STAT(rx_packets) + NET_DEV_STAT(tx_packets) + NET_DEV_STAT(rx_bytes) + NET_DEV_STAT(tx_bytes) + NET_DEV_STAT(rx_errors) + NET_DEV_STAT(tx_errors) + NET_DEV_STAT(rx_dropped) + NET_DEV_STAT(tx_dropped) + NET_DEV_STAT(multicast) + NET_DEV_STAT(collisions) + NET_DEV_STAT(rx_length_errors) + NET_DEV_STAT(rx_over_errors) + NET_DEV_STAT(rx_crc_errors) + NET_DEV_STAT(rx_frame_errors) + NET_DEV_STAT(rx_fifo_errors) + NET_DEV_STAT(rx_missed_errors) + NET_DEV_STAT(tx_aborted_errors) + NET_DEV_STAT(tx_carrier_errors) + NET_DEV_STAT(tx_fifo_errors) + NET_DEV_STAT(tx_heartbeat_errors) + NET_DEV_STAT(tx_window_errors) + NET_DEV_STAT(rx_compressed) + NET_DEV_STAT(tx_compressed) }; +#undef NET_DEV_STAT /* per-cpu stats, allocated on demand. * Try to fit them in a single cache line, for dev_get_stats() sake. @@ -3492,6 +3500,7 @@ static inline void netif_tx_wake_all_queues(struct net_device *dev) static __always_inline void netif_tx_stop_queue(struct netdev_queue *dev_queue) { + /* Must be an atomic op see netif_txq_try_stop() */ set_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state); } @@ -3650,7 +3659,7 @@ static inline void netdev_tx_completed_queue(struct netdev_queue *dev_queue, * netdev_tx_sent_queue will miss the update and cause the queue to * be stopped forever */ - smp_mb(); + smp_mb(); /* NOTE: netdev_txq_completed_mb() assumes this exists */ if (dql_avail(&dev_queue->dql) < 0) return; @@ -5405,4 +5414,9 @@ do { \ #define PTYPE_HASH_SIZE (16) #define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1) +/* Note: Avoid these macros in fast path, prefer per-cpu or per-queue counters. */ +#define DEV_STATS_INC(DEV, FIELD) atomic_long_inc(&(DEV)->stats.__##FIELD) +#define DEV_STATS_ADD(DEV, FIELD, VAL) \ + atomic_long_add((VAL), &(DEV)->stats.__##FIELD) + #endif /* _LINUX_NETDEVICE_H */ diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 3796f46960b12c..eac49c6558ec50 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -124,6 +124,7 @@ struct ip_set_ext { u32 timeout; u8 packets_op; u8 bytes_op; + bool target; }; struct ip_set; @@ -190,6 +191,16 @@ struct ip_set_type_variant { /* Return true if "b" set is the same as "a" * according to the create set parameters */ bool (*same_set)(const struct ip_set *a, const struct ip_set *b); + /* Cancel ongoing garbage collectors before destroying the set*/ + void (*cancel_gc)(struct ip_set *set); + /* Region-locking is used */ + bool region_lock; +}; + +struct ip_set_region { + spinlock_t lock; /* Region lock */ + size_t ext_size; /* Size of the dynamic extensions */ + u32 elements; /* Number of elements vs timeout */ }; /* Max range where every element is added/deleted in one step */ @@ -233,6 +244,8 @@ extern void ip_set_type_unregister(struct ip_set_type *set_type); /* A generic IP set */ struct ip_set { + /* For call_cru in destroy */ + struct rcu_head rcu; /* The name of the set */ char name[IPSET_MAXNAMELEN]; /* Lock protecting the set data */ @@ -464,7 +477,7 @@ bitmap_bytes(u32 a, u32 b) #include #define IP_SET_INIT_KEXT(skb, opt, set) \ - { .bytes = (skb)->len, .packets = 1, \ + { .bytes = (skb)->len, .packets = 1, .target = true,\ .timeout = ip_set_adt_opt_timeout(opt, set) } #define IP_SET_INIT_UEXT(set) \ diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index befb9c2a59f38c..3a8329a71a343a 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -241,6 +241,7 @@ struct nfs_server { struct list_head delegations; struct list_head ss_copies; + unsigned long delegation_gen; unsigned long mig_gen; unsigned long mig_status; #define NFS_MIG_IN_TRANSITION (1) diff --git a/include/net/dst.h b/include/net/dst.h index d0bd6c671a9ff3..2e3f79ea8bede1 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -383,9 +383,8 @@ static inline void __skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev, static inline void skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev, struct net *net) { - /* TODO : stats should be SMP safe */ - dev->stats.rx_packets++; - dev->stats.rx_bytes += skb->len; + DEV_STATS_INC(dev, rx_packets); + DEV_STATS_ADD(dev, rx_bytes, skb->len); __skb_tunnel_rx(skb, dev, net); } diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index d4f2a9fbb30469..5da352adf1d684 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -37,7 +37,6 @@ enum TRI_STATE { #define COMP_ENTRY_SIZE 64 #define RX_BUFFERS_PER_QUEUE 512 -#define MANA_RX_DATA_ALIGN 64 #define MAX_SEND_BUFFERS_PER_QUEUE 256 diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h new file mode 100644 index 00000000000000..8fc9693228e6ce --- /dev/null +++ b/include/net/netdev_queues.h @@ -0,0 +1,163 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_NET_QUEUES_H +#define _LINUX_NET_QUEUES_H + +#include + +/** + * DOC: Lockless queue stopping / waking helpers. + * + * The netif_txq_maybe_stop() and __netif_txq_completed_wake() + * macros are designed to safely implement stopping + * and waking netdev queues without full lock protection. + * + * We assume that there can be no concurrent stop attempts and no concurrent + * wake attempts. The try-stop should happen from the xmit handler, + * while wake up should be triggered from NAPI poll context. + * The two may run concurrently (single producer, single consumer). + * + * The try-stop side is expected to run from the xmit handler and therefore + * it does not reschedule Tx (netif_tx_start_queue() instead of + * netif_tx_wake_queue()). Uses of the ``stop`` macros outside of the xmit + * handler may lead to xmit queue being enabled but not run. + * The waking side does not have similar context restrictions. + * + * The macros guarantee that rings will not remain stopped if there's + * space available, but they do *not* prevent false wake ups when + * the ring is full! Drivers should check for ring full at the start + * for the xmit handler. + * + * All descriptor ring indexes (and other relevant shared state) must + * be updated before invoking the macros. + */ + +#define netif_txq_try_stop(txq, get_desc, start_thrs) \ + ({ \ + int _res; \ + \ + netif_tx_stop_queue(txq); \ + /* Producer index and stop bit must be visible \ + * to consumer before we recheck. \ + * Pairs with a barrier in __netif_txq_completed_wake(). \ + */ \ + smp_mb__after_atomic(); \ + \ + /* We need to check again in a case another \ + * CPU has just made room available. \ + */ \ + _res = 0; \ + if (unlikely(get_desc >= start_thrs)) { \ + netif_tx_start_queue(txq); \ + _res = -1; \ + } \ + _res; \ + }) \ + +/** + * netif_txq_maybe_stop() - locklessly stop a Tx queue, if needed + * @txq: struct netdev_queue to stop/start + * @get_desc: get current number of free descriptors (see requirements below!) + * @stop_thrs: minimal number of available descriptors for queue to be left + * enabled + * @start_thrs: minimal number of descriptors to re-enable the queue, can be + * equal to @stop_thrs or higher to avoid frequent waking + * + * All arguments may be evaluated multiple times, beware of side effects. + * @get_desc must be a formula or a function call, it must always + * return up-to-date information when evaluated! + * Expected to be used from ndo_start_xmit, see the comment on top of the file. + * + * Returns: + * 0 if the queue was stopped + * 1 if the queue was left enabled + * -1 if the queue was re-enabled (raced with waking) + */ +#define netif_txq_maybe_stop(txq, get_desc, stop_thrs, start_thrs) \ + ({ \ + int _res; \ + \ + _res = 1; \ + if (unlikely(get_desc < stop_thrs)) \ + _res = netif_txq_try_stop(txq, get_desc, start_thrs); \ + _res; \ + }) \ + +/* Variant of netdev_tx_completed_queue() which guarantees smp_mb() if + * @bytes != 0, regardless of kernel config. + */ +static inline void +netdev_txq_completed_mb(struct netdev_queue *dev_queue, + unsigned int pkts, unsigned int bytes) +{ + if (IS_ENABLED(CONFIG_BQL)) + netdev_tx_completed_queue(dev_queue, pkts, bytes); + else if (bytes) + smp_mb(); +} + +/** + * __netif_txq_completed_wake() - locklessly wake a Tx queue, if needed + * @txq: struct netdev_queue to stop/start + * @pkts: number of packets completed + * @bytes: number of bytes completed + * @get_desc: get current number of free descriptors (see requirements below!) + * @start_thrs: minimal number of descriptors to re-enable the queue + * @down_cond: down condition, predicate indicating that the queue should + * not be woken up even if descriptors are available + * + * All arguments may be evaluated multiple times. + * @get_desc must be a formula or a function call, it must always + * return up-to-date information when evaluated! + * Reports completed pkts/bytes to BQL. + * + * Returns: + * 0 if the queue was woken up + * 1 if the queue was already enabled (or disabled but @down_cond is true) + * -1 if the queue was left unchanged (@start_thrs not reached) + */ +#define __netif_txq_completed_wake(txq, pkts, bytes, \ + get_desc, start_thrs, down_cond) \ + ({ \ + int _res; \ + \ + /* Report to BQL and piggy back on its barrier. \ + * Barrier makes sure that anybody stopping the queue \ + * after this point sees the new consumer index. \ + * Pairs with barrier in netif_txq_try_stop(). \ + */ \ + netdev_txq_completed_mb(txq, pkts, bytes); \ + \ + _res = -1; \ + if (pkts && likely(get_desc >= start_thrs)) { \ + _res = 1; \ + if (unlikely(netif_tx_queue_stopped(txq)) && \ + !(down_cond)) { \ + netif_tx_wake_queue(txq); \ + _res = 0; \ + } \ + } \ + _res; \ + }) + +#define netif_txq_completed_wake(txq, pkts, bytes, get_desc, start_thrs) \ + __netif_txq_completed_wake(txq, pkts, bytes, get_desc, start_thrs, false) + +/* subqueue variants follow */ + +#define netif_subqueue_try_stop(dev, idx, get_desc, start_thrs) \ + ({ \ + struct netdev_queue *txq; \ + \ + txq = netdev_get_tx_queue(dev, idx); \ + netif_txq_try_stop(txq, get_desc, start_thrs); \ + }) + +#define netif_subqueue_maybe_stop(dev, idx, get_desc, stop_thrs, start_thrs) \ + ({ \ + struct netdev_queue *txq; \ + \ + txq = netdev_get_tx_queue(dev, idx); \ + netif_txq_maybe_stop(txq, get_desc, stop_thrs, start_thrs); \ + }) + +#endif diff --git a/init/main.c b/init/main.c index 5a9e8bc3b01696..7f62f603e70d07 100644 --- a/init/main.c +++ b/init/main.c @@ -1157,7 +1157,7 @@ static noinline void __init kernel_init_freeable(void) */ set_mems_allowed(node_states[N_MEMORY]); - cad_pid = task_pid(current); + cad_pid = get_pid(task_pid(current)); smp_prepare_cpus(setup_max_cpus); diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 87ba0668cb7def..b5d050dfc9af88 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4349,13 +4349,18 @@ static int init_rescuer(struct workqueue_struct *wq) return 0; rescuer = alloc_worker(NUMA_NO_NODE); - if (!rescuer) + if (!rescuer) { + pr_err("workqueue: Failed to allocate a rescuer for wq \"%s\"\n", + wq->name); return -ENOMEM; + } rescuer->rescue_wq = wq; rescuer->task = kthread_create(rescuer_thread, rescuer, "%s", wq->name); if (IS_ERR(rescuer->task)) { ret = PTR_ERR(rescuer->task); + pr_err("workqueue: Failed to create a rescuer kthread for wq \"%s\": %pe", + wq->name, ERR_PTR(ret)); kfree(rescuer); return ret; } diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index ec646656dbf14d..31c1b3c412f052 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -124,7 +124,7 @@ static int deliver_clone(const struct net_bridge_port *prev, skb = skb_clone(skb, GFP_ATOMIC); if (!skb) { - dev->stats.tx_dropped++; + DEV_STATS_INC(dev, tx_dropped); return -ENOMEM; } @@ -263,7 +263,7 @@ static void maybe_deliver_addr(struct net_bridge_port *p, struct sk_buff *skb, skb = skb_copy(skb, GFP_ATOMIC); if (!skb) { - dev->stats.tx_dropped++; + DEV_STATS_INC(dev, tx_dropped); return; } diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 5dd920e52660b3..3a5df5f528eb21 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -164,12 +164,12 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb if ((mdst && mdst->host_joined) || br_multicast_is_router(brmctx, skb)) { local_rcv = true; - br->dev->stats.multicast++; + DEV_STATS_INC(br->dev, multicast); } mcast_hit = true; } else { local_rcv = true; - br->dev->stats.multicast++; + DEV_STATS_INC(br->dev, multicast); } break; case BR_PKT_UNICAST: diff --git a/net/core/dev.c b/net/core/dev.c index 100c5eed4fbdf8..f6a94d846053f9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10536,24 +10536,16 @@ void netdev_run_todo(void) void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, const struct net_device_stats *netdev_stats) { -#if BITS_PER_LONG == 64 - BUILD_BUG_ON(sizeof_rtnl_link_stats64 < sizeof(*netdev_stats)); - memcpy(stats64, netdev_stats, sizeof(*netdev_stats)); - /* zero out counters that only exist in rtnl_link_stats64 */ - memset((char *)stats64 + sizeof(*netdev_stats), 0, - sizeof_rtnl_link_stats64 - sizeof(*netdev_stats)); -#else - size_t i, n = sizeof(*netdev_stats) / sizeof(unsigned long); - const unsigned long *src = (const unsigned long *)netdev_stats; + size_t i, n = sizeof(*netdev_stats) / sizeof(atomic_long_t); + const atomic_long_t *src = (atomic_long_t *)netdev_stats; u64 *dst = (u64 *)stats64; BUILD_BUG_ON(n > sizeof_rtnl_link_stats64 / sizeof(u64)); for (i = 0; i < n; i++) - dst[i] = src[i]; + dst[i] = (unsigned long)atomic_long_read(&src[i]); /* zero out counters that only exist in rtnl_link_stats64 */ memset((char *)stats64 + n * sizeof(u64), 0, sizeof_rtnl_link_stats64 - n * sizeof(u64)); -#endif } EXPORT_SYMBOL(netdev_stats_to_stats64); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 1400512e0dde5d..349f7d0c9969d4 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -910,6 +910,8 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) if (ieee80211_vif_is_mesh(&sdata->vif)) mesh_accept_plinks_update(sdata); + ieee80211_check_fast_xmit(sta); + return 0; out_remove: if (sta->sta.valid_links) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index ac7bf71fad000c..f832afcf23339e 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3038,7 +3038,7 @@ void ieee80211_check_fast_xmit(struct sta_info *sta) sdata->vif.type == NL80211_IFTYPE_STATION) goto out; - if (!test_sta_flag(sta, WLAN_STA_AUTHORIZED)) + if (!test_sta_flag(sta, WLAN_STA_AUTHORIZED) || !sta->uploaded) goto out; if (test_sta_flag(sta, WLAN_STA_PS_STA) || diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 02b342201fde14..5fdddb0fb4d991 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2831,11 +2831,54 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk) return (struct ipv6_pinfo *)(((u8 *)sk) + offset); } + +static void mptcp_copy_ip6_options(struct sock *newsk, const struct sock *sk) +{ + const struct ipv6_pinfo *np = inet6_sk(sk); + struct ipv6_txoptions *opt; + struct ipv6_pinfo *newnp; + + newnp = inet6_sk(newsk); + + rcu_read_lock(); + opt = rcu_dereference(np->opt); + if (opt) { + opt = ipv6_dup_options(newsk, opt); + if (!opt) + net_warn_ratelimited("%s: Failed to copy ip6 options\n", __func__); + } + RCU_INIT_POINTER(newnp->opt, opt); + rcu_read_unlock(); +} #endif +static void mptcp_copy_ip_options(struct sock *newsk, const struct sock *sk) +{ + struct ip_options_rcu *inet_opt, *newopt = NULL; + const struct inet_sock *inet = inet_sk(sk); + struct inet_sock *newinet; + + newinet = inet_sk(newsk); + + rcu_read_lock(); + inet_opt = rcu_dereference(inet->inet_opt); + if (inet_opt) { + newopt = sock_kmalloc(newsk, sizeof(*inet_opt) + + inet_opt->opt.optlen, GFP_ATOMIC); + if (newopt) + memcpy(newopt, inet_opt, sizeof(*inet_opt) + + inet_opt->opt.optlen); + else + net_warn_ratelimited("%s: Failed to copy ip options\n", __func__); + } + RCU_INIT_POINTER(newinet->inet_opt, newopt); + rcu_read_unlock(); +} + struct sock *mptcp_sk_clone(const struct sock *sk, const struct mptcp_options_received *mp_opt, struct request_sock *req) + { struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); struct sock *nsk = sk_clone_lock(sk, GFP_ATOMIC); @@ -2852,6 +2895,13 @@ struct sock *mptcp_sk_clone(const struct sock *sk, __mptcp_init_sock(nsk); +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + if (nsk->sk_family == AF_INET6) + mptcp_copy_ip6_options(nsk, sk); + else +#endif + mptcp_copy_ip_options(nsk, sk); + msk = mptcp_sk(nsk); msk->local_key = subflow_req->local_key; msk->token = subflow_req->token; diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index 98beaf9d67e2c0..4bc0a8de36c04a 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -32,6 +32,7 @@ #define mtype_del IPSET_TOKEN(MTYPE, _del) #define mtype_list IPSET_TOKEN(MTYPE, _list) #define mtype_gc IPSET_TOKEN(MTYPE, _gc) +#define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc) #define mtype MTYPE #define get_ext(set, map, id) ((map)->extensions + ((set)->dsize * (id))) @@ -61,9 +62,6 @@ mtype_destroy(struct ip_set *set) { struct mtype *map = set->data; - if (SET_WITH_TIMEOUT(set)) - del_timer_sync(&map->gc); - if (set->dsize && set->extensions & IPSET_EXT_DESTROY) mtype_ext_cleanup(set); ip_set_free(map->members); @@ -292,6 +290,15 @@ mtype_gc(struct timer_list *t) add_timer(&map->gc); } +static void +mtype_cancel_gc(struct ip_set *set) +{ + struct mtype *map = set->data; + + if (SET_WITH_TIMEOUT(set)) + del_timer_sync(&map->gc); +} + static const struct ip_set_type_variant mtype = { .kadt = mtype_kadt, .uadt = mtype_uadt, @@ -305,6 +312,7 @@ static const struct ip_set_type_variant mtype = { .head = mtype_head, .list = mtype_list, .same_set = mtype_same_set, + .cancel_gc = mtype_cancel_gc, }; #endif /* __IP_SET_BITMAP_IP_GEN_H */ diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 17c14ad58809a9..7c4674a718e417 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -64,6 +64,8 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET); ip_set_dereference((inst)->ip_set_list)[id] #define ip_set_ref_netlink(inst,id) \ rcu_dereference_raw((inst)->ip_set_list)[id] +#define ip_set_dereference_nfnl(p) \ + rcu_dereference_check(p, lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET)) /* The set types are implemented in modules and registered set types * can be found in ip_set_type_list. Adding/deleting types is @@ -547,15 +549,24 @@ __ip_set_put_netlink(struct ip_set *set) static inline struct ip_set * ip_set_rcu_get(struct net *net, ip_set_id_t index) { - struct ip_set *set; struct ip_set_net *inst = ip_set_pernet(net); - rcu_read_lock(); - /* ip_set_list itself needs to be protected */ - set = rcu_dereference(inst->ip_set_list)[index]; - rcu_read_unlock(); + /* ip_set_list and the set pointer need to be protected */ + return ip_set_dereference_nfnl(inst->ip_set_list)[index]; +} + +static inline void +ip_set_lock(struct ip_set *set) +{ + if (!set->variant->region_lock) + spin_lock_bh(&set->lock); +} - return set; +static inline void +ip_set_unlock(struct ip_set *set) +{ + if (!set->variant->region_lock) + spin_unlock_bh(&set->lock); } int @@ -579,9 +590,9 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb, if (ret == -EAGAIN) { /* Type requests element to be completed */ pr_debug("element must be completed, ADD is triggered\n"); - spin_lock_bh(&set->lock); + ip_set_lock(set); set->variant->kadt(set, skb, par, IPSET_ADD, opt); - spin_unlock_bh(&set->lock); + ip_set_unlock(set); ret = 1; } else { /* --return-nomatch: invert matched element */ @@ -610,9 +621,9 @@ ip_set_add(ip_set_id_t index, const struct sk_buff *skb, !(opt->family == set->family || set->family == NFPROTO_UNSPEC)) return -IPSET_ERR_TYPE_MISMATCH; - spin_lock_bh(&set->lock); + ip_set_lock(set); ret = set->variant->kadt(set, skb, par, IPSET_ADD, opt); - spin_unlock_bh(&set->lock); + ip_set_unlock(set); return ret; } @@ -632,9 +643,9 @@ ip_set_del(ip_set_id_t index, const struct sk_buff *skb, !(opt->family == set->family || set->family == NFPROTO_UNSPEC)) return -IPSET_ERR_TYPE_MISMATCH; - spin_lock_bh(&set->lock); + ip_set_lock(set); ret = set->variant->kadt(set, skb, par, IPSET_DEL, opt); - spin_unlock_bh(&set->lock); + ip_set_unlock(set); return ret; } @@ -996,6 +1007,7 @@ static int ip_set_create(struct net *net, struct sock *ctnl, return ret; cleanup: + set->variant->cancel_gc(set); set->variant->destroy(set); put_out: module_put(set->type->me); @@ -1024,6 +1036,14 @@ ip_set_destroy_set(struct ip_set *set) kfree(set); } +static void +ip_set_destroy_set_rcu(struct rcu_head *head) +{ + struct ip_set *set = container_of(head, struct ip_set, rcu); + + ip_set_destroy_set(set); +} + static int ip_set_destroy(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[], @@ -1037,8 +1057,6 @@ static int ip_set_destroy(struct net *net, struct sock *ctnl, if (unlikely(protocol_min_failed(attr))) return -IPSET_ERR_PROTOCOL; - /* Must wait for flush to be really finished in list:set */ - rcu_barrier(); /* Commands are serialized and references are * protected by the ip_set_ref_lock. @@ -1050,8 +1068,10 @@ static int ip_set_destroy(struct net *net, struct sock *ctnl, * counter, so if it's already zero, we can proceed * without holding the lock. */ - read_lock_bh(&ip_set_ref_lock); if (!attr[IPSET_ATTR_SETNAME]) { + /* Must wait for flush to be really finished in list:set */ + rcu_barrier(); + read_lock_bh(&ip_set_ref_lock); for (i = 0; i < inst->ip_set_max; i++) { s = ip_set(inst, i); if (s && (s->ref || s->ref_netlink)) { @@ -1065,12 +1085,17 @@ static int ip_set_destroy(struct net *net, struct sock *ctnl, s = ip_set(inst, i); if (s) { ip_set(inst, i) = NULL; + /* Must cancel garbage collectors */ + s->variant->cancel_gc(s); ip_set_destroy_set(s); } } /* Modified by ip_set_destroy() only, which is serialized */ inst->is_destroyed = false; } else { + u16 features = 0; + + read_lock_bh(&ip_set_ref_lock); s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), &i); if (!s) { @@ -1080,10 +1105,16 @@ static int ip_set_destroy(struct net *net, struct sock *ctnl, ret = -IPSET_ERR_BUSY; goto out; } + features = s->type->features; ip_set(inst, i) = NULL; read_unlock_bh(&ip_set_ref_lock); - - ip_set_destroy_set(s); + if (features & IPSET_TYPE_NAME) { + /* Must wait for flush to be really finished */ + rcu_barrier(); + } + /* Must cancel garbage collectors */ + s->variant->cancel_gc(s); + call_rcu(&s->rcu, ip_set_destroy_set_rcu); } return 0; out: @@ -1098,9 +1129,9 @@ ip_set_flush_set(struct ip_set *set) { pr_debug("set: %s\n", set->name); - spin_lock_bh(&set->lock); + ip_set_lock(set); set->variant->flush(set); - spin_unlock_bh(&set->lock); + ip_set_unlock(set); } static int ip_set_flush(struct net *net, struct sock *ctnl, struct sk_buff *skb, @@ -1515,9 +1546,9 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, bool eexist = flags & IPSET_FLAG_EXIST, retried = false; do { - spin_lock_bh(&set->lock); + ip_set_lock(set); ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried); - spin_unlock_bh(&set->lock); + ip_set_unlock(set); retried = true; } while (ret == -ERANGE || (ret == -EAGAIN && @@ -2221,6 +2252,7 @@ ip_set_net_exit(struct net *net) set = ip_set(inst, i); if (set) { ip_set(inst, i) = NULL; + set->variant->cancel_gc(set); ip_set_destroy_set(set); } } @@ -2268,8 +2300,11 @@ ip_set_fini(void) { nf_unregister_sockopt(&so_set); nfnetlink_subsys_unregister(&ip_set_netlink_subsys); - unregister_pernet_subsys(&ip_set_net_ops); + + /* Wait for call_rcu() in destroy */ + rcu_barrier(); + pr_debug("these are the famous last words\n"); } diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 89838b48ad8513..7a22e4e65e265f 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -11,13 +11,21 @@ #include #include #include +#include #include -#define __ipset_dereference_protected(p, c) rcu_dereference_protected(p, c) -#define ipset_dereference_protected(p, set) \ - __ipset_dereference_protected(p, lockdep_is_held(&(set)->lock)) - -#define rcu_dereference_bh_nfnl(p) rcu_dereference_bh_check(p, 1) +#define __ipset_dereference(p) \ + rcu_dereference_protected(p, 1) +#define ipset_dereference_nfnl(p) \ + rcu_dereference_protected(p, \ + lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET)) +#define ipset_dereference_set(p, set) \ + rcu_dereference_protected(p, \ + lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET) || \ + lockdep_is_held(&(set)->lock)) +#define ipset_dereference_bh_nfnl(p) \ + rcu_dereference_bh_check(p, \ + lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET)) /* Hashing which uses arrays to resolve clashing. The hash table is resized * (doubled) when searching becomes too long. @@ -76,11 +84,35 @@ struct hbucket { __aligned(__alignof__(u64)); }; +/* Region size for locking == 2^HTABLE_REGION_BITS */ +#define HTABLE_REGION_BITS 10 +#define ahash_numof_locks(htable_bits) \ + ((htable_bits) < HTABLE_REGION_BITS ? 1 \ + : jhash_size((htable_bits) - HTABLE_REGION_BITS)) +#define ahash_sizeof_regions(htable_bits) \ + (ahash_numof_locks(htable_bits) * sizeof(struct ip_set_region)) +#define ahash_region(n, htable_bits) \ + ((n) % ahash_numof_locks(htable_bits)) +#define ahash_bucket_start(h, htable_bits) \ + ((htable_bits) < HTABLE_REGION_BITS ? 0 \ + : (h) * jhash_size(HTABLE_REGION_BITS)) +#define ahash_bucket_end(h, htable_bits) \ + ((htable_bits) < HTABLE_REGION_BITS ? jhash_size(htable_bits) \ + : ((h) + 1) * jhash_size(HTABLE_REGION_BITS)) + +struct htable_gc { + struct delayed_work dwork; + struct ip_set *set; /* Set the gc belongs to */ + u32 region; /* Last gc run position */ +}; + /* The hash table: the table size stored here in order to make resizing easy */ struct htable { atomic_t ref; /* References for resizing */ - atomic_t uref; /* References for dumping */ + atomic_t uref; /* References for dumping and gc */ u8 htable_bits; /* size of hash table == 2^htable_bits */ + u32 maxelem; /* Maxelem per region */ + struct ip_set_region *hregion; /* Region locks and ext sizes */ struct hbucket __rcu *bucket[0]; /* hashtable buckets */ }; @@ -152,6 +184,10 @@ htable_size(u8 hbits) #define NLEN 0 #endif /* IP_SET_HASH_WITH_NETS */ +#define SET_ELEM_EXPIRED(set, d) \ + (SET_WITH_TIMEOUT(set) && \ + ip_set_timeout_expired(ext_timeout(d, set))) + #endif /* _IP_SET_HASH_GEN_H */ #ifndef MTYPE @@ -195,12 +231,15 @@ htable_size(u8 hbits) #undef mtype_test_cidrs #undef mtype_test #undef mtype_uref -#undef mtype_expire #undef mtype_resize +#undef mtype_ext_size +#undef mtype_resize_ad #undef mtype_head #undef mtype_list +#undef mtype_gc_do #undef mtype_gc #undef mtype_gc_init +#undef mtype_cancel_gc #undef mtype_variant #undef mtype_data_match @@ -237,12 +276,15 @@ htable_size(u8 hbits) #define mtype_test_cidrs IPSET_TOKEN(MTYPE, _test_cidrs) #define mtype_test IPSET_TOKEN(MTYPE, _test) #define mtype_uref IPSET_TOKEN(MTYPE, _uref) -#define mtype_expire IPSET_TOKEN(MTYPE, _expire) #define mtype_resize IPSET_TOKEN(MTYPE, _resize) +#define mtype_ext_size IPSET_TOKEN(MTYPE, _ext_size) +#define mtype_resize_ad IPSET_TOKEN(MTYPE, _resize_ad) #define mtype_head IPSET_TOKEN(MTYPE, _head) #define mtype_list IPSET_TOKEN(MTYPE, _list) +#define mtype_gc_do IPSET_TOKEN(MTYPE, _gc_do) #define mtype_gc IPSET_TOKEN(MTYPE, _gc) #define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init) +#define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc) #define mtype_variant IPSET_TOKEN(MTYPE, _variant) #define mtype_data_match IPSET_TOKEN(MTYPE, _data_match) @@ -265,8 +307,7 @@ htable_size(u8 hbits) /* The generic hash structure */ struct htype { struct htable __rcu *table; /* the hash table */ - struct timer_list gc; /* garbage collection when timeout enabled */ - struct ip_set *set; /* attached to this ip_set */ + struct htable_gc gc; /* gc workqueue */ u32 maxelem; /* max elements in the hash */ u32 initval; /* random jhash init value */ #ifdef IP_SET_HASH_WITH_MARKMASK @@ -278,21 +319,33 @@ struct htype { #ifdef IP_SET_HASH_WITH_NETMASK u8 netmask; /* netmask value for subnets to store */ #endif + struct list_head ad; /* Resize add|del backlist */ struct mtype_elem next; /* temporary storage for uadd */ #ifdef IP_SET_HASH_WITH_NETS struct net_prefixes nets[NLEN]; /* book-keeping of prefixes */ #endif }; +/* ADD|DEL entries saved during resize */ +struct mtype_resize_ad { + struct list_head list; + enum ipset_adt ad; /* ADD|DEL element */ + struct mtype_elem d; /* Element value */ + struct ip_set_ext ext; /* Extensions for ADD */ + struct ip_set_ext mext; /* Target extensions for ADD */ + u32 flags; /* Flags for ADD */ +}; + #ifdef IP_SET_HASH_WITH_NETS /* Network cidr size book keeping when the hash stores different * sized networks. cidr == real cidr + 1 to support /0. */ static void -mtype_add_cidr(struct htype *h, u8 cidr, u8 n) +mtype_add_cidr(struct ip_set *set, struct htype *h, u8 cidr, u8 n) { int i, j; + spin_lock_bh(&set->lock); /* Add in increasing prefix order, so larger cidr first */ for (i = 0, j = -1; i < NLEN && h->nets[i].cidr[n]; i++) { if (j != -1) { @@ -301,7 +354,7 @@ mtype_add_cidr(struct htype *h, u8 cidr, u8 n) j = i; } else if (h->nets[i].cidr[n] == cidr) { h->nets[CIDR_POS(cidr)].nets[n]++; - return; + goto unlock; } } if (j != -1) { @@ -310,24 +363,29 @@ mtype_add_cidr(struct htype *h, u8 cidr, u8 n) } h->nets[i].cidr[n] = cidr; h->nets[CIDR_POS(cidr)].nets[n] = 1; +unlock: + spin_unlock_bh(&set->lock); } static void -mtype_del_cidr(struct htype *h, u8 cidr, u8 n) +mtype_del_cidr(struct ip_set *set, struct htype *h, u8 cidr, u8 n) { u8 i, j, net_end = NLEN - 1; + spin_lock_bh(&set->lock); for (i = 0; i < NLEN; i++) { if (h->nets[i].cidr[n] != cidr) continue; h->nets[CIDR_POS(cidr)].nets[n]--; if (h->nets[CIDR_POS(cidr)].nets[n] > 0) - return; + goto unlock; for (j = i; j < net_end && h->nets[j].cidr[n]; j++) h->nets[j].cidr[n] = h->nets[j + 1].cidr[n]; h->nets[j].cidr[n] = 0; - return; + goto unlock; } +unlock: + spin_unlock_bh(&set->lock); } #endif @@ -335,7 +393,7 @@ mtype_del_cidr(struct htype *h, u8 cidr, u8 n) static size_t mtype_ahash_memsize(const struct htype *h, const struct htable *t) { - return sizeof(*h) + sizeof(*t); + return sizeof(*h) + sizeof(*t) + ahash_sizeof_regions(t->htable_bits); } /* Get the ith element from the array block n */ @@ -359,24 +417,29 @@ mtype_flush(struct ip_set *set) struct htype *h = set->data; struct htable *t; struct hbucket *n; - u32 i; - - t = ipset_dereference_protected(h->table, set); - for (i = 0; i < jhash_size(t->htable_bits); i++) { - n = __ipset_dereference_protected(hbucket(t, i), 1); - if (!n) - continue; - if (set->extensions & IPSET_EXT_DESTROY) - mtype_ext_cleanup(set, n); - /* FIXME: use slab cache */ - rcu_assign_pointer(hbucket(t, i), NULL); - kfree_rcu(n, rcu); + u32 r, i; + + t = ipset_dereference_nfnl(h->table); + for (r = 0; r < ahash_numof_locks(t->htable_bits); r++) { + spin_lock_bh(&t->hregion[r].lock); + for (i = ahash_bucket_start(r, t->htable_bits); + i < ahash_bucket_end(r, t->htable_bits); i++) { + n = __ipset_dereference(hbucket(t, i)); + if (!n) + continue; + if (set->extensions & IPSET_EXT_DESTROY) + mtype_ext_cleanup(set, n); + /* FIXME: use slab cache */ + rcu_assign_pointer(hbucket(t, i), NULL); + kfree_rcu(n, rcu); + } + t->hregion[r].ext_size = 0; + t->hregion[r].elements = 0; + spin_unlock_bh(&t->hregion[r].lock); } #ifdef IP_SET_HASH_WITH_NETS memset(h->nets, 0, sizeof(h->nets)); #endif - set->elements = 0; - set->ext_size = 0; } /* Destroy the hashtable part of the set */ @@ -387,7 +450,7 @@ mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy) u32 i; for (i = 0; i < jhash_size(t->htable_bits); i++) { - n = __ipset_dereference_protected(hbucket(t, i), 1); + n = (__force struct hbucket *)hbucket(t, i); if (!n) continue; if (set->extensions & IPSET_EXT_DESTROY && ext_destroy) @@ -396,6 +459,7 @@ mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy) kfree(n); } + ip_set_free(t->hregion); ip_set_free(t); } @@ -404,28 +468,18 @@ static void mtype_destroy(struct ip_set *set) { struct htype *h = set->data; + struct list_head *l, *lt; - if (SET_WITH_TIMEOUT(set)) - del_timer_sync(&h->gc); - - mtype_ahash_destroy(set, - __ipset_dereference_protected(h->table, 1), true); + mtype_ahash_destroy(set, (__force struct htable *)h->table, true); + list_for_each_safe(l, lt, &h->ad) { + list_del(l); + kfree(l); + } kfree(h); set->data = NULL; } -static void -mtype_gc_init(struct ip_set *set, void (*gc)(struct timer_list *t)) -{ - struct htype *h = set->data; - - timer_setup(&h->gc, gc, 0); - mod_timer(&h->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ); - pr_debug("gc initialized, run in every %u\n", - IPSET_GC_PERIOD(set->timeout)); -} - static bool mtype_same_set(const struct ip_set *a, const struct ip_set *b) { @@ -444,11 +498,9 @@ mtype_same_set(const struct ip_set *a, const struct ip_set *b) a->extensions == b->extensions; } -/* Delete expired elements from the hashtable */ static void -mtype_expire(struct ip_set *set, struct htype *h) +mtype_gc_do(struct ip_set *set, struct htype *h, struct htable *t, u32 r) { - struct htable *t; struct hbucket *n, *tmp; struct mtype_elem *data; u32 i, j, d; @@ -456,10 +508,12 @@ mtype_expire(struct ip_set *set, struct htype *h) #ifdef IP_SET_HASH_WITH_NETS u8 k; #endif + u8 htable_bits = t->htable_bits; - t = ipset_dereference_protected(h->table, set); - for (i = 0; i < jhash_size(t->htable_bits); i++) { - n = __ipset_dereference_protected(hbucket(t, i), 1); + spin_lock_bh(&t->hregion[r].lock); + for (i = ahash_bucket_start(r, htable_bits); + i < ahash_bucket_end(r, htable_bits); i++) { + n = __ipset_dereference(hbucket(t, i)); if (!n) continue; for (j = 0, d = 0; j < n->pos; j++) { @@ -475,58 +529,109 @@ mtype_expire(struct ip_set *set, struct htype *h) smp_mb__after_atomic(); #ifdef IP_SET_HASH_WITH_NETS for (k = 0; k < IPSET_NET_COUNT; k++) - mtype_del_cidr(h, + mtype_del_cidr(set, h, NCIDR_PUT(DCIDR_GET(data->cidr, k)), k); #endif + t->hregion[r].elements--; ip_set_ext_destroy(set, data); - set->elements--; d++; } if (d >= AHASH_INIT_SIZE) { if (d >= n->size) { + t->hregion[r].ext_size -= + ext_size(n->size, dsize); rcu_assign_pointer(hbucket(t, i), NULL); kfree_rcu(n, rcu); continue; } tmp = kzalloc(sizeof(*tmp) + - (n->size - AHASH_INIT_SIZE) * dsize, - GFP_ATOMIC); + (n->size - AHASH_INIT_SIZE) * dsize, + GFP_ATOMIC); if (!tmp) - /* Still try to delete expired elements */ + /* Still try to delete expired elements. */ continue; tmp->size = n->size - AHASH_INIT_SIZE; for (j = 0, d = 0; j < n->pos; j++) { if (!test_bit(j, n->used)) continue; data = ahash_data(n, j, dsize); - memcpy(tmp->value + d * dsize, data, dsize); + memcpy(tmp->value + d * dsize, + data, dsize); set_bit(d, tmp->used); d++; } tmp->pos = d; - set->ext_size -= ext_size(AHASH_INIT_SIZE, dsize); + t->hregion[r].ext_size -= + ext_size(AHASH_INIT_SIZE, dsize); rcu_assign_pointer(hbucket(t, i), tmp); kfree_rcu(n, rcu); } } + spin_unlock_bh(&t->hregion[r].lock); } static void -mtype_gc(struct timer_list *t) +mtype_gc(struct work_struct *work) { - struct htype *h = from_timer(h, t, gc); - struct ip_set *set = h->set; + struct htable_gc *gc; + struct ip_set *set; + struct htype *h; + struct htable *t; + u32 r, numof_locks; + unsigned int next_run; + + gc = container_of(work, struct htable_gc, dwork.work); + set = gc->set; + h = set->data; - pr_debug("called\n"); spin_lock_bh(&set->lock); - mtype_expire(set, h); + t = ipset_dereference_set(h->table, set); + atomic_inc(&t->uref); + numof_locks = ahash_numof_locks(t->htable_bits); + r = gc->region++; + if (r >= numof_locks) { + r = gc->region = 0; + } + next_run = (IPSET_GC_PERIOD(set->timeout) * HZ) / numof_locks; + if (next_run < HZ/10) + next_run = HZ/10; spin_unlock_bh(&set->lock); - h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; - add_timer(&h->gc); + mtype_gc_do(set, h, t, r); + + if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) { + pr_debug("Table destroy after resize by expire: %p\n", t); + mtype_ahash_destroy(set, t, false); + } + + queue_delayed_work(system_power_efficient_wq, &gc->dwork, next_run); + +} + +static void +mtype_gc_init(struct htable_gc *gc) +{ + INIT_DEFERRABLE_WORK(&gc->dwork, mtype_gc); + queue_delayed_work(system_power_efficient_wq, &gc->dwork, HZ); } +static void +mtype_cancel_gc(struct ip_set *set) +{ + struct htype *h = set->data; + + if (SET_WITH_TIMEOUT(set)) + cancel_delayed_work_sync(&h->gc.dwork); +} + +static int +mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, + struct ip_set_ext *mext, u32 flags); +static int +mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, + struct ip_set_ext *mext, u32 flags); + /* Resize a hash: create a new hash table with doubling the hashsize * and inserting the elements to it. Repeat until we succeed or * fail due to memory pressures. @@ -537,7 +642,7 @@ mtype_resize(struct ip_set *set, bool retried) struct htype *h = set->data; struct htable *t, *orig; u8 htable_bits; - size_t extsize, dsize = set->dsize; + size_t dsize = set->dsize; #ifdef IP_SET_HASH_WITH_NETS u8 flags; struct mtype_elem *tmp; @@ -545,7 +650,9 @@ mtype_resize(struct ip_set *set, bool retried) struct mtype_elem *data; struct mtype_elem *d; struct hbucket *n, *m; - u32 i, j, key; + struct list_head *l, *lt; + struct mtype_resize_ad *x; + u32 i, j, r, nr, key; int ret; #ifdef IP_SET_HASH_WITH_NETS @@ -553,10 +660,8 @@ mtype_resize(struct ip_set *set, bool retried) if (!tmp) return -ENOMEM; #endif - rcu_read_lock_bh(); - orig = rcu_dereference_bh_nfnl(h->table); + orig = ipset_dereference_bh_nfnl(h->table); htable_bits = orig->htable_bits; - rcu_read_unlock_bh(); retry: ret = 0; @@ -573,88 +678,124 @@ mtype_resize(struct ip_set *set, bool retried) ret = -ENOMEM; goto out; } + t->hregion = ip_set_alloc(ahash_sizeof_regions(htable_bits)); + if (!t->hregion) { + kfree(t); + ret = -ENOMEM; + goto out; + } t->htable_bits = htable_bits; + t->maxelem = h->maxelem / ahash_numof_locks(htable_bits); + for (i = 0; i < ahash_numof_locks(htable_bits); i++) + spin_lock_init(&t->hregion[i].lock); - spin_lock_bh(&set->lock); - orig = __ipset_dereference_protected(h->table, 1); - /* There can't be another parallel resizing, but dumping is possible */ + /* There can't be another parallel resizing, + * but dumping, gc, kernel side add/del are possible + */ + orig = ipset_dereference_bh_nfnl(h->table); atomic_set(&orig->ref, 1); atomic_inc(&orig->uref); - extsize = 0; pr_debug("attempt to resize set %s from %u to %u, t %p\n", set->name, orig->htable_bits, htable_bits, orig); - for (i = 0; i < jhash_size(orig->htable_bits); i++) { - n = __ipset_dereference_protected(hbucket(orig, i), 1); - if (!n) - continue; - for (j = 0; j < n->pos; j++) { - if (!test_bit(j, n->used)) + for (r = 0; r < ahash_numof_locks(orig->htable_bits); r++) { + /* Expire may replace a hbucket with another one */ + rcu_read_lock_bh(); + for (i = ahash_bucket_start(r, orig->htable_bits); + i < ahash_bucket_end(r, orig->htable_bits); i++) { + n = __ipset_dereference(hbucket(orig, i)); + if (!n) continue; - data = ahash_data(n, j, dsize); + for (j = 0; j < n->pos; j++) { + if (!test_bit(j, n->used)) + continue; + data = ahash_data(n, j, dsize); + if (SET_ELEM_EXPIRED(set, data)) + continue; #ifdef IP_SET_HASH_WITH_NETS - /* We have readers running parallel with us, - * so the live data cannot be modified. - */ - flags = 0; - memcpy(tmp, data, dsize); - data = tmp; - mtype_data_reset_flags(data, &flags); + /* We have readers running parallel with us, + * so the live data cannot be modified. + */ + flags = 0; + memcpy(tmp, data, dsize); + data = tmp; + mtype_data_reset_flags(data, &flags); #endif - key = HKEY(data, h->initval, htable_bits); - m = __ipset_dereference_protected(hbucket(t, key), 1); - if (!m) { - m = kzalloc(sizeof(*m) + + key = HKEY(data, h->initval, htable_bits); + m = __ipset_dereference(hbucket(t, key)); + nr = ahash_region(key, htable_bits); + if (!m) { + m = kzalloc(sizeof(*m) + AHASH_INIT_SIZE * dsize, GFP_ATOMIC); - if (!m) { - ret = -ENOMEM; - goto cleanup; - } - m->size = AHASH_INIT_SIZE; - extsize += ext_size(AHASH_INIT_SIZE, dsize); - RCU_INIT_POINTER(hbucket(t, key), m); - } else if (m->pos >= m->size) { - struct hbucket *ht; - - if (m->size >= AHASH_MAX(h)) { - ret = -EAGAIN; - } else { - ht = kzalloc(sizeof(*ht) + + if (!m) { + ret = -ENOMEM; + goto cleanup; + } + m->size = AHASH_INIT_SIZE; + t->hregion[nr].ext_size += + ext_size(AHASH_INIT_SIZE, + dsize); + RCU_INIT_POINTER(hbucket(t, key), m); + } else if (m->pos >= m->size) { + struct hbucket *ht; + + if (m->size >= AHASH_MAX(h)) { + ret = -EAGAIN; + } else { + ht = kzalloc(sizeof(*ht) + (m->size + AHASH_INIT_SIZE) * dsize, GFP_ATOMIC); - if (!ht) - ret = -ENOMEM; + if (!ht) + ret = -ENOMEM; + } + if (ret < 0) + goto cleanup; + memcpy(ht, m, sizeof(struct hbucket) + + m->size * dsize); + ht->size = m->size + AHASH_INIT_SIZE; + t->hregion[nr].ext_size += + ext_size(AHASH_INIT_SIZE, + dsize); + kfree(m); + m = ht; + RCU_INIT_POINTER(hbucket(t, key), ht); } - if (ret < 0) - goto cleanup; - memcpy(ht, m, sizeof(struct hbucket) + - m->size * dsize); - ht->size = m->size + AHASH_INIT_SIZE; - extsize += ext_size(AHASH_INIT_SIZE, dsize); - kfree(m); - m = ht; - RCU_INIT_POINTER(hbucket(t, key), ht); - } - d = ahash_data(m, m->pos, dsize); - memcpy(d, data, dsize); - set_bit(m->pos++, m->used); + d = ahash_data(m, m->pos, dsize); + memcpy(d, data, dsize); + set_bit(m->pos++, m->used); + t->hregion[nr].elements++; #ifdef IP_SET_HASH_WITH_NETS - mtype_data_reset_flags(d, &flags); + mtype_data_reset_flags(d, &flags); #endif + } } + rcu_read_unlock_bh(); } - rcu_assign_pointer(h->table, t); - set->ext_size = extsize; - spin_unlock_bh(&set->lock); + /* There can't be any other writer. */ + rcu_assign_pointer(h->table, t); /* Give time to other readers of the set */ synchronize_rcu(); pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name, orig->htable_bits, orig, t->htable_bits, t); - /* If there's nobody else dumping the table, destroy it */ + /* Add/delete elements processed by the SET target during resize. + * Kernel-side add cannot trigger a resize and userspace actions + * are serialized by the mutex. + */ + list_for_each_safe(l, lt, &h->ad) { + x = list_entry(l, struct mtype_resize_ad, list); + if (x->ad == IPSET_ADD) { + mtype_add(set, &x->d, &x->ext, &x->mext, x->flags); + } else { + mtype_del(set, &x->d, NULL, NULL, 0); + } + list_del(l); + kfree(l); + } + /* If there's nobody else using the table, destroy it */ if (atomic_dec_and_test(&orig->uref)) { pr_debug("Table destroy by resize %p\n", orig); mtype_ahash_destroy(set, orig, false); @@ -667,15 +808,44 @@ mtype_resize(struct ip_set *set, bool retried) return ret; cleanup: + rcu_read_unlock_bh(); atomic_set(&orig->ref, 0); atomic_dec(&orig->uref); - spin_unlock_bh(&set->lock); mtype_ahash_destroy(set, t, false); if (ret == -EAGAIN) goto retry; goto out; } +/* Get the current number of elements and ext_size in the set */ +static void +mtype_ext_size(struct ip_set *set, u32 *elements, size_t *ext_size) +{ + struct htype *h = set->data; + const struct htable *t; + u32 i, j, r; + struct hbucket *n; + struct mtype_elem *data; + + t = rcu_dereference_bh(h->table); + for (r = 0; r < ahash_numof_locks(t->htable_bits); r++) { + for (i = ahash_bucket_start(r, t->htable_bits); + i < ahash_bucket_end(r, t->htable_bits); i++) { + n = rcu_dereference_bh(hbucket(t, i)); + if (!n) + continue; + for (j = 0; j < n->pos; j++) { + if (!test_bit(j, n->used)) + continue; + data = ahash_data(n, j, set->dsize); + if (!SET_ELEM_EXPIRED(set, data)) + (*elements)++; + } + } + *ext_size += t->hregion[r].ext_size; + } +} + /* Add an element to a hash and update the internal counters when succeeded, * otherwise report the proper error code. */ @@ -688,32 +858,49 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, const struct mtype_elem *d = value; struct mtype_elem *data; struct hbucket *n, *old = ERR_PTR(-ENOENT); - int i, j = -1; + int i, j = -1, ret; bool flag_exist = flags & IPSET_FLAG_EXIST; bool deleted = false, forceadd = false, reuse = false; - u32 key, multi = 0; + u32 r, key, multi = 0, elements, maxelem; - if (set->elements >= h->maxelem) { - if (SET_WITH_TIMEOUT(set)) - /* FIXME: when set is full, we slow down here */ - mtype_expire(set, h); - if (set->elements >= h->maxelem && SET_WITH_FORCEADD(set)) + rcu_read_lock_bh(); + t = rcu_dereference_bh(h->table); + key = HKEY(value, h->initval, t->htable_bits); + r = ahash_region(key, t->htable_bits); + atomic_inc(&t->uref); + elements = t->hregion[r].elements; + maxelem = t->maxelem; + if (elements >= maxelem) { + u32 e; + if (SET_WITH_TIMEOUT(set)) { + rcu_read_unlock_bh(); + mtype_gc_do(set, h, t, r); + rcu_read_lock_bh(); + } + maxelem = h->maxelem; + elements = 0; + for (e = 0; e < ahash_numof_locks(t->htable_bits); e++) + elements += t->hregion[e].elements; + if (elements >= maxelem && SET_WITH_FORCEADD(set)) forceadd = true; } + rcu_read_unlock_bh(); - t = ipset_dereference_protected(h->table, set); - key = HKEY(value, h->initval, t->htable_bits); - n = __ipset_dereference_protected(hbucket(t, key), 1); + spin_lock_bh(&t->hregion[r].lock); + n = rcu_dereference_bh(hbucket(t, key)); if (!n) { - if (forceadd || set->elements >= h->maxelem) + if (forceadd || elements >= maxelem) goto set_full; old = NULL; n = kzalloc(sizeof(*n) + AHASH_INIT_SIZE * set->dsize, GFP_ATOMIC); - if (!n) - return -ENOMEM; + if (!n) { + ret = -ENOMEM; + goto unlock; + } n->size = AHASH_INIT_SIZE; - set->ext_size += ext_size(AHASH_INIT_SIZE, set->dsize); + t->hregion[r].ext_size += + ext_size(AHASH_INIT_SIZE, set->dsize); goto copy_elem; } for (i = 0; i < n->pos; i++) { @@ -727,19 +914,16 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, } data = ahash_data(n, i, set->dsize); if (mtype_data_equal(data, d, &multi)) { - if (flag_exist || - (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(data, set)))) { + if (flag_exist || SET_ELEM_EXPIRED(set, data)) { /* Just the extensions could be overwritten */ j = i; goto overwrite_extensions; } - return -IPSET_ERR_EXIST; + ret = -IPSET_ERR_EXIST; + goto unlock; } /* Reuse first timed out entry */ - if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(data, set)) && - j == -1) { + if (SET_ELEM_EXPIRED(set, data) && j == -1) { j = i; reuse = true; } @@ -751,16 +935,16 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, if (!deleted) { #ifdef IP_SET_HASH_WITH_NETS for (i = 0; i < IPSET_NET_COUNT; i++) - mtype_del_cidr(h, + mtype_del_cidr(set, h, NCIDR_PUT(DCIDR_GET(data->cidr, i)), i); #endif ip_set_ext_destroy(set, data); - set->elements--; + t->hregion[r].elements--; } goto copy_data; } - if (set->elements >= h->maxelem) + if (elements >= maxelem) goto set_full; /* Create a new slot */ if (n->pos >= n->size) { @@ -768,28 +952,32 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, if (n->size >= AHASH_MAX(h)) { /* Trigger rehashing */ mtype_data_next(&h->next, d); - return -EAGAIN; + ret = -EAGAIN; + goto resize; } old = n; n = kzalloc(sizeof(*n) + (old->size + AHASH_INIT_SIZE) * set->dsize, GFP_ATOMIC); - if (!n) - return -ENOMEM; + if (!n) { + ret = -ENOMEM; + goto unlock; + } memcpy(n, old, sizeof(struct hbucket) + old->size * set->dsize); n->size = old->size + AHASH_INIT_SIZE; - set->ext_size += ext_size(AHASH_INIT_SIZE, set->dsize); + t->hregion[r].ext_size += + ext_size(AHASH_INIT_SIZE, set->dsize); } copy_elem: j = n->pos++; data = ahash_data(n, j, set->dsize); copy_data: - set->elements++; + t->hregion[r].elements++; #ifdef IP_SET_HASH_WITH_NETS for (i = 0; i < IPSET_NET_COUNT; i++) - mtype_add_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, i)), i); + mtype_add_cidr(set, h, NCIDR_PUT(DCIDR_GET(d->cidr, i)), i); #endif memcpy(data, d, sizeof(struct mtype_elem)); overwrite_extensions: @@ -812,13 +1000,41 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, if (old) kfree_rcu(old, rcu); } + ret = 0; +resize: + spin_unlock_bh(&t->hregion[r].lock); + if (atomic_read(&t->ref) && ext->target) { + /* Resize is in process and kernel side add, save values */ + struct mtype_resize_ad *x; + + x = kzalloc(sizeof(struct mtype_resize_ad), GFP_ATOMIC); + if (!x) + /* Don't bother */ + goto out; + x->ad = IPSET_ADD; + memcpy(&x->d, value, sizeof(struct mtype_elem)); + memcpy(&x->ext, ext, sizeof(struct ip_set_ext)); + memcpy(&x->mext, mext, sizeof(struct ip_set_ext)); + x->flags = flags; + spin_lock_bh(&set->lock); + list_add_tail(&x->list, &h->ad); + spin_unlock_bh(&set->lock); + } + goto out; - return 0; set_full: if (net_ratelimit()) pr_warn("Set %s is full, maxelem %u reached\n", - set->name, h->maxelem); - return -IPSET_ERR_HASH_FULL; + set->name, maxelem); + ret = -IPSET_ERR_HASH_FULL; +unlock: + spin_unlock_bh(&t->hregion[r].lock); +out: + if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) { + pr_debug("Table destroy after resize by add: %p\n", t); + mtype_ahash_destroy(set, t, false); + } + return ret; } /* Delete an element from the hash and free up space if possible. @@ -832,13 +1048,23 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, const struct mtype_elem *d = value; struct mtype_elem *data; struct hbucket *n; - int i, j, k, ret = -IPSET_ERR_EXIST; + struct mtype_resize_ad *x = NULL; + int i, j, k, r, ret = -IPSET_ERR_EXIST; u32 key, multi = 0; size_t dsize = set->dsize; - t = ipset_dereference_protected(h->table, set); + /* Userspace add and resize is excluded by the mutex. + * Kernespace add does not trigger resize. + */ + rcu_read_lock_bh(); + t = rcu_dereference_bh(h->table); key = HKEY(value, h->initval, t->htable_bits); - n = __ipset_dereference_protected(hbucket(t, key), 1); + r = ahash_region(key, t->htable_bits); + atomic_inc(&t->uref); + rcu_read_unlock_bh(); + + spin_lock_bh(&t->hregion[r].lock); + n = rcu_dereference_bh(hbucket(t, key)); if (!n) goto out; for (i = 0, k = 0; i < n->pos; i++) { @@ -849,8 +1075,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, data = ahash_data(n, i, dsize); if (!mtype_data_equal(data, d, &multi)) continue; - if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(data, set))) + if (SET_ELEM_EXPIRED(set, data)) goto out; ret = 0; @@ -858,20 +1083,33 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, smp_mb__after_atomic(); if (i + 1 == n->pos) n->pos--; - set->elements--; + t->hregion[r].elements--; #ifdef IP_SET_HASH_WITH_NETS for (j = 0; j < IPSET_NET_COUNT; j++) - mtype_del_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, j)), - j); + mtype_del_cidr(set, h, + NCIDR_PUT(DCIDR_GET(d->cidr, j)), j); #endif ip_set_ext_destroy(set, data); + if (atomic_read(&t->ref) && ext->target) { + /* Resize is in process and kernel side del, + * save values + */ + x = kzalloc(sizeof(struct mtype_resize_ad), + GFP_ATOMIC); + if (x) { + x->ad = IPSET_DEL; + memcpy(&x->d, value, + sizeof(struct mtype_elem)); + x->flags = flags; + } + } for (; i < n->pos; i++) { if (!test_bit(i, n->used)) k++; } if (n->pos == 0 && k == 0) { - set->ext_size -= ext_size(n->size, dsize); + t->hregion[r].ext_size -= ext_size(n->size, dsize); rcu_assign_pointer(hbucket(t, key), NULL); kfree_rcu(n, rcu); } else if (k >= AHASH_INIT_SIZE) { @@ -890,7 +1128,8 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, k++; } tmp->pos = k; - set->ext_size -= ext_size(AHASH_INIT_SIZE, dsize); + t->hregion[r].ext_size -= + ext_size(AHASH_INIT_SIZE, dsize); rcu_assign_pointer(hbucket(t, key), tmp); kfree_rcu(n, rcu); } @@ -898,6 +1137,16 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, } out: + spin_unlock_bh(&t->hregion[r].lock); + if (x) { + spin_lock_bh(&set->lock); + list_add(&x->list, &h->ad); + spin_unlock_bh(&set->lock); + } + if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) { + pr_debug("Table destroy after resize by del: %p\n", t); + mtype_ahash_destroy(set, t, false); + } return ret; } @@ -983,6 +1232,7 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext, int i, ret = 0; u32 key, multi = 0; + rcu_read_lock_bh(); t = rcu_dereference_bh(h->table); #ifdef IP_SET_HASH_WITH_NETS /* If we test an IP address and not a network address, @@ -1014,6 +1264,7 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext, goto out; } out: + rcu_read_unlock_bh(); return ret; } @@ -1025,23 +1276,14 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) const struct htable *t; struct nlattr *nested; size_t memsize; + u32 elements = 0; + size_t ext_size = 0; u8 htable_bits; - /* If any members have expired, set->elements will be wrong - * mytype_expire function will update it with the right count. - * we do not hold set->lock here, so grab it first. - * set->elements can still be incorrect in the case of a huge set, - * because elements might time out during the listing. - */ - if (SET_WITH_TIMEOUT(set)) { - spin_lock_bh(&set->lock); - mtype_expire(set, h); - spin_unlock_bh(&set->lock); - } - rcu_read_lock_bh(); - t = rcu_dereference_bh_nfnl(h->table); - memsize = mtype_ahash_memsize(h, t) + set->ext_size; + t = rcu_dereference_bh(h->table); + mtype_ext_size(set, &elements, &ext_size); + memsize = mtype_ahash_memsize(h, t) + ext_size + set->ext_size; htable_bits = t->htable_bits; rcu_read_unlock_bh(); @@ -1063,7 +1305,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) #endif if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) || nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) || - nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(set->elements))) + nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(elements))) goto nla_put_failure; if (unlikely(ip_set_put_flags(skb, set))) goto nla_put_failure; @@ -1083,15 +1325,15 @@ mtype_uref(struct ip_set *set, struct netlink_callback *cb, bool start) if (start) { rcu_read_lock_bh(); - t = rcu_dereference_bh_nfnl(h->table); + t = ipset_dereference_bh_nfnl(h->table); atomic_inc(&t->uref); cb->args[IPSET_CB_PRIVATE] = (unsigned long)t; rcu_read_unlock_bh(); } else if (cb->args[IPSET_CB_PRIVATE]) { t = (struct htable *)cb->args[IPSET_CB_PRIVATE]; if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) { - /* Resizing didn't destroy the hash table */ - pr_debug("Table destroy by dump: %p\n", t); + pr_debug("Table destroy after resize " + " by dump: %p\n", t); mtype_ahash_destroy(set, t, false); } cb->args[IPSET_CB_PRIVATE] = 0; @@ -1133,8 +1375,7 @@ mtype_list(const struct ip_set *set, if (!test_bit(i, n->used)) continue; e = ahash_data(n, i, set->dsize); - if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, set))) + if (SET_ELEM_EXPIRED(set, e)) continue; pr_debug("list hash %lu hbucket %p i %u, data %p\n", cb->args[IPSET_CB_ARG0], n, i, e); @@ -1200,6 +1441,8 @@ static const struct ip_set_type_variant mtype_variant = { .uref = mtype_uref, .resize = mtype_resize, .same_set = mtype_same_set, + .cancel_gc = mtype_cancel_gc, + .region_lock = true, }; #ifdef IP_SET_EMIT_CREATE @@ -1218,6 +1461,7 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, size_t hsize; struct htype *h; struct htable *t; + u32 i; pr_debug("Create set %s with family %s\n", set->name, set->family == NFPROTO_IPV4 ? "inet" : "inet6"); @@ -1290,6 +1534,15 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, kfree(h); return -ENOMEM; } + t->hregion = ip_set_alloc(ahash_sizeof_regions(hbits)); + if (!t->hregion) { + kfree(t); + kfree(h); + return -ENOMEM; + } + h->gc.set = set; + for (i = 0; i < ahash_numof_locks(hbits); i++) + spin_lock_init(&t->hregion[i].lock); h->maxelem = maxelem; #ifdef IP_SET_HASH_WITH_NETMASK h->netmask = netmask; @@ -1300,9 +1553,10 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, get_random_bytes(&h->initval, sizeof(h->initval)); t->htable_bits = hbits; + t->maxelem = h->maxelem / ahash_numof_locks(hbits); RCU_INIT_POINTER(h->table, t); - h->set = set; + INIT_LIST_HEAD(&h->ad); set->data = h; #ifndef IP_SET_PROTO_UNDEF if (set->family == NFPROTO_IPV4) { @@ -1325,12 +1579,10 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, #ifndef IP_SET_PROTO_UNDEF if (set->family == NFPROTO_IPV4) #endif - IPSET_TOKEN(HTYPE, 4_gc_init)(set, - IPSET_TOKEN(HTYPE, 4_gc)); + IPSET_TOKEN(HTYPE, 4_gc_init)(&h->gc); #ifndef IP_SET_PROTO_UNDEF else - IPSET_TOKEN(HTYPE, 6_gc_init)(set, - IPSET_TOKEN(HTYPE, 6_gc)); + IPSET_TOKEN(HTYPE, 6_gc_init)(&h->gc); #endif } pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n", diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index efef9b76be2e2d..7501303ebfa2aa 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -430,9 +430,6 @@ list_set_destroy(struct ip_set *set) struct list_set *map = set->data; struct set_elem *e, *n; - if (SET_WITH_TIMEOUT(set)) - del_timer_sync(&map->gc); - list_for_each_entry_safe(e, n, &map->members, list) { list_del(&e->list); ip_set_put_byindex(map->net, e->id); @@ -549,6 +546,15 @@ list_set_same_set(const struct ip_set *a, const struct ip_set *b) a->extensions == b->extensions; } +static void +list_set_cancel_gc(struct ip_set *set) +{ + struct list_set *map = set->data; + + if (SET_WITH_TIMEOUT(set)) + del_timer_sync(&map->gc); +} + static const struct ip_set_type_variant set_variant = { .kadt = list_set_kadt, .uadt = list_set_uadt, @@ -562,6 +568,7 @@ static const struct ip_set_type_variant set_variant = { .head = list_set_head, .list = list_set_list, .same_set = list_set_same_set, + .cancel_gc = list_set_cancel_gc, }; static void diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index baaad3937a3e35..8e7547925ef461 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4156,6 +4156,9 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, if ((flags & (NFT_SET_EVAL | NFT_SET_OBJECT)) == (NFT_SET_EVAL | NFT_SET_OBJECT)) return -EOPNOTSUPP; + if ((flags & (NFT_SET_ANONYMOUS | NFT_SET_TIMEOUT | NFT_SET_EVAL)) == + (NFT_SET_ANONYMOUS | NFT_SET_TIMEOUT)) + return -EOPNOTSUPP; } dtype = 0; @@ -4521,6 +4524,7 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) { list_del_rcu(&set->list); + set->dead = 1; if (event) nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, GFP_KERNEL); diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 0a96661218d11b..48b8c360e0a4ac 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -748,7 +748,6 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, if (err || !frag) return err; - skb_get(skb); mru = tc_skb_cb(skb)->mru; if (family == NFPROTO_IPV4) { @@ -1030,12 +1029,8 @@ TC_INDIRECT_SCOPE int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, nh_ofs = skb_network_offset(skb); skb_pull_rcsum(skb, nh_ofs); err = tcf_ct_handle_fragments(net, skb, family, p->zone, &defrag); - if (err == -EINPROGRESS) { - retval = TC_ACT_STOLEN; - goto out_clear; - } if (err) - goto drop; + goto out_frag; err = tcf_ct_skb_network_trim(skb, family); if (err) @@ -1102,6 +1097,11 @@ TC_INDIRECT_SCOPE int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, qdisc_skb_cb(skb)->pkt_len = skb->len; return retval; +out_frag: + if (err != -EINPROGRESS) + tcf_action_inc_drop_qstats(&c->common); + return TC_ACT_CONSUMED; + drop: tcf_action_inc_drop_qstats(&c->common); return TC_ACT_SHOT; diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index da43063f192ecf..ac5e6aca15c06a 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -163,7 +163,7 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, } if (smc_conn_lgr_valid(&smc->conn) && smc->conn.lgr->is_smcd && (req->diag_ext & (1 << (SMC_DIAG_DMBINFO - 1))) && - !list_empty(&smc->conn.lgr->list)) { + !list_empty(&smc->conn.lgr->list) && smc->conn.rmb_desc) { struct smc_connection *conn = &smc->conn; struct smcd_diag_dmbinfo dinfo; struct smcd_dev *smcd = conn->lgr->smcd; diff --git a/scripts/kernel.spec b/scripts/kernel.spec index 49056ab7711498..5be23bae60036e 100644 --- a/scripts/kernel.spec +++ b/scripts/kernel.spec @@ -38,10 +38,10 @@ # define buildid .local %define specversion 4.18.0 -%define pkgrelease 553.el8 +%define pkgrelease 553.5.1.el8_10 # allow pkg_release to have configurable %%{?dist} tag -%define specrelease 553%{?dist} +%define specrelease 553.5.1%{?dist} %define pkg_release %{specrelease}%{?buildid} @@ -53,6 +53,7 @@ # architecture allows it. All should default to 1 (enabled) and be flipped to # 0 (disabled) by later arch-specific checks. +%define _with_kabidupchk 1 # The following build options are enabled by default. # Use either --without in your rpmbuild command or force values # to 0 in here to disable them. @@ -2681,7 +2682,7 @@ fi # # %changelog -* Wed May 22 2024 Release Engineering - 4.18.0-553 +* Wed Jun 05 2024 Release Engineering - 4.18.0-553.5.1 - Adding prod certs and changed cert date to 20210620 (Sherif Nagy) - Adding Rocky secure boot certs (Sherif Nagy) - Fixing vmlinuz removal (Sherif Nagy) @@ -2689,8 +2690,172 @@ fi - Porting to 8.10, debranding and Rocky branding (Louis Abel) - Fixing pesign_key_name values (Sherif Nagy) -* Fri May 10 2024 Denys Vlasenko [4.18.0-553.el8] -- cpuhotplug: Fix kABI breakage caused by CPUHP_AP_HYPERV_ONLINE (Vitaly Kuznetsov) [RHEL-35784] +* Mon May 20 2024 Denys Vlasenko [4.18.0-553.5.1.el8_10] +- tools/power/turbostat: Fix uncore frequency file string (David Arcari) [RHEL-29238] +- tools/power turbostat: Expand probe_intel_uncore_frequency() (David Arcari) [RHEL-29238] +- uio: Fix use-after-free in uio_open (Ricardo Robaina) [RHEL-26232] {CVE-2023-52439} +- net:emac/emac-mac: Fix a use after free in emac_mac_tx_buf_send (Ken Cox) [RHEL-27316] {CVE-2021-47013} +- keys: Fix linking a duplicate key to a keyring's assoc_array (David Howells) [RHEL-30772] +- keys: Hoist locking out of __key_link_begin() (David Howells) [RHEL-30772] +- keys: Break bits out of key_unlink() (David Howells) [RHEL-30772] +- keys: Change keyring_serialise_link_sem to a mutex (David Howells) [RHEL-30772] +- wifi: brcm80211: handle pmk_op allocation failure (Jose Ignacio Tornos Martinez) [RHEL-35150] {CVE-2024-27048} +- wifi: rtl8xxxu: add cancel_work_sync() for c2hcmd_work (Jose Ignacio Tornos Martinez) [RHEL-35140] {CVE-2024-27052} +- wifi: iwlwifi: mvm: ensure offloading TID queue exists (Jose Ignacio Tornos Martinez) [RHEL-35130] {CVE-2024-27056} +- wifi: mt76: mt7921e: fix use-after-free in free_irq() (Jose Ignacio Tornos Martinez) [RHEL-34866] {CVE-2024-26892} +- wifi: ath9k: delay all of ath9k_wmi_event_tasklet() until init is complete (Jose Ignacio Tornos Martinez) [RHEL-34189] {CVE-2024-26897} +- wifi: iwlwifi: mvm: fix a crash when we run out of stations (Jose Ignacio Tornos Martinez) [RHEL-31547] {CVE-2024-26693} +- wifi: iwlwifi: fix double-free bug (Jose Ignacio Tornos Martinez) [RHEL-31543] {CVE-2024-26694} +- wifi: ath9k: Fix potential array-index-out-of-bounds read in ath9k_htc_txstatus() (Jose Ignacio Tornos Martinez) [RHEL-29089] {CVE-2023-52594} +- wifi: rt2x00: restart beacon queue when hardware reset (Jose Ignacio Tornos Martinez) [RHEL-29093] {CVE-2023-52595} +- wifi: iwlwifi: fix a memory corruption (Jose Ignacio Tornos Martinez) [RHEL-28903] {CVE-2024-26610} + +* Wed May 15 2024 Denys Vlasenko [4.18.0-553.4.1.el8_10] +- cpuhotplug: Fix kABI breakage caused by CPUHP_AP_HYPERV_ONLINE (Vitaly Kuznetsov) [RHEL-36117] +- net/mlx5e: Prevent deadlock while disabling aRFS (Kamal Heib) [RHEL-35041] {CVE-2024-27014} +- x86/tsc: Defer marking TSC unstable to a worker (Wander Lairson Costa) [RHEL-32676] +- x86/smpboot: Make TSC synchronization function call based (Wander Lairson Costa) [RHEL-32676] +- net: usb: fix possible use-after-free in smsc75xx_bind (Jose Ignacio Tornos Martinez) [RHEL-30311] {CVE-2021-47171} +- net: usb: fix memory leak in smsc75xx_bind (Jose Ignacio Tornos Martinez) [RHEL-30311] {CVE-2021-47171} + +* Sat May 11 2024 Denys Vlasenko [4.18.0-552.3.1.el8_10] +- netfilter: nf_tables: mark set as dead when unbinding anonymous set with timeout (Phil Sutter) [RHEL-30076] {CVE-2024-26643} +- netfilter: nf_tables: disallow anonymous set with timeout flag (Phil Sutter) [RHEL-30080] {CVE-2024-26642} +- selftests/bpf: Fix pyperf180 compilation failure with clang18 (Artem Savkov) [RHEL-35576] +- md/raid5: fix atomicity violation in raid5_cache_count (Nigel Croxon) [RHEL-27930] {CVE-2024-23307} +- usb: ulpi: Fix debugfs directory leak (Desnes Nunes) [RHEL-33287] {CVE-2024-26919} +- powerpc/pseries: Fix potential memleak in papr_get_attr() (Mamatha Inamdar) [RHEL-35213] {CVE-2022-48669} +- USB: usb-storage: Prevent divide-by-0 error in isd200_ata_command (Desnes Nunes) [RHEL-35122] {CVE-2024-27059} +- NFSv4: fairly test all delegations on a SEQ4_ revocation (Benjamin Coddington) [RHEL-34912] +- USB: core: Fix deadlock in usb_deauthorize_interface() (Desnes Nunes) [RHEL-35002] {CVE-2024-26934} +- usb: xhci: Add error handling in xhci_map_urb_for_dma (Desnes Nunes) [RHEL-34958] {CVE-2024-26964} +- fs: sysfs: Fix reference leak in sysfs_break_active_protection() (Ewan D. Milne) [RHEL-35076] {CVE-2024-26993} +- xhci: handle isoc Babble and Buffer Overrun events properly (Desnes Nunes) [RHEL-31297] {CVE-2024-26659} +- xhci: process isoc TD properly when there was a transaction error mid TD. (Desnes Nunes) [RHEL-31297] {CVE-2024-26659} +- USB: core: Fix deadlock in port "disable" sysfs attribute (Desnes Nunes) [RHEL-35006] {CVE-2024-26933} +- USB: core: Add hub_get() and hub_put() routines (Desnes Nunes) [RHEL-35006] {CVE-2024-26933} +- netfilter: ipset: Missing gc cancellations fixed (Phil Sutter) [RHEL-30521] +- netfilter: ipset: fix performance regression in swap operation (Phil Sutter) [RHEL-30521] +- netfilter: ipset: Fix "INFO: rcu detected stall in hash_xxx" reports (Phil Sutter) [RHEL-30521] +- netfilter: ipset: fix race condition between swap/destroy and kernel side add/del/test (Phil Sutter) [RHEL-30521] +- x86/apic/x2apic: Fix a NULL pointer deref when handling a dying cpu (David Arcari) [RHEL-32516] +- x86/coco: Disable 32-bit emulation by default on TDX and SEV (Vitaly Kuznetsov) [RHEL-25087] {CVE-2024-25744} +- x86: Make IA32_EMULATION boot time configurable (Vitaly Kuznetsov) [RHEL-25087] {CVE-2024-25744} +- x86/entry: Make IA32 syscalls' availability depend on ia32_enabled() (Vitaly Kuznetsov) [RHEL-25087] {CVE-2024-25744} +- x86/elf: Make loading of 32bit processes depend on ia32_enabled() (Vitaly Kuznetsov) [RHEL-25087] {CVE-2024-25744} +- x86/entry: Rename ignore_sysret() (Vitaly Kuznetsov) [RHEL-25087] {CVE-2024-25744} +- x86/cpu: Don't write CSTAR MSR on Intel CPUs (Vitaly Kuznetsov) [RHEL-25087] {CVE-2024-25744} +- x86: Introduce ia32_enabled() (Vitaly Kuznetsov) [RHEL-25087] {CVE-2024-25744} + +* Mon May 06 2024 Denys Vlasenko [4.18.0-552.2.1.el8_10] +- s390/ptrace: handle setting of fpc register correctly (Tobias Huschle) [RHEL-29106] {CVE-2023-52598} +- net/smc: fix illegal rmb_desc access in SMC-D connection dump (Tobias Huschle) [RHEL-27746] {CVE-2024-26615} +- wifi: mac80211: fix race condition on enabling fast-xmit (Jose Ignacio Tornos Martinez) [RHEL-31664] {CVE-2024-26779} +- powerpc/fadump: make is_kdump_kernel() return false when fadump is active (Mamatha Inamdar) [RHEL-24401] +- vmcore: remove dependency with is_kdump_kernel() for exporting vmcore (Mamatha Inamdar) [RHEL-24401] +- mtd: require write permissions for locking and badblock ioctls (Prarit Bhargava) [RHEL-27585] {CVE-2021-47055} +- mtd: properly check all write ioctls for permissions (Prarit Bhargava) [RHEL-27585] {CVE-2021-47055} +- pid: take a reference when initializing `cad_pid` (Waiman Long) [RHEL-29420] {CVE-2021-47118} +- i2c: i801: Don't generate an interrupt on bus reset (Prarit Bhargava) [RHEL-30325] {CVE-2021-47153} +- RDMA/srpt: Do not register event handler until srpt device is fully setup (Kamal Heib) [RHEL-33224] {CVE-2024-26872} +- ceph: switch to corrected encoding of max_xattr_size in mdsmap (Xiubo Li) [RHEL-26723] +- ceph: switch to use cap_delay_lock for the unlink delay list (Xiubo Li) [RHEL-32870] +- ceph: pass ino# instead of old_dentry if it's disconnected (Xiubo Li) [RHEL-32870] +- fat: fix uninitialized field in nostale filehandles (Andrey Albershteyn) [RHEL-33186 RHEL-35108] {CVE-2024-26973} +- do_sys_name_to_handle(): use kzalloc() to fix kernel-infoleak (Andrey Albershteyn) [RHEL-33186] {CVE-2024-26901} +- idpf: limit the support to GCP only (Michal Schmidt) [RHEL-15652] +- redhat/configs: enable CONFIG_IDPF (Michal Schmidt) [RHEL-15652] +- idpf: remove the use of ETHTOOL_RING_USE_TCP_DATA_SPLIT (Michal Schmidt) [RHEL-15652] +- idpf: workaround for unavailable skb page recycling (Michal Schmidt) [RHEL-15652] +- idpf: always allocate a full page (Michal Schmidt) [RHEL-15652] +- idpf: remove page pool stats code (Michal Schmidt) [RHEL-15652] +- idpf: add minimal macros for __free(kfree) to work (Michal Schmidt) [RHEL-15652] +- idpf: fixup include paths for RHEL 8 (Michal Schmidt) [RHEL-15652] +- idpf: fix kernel panic on unknown packet types (Michal Schmidt) [RHEL-15652] +- idpf: disable local BH when scheduling napi for marker packets (Michal Schmidt) [RHEL-15652] +- idpf: remove dealloc vector msg err in idpf_intr_rel (Michal Schmidt) [RHEL-15652] +- idpf: fix minor controlq issues (Michal Schmidt) [RHEL-15652] +- idpf: prevent deinit uninitialized virtchnl core (Michal Schmidt) [RHEL-15652] +- idpf: cleanup virtchnl cruft (Michal Schmidt) [RHEL-15652] +- idpf: refactor idpf_recv_mb_msg (Michal Schmidt) [RHEL-15652] +- idpf: add async_handler for MAC filter messages (Michal Schmidt) [RHEL-15652] +- idpf: refactor remaining virtchnl messages (Michal Schmidt) [RHEL-15652] +- idpf: refactor queue related virtchnl messages (Michal Schmidt) [RHEL-15652] +- idpf: refactor vport virtchnl messages (Michal Schmidt) [RHEL-15652] +- idpf: implement virtchnl transaction manager (Michal Schmidt) [RHEL-15652] +- idpf: add idpf_virtchnl.h (Michal Schmidt) [RHEL-15652] +- idpf: avoid compiler padding in virtchnl2_ptype struct (Michal Schmidt) [RHEL-15652] +- idpf: distinguish vports by the dev_port attribute (Michal Schmidt) [RHEL-15652] +- idpf: avoid compiler introduced padding in virtchnl2_rss_key struct (Michal Schmidt) [RHEL-15652] +- idpf: fix corrupted frames and skb leaks in singleq mode (Michal Schmidt) [RHEL-15652] +- idpf: refactor some missing field get/prep conversions (Michal Schmidt) [RHEL-15652] +- idpf: add get/set for Ethtool's header split ringparam (Michal Schmidt) [RHEL-15652] +- idpf: fix potential use-after-free in idpf_tso() (Michal Schmidt) [RHEL-15652] +- idpf: cancel mailbox work in error path (Michal Schmidt) [RHEL-15652] +- idpf: set scheduling mode for completion queue (Michal Schmidt) [RHEL-15652] +- idpf: add SRIOV support and other ndo_ops (Michal Schmidt) [RHEL-15652] +- idpf: add ethtool callbacks (Michal Schmidt) [RHEL-15652] +- idpf: add singleq start_xmit and napi poll (Michal Schmidt) [RHEL-15652] +- idpf: add RX splitq napi poll support (Michal Schmidt) [RHEL-15652] +- idpf: add TX splitq napi poll support (Michal Schmidt) [RHEL-15652] +- idpf: add splitq start_xmit (Michal Schmidt) [RHEL-15652] +- idpf: initialize interrupts and enable vport (Michal Schmidt) [RHEL-15652] +- idpf: configure resources for RX queues (Michal Schmidt) [RHEL-15652] +- idpf: configure resources for TX queues (Michal Schmidt) [RHEL-15652] +- idpf: add ptypes and MAC filter support (Michal Schmidt) [RHEL-15652] +- idpf: add create vport and netdev configuration (Michal Schmidt) [RHEL-15652] +- idpf: add core init and interrupt request (Michal Schmidt) [RHEL-15652] +- idpf: add controlq init and reset checks (Michal Schmidt) [RHEL-15652] +- idpf: add module register and probe functionality (Michal Schmidt) [RHEL-15652] +- virtchnl: add virtchnl version 2 ops (Michal Schmidt) [RHEL-15652] +- net: netdev_queue: netdev_txq_completed_mb(): fix wake condition (Michal Schmidt) [RHEL-15652] +- net: piggy back on the memory barrier in bql when waking queues (Michal Schmidt) [RHEL-15652] +- net: provide macros for commonly copied lockless queue stop/wake code (Michal Schmidt) [RHEL-15652] + +* Fri Apr 26 2024 Denys Vlasenko [4.18.0-552.1.1.el8_10] +- redhat: set DIST to el8_10 and ZSTREAM to yes for 8.10 (Denys Vlasenko) +- tty: tty_buffer: Fix the softlockup issue in flush_to_ldisc (Prarit Bhargava) [RHEL-32590] {CVE-2021-47185} +- net: mana: Fix Rx DMA datasize and skb_over_panic (Cathy Avery) [RHEL-32579] +- RDMA/srpt: Support specifying the srpt_service_guid parameter (Kamal Heib) [RHEL-31710] {CVE-2024-26744} +- RDMA/qedr: Fix qedr_create_user_qp error flow (Kamal Heib) [RHEL-31714] {CVE-2024-26743} +- hwmon: (coretemp) Fix out-of-bounds memory access (David Arcari) [RHEL-31305] {CVE-2024-26664} +- RDMA/irdma: Fix KASAN issue with tasklet (Kamal Heib) [RHEL-15776] +- net: bridge: use DEV_STATS_INC() (Ivan Vecera) [RHEL-27989] {CVE-2023-52578} +- net: Fix unwanted sign extension in netdev_stats_to_stats64() (Ivan Vecera) [RHEL-27989] {CVE-2023-52578} +- net: add atomic_long_t to net_device_stats fields (Ivan Vecera) [RHEL-27989] {CVE-2023-52578} +- net/sched: act_ct: fix skb leak and crash on ooo frags (Xin Long) [RHEL-29467] {CVE-2023-52610} +- net: usb: smsc75xx: Fix uninit-value access in __smsc75xx_read_reg (Jose Ignacio Tornos Martinez) [RHEL-28015] {CVE-2023-52528} +- RDMA/core: Fix uninit-value access in ib_get_eth_speed() (Kamal Heib) [RHEL-30130] +- RDMA/core: Get IB width and speed from netdev (Kamal Heib) [RHEL-30130] +- cpufreq: intel_pstate: Add Emerald Rapids support in no-HWP mode (Prarit Bhargava) [RHEL-29444] +- powerpc/mm: Fix null-pointer dereference in pgtable_cache_add (Mamatha Inamdar) [RHEL-29118] {CVE-2023-52607} +- powerpc/lib: Validate size for vector operations (Mamatha Inamdar) [RHEL-29114] {CVE-2023-52606} +- usb: hub: Guard against accesses to uninitialized BOS descriptors (Desnes Nunes) [RHEL-28986] {CVE-2023-52477} +- media: uvcvideo: Fix OOB read (Desnes Nunes) [RHEL-27940] {CVE-2023-52565} +- media: pvrusb2: fix use after free on context disconnection (Desnes Nunes) [RHEL-26498] {CVE-2023-52445} +- i2c: i801: Fix block process call transactions (Prarit Bhargava) [RHEL-26478] {CVE-2024-26593} +- overlay: disable EVM (Coiby Xu) [RHEL-19863] +- evm: add support to disable EVM on unsupported filesystems (Coiby Xu) [RHEL-19863] +- evm: don't copy up 'security.evm' xattr (Coiby Xu) [RHEL-19863] +- net: ena: Remove ena_select_queue (Kamal Heib) [RHEL-14286] +- media: dvbdev: Fix memory leak in dvb_media_device_free() (Prarit Bhargava) [RHEL-27254] {CVE-2020-36777} +- gfs2: Fix invalid metadata access in punch_hole (Andrew Price) [RHEL-28784] +- i2c: Fix a potential use after free (Prarit Bhargava) [RHEL-26849] {CVE-2019-25162} +- i2c: validate user data in compat ioctl (Prarit Bhargava) [RHEL-27022] {CVE-2021-46934} +- platform/x86: think-lmi: Fix reference leak (Prarit Bhargava) [RHEL-28030] {CVE-2023-52520} +- vhost: use kzalloc() instead of kmalloc() followed by memset() (Jon Maloy) [RHEL-21505] {CVE-2024-0340} +- RDMA/siw: Fix connection failure handling (Kamal Heib) [RHEL-28042] {CVE-2023-52513} +- vt: fix memory overlapping when deleting chars in the buffer (Waiman Long) [RHEL-27778 RHEL-27779] {CVE-2022-48627} +- x86/fpu: Stop relying on userspace for info to fault in xsave buffer (Steve Best) [RHEL-26669] {CVE-2024-26603} +- mptcp: fix double-free on socket dismantle (Davide Caratti) [RHEL-22773] {CVE-2024-26782} +- crypto: akcipher - Disable signing and decryption (Herbert Xu) [RHEL-17114] {CVE-2023-6240} +- crypto: akcipher - default implementations for request callbacks (Herbert Xu) [RHEL-17114] {CVE-2023-6240} +- crypto: testmgr - split akcipher tests by a key type (Herbert Xu) [RHEL-17114] {CVE-2023-6240} +- workqueue: Warn when a rescuer could not be created (Waiman Long) [RHEL-22136] +- RDMA/cma: Avoid GID lookups on iWARP devices (Benjamin Coddington) [RHEL-12456] +- RDMA/cma: Deduplicate error flow in cma_validate_port() (Benjamin Coddington) [RHEL-12456] +- RDMA/core: Set gid_attr.ndev for iWARP devices (Benjamin Coddington) [RHEL-12456] +- RDMA/siw: Fabricate a GID on tun and loopback devices (Benjamin Coddington) [RHEL-12456] * Sun Apr 07 2024 Denys Vlasenko [4.18.0-552.el8] - i40e: Enforce software interrupt during busy-poll exit (Ivan Vecera) [RHEL-26248] diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c index fdbffac2b91c1b..187525aa965af2 100644 --- a/security/integrity/evm/evm_main.c +++ b/security/integrity/evm/evm_main.c @@ -118,6 +118,17 @@ static int evm_find_protected_xattrs(struct dentry *dentry) return count; } +static int is_unsupported_fs(struct dentry *dentry) +{ + struct inode *inode = d_backing_inode(dentry); + + if (inode->i_sb->s_iflags & SB_I_EVM_UNSUPPORTED) { + pr_info_once("%s not supported\n", inode->i_sb->s_type->name); + return 1; + } + return 0; +} + /* * evm_verify_hmac - calculate and compare the HMAC with the EVM xattr * @@ -148,6 +159,9 @@ static enum integrity_status evm_verify_hmac(struct dentry *dentry, iint->evm_status == INTEGRITY_PASS_IMMUTABLE)) return iint->evm_status; + if (is_unsupported_fs(dentry)) + return INTEGRITY_UNKNOWN; + /* if status is not PASS, try to check again - against -ENOMEM */ /* first need to know the sig type */ @@ -283,6 +297,9 @@ enum integrity_status evm_verifyxattr(struct dentry *dentry, if (!evm_key_loaded() || !evm_protected_xattr(xattr_name)) return INTEGRITY_UNKNOWN; + if (is_unsupported_fs(dentry)) + return INTEGRITY_UNKNOWN; + if (!iint) { iint = integrity_iint_find(d_backing_inode(dentry)); if (!iint) @@ -329,15 +346,21 @@ static int evm_protect_xattr(struct dentry *dentry, const char *xattr_name, if (strcmp(xattr_name, XATTR_NAME_EVM) == 0) { if (!capable(CAP_SYS_ADMIN)) return -EPERM; + if (is_unsupported_fs(dentry)) + return -EPERM; } else if (!evm_protected_xattr(xattr_name)) { if (!posix_xattr_acl(xattr_name)) return 0; + if (is_unsupported_fs(dentry)) + return 0; + evm_status = evm_verify_current_integrity(dentry); if ((evm_status == INTEGRITY_PASS) || (evm_status == INTEGRITY_NOXATTRS)) return 0; goto out; - } + } else if (is_unsupported_fs(dentry)) + return 0; evm_status = evm_verify_current_integrity(dentry); if (evm_status == INTEGRITY_NOXATTRS) { @@ -479,6 +502,9 @@ void evm_inode_post_setxattr(struct dentry *dentry, const char *xattr_name, if (!strcmp(xattr_name, XATTR_NAME_EVM)) return; + if (is_unsupported_fs(dentry)) + return; + evm_update_evmxattr(dentry, xattr_name, xattr_value, xattr_value_len); } @@ -523,8 +549,12 @@ int evm_inode_setattr(struct dentry *dentry, struct iattr *attr) if (evm_initialized & EVM_ALLOW_METADATA_WRITES) return 0; + if (is_unsupported_fs(dentry)) + return 0; + if (!(ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID))) return 0; + evm_status = evm_verify_current_integrity(dentry); if ((evm_status == INTEGRITY_PASS) || (evm_status == INTEGRITY_NOXATTRS)) @@ -553,10 +583,20 @@ void evm_inode_post_setattr(struct dentry *dentry, int ia_valid) evm_reset_status(dentry->d_inode); + if (is_unsupported_fs(dentry)) + return; + if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID)) evm_update_evmxattr(dentry, NULL, NULL, 0); } +int evm_inode_copy_up_xattr(const char *name) +{ + if (strcmp(name, XATTR_NAME_EVM) == 0) + return 1; /* Discard */ + return -EOPNOTSUPP; +} + /* * evm_inode_init_security - initializes security.evm HMAC value */ diff --git a/security/keys/internal.h b/security/keys/internal.h index 8c12f15e61e32f..22f40f287ee6be 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -95,6 +95,8 @@ extern wait_queue_head_t request_key_conswq; extern struct key_type *key_type_lookup(const char *type); extern void key_type_put(struct key_type *ktype); +extern int __key_link_lock(struct key *keyring, + const struct keyring_index_key *index_key); extern int __key_link_begin(struct key *keyring, const struct keyring_index_key *index_key, struct assoc_array_edit **_edit); diff --git a/security/keys/key.c b/security/keys/key.c index 6110da17912df9..924d41d1cbecab 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -501,7 +501,7 @@ int key_instantiate_and_link(struct key *key, struct key *authkey) { struct key_preparsed_payload prep; - struct assoc_array_edit *edit; + struct assoc_array_edit *edit = NULL; int ret; memset(&prep, 0, sizeof(prep)); @@ -516,10 +516,14 @@ int key_instantiate_and_link(struct key *key, } if (keyring) { - ret = __key_link_begin(keyring, &key->index_key, &edit); + ret = __key_link_lock(keyring, &key->index_key); if (ret < 0) goto error; + ret = __key_link_begin(keyring, &key->index_key, &edit); + if (ret < 0) + goto error_link_end; + if (keyring->restrict_link && keyring->restrict_link->check) { struct key_restriction *keyres = keyring->restrict_link; @@ -571,7 +575,7 @@ int key_reject_and_link(struct key *key, struct key *keyring, struct key *authkey) { - struct assoc_array_edit *edit; + struct assoc_array_edit *edit = NULL; int ret, awaken, link_ret = 0; key_check(key); @@ -584,7 +588,12 @@ int key_reject_and_link(struct key *key, if (keyring->restrict_link) return -EPERM; - link_ret = __key_link_begin(keyring, &key->index_key, &edit); + link_ret = __key_link_lock(keyring, &key->index_key); + if (link_ret == 0) { + link_ret = __key_link_begin(keyring, &key->index_key, &edit); + if (link_ret < 0) + __key_link_end(keyring, &key->index_key, edit); + } } mutex_lock(&key_construction_mutex); @@ -811,7 +820,7 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, .description = description, }; struct key_preparsed_payload prep; - struct assoc_array_edit *edit; + struct assoc_array_edit *edit = NULL; const struct cred *cred = current_cred(); struct key *keyring, *key = NULL; key_ref_t key_ref; @@ -861,12 +870,18 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, } index_key.desc_len = strlen(index_key.description); - ret = __key_link_begin(keyring, &index_key, &edit); + ret = __key_link_lock(keyring, &index_key); if (ret < 0) { key_ref = ERR_PTR(ret); goto error_free_prep; } + ret = __key_link_begin(keyring, &index_key, &edit); + if (ret < 0) { + key_ref = ERR_PTR(ret); + goto error_link_end; + } + if (restrict_link && restrict_link->check) { ret = restrict_link->check(keyring, index_key.type, &prep.payload, restrict_link->key); diff --git a/security/keys/keyring.c b/security/keys/keyring.c index c20085add53dc4..408bff68e4e97d 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -100,7 +100,7 @@ EXPORT_SYMBOL(key_type_keyring); * Semaphore to serialise link/link calls to prevent two link calls in parallel * introducing a cycle. */ -static DECLARE_RWSEM(keyring_serialise_link_sem); +static DEFINE_MUTEX(keyring_serialise_link_lock); /* * Publish the name of a keyring so that it can be found by name (if it has @@ -1192,14 +1192,34 @@ static int keyring_detect_cycle(struct key *A, struct key *B) return PTR_ERR(ctx.result) == -EAGAIN ? 0 : PTR_ERR(ctx.result); } +/* + * Lock keyring for link. + */ +int __key_link_lock(struct key *keyring, + const struct keyring_index_key *index_key) + __acquires(&keyring->sem) + __acquires(&keyring_serialise_link_lock) +{ + if (keyring->type != &key_type_keyring) + return -ENOTDIR; + + down_write(&keyring->sem); + + /* Serialise link/link calls to prevent parallel calls causing a cycle + * when linking two keyring in opposite orders. + */ + if (index_key->type == &key_type_keyring) + mutex_lock(&keyring_serialise_link_lock); + + return 0; +} + /* * Preallocate memory so that a key can be linked into to a keyring. */ int __key_link_begin(struct key *keyring, const struct keyring_index_key *index_key, struct assoc_array_edit **_edit) - __acquires(&keyring->sem) - __acquires(&keyring_serialise_link_sem) { struct assoc_array_edit *edit; int ret; @@ -1208,20 +1228,13 @@ int __key_link_begin(struct key *keyring, keyring->serial, index_key->type->name, index_key->description); BUG_ON(index_key->desc_len == 0); + BUG_ON(*_edit != NULL); - if (keyring->type != &key_type_keyring) - return -ENOTDIR; - - down_write(&keyring->sem); + *_edit = NULL; ret = -EKEYREVOKED; if (test_bit(KEY_FLAG_REVOKED, &keyring->flags)) - goto error_krsem; - - /* serialise link/link calls to prevent parallel calls causing a cycle - * when linking two keyring in opposite orders */ - if (index_key->type == &key_type_keyring) - down_write(&keyring_serialise_link_sem); + goto error; /* Create an edit script that will insert/replace the key in the * keyring tree. @@ -1232,7 +1245,7 @@ int __key_link_begin(struct key *keyring, NULL); if (IS_ERR(edit)) { ret = PTR_ERR(edit); - goto error_sem; + goto error; } /* If we're not replacing a link in-place then we're going to need some @@ -1251,11 +1264,7 @@ int __key_link_begin(struct key *keyring, error_cancel: assoc_array_cancel_edit(edit); -error_sem: - if (index_key->type == &key_type_keyring) - up_write(&keyring_serialise_link_sem); -error_krsem: - up_write(&keyring->sem); +error: kleave(" = %d", ret); return ret; } @@ -1300,14 +1309,11 @@ void __key_link_end(struct key *keyring, const struct keyring_index_key *index_key, struct assoc_array_edit *edit) __releases(&keyring->sem) - __releases(&keyring_serialise_link_sem) + __releases(&keyring_serialise_link_lock) { BUG_ON(index_key->type == NULL); kenter("%d,%s,", keyring->serial, index_key->type->name); - if (index_key->type == &key_type_keyring) - up_write(&keyring_serialise_link_sem); - if (edit) { if (!edit->dead_leaf) { key_payload_reserve(keyring, @@ -1316,6 +1322,9 @@ void __key_link_end(struct key *keyring, assoc_array_cancel_edit(edit); } up_write(&keyring->sem); + + if (index_key->type == &key_type_keyring) + mutex_unlock(&keyring_serialise_link_lock); } /* @@ -1351,7 +1360,7 @@ static int __key_link_check_restriction(struct key *keyring, struct key *key) */ int key_link(struct key *keyring, struct key *key) { - struct assoc_array_edit *edit; + struct assoc_array_edit *edit = NULL; int ret; kenter("{%d,%d}", keyring->serial, refcount_read(&keyring->usage)); @@ -1359,22 +1368,88 @@ int key_link(struct key *keyring, struct key *key) key_check(keyring); key_check(key); - ret = __key_link_begin(keyring, &key->index_key, &edit); - if (ret == 0) { - kdebug("begun {%d,%d}", keyring->serial, refcount_read(&keyring->usage)); - ret = __key_link_check_restriction(keyring, key); - if (ret == 0) - ret = __key_link_check_live_key(keyring, key); - if (ret == 0) - __key_link(key, &edit); - __key_link_end(keyring, &key->index_key, edit); - } + ret = __key_link_lock(keyring, &key->index_key); + if (ret < 0) + goto error; + ret = __key_link_begin(keyring, &key->index_key, &edit); + if (ret < 0) + goto error_end; + + kdebug("begun {%d,%d}", keyring->serial, refcount_read(&keyring->usage)); + ret = __key_link_check_restriction(keyring, key); + if (ret == 0) + ret = __key_link_check_live_key(keyring, key); + if (ret == 0) + __key_link(key, &edit); + +error_end: + __key_link_end(keyring, &key->index_key, edit); +error: kleave(" = %d {%d,%d}", ret, keyring->serial, refcount_read(&keyring->usage)); return ret; } EXPORT_SYMBOL(key_link); +/* + * Lock a keyring for unlink. + */ +static int __key_unlink_lock(struct key *keyring) + __acquires(&keyring->sem) +{ + if (keyring->type != &key_type_keyring) + return -ENOTDIR; + + down_write(&keyring->sem); + return 0; +} + +/* + * Begin the process of unlinking a key from a keyring. + */ +static int __key_unlink_begin(struct key *keyring, struct key *key, + struct assoc_array_edit **_edit) +{ + struct assoc_array_edit *edit; + + BUG_ON(*_edit != NULL); + + edit = assoc_array_delete(&keyring->keys, &keyring_assoc_array_ops, + &key->index_key); + if (IS_ERR(edit)) + return PTR_ERR(edit); + + if (!edit) + return -ENOENT; + + *_edit = edit; + return 0; +} + +/* + * Apply an unlink change. + */ +static void __key_unlink(struct key *keyring, struct key *key, + struct assoc_array_edit **_edit) +{ + assoc_array_apply_edit(*_edit); + *_edit = NULL; + key_payload_reserve(keyring, keyring->datalen - KEYQUOTA_LINK_BYTES); +} + +/* + * Finish unlinking a key from to a keyring. + */ +static void __key_unlink_end(struct key *keyring, + struct key *key, + struct assoc_array_edit *edit) + __releases(&keyring->sem) +{ + if (edit) + assoc_array_cancel_edit(edit); + up_write(&keyring->sem); +} + /** * key_unlink - Unlink the first link to a key from a keyring. * @keyring: The keyring to remove the link from. @@ -1394,33 +1469,20 @@ EXPORT_SYMBOL(key_link); */ int key_unlink(struct key *keyring, struct key *key) { - struct assoc_array_edit *edit; + struct assoc_array_edit *edit = NULL; int ret; key_check(keyring); key_check(key); - if (keyring->type != &key_type_keyring) - return -ENOTDIR; - - down_write(&keyring->sem); + ret = __key_unlink_lock(keyring); + if (ret < 0) + return ret; - edit = assoc_array_delete(&keyring->keys, &keyring_assoc_array_ops, - &key->index_key); - if (IS_ERR(edit)) { - ret = PTR_ERR(edit); - goto error; - } - ret = -ENOENT; - if (edit == NULL) - goto error; - - assoc_array_apply_edit(edit); - key_payload_reserve(keyring, keyring->datalen - KEYQUOTA_LINK_BYTES); - ret = 0; - -error: - up_write(&keyring->sem); + ret = __key_unlink_begin(keyring, key, &edit); + if (ret == 0) + __key_unlink(keyring, key, &edit); + __key_unlink_end(keyring, key, edit); return ret; } EXPORT_SYMBOL(key_unlink); diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 301f0e300dbd28..74fa7d92936409 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -357,7 +357,7 @@ static int construct_alloc_key(struct keyring_search_context *ctx, struct key_user *user, struct key **_key) { - struct assoc_array_edit *edit; + struct assoc_array_edit *edit = NULL; struct key *key; key_perm_t perm; key_ref_t key_ref; @@ -386,26 +386,37 @@ static int construct_alloc_key(struct keyring_search_context *ctx, set_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags); if (dest_keyring) { - ret = __key_link_begin(dest_keyring, &ctx->index_key, &edit); + ret = __key_link_lock(dest_keyring, &key->index_key); if (ret < 0) - goto link_prealloc_failed; + goto link_lock_failed; } - /* attach the key to the destination keyring under lock, but we do need + /* + * Attach the key to the destination keyring under lock, but we do need * to do another check just in case someone beat us to it whilst we - * waited for locks */ + * waited for locks. + * + * The caller might specify a comparison function which looks for keys + * that do not exactly match but are still equivalent from the caller's + * perspective. The __key_link_begin() operation must be done only after + * an actual key is determined. + */ mutex_lock(&key_construction_mutex); key_ref = search_process_keyrings(ctx); if (!IS_ERR(key_ref)) goto key_already_present; - if (dest_keyring) + if (dest_keyring) { + ret = __key_link_begin(dest_keyring, &key->index_key, &edit); + if (ret < 0) + goto link_alloc_failed; __key_link(key, &edit); + } mutex_unlock(&key_construction_mutex); if (dest_keyring) - __key_link_end(dest_keyring, &ctx->index_key, edit); + __key_link_end(dest_keyring, &key->index_key, edit); mutex_unlock(&user->cons_lock); *_key = key; kleave(" = 0 [%d]", key_serial(key)); @@ -418,10 +429,13 @@ static int construct_alloc_key(struct keyring_search_context *ctx, mutex_unlock(&key_construction_mutex); key = key_ref_to_ptr(key_ref); if (dest_keyring) { + ret = __key_link_begin(dest_keyring, &key->index_key, &edit); + if (ret < 0) + goto link_alloc_failed_unlocked; ret = __key_link_check_live_key(dest_keyring, key); if (ret == 0) __key_link(key, &edit); - __key_link_end(dest_keyring, &ctx->index_key, edit); + __key_link_end(dest_keyring, &key->index_key, edit); if (ret < 0) goto link_check_failed; } @@ -436,7 +450,11 @@ static int construct_alloc_key(struct keyring_search_context *ctx, kleave(" = %d [linkcheck]", ret); return ret; -link_prealloc_failed: +link_alloc_failed: + mutex_unlock(&key_construction_mutex); +link_alloc_failed_unlocked: + __key_link_end(dest_keyring, &key->index_key, edit); +link_lock_failed: mutex_unlock(&user->cons_lock); key_put(key); kleave(" = %d [prelink]", ret); diff --git a/security/security.c b/security/security.c index b5c34b73fcec55..641f462522928e 100644 --- a/security/security.c +++ b/security/security.c @@ -1416,7 +1416,7 @@ int security_inode_copy_up_xattr(const char *name) return rc; } - return LSM_RET_DEFAULT(inode_copy_up_xattr); + return evm_inode_copy_up_xattr(name); } EXPORT_SYMBOL(security_inode_copy_up_xattr); diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index d97d767214c50c..341bde3b9878d1 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1965,7 +1965,7 @@ unsigned long long get_uncore_mhz(int package, int die) { char path[128]; - sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/current_freq_khz", package, + sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d/current_freq_khz", package, die); return (snapshot_sysfs_counter(path) / 1000); @@ -4151,16 +4151,15 @@ static void dump_sysfs_file(char *path) static void intel_uncore_frequency_probe(void) { int i, j; - char path[128]; + char path[256]; if (!genuine_intel) return; - if (access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00", R_OK)) - return; + if (access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/current_freq_khz", R_OK)) + goto probe_cluster; - if (!access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/current_freq_khz", R_OK)) - BIC_PRESENT(BIC_UNCORE_MHZ); + BIC_PRESENT(BIC_UNCORE_MHZ); if (quiet) return; @@ -4168,26 +4167,73 @@ static void intel_uncore_frequency_probe(void) for (i = 0; i < topo.num_packages; ++i) { for (j = 0; j < topo.num_die; ++j) { int k, l; + char path_base[128]; + + sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d", i, + j); - sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/min_freq_khz", - i, j); + sprintf(path, "%s/min_freq_khz", path_base); k = read_sysfs_int(path); - sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/max_freq_khz", - i, j); + sprintf(path, "%s/max_freq_khz", path_base); l = read_sysfs_int(path); - fprintf(outf, "Uncore Frequency pkg%d die%d: %d - %d MHz ", i, j, k / 1000, l / 1000); + fprintf(outf, "Uncore Frequency package%d die%d: %d - %d MHz ", i, j, k / 1000, l / 1000); - sprintf(path, - "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/initial_min_freq_khz", - i, j); + sprintf(path, "%s/initial_min_freq_khz", path_base); k = read_sysfs_int(path); - sprintf(path, - "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/initial_max_freq_khz", - i, j); + sprintf(path, "%s/initial_max_freq_khz", path_base); l = read_sysfs_int(path); - fprintf(outf, "(%d - %d MHz)\n", k / 1000, l / 1000); + fprintf(outf, "(%d - %d MHz)", k / 1000, l / 1000); + + sprintf(path, "%s/current_freq_khz", path_base); + k = read_sysfs_int(path); + fprintf(outf, " %d MHz\n", k / 1000); } } + return; + +probe_cluster: + if (access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00/current_freq_khz", R_OK)) + return; + + if (quiet) + return; + + for (i = 0;; ++i) { + int k, l; + char path_base[128]; + int package_id, domain_id, cluster_id; + + sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/uncore%02d", i); + + if (access(path_base, R_OK)) + break; + + sprintf(path, "%s/package_id", path_base); + package_id = read_sysfs_int(path); + + sprintf(path, "%s/domain_id", path_base); + domain_id = read_sysfs_int(path); + + sprintf(path, "%s/fabric_cluster_id", path_base); + cluster_id = read_sysfs_int(path); + + sprintf(path, "%s/min_freq_khz", path_base); + k = read_sysfs_int(path); + sprintf(path, "%s/max_freq_khz", path_base); + l = read_sysfs_int(path); + fprintf(outf, "Uncore Frequency package%d domain%d cluster%d: %d - %d MHz ", package_id, domain_id, + cluster_id, k / 1000, l / 1000); + + sprintf(path, "%s/initial_min_freq_khz", path_base); + k = read_sysfs_int(path); + sprintf(path, "%s/initial_max_freq_khz", path_base); + l = read_sysfs_int(path); + fprintf(outf, "(%d - %d MHz)", k / 1000, l / 1000); + + sprintf(path, "%s/current_freq_khz", path_base); + k = read_sysfs_int(path); + fprintf(outf, " %d MHz\n", k / 1000); + } } static void dump_sysfs_cstate_config(void) diff --git a/tools/testing/selftests/bpf/progs/pyperf180.c b/tools/testing/selftests/bpf/progs/pyperf180.c index c39f559d3100e8..42c4a8b62e3602 100644 --- a/tools/testing/selftests/bpf/progs/pyperf180.c +++ b/tools/testing/selftests/bpf/progs/pyperf180.c @@ -1,4 +1,26 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2019 Facebook #define STACK_MAX_LEN 180 + +/* llvm upstream commit at clang18 + * https://github.com/llvm/llvm-project/commit/1a2e77cf9e11dbf56b5720c607313a566eebb16e + * changed inlining behavior and caused compilation failure as some branch + * target distance exceeded 16bit representation which is the maximum for + * cpu v1/v2/v3. Macro __BPF_CPU_VERSION__ is later implemented in clang18 + * to specify which cpu version is used for compilation. So a smaller + * unroll_count can be set if __BPF_CPU_VERSION__ is less than 4, which + * reduced some branch target distances and resolved the compilation failure. + * + * To capture the case where a developer/ci uses clang18 but the corresponding + * repo checkpoint does not have __BPF_CPU_VERSION__, a smaller unroll_count + * will be set as well to prevent potential compilation failures. + */ +#ifdef __BPF_CPU_VERSION__ +#if __BPF_CPU_VERSION__ < 4 +#define UNROLL_COUNT 90 +#endif +#elif __clang_major__ == 18 +#define UNROLL_COUNT 90 +#endif + #include "pyperf.h"