ALSA: add virtio sound driver

This series implements a driver part of the virtio sound device
 specification v8 [1].
 
 The driver supports PCM playback and capture substreams, jack and
 channel map controls. A message-based transport is used to write/read
 PCM frames to/from a device.
 
 As a device part was used OpenSynergy proprietary implementation.
 
 v7 changes:
  - Moved the snd_pcm_period_elapsed() call from the interrupt handler to the
    kernel worker for being consistent with the non-atomic mode of the PCM
    device.
  - Removed SNDRV_PCM_INFO_RESUME flag. Now ops->prepare() sets the parameters
    for the substream if it was previously suspended.
  - Some additional code readability improvements/comments.
 
 [1] https://lists.oasis-open.org/archives/virtio-dev/202003/msg00185.html
 
 Link: https://lore.kernel.org/r/20210302164709.3142702-1-anton.yakovlev@opensynergy.com
 -----BEGIN PGP SIGNATURE-----
 
 iQJCBAABCAAsFiEEIXTw5fNLNI7mMiVaLtJE4w1nLE8FAmBEi8UOHHRpd2FpQHN1
 c2UuZGUACgkQLtJE4w1nLE9TPQ/+P2GPl+lTQKx1MWGcVWAAdECd8jVkwiNdzTB3
 jbO39a/4B1QlXo83uMwfN7r5Apk6b8iuEKww2YUEBmMG9BI/gOkWZWe+Dc/LWxaC
 MjeBxq0OsA9jvw8akJfDYZGfKeLcBDgfJYMiqbRE+k/bQc+ld68AUr748hsJ26ER
 aje+iF/Aja7WSYwhoMVtqsWkK4/nSGtSQyuWeXqBM6sFm89dpH6BrfTG4fHgIYz3
 n6qOtGYBHSxYL7tQTKvQIJJwCZM2Mk0oAOE/xrjO4dgXq2+TWNwhFHNzTLfzAibq
 izJ3EpWmpSs0uRjUhkblrkP6PxUCMOaSIFPM9VIa5ULqaddLwQWyo0l3oj75CVJK
 L/LgLFMWK+VacxxPiLIfTxicJhXAbLyuJ06gmqbpeHitG/iLMcDBZIHrvmj84EED
 v5Zecud3t1w7ze6ArO7y2FAZqW0mjIkzs/C3qvQlu7rHruytq17CiGso+RdwhMYb
 JISqmVvBmvEVQk7niQohehI6tINpVz/XpCUeDGDPWFYpw1VAWXsBTO7lVuX+aXbj
 s/xjGanAn4lnRCA0rLaQQzKElMAhraWmS73lOiCOrxTAAU7Nob/tuKerei6NgGkN
 9G9hnwh24G9d1WexY9Bs4Nhjv4qBYnfv64S39NzHlEcaML7d5lJk2gfZ9pFCpIKt
 lIDFOVs=
 =O9k3
 -----END PGP SIGNATURE-----

Merge tag 'tags/virtio_snd-5.12-rc2' into for-next

ALSA: add virtio sound driver

This series implements a driver part of the virtio sound device
specification v8 [1].

The driver supports PCM playback and capture substreams, jack and
channel map controls. A message-based transport is used to write/read
PCM frames to/from a device.

As a device part was used OpenSynergy proprietary implementation.

v7 changes:
 - Moved the snd_pcm_period_elapsed() call from the interrupt handler to the
   kernel worker for being consistent with the non-atomic mode of the PCM
   device.
 - Removed SNDRV_PCM_INFO_RESUME flag. Now ops->prepare() sets the parameters
   for the substream if it was previously suspended.
 - Some additional code readability improvements/comments.

[1] https://lists.oasis-open.org/archives/virtio-dev/202003/msg00185.html

Link: https://lore.kernel.org/r/20210302164709.3142702-1-anton.yakovlev@opensynergy.com
This commit is contained in:
Takashi Iwai 2021-03-07 09:16:24 +01:00
commit 59117306e0
176 changed files with 6354 additions and 1888 deletions

View file

@ -430,13 +430,13 @@ fifo_expire_async
-----------------
This parameter is used to set the timeout of asynchronous requests. Default
value of this is 248ms.
value of this is 250ms.
fifo_expire_sync
----------------
This parameter is used to set the timeout of synchronous requests. Default
value of this is 124ms. In case to favor synchronous requests over asynchronous
value of this is 125ms. In case to favor synchronous requests over asynchronous
one, this value should be decreased relative to fifo_expire_async.
low_latency

View file

@ -109,7 +109,7 @@ required:
- resets
- ddc
unevaluatedProperties: false
additionalProperties: false
examples:
- |

View file

@ -49,10 +49,14 @@ properties:
# See ../video-interfaces.txt for more details
port:
type: object
$ref: /schemas/graph.yaml#/properties/port
additionalProperties: false
properties:
endpoint:
type: object
$ref: /schemas/media/video-interfaces.yaml#
unevaluatedProperties: false
properties:
data-lanes:
oneOf:
@ -65,11 +69,7 @@ properties:
- const: 1
- const: 2
link-frequencies:
allOf:
- $ref: /schemas/types.yaml#/definitions/uint64-array
description:
Allowed data bus frequencies.
link-frequencies: true
required:
- data-lanes

View file

@ -31,7 +31,8 @@ properties:
maxItems: 1
port:
$ref: /schemas/graph.yaml#/$defs/port-base
$ref: /schemas/graph.yaml#/properties/port
additionalProperties: false
properties:
endpoint:
@ -41,8 +42,6 @@ properties:
properties:
clock-noncontinuous: true
additionalProperties: false
required:
- compatible
- reg

View file

@ -44,19 +44,17 @@ properties:
description: Reset Pin GPIO Control (active low)
port:
type: object
description: MIPI CSI-2 transmitter port
$ref: /schemas/graph.yaml#/properties/port
additionalProperties: false
properties:
endpoint:
type: object
$ref: /schemas/media/video-interfaces.yaml#
unevaluatedProperties: false
properties:
remote-endpoint: true
link-frequencies:
$ref: /schemas/types.yaml#/definitions/uint64-array
description: Allowed MIPI CSI-2 link frequencies
link-frequencies: true
data-lanes:
minItems: 1
@ -65,10 +63,6 @@ properties:
required:
- data-lanes
- link-frequencies
- remote-endpoint
required:
- endpoint
required:
- compatible

View file

@ -44,19 +44,17 @@ properties:
description: Reset Pin GPIO Control (active low)
port:
type: object
description: MIPI CSI-2 transmitter port
$ref: /schemas/graph.yaml#/properties/port
additionalProperties: false
properties:
endpoint:
type: object
$ref: /schemas/media/video-interfaces.yaml#
unevaluatedProperties: false
properties:
remote-endpoint: true
link-frequencies:
$ref: /schemas/types.yaml#/definitions/uint64-array
description: Allowed MIPI CSI-2 link frequencies
link-frequencies: true
data-lanes:
minItems: 1
@ -65,10 +63,6 @@ properties:
required:
- data-lanes
- link-frequencies
- remote-endpoint
required:
- endpoint
required:
- compatible

View file

@ -36,18 +36,17 @@ properties:
description: Reference to the GPIO connected to the XCLR pin, if any.
port:
type: object
additionalProperties: false
$ref: /schemas/graph.yaml#/properties/port
properties:
endpoint:
type: object
$ref: /schemas/media/video-interfaces.yaml#
unevaluatedProperties: false
properties:
data-lanes:
$ref: ../video-interfaces.yaml#/properties/data-lanes
link-frequencies:
$ref: ../video-interfaces.yaml#/properties/link-frequencies
data-lanes: true
link-frequencies: true
required:
- data-lanes

View file

@ -3856,49 +3856,20 @@ base 2 of the page size in the bottom 6 bits.
-EFAULT if struct kvm_reinject_control cannot be read,
-EINVAL if the supplied shift or flags are invalid,
-ENOMEM if unable to allocate the new HPT,
-ENOSPC if there was a hash collision
::
struct kvm_ppc_rmmu_info {
struct kvm_ppc_radix_geom {
__u8 page_shift;
__u8 level_bits[4];
__u8 pad[3];
} geometries[8];
__u32 ap_encodings[8];
};
The geometries[] field gives up to 8 supported geometries for the
radix page table, in terms of the log base 2 of the smallest page
size, and the number of bits indexed at each level of the tree, from
the PTE level up to the PGD level in that order. Any unused entries
will have 0 in the page_shift field.
The ap_encodings gives the supported page sizes and their AP field
encodings, encoded with the AP value in the top 3 bits and the log
base 2 of the page size in the bottom 6 bits.
4.102 KVM_PPC_RESIZE_HPT_PREPARE
--------------------------------
:Capability: KVM_CAP_SPAPR_RESIZE_HPT
:Architectures: powerpc
:Type: vm ioctl
:Parameters: struct kvm_ppc_resize_hpt (in)
:Returns: 0 on successful completion,
>0 if a new HPT is being prepared, the value is an estimated
number of milliseconds until preparation is complete,
-EFAULT if struct kvm_reinject_control cannot be read,
-EINVAL if the supplied shift or flags are invalid,when moving existing
HPT entries to the new HPT,
-EIO on other error conditions
Used to implement the PAPR extension for runtime resizing of a guest's
Hashed Page Table (HPT). Specifically this starts, stops or monitors
the preparation of a new potential HPT for the guest, essentially
implementing the H_RESIZE_HPT_PREPARE hypercall.
::
struct kvm_ppc_resize_hpt {
__u64 flags;
__u32 shift;
__u32 pad;
};
If called with shift > 0 when there is no pending HPT for the guest,
this begins preparation of a new pending HPT of size 2^(shift) bytes.
It then returns a positive integer with the estimated number of
@ -3926,14 +3897,6 @@ Normally this will be called repeatedly with the same parameters until
it returns <= 0. The first call will initiate preparation, subsequent
ones will monitor preparation until it completes or fails.
::
struct kvm_ppc_resize_hpt {
__u64 flags;
__u32 shift;
__u32 pad;
};
4.103 KVM_PPC_RESIZE_HPT_COMMIT
-------------------------------
@ -3956,6 +3919,14 @@ Hashed Page Table (HPT). Specifically this requests that the guest be
transferred to working with the new HPT, essentially implementing the
H_RESIZE_HPT_COMMIT hypercall.
::
struct kvm_ppc_resize_hpt {
__u64 flags;
__u32 shift;
__u32 pad;
};
This should only be called after KVM_PPC_RESIZE_HPT_PREPARE has
returned 0 with the same parameters. In other cases
KVM_PPC_RESIZE_HPT_COMMIT will return an error (usually -ENXIO or
@ -3971,14 +3942,6 @@ HPT and the previous HPT will be discarded.
On failure, the guest will still be operating on its previous HPT.
::
struct kvm_ppc_resize_hpt {
__u64 flags;
__u32 shift;
__u32 pad;
};
4.104 KVM_X86_GET_MCE_CAP_SUPPORTED
-----------------------------------
@ -4915,6 +4878,14 @@ see KVM_XEN_HVM_SET_ATTR above.
union {
__u64 gpa;
__u64 pad[4];
struct {
__u64 state;
__u64 state_entry_time;
__u64 time_running;
__u64 time_runnable;
__u64 time_blocked;
__u64 time_offline;
} runstate;
} u;
};
@ -4927,6 +4898,31 @@ KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO
Sets the guest physical address of an additional pvclock structure
for a given vCPU. This is typically used for guest vsyscall support.
KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR
Sets the guest physical address of the vcpu_runstate_info for a given
vCPU. This is how a Xen guest tracks CPU state such as steal time.
KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT
Sets the runstate (RUNSTATE_running/_runnable/_blocked/_offline) of
the given vCPU from the .u.runstate.state member of the structure.
KVM automatically accounts running and runnable time but blocked
and offline states are only entered explicitly.
KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA
Sets all fields of the vCPU runstate data from the .u.runstate member
of the structure, including the current runstate. The state_entry_time
must equal the sum of the other four times.
KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST
This *adds* the contents of the .u.runstate members of the structure
to the corresponding members of the given vCPU's runstate data, thus
permitting atomic adjustments to the runstate times. The adjustment
to the state_entry_time must equal the sum of the adjustments to the
other four times. The state field must be set to -1, or to a valid
runstate value (RUNSTATE_running, RUNSTATE_runnable, RUNSTATE_blocked
or RUNSTATE_offline) to set the current accounted state as of the
adjusted state_entry_time.
4.130 KVM_XEN_VCPU_GET_ATTR
---------------------------
@ -4939,6 +4935,9 @@ KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO
Allows Xen vCPU attributes to be read. For the structure and types,
see KVM_XEN_VCPU_SET_ATTR above.
The KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST type may not be used
with the KVM_XEN_VCPU_GET_ATTR ioctl.
5. The kvm_run structure
========================
@ -5000,7 +4999,8 @@ local APIC is not used.
__u16 flags;
More architecture-specific flags detailing state of the VCPU that may
affect the device's behavior. Current defined flags:
affect the device's behavior. Current defined flags::
/* x86, set if the VCPU is in system management mode */
#define KVM_RUN_X86_SMM (1 << 0)
/* x86, set if bus lock detected in VM */
@ -6217,7 +6217,7 @@ the bus lock vm exit can be preempted by a higher priority VM exit, the exit
notifications to userspace can be KVM_EXIT_BUS_LOCK or other reasons.
KVM_RUN_BUS_LOCK flag is used to distinguish between them.
7.22 KVM_CAP_PPC_DAWR1
7.23 KVM_CAP_PPC_DAWR1
----------------------
:Architectures: ppc
@ -6702,6 +6702,7 @@ PVHVM guests. Valid flags are::
#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0)
#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1)
#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2)
#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 2)
The KVM_XEN_HVM_CONFIG_HYPERCALL_MSR flag indicates that the KVM_XEN_HVM_CONFIG
ioctl is available, for the guest to set its hypercall page.
@ -6716,3 +6717,7 @@ KVM_XEN_HVM_SET_ATTR, KVM_XEN_HVM_GET_ATTR, KVM_XEN_VCPU_SET_ATTR and
KVM_XEN_VCPU_GET_ATTR ioctls, as well as the delivery of exception vectors
for event channel upcalls when the evtchn_upcall_pending field of a vcpu's
vcpu_info is set.
The KVM_XEN_HVM_CONFIG_RUNSTATE flag indicates that the runstate-related
features KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR/_CURRENT/_DATA/_ADJUST are
supported by the KVM_XEN_VCPU_SET_ATTR/KVM_XEN_VCPU_GET_ATTR ioctls.

View file

@ -19086,6 +19086,15 @@ W: https://virtio-mem.gitlab.io/
F: drivers/virtio/virtio_mem.c
F: include/uapi/linux/virtio_mem.h
VIRTIO SOUND DRIVER
M: Anton Yakovlev <anton.yakovlev@opensynergy.com>
M: "Michael S. Tsirkin" <mst@redhat.com>
L: virtualization@lists.linux-foundation.org
L: alsa-devel@alsa-project.org (moderated for non-subscribers)
S: Maintained
F: include/uapi/linux/virtio_snd.h
F: sound/virtio/*
VIRTUAL BOX GUEST DEVICE DRIVER
M: Hans de Goede <hdegoede@redhat.com>
M: Arnd Bergmann <arnd@arndb.de>

View file

@ -2,7 +2,7 @@
VERSION = 5
PATCHLEVEL = 12
SUBLEVEL = 0
EXTRAVERSION = -rc1
EXTRAVERSION = -rc2
NAME = Frozen Wasteland
# *DOCUMENTATION*

View file

@ -93,12 +93,39 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
int i;
for (i = 0; i < count; i++) {
struct gnttab_unmap_grant_ref unmap;
int rc;
if (map_ops[i].status)
continue;
if (unlikely(!set_phys_to_machine(map_ops[i].host_addr >> XEN_PAGE_SHIFT,
map_ops[i].dev_bus_addr >> XEN_PAGE_SHIFT))) {
return -ENOMEM;
}
if (likely(set_phys_to_machine(map_ops[i].host_addr >> XEN_PAGE_SHIFT,
map_ops[i].dev_bus_addr >> XEN_PAGE_SHIFT)))
continue;
/*
* Signal an error for this slot. This in turn requires
* immediate unmapping.
*/
map_ops[i].status = GNTST_general_error;
unmap.host_addr = map_ops[i].host_addr,
unmap.handle = map_ops[i].handle;
map_ops[i].handle = ~0;
if (map_ops[i].flags & GNTMAP_device_map)
unmap.dev_bus_addr = map_ops[i].dev_bus_addr;
else
unmap.dev_bus_addr = 0;
/*
* Pre-populate the status field, to be recognizable in
* the log message below.
*/
unmap.status = 1;
rc = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
&unmap, 1);
if (rc || unmap.status != GNTST_okay)
pr_err_once("gnttab unmap failed: rc=%d st=%d\n",
rc, unmap.status);
}
return 0;

View file

@ -341,7 +341,8 @@ ia64_do_signal (struct sigscratch *scr, long in_syscall)
* need to push through a forced SIGSEGV.
*/
while (1) {
get_signal(&ksig);
if (!get_signal(&ksig))
break;
/*
* get_signal() may have run a debugger (via notify_parent())

View file

@ -535,10 +535,16 @@ struct kvm_vcpu_hv {
/* Xen HVM per vcpu emulation context */
struct kvm_vcpu_xen {
u64 hypercall_rip;
u32 current_runstate;
bool vcpu_info_set;
bool vcpu_time_info_set;
bool runstate_set;
struct gfn_to_hva_cache vcpu_info_cache;
struct gfn_to_hva_cache vcpu_time_info_cache;
struct gfn_to_hva_cache runstate_cache;
u64 last_steal;
u64 runstate_entry_time;
u64 runstate_times[4];
};
struct kvm_vcpu_arch {
@ -939,9 +945,6 @@ struct kvm_arch {
unsigned int indirect_shadow_pages;
u8 mmu_valid_gen;
struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
/*
* Hash table of struct kvm_mmu_page.
*/
struct list_head active_mmu_pages;
struct list_head zapped_obsolete_pages;
struct list_head lpage_disallowed_mmu_pages;

View file

@ -86,6 +86,18 @@ clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
}
#endif
/*
* The maximum amount of extra memory compared to the base size. The
* main scaling factor is the size of struct page. At extreme ratios
* of base:extra, all the base memory can be filled with page
* structures for the extra memory, leaving no space for anything
* else.
*
* 10x seems like a reasonable balance between scaling flexibility and
* leaving a practically usable system.
*/
#define XEN_EXTRA_MEM_RATIO (10)
/*
* Helper functions to write or read unsigned long values to/from
* memory, when the access may fault.

View file

@ -103,6 +103,15 @@ config KVM_AMD_SEV
Provides support for launching Encrypted VMs (SEV) and Encrypted VMs
with Encrypted State (SEV-ES) on AMD processors.
config KVM_XEN
bool "Support for Xen hypercall interface"
depends on KVM
help
Provides KVM support for the hosting Xen HVM guests and
passing Xen hypercalls to userspace.
If in doubt, say "N".
config KVM_MMU_AUDIT
bool "Audit KVM MMU"
depends on KVM && TRACEPOINTS

View file

@ -14,11 +14,12 @@ kvm-y += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
$(KVM)/dirty_ring.o
kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o
kvm-y += x86.o emulate.o i8259.o irq.o lapic.o xen.o \
kvm-y += x86.o emulate.o i8259.o irq.o lapic.o \
i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
hyperv.o debugfs.o mmu/mmu.o mmu/page_track.o \
mmu/spte.o
kvm-$(CONFIG_X86_64) += mmu/tdp_iter.o mmu/tdp_mmu.o
kvm-$(CONFIG_KVM_XEN) += xen.o
kvm-intel-y += vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o \
vmx/evmcs.o vmx/nested.o vmx/posted_intr.o

View file

@ -159,7 +159,7 @@ static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vpidx)
struct kvm_vcpu_hv_synic *synic;
vcpu = get_vcpu_by_vpidx(kvm, vpidx);
if (!vcpu)
if (!vcpu || !to_hv_vcpu(vcpu))
return NULL;
synic = to_hv_synic(vcpu);
return (synic->active) ? synic : NULL;

View file

@ -81,15 +81,15 @@ static inline struct kvm_mmu_page *sptep_to_sp(u64 *sptep)
static inline bool kvm_vcpu_ad_need_write_protect(struct kvm_vcpu *vcpu)
{
/*
* When using the EPT page-modification log, the GPAs in the log
* would come from L2 rather than L1. Therefore, we need to rely
* on write protection to record dirty pages. This also bypasses
* PML, since writes now result in a vmexit. Note, this helper will
* tag SPTEs as needing write-protection even if PML is disabled or
* unsupported, but that's ok because the tag is consumed if and only
* if PML is enabled. Omit the PML check to save a few uops.
* When using the EPT page-modification log, the GPAs in the CPU dirty
* log would come from L2 rather than L1. Therefore, we need to rely
* on write protection to record dirty pages, which bypasses PML, since
* writes now result in a vmexit. Note, the check on CPU dirty logging
* being enabled is mandatory as the bits used to denote WP-only SPTEs
* are reserved for NPT w/ PAE (32-bit KVM).
*/
return vcpu->arch.mmu == &vcpu->arch.guest_mmu;
return vcpu->arch.mmu == &vcpu->arch.guest_mmu &&
kvm_x86_ops.cpu_dirty_log_size;
}
bool is_nx_huge_page_enabled(void);

View file

@ -1200,6 +1200,7 @@ static void init_vmcb(struct vcpu_svm *svm)
init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
svm_set_cr4(&svm->vcpu, 0);
svm_set_efer(&svm->vcpu, 0);
save->dr6 = 0xffff0ff0;
kvm_set_rflags(&svm->vcpu, X86_EFLAGS_FIXED);

View file

@ -2957,6 +2957,11 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
struct kvm_host_map map;
struct kvm_steal_time *st;
if (kvm_xen_msr_enabled(vcpu->kvm)) {
kvm_xen_runstate_set_running(vcpu);
return;
}
if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
return;
@ -3756,11 +3761,15 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_ENFORCE_PV_FEATURE_CPUID:
r = 1;
break;
#ifdef CONFIG_KVM_XEN
case KVM_CAP_XEN_HVM:
r = KVM_XEN_HVM_CONFIG_HYPERCALL_MSR |
KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL |
KVM_XEN_HVM_CONFIG_SHARED_INFO;
if (sched_info_on())
r |= KVM_XEN_HVM_CONFIG_RUNSTATE;
break;
#endif
case KVM_CAP_SYNC_REGS:
r = KVM_SYNC_X86_VALID_FIELDS;
break;
@ -4038,7 +4047,11 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
if (vcpu->preempted && !vcpu->arch.guest_state_protected)
vcpu->arch.preempted_in_kernel = !static_call(kvm_x86_get_cpl)(vcpu);
kvm_steal_time_set_preempted(vcpu);
if (kvm_xen_msr_enabled(vcpu->kvm))
kvm_xen_runstate_set_preempted(vcpu);
else
kvm_steal_time_set_preempted(vcpu);
static_call(kvm_x86_vcpu_put)(vcpu);
vcpu->arch.last_host_tsc = rdtsc();
/*
@ -5013,6 +5026,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
case KVM_GET_SUPPORTED_HV_CPUID:
r = kvm_ioctl_get_supported_hv_cpuid(vcpu, argp);
break;
#ifdef CONFIG_KVM_XEN
case KVM_XEN_VCPU_GET_ATTR: {
struct kvm_xen_vcpu_attr xva;
@ -5033,6 +5047,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = kvm_xen_vcpu_set_attr(vcpu, &xva);
break;
}
#endif
default:
r = -EINVAL;
}
@ -5654,6 +5669,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
kvm->arch.bsp_vcpu_id = arg;
mutex_unlock(&kvm->lock);
break;
#ifdef CONFIG_KVM_XEN
case KVM_XEN_HVM_CONFIG: {
struct kvm_xen_hvm_config xhc;
r = -EFAULT;
@ -5682,6 +5698,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = kvm_xen_hvm_set_attr(kvm, &xha);
break;
}
#endif
case KVM_SET_CLOCK: {
struct kvm_clock_data user_ns;
u64 now_ns;
@ -8040,7 +8057,10 @@ void kvm_arch_exit(void)
kvm_mmu_module_exit();
free_percpu(user_return_msrs);
kmem_cache_destroy(x86_fpu_cache);
#ifdef CONFIG_KVM_XEN
static_key_deferred_flush(&kvm_xen_enabled);
WARN_ON(static_branch_unlikely(&kvm_xen_enabled.key));
#endif
}
static int __kvm_vcpu_halt(struct kvm_vcpu *vcpu, int state, int reason)

View file

@ -11,9 +11,11 @@
#include "hyperv.h"
#include <linux/kvm_host.h>
#include <linux/sched/stat.h>
#include <trace/events/kvm.h>
#include <xen/interface/xen.h>
#include <xen/interface/vcpu.h>
#include "trace.h"
@ -61,6 +63,132 @@ static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn)
return ret;
}
static void kvm_xen_update_runstate(struct kvm_vcpu *v, int state)
{
struct kvm_vcpu_xen *vx = &v->arch.xen;
u64 now = get_kvmclock_ns(v->kvm);
u64 delta_ns = now - vx->runstate_entry_time;
u64 run_delay = current->sched_info.run_delay;
if (unlikely(!vx->runstate_entry_time))
vx->current_runstate = RUNSTATE_offline;
/*
* Time waiting for the scheduler isn't "stolen" if the
* vCPU wasn't running anyway.
*/
if (vx->current_runstate == RUNSTATE_running) {
u64 steal_ns = run_delay - vx->last_steal;
delta_ns -= steal_ns;
vx->runstate_times[RUNSTATE_runnable] += steal_ns;
}
vx->last_steal = run_delay;
vx->runstate_times[vx->current_runstate] += delta_ns;
vx->current_runstate = state;
vx->runstate_entry_time = now;
}
void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
{
struct kvm_vcpu_xen *vx = &v->arch.xen;
uint64_t state_entry_time;
unsigned int offset;
kvm_xen_update_runstate(v, state);
if (!vx->runstate_set)
return;
BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c);
offset = offsetof(struct compat_vcpu_runstate_info, state_entry_time);
#ifdef CONFIG_X86_64
/*
* The only difference is alignment of uint64_t in 32-bit.
* So the first field 'state' is accessed directly using
* offsetof() (where its offset happens to be zero), while the
* remaining fields which are all uint64_t, start at 'offset'
* which we tweak here by adding 4.
*/
BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
offsetof(struct compat_vcpu_runstate_info, state_entry_time) + 4);
BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, time) !=
offsetof(struct compat_vcpu_runstate_info, time) + 4);
if (v->kvm->arch.xen.long_mode)
offset = offsetof(struct vcpu_runstate_info, state_entry_time);
#endif
/*
* First write the updated state_entry_time at the appropriate
* location determined by 'offset'.
*/
state_entry_time = vx->runstate_entry_time;
state_entry_time |= XEN_RUNSTATE_UPDATE;
BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state_entry_time) !=
sizeof(state_entry_time));
BUILD_BUG_ON(sizeof(((struct compat_vcpu_runstate_info *)0)->state_entry_time) !=
sizeof(state_entry_time));
if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
&state_entry_time, offset,
sizeof(state_entry_time)))
return;
smp_wmb();
/*
* Next, write the new runstate. This is in the *same* place
* for 32-bit and 64-bit guests, asserted here for paranoia.
*/
BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) !=
offsetof(struct compat_vcpu_runstate_info, state));
BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state) !=
sizeof(vx->current_runstate));
BUILD_BUG_ON(sizeof(((struct compat_vcpu_runstate_info *)0)->state) !=
sizeof(vx->current_runstate));
if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
&vx->current_runstate,
offsetof(struct vcpu_runstate_info, state),
sizeof(vx->current_runstate)))
return;
/*
* Write the actual runstate times immediately after the
* runstate_entry_time.
*/
BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
offsetof(struct vcpu_runstate_info, time) - sizeof(u64));
BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state_entry_time) !=
offsetof(struct compat_vcpu_runstate_info, time) - sizeof(u64));
BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->time) !=
sizeof(((struct compat_vcpu_runstate_info *)0)->time));
BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->time) !=
sizeof(vx->runstate_times));
if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
&vx->runstate_times[0],
offset + sizeof(u64),
sizeof(vx->runstate_times)))
return;
smp_wmb();
/*
* Finally, clear the XEN_RUNSTATE_UPDATE bit in the guest's
* runstate_entry_time field.
*/
state_entry_time &= ~XEN_RUNSTATE_UPDATE;
if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
&state_entry_time, offset,
sizeof(state_entry_time)))
return;
}
int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
{
u8 rc = 0;
@ -187,9 +315,12 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
/* No compat necessary here. */
BUILD_BUG_ON(sizeof(struct vcpu_info) !=
sizeof(struct compat_vcpu_info));
BUILD_BUG_ON(offsetof(struct vcpu_info, time) !=
offsetof(struct compat_vcpu_info, time));
if (data->u.gpa == GPA_INVALID) {
vcpu->arch.xen.vcpu_info_set = false;
r = 0;
break;
}
@ -206,6 +337,7 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO:
if (data->u.gpa == GPA_INVALID) {
vcpu->arch.xen.vcpu_time_info_set = false;
r = 0;
break;
}
@ -219,6 +351,121 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
}
break;
case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR:
if (!sched_info_on()) {
r = -EOPNOTSUPP;
break;
}
if (data->u.gpa == GPA_INVALID) {
vcpu->arch.xen.runstate_set = false;
r = 0;
break;
}
r = kvm_gfn_to_hva_cache_init(vcpu->kvm,
&vcpu->arch.xen.runstate_cache,
data->u.gpa,
sizeof(struct vcpu_runstate_info));
if (!r) {
vcpu->arch.xen.runstate_set = true;
}
break;
case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT:
if (!sched_info_on()) {
r = -EOPNOTSUPP;
break;
}
if (data->u.runstate.state > RUNSTATE_offline) {
r = -EINVAL;
break;
}
kvm_xen_update_runstate(vcpu, data->u.runstate.state);
r = 0;
break;
case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA:
if (!sched_info_on()) {
r = -EOPNOTSUPP;
break;
}
if (data->u.runstate.state > RUNSTATE_offline) {
r = -EINVAL;
break;
}
if (data->u.runstate.state_entry_time !=
(data->u.runstate.time_running +
data->u.runstate.time_runnable +
data->u.runstate.time_blocked +
data->u.runstate.time_offline)) {
r = -EINVAL;
break;
}
if (get_kvmclock_ns(vcpu->kvm) <
data->u.runstate.state_entry_time) {
r = -EINVAL;
break;
}
vcpu->arch.xen.current_runstate = data->u.runstate.state;
vcpu->arch.xen.runstate_entry_time =
data->u.runstate.state_entry_time;
vcpu->arch.xen.runstate_times[RUNSTATE_running] =
data->u.runstate.time_running;
vcpu->arch.xen.runstate_times[RUNSTATE_runnable] =
data->u.runstate.time_runnable;
vcpu->arch.xen.runstate_times[RUNSTATE_blocked] =
data->u.runstate.time_blocked;
vcpu->arch.xen.runstate_times[RUNSTATE_offline] =
data->u.runstate.time_offline;
vcpu->arch.xen.last_steal = current->sched_info.run_delay;
r = 0;
break;
case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST:
if (!sched_info_on()) {
r = -EOPNOTSUPP;
break;
}
if (data->u.runstate.state > RUNSTATE_offline &&
data->u.runstate.state != (u64)-1) {
r = -EINVAL;
break;
}
/* The adjustment must add up */
if (data->u.runstate.state_entry_time !=
(data->u.runstate.time_running +
data->u.runstate.time_runnable +
data->u.runstate.time_blocked +
data->u.runstate.time_offline)) {
r = -EINVAL;
break;
}
if (get_kvmclock_ns(vcpu->kvm) <
(vcpu->arch.xen.runstate_entry_time +
data->u.runstate.state_entry_time)) {
r = -EINVAL;
break;
}
vcpu->arch.xen.runstate_entry_time +=
data->u.runstate.state_entry_time;
vcpu->arch.xen.runstate_times[RUNSTATE_running] +=
data->u.runstate.time_running;
vcpu->arch.xen.runstate_times[RUNSTATE_runnable] +=
data->u.runstate.time_runnable;
vcpu->arch.xen.runstate_times[RUNSTATE_blocked] +=
data->u.runstate.time_blocked;
vcpu->arch.xen.runstate_times[RUNSTATE_offline] +=
data->u.runstate.time_offline;
if (data->u.runstate.state <= RUNSTATE_offline)
kvm_xen_update_runstate(vcpu, data->u.runstate.state);
r = 0;
break;
default:
break;
}
@ -251,6 +498,49 @@ int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
r = 0;
break;
case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR:
if (!sched_info_on()) {
r = -EOPNOTSUPP;
break;
}
if (vcpu->arch.xen.runstate_set) {
data->u.gpa = vcpu->arch.xen.runstate_cache.gpa;
r = 0;
}
break;
case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT:
if (!sched_info_on()) {
r = -EOPNOTSUPP;
break;
}
data->u.runstate.state = vcpu->arch.xen.current_runstate;
r = 0;
break;
case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA:
if (!sched_info_on()) {
r = -EOPNOTSUPP;
break;
}
data->u.runstate.state = vcpu->arch.xen.current_runstate;
data->u.runstate.state_entry_time =
vcpu->arch.xen.runstate_entry_time;
data->u.runstate.time_running =
vcpu->arch.xen.runstate_times[RUNSTATE_running];
data->u.runstate.time_runnable =
vcpu->arch.xen.runstate_times[RUNSTATE_runnable];
data->u.runstate.time_blocked =
vcpu->arch.xen.runstate_times[RUNSTATE_blocked];
data->u.runstate.time_offline =
vcpu->arch.xen.runstate_times[RUNSTATE_offline];
r = 0;
break;
case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST:
r = -EINVAL;
break;
default:
break;
}

View file

@ -9,6 +9,7 @@
#ifndef __ARCH_X86_KVM_XEN_H__
#define __ARCH_X86_KVM_XEN_H__
#ifdef CONFIG_KVM_XEN
#include <linux/jump_label_ratelimit.h>
extern struct static_key_false_deferred kvm_xen_enabled;
@ -18,11 +19,16 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data);
int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data);
int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data);
int kvm_xen_hypercall(struct kvm_vcpu *vcpu);
int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data);
int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc);
void kvm_xen_destroy_vm(struct kvm *kvm);
static inline bool kvm_xen_msr_enabled(struct kvm *kvm)
{
return static_branch_unlikely(&kvm_xen_enabled.key) &&
kvm->arch.xen_hvm_config.msr;
}
static inline bool kvm_xen_hypercall_enabled(struct kvm *kvm)
{
return static_branch_unlikely(&kvm_xen_enabled.key) &&
@ -38,11 +44,59 @@ static inline int kvm_xen_has_interrupt(struct kvm_vcpu *vcpu)
return 0;
}
#else
static inline int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data)
{
return 1;
}
static inline void kvm_xen_destroy_vm(struct kvm *kvm)
{
}
static inline bool kvm_xen_msr_enabled(struct kvm *kvm)
{
return false;
}
static inline bool kvm_xen_hypercall_enabled(struct kvm *kvm)
{
return false;
}
static inline int kvm_xen_has_interrupt(struct kvm_vcpu *vcpu)
{
return 0;
}
#endif
int kvm_xen_hypercall(struct kvm_vcpu *vcpu);
/* 32-bit compatibility definitions, also used natively in 32-bit build */
#include <asm/pvclock-abi.h>
#include <asm/xen/interface.h>
#include <xen/interface/vcpu.h>
void kvm_xen_update_runstate_guest(struct kvm_vcpu *vcpu, int state);
static inline void kvm_xen_runstate_set_running(struct kvm_vcpu *vcpu)
{
kvm_xen_update_runstate_guest(vcpu, RUNSTATE_running);
}
static inline void kvm_xen_runstate_set_preempted(struct kvm_vcpu *vcpu)
{
/*
* If the vCPU wasn't preempted but took a normal exit for
* some reason (hypercalls, I/O, etc.), that is accounted as
* still RUNSTATE_running, as the VMM is still operating on
* behalf of the vCPU. Only if the VMM does actually block
* does it need to enter RUNSTATE_blocked.
*/
if (vcpu->preempted)
kvm_xen_update_runstate_guest(vcpu, RUNSTATE_runnable);
}
/* 32-bit compatibility definitions, also used natively in 32-bit build */
struct compat_arch_vcpu_info {
unsigned int cr2;
unsigned int pad[5];
@ -75,4 +129,10 @@ struct compat_shared_info {
struct compat_arch_shared_info arch;
};
struct compat_vcpu_runstate_info {
int state;
uint64_t state_entry_time;
uint64_t time[4];
} __attribute__((packed));
#endif /* __ARCH_X86_KVM_XEN_H__ */

View file

@ -416,6 +416,9 @@ void __init xen_vmalloc_p2m_tree(void)
xen_p2m_last_pfn = xen_max_p2m_pfn;
p2m_limit = (phys_addr_t)P2M_LIMIT * 1024 * 1024 * 1024 / PAGE_SIZE;
if (!p2m_limit && IS_ENABLED(CONFIG_XEN_UNPOPULATED_ALLOC))
p2m_limit = xen_start_info->nr_pages * XEN_EXTRA_MEM_RATIO;
vm.flags = VM_ALLOC;
vm.size = ALIGN(sizeof(unsigned long) * max(xen_max_p2m_pfn, p2m_limit),
PMD_SIZE * PMDS_PER_MID_PAGE);
@ -652,10 +655,9 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
pte_t *ptep;
unsigned int level;
if (unlikely(pfn >= xen_p2m_size)) {
BUG_ON(mfn != INVALID_P2M_ENTRY);
return true;
}
/* Only invalid entries allowed above the highest p2m covered frame. */
if (unlikely(pfn >= xen_p2m_size))
return mfn == INVALID_P2M_ENTRY;
/*
* The interface requires atomic updates on p2m elements.
@ -710,6 +712,8 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
for (i = 0; i < count; i++) {
unsigned long mfn, pfn;
struct gnttab_unmap_grant_ref unmap[2];
int rc;
/* Do not add to override if the map failed. */
if (map_ops[i].status != GNTST_okay ||
@ -727,10 +731,46 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
WARN(pfn_to_mfn(pfn) != INVALID_P2M_ENTRY, "page must be ballooned");
if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) {
ret = -ENOMEM;
goto out;
if (likely(set_phys_to_machine(pfn, FOREIGN_FRAME(mfn))))
continue;
/*
* Signal an error for this slot. This in turn requires
* immediate unmapping.
*/
map_ops[i].status = GNTST_general_error;
unmap[0].host_addr = map_ops[i].host_addr,
unmap[0].handle = map_ops[i].handle;
map_ops[i].handle = ~0;
if (map_ops[i].flags & GNTMAP_device_map)
unmap[0].dev_bus_addr = map_ops[i].dev_bus_addr;
else
unmap[0].dev_bus_addr = 0;
if (kmap_ops) {
kmap_ops[i].status = GNTST_general_error;
unmap[1].host_addr = kmap_ops[i].host_addr,
unmap[1].handle = kmap_ops[i].handle;
kmap_ops[i].handle = ~0;
if (kmap_ops[i].flags & GNTMAP_device_map)
unmap[1].dev_bus_addr = kmap_ops[i].dev_bus_addr;
else
unmap[1].dev_bus_addr = 0;
}
/*
* Pre-populate both status fields, to be recognizable in
* the log message below.
*/
unmap[0].status = 1;
unmap[1].status = 1;
rc = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
unmap, 1 + !!kmap_ops);
if (rc || unmap[0].status != GNTST_okay ||
unmap[1].status != GNTST_okay)
pr_err_once("gnttab unmap failed: rc=%d st0=%d st1=%d\n",
rc, unmap[0].status, unmap[1].status);
}
out:

View file

@ -59,18 +59,6 @@ static struct {
} xen_remap_buf __initdata __aligned(PAGE_SIZE);
static unsigned long xen_remap_mfn __initdata = INVALID_P2M_ENTRY;
/*
* The maximum amount of extra memory compared to the base size. The
* main scaling factor is the size of struct page. At extreme ratios
* of base:extra, all the base memory can be filled with page
* structures for the extra memory, leaving no space for anything
* else.
*
* 10x seems like a reasonable balance between scaling flexibility and
* leaving a practically usable system.
*/
#define EXTRA_MEM_RATIO (10)
static bool xen_512gb_limit __initdata = IS_ENABLED(CONFIG_XEN_512GB);
static void __init xen_parse_512gb(void)
@ -790,20 +778,13 @@ char * __init xen_memory_setup(void)
extra_pages += max_pages - max_pfn;
/*
* Clamp the amount of extra memory to a EXTRA_MEM_RATIO
* factor the base size. On non-highmem systems, the base
* size is the full initial memory allocation; on highmem it
* is limited to the max size of lowmem, so that it doesn't
* get completely filled.
* Clamp the amount of extra memory to a XEN_EXTRA_MEM_RATIO
* factor the base size.
*
* Make sure we have no memory above max_pages, as this area
* isn't handled by the p2m management.
*
* In principle there could be a problem in lowmem systems if
* the initial memory is also very large with respect to
* lowmem, but we won't try to deal with that here.
*/
extra_pages = min3(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
extra_pages = min3(XEN_EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
extra_pages, max_pages - max_pfn);
i = 0;
addr = xen_e820_table.entries[0].addr;

View file

@ -162,7 +162,7 @@ BFQ_BFQQ_FNS(split_coop);
BFQ_BFQQ_FNS(softrt_update);
#undef BFQ_BFQQ_FNS \
/* Expiration time of sync (0) and async (1) requests, in ns. */
/* Expiration time of async (0) and sync (1) requests, in ns. */
static const u64 bfq_fifo_expire[2] = { NSEC_PER_SEC / 4, NSEC_PER_SEC / 8 };
/* Maximum backwards seek (magic number lifted from CFQ), in KiB. */

View file

@ -292,7 +292,6 @@ static const char *const cmd_flag_name[] = {
#define RQF_NAME(name) [ilog2((__force u32)RQF_##name)] = #name
static const char *const rqf_name[] = {
RQF_NAME(SORTED),
RQF_NAME(STARTED),
RQF_NAME(SOFTBARRIER),
RQF_NAME(FLUSH_SEQ),

View file

@ -385,7 +385,6 @@ bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq)
EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge);
static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
bool has_sched,
struct request *rq)
{
/*
@ -402,9 +401,6 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
if ((rq->rq_flags & RQF_FLUSH_SEQ) || blk_rq_is_passthrough(rq))
return true;
if (has_sched)
rq->rq_flags |= RQF_SORTED;
return false;
}
@ -418,7 +414,7 @@ void blk_mq_sched_insert_request(struct request *rq, bool at_head,
WARN_ON(e && (rq->tag != BLK_MQ_NO_TAG));
if (blk_mq_sched_bypass_insert(hctx, !!e, rq)) {
if (blk_mq_sched_bypass_insert(hctx, rq)) {
/*
* Firstly normal IO request is inserted to scheduler queue or
* sw queue, meantime we add flush request to dispatch queue(

View file

@ -45,11 +45,10 @@ static void disk_release_events(struct gendisk *disk);
void set_capacity(struct gendisk *disk, sector_t sectors)
{
struct block_device *bdev = disk->part0;
unsigned long flags;
spin_lock_irqsave(&bdev->bd_size_lock, flags);
spin_lock(&bdev->bd_size_lock);
i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT);
spin_unlock_irqrestore(&bdev->bd_size_lock, flags);
spin_unlock(&bdev->bd_size_lock);
}
EXPORT_SYMBOL(set_capacity);

View file

@ -88,11 +88,9 @@ static int (*check_part[])(struct parsed_partitions *) = {
static void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors)
{
unsigned long flags;
spin_lock_irqsave(&bdev->bd_size_lock, flags);
spin_lock(&bdev->bd_size_lock);
i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT);
spin_unlock_irqrestore(&bdev->bd_size_lock, flags);
spin_unlock(&bdev->bd_size_lock);
}
static struct parsed_partitions *allocate_partitions(struct gendisk *hd)

View file

@ -325,22 +325,22 @@ static void rpm_put_suppliers(struct device *dev)
static int __rpm_callback(int (*cb)(struct device *), struct device *dev)
__releases(&dev->power.lock) __acquires(&dev->power.lock)
{
int retval, idx;
bool use_links = dev->power.links_count > 0;
bool get = false;
int retval, idx;
bool put;
if (dev->power.irq_safe) {
spin_unlock(&dev->power.lock);
} else if (!use_links) {
spin_unlock_irq(&dev->power.lock);
} else {
get = dev->power.runtime_status == RPM_RESUMING;
spin_unlock_irq(&dev->power.lock);
/*
* Resume suppliers if necessary.
*
* The device's runtime PM status cannot change until this
* routine returns, so it is safe to read the status outside of
* the lock.
*/
if (use_links && dev->power.runtime_status == RPM_RESUMING) {
/* Resume suppliers if necessary. */
if (get) {
idx = device_links_read_lock();
retval = rpm_get_suppliers(dev);
@ -355,24 +355,36 @@ static int __rpm_callback(int (*cb)(struct device *), struct device *dev)
if (dev->power.irq_safe) {
spin_lock(&dev->power.lock);
} else {
/*
* If the device is suspending and the callback has returned
* success, drop the usage counters of the suppliers that have
* been reference counted on its resume.
*
* Do that if resume fails too.
*/
if (use_links
&& ((dev->power.runtime_status == RPM_SUSPENDING && !retval)
|| (dev->power.runtime_status == RPM_RESUMING && retval))) {
idx = device_links_read_lock();
return retval;
}
fail:
rpm_put_suppliers(dev);
spin_lock_irq(&dev->power.lock);
device_links_read_unlock(idx);
}
if (!use_links)
return retval;
/*
* If the device is suspending and the callback has returned success,
* drop the usage counters of the suppliers that have been reference
* counted on its resume.
*
* Do that if the resume fails too.
*/
put = dev->power.runtime_status == RPM_SUSPENDING && !retval;
if (put)
__update_runtime_status(dev, RPM_SUSPENDED);
else
put = get && retval;
if (put) {
spin_unlock_irq(&dev->power.lock);
idx = device_links_read_lock();
fail:
rpm_put_suppliers(dev);
device_links_read_unlock(idx);
spin_lock_irq(&dev->power.lock);
}

View file

@ -165,15 +165,17 @@ static ssize_t rsxx_cram_read(struct file *fp, char __user *ubuf,
{
struct rsxx_cardinfo *card = file_inode(fp)->i_private;
char *buf;
ssize_t st;
int st;
buf = kzalloc(cnt, GFP_KERNEL);
if (!buf)
return -ENOMEM;
st = rsxx_creg_read(card, CREG_ADD_CRAM + (u32)*ppos, cnt, buf, 1);
if (!st)
st = copy_to_user(ubuf, buf, cnt);
if (!st) {
if (copy_to_user(ubuf, buf, cnt))
st = -EFAULT;
}
kfree(buf);
if (st)
return st;

View file

@ -11,7 +11,6 @@
#ifndef __RSXX_PRIV_H__
#define __RSXX_PRIV_H__
#include <linux/version.h>
#include <linux/semaphore.h>
#include <linux/fs.h>

View file

@ -278,8 +278,6 @@ static void tpm_devs_release(struct device *dev)
{
struct tpm_chip *chip = container_of(dev, struct tpm_chip, devs);
dump_stack();
/* release the master device reference */
put_device(&chip->dev);
}

View file

@ -707,12 +707,22 @@ static int tpm_tis_gen_interrupt(struct tpm_chip *chip)
const char *desc = "attempting to generate an interrupt";
u32 cap2;
cap_t cap;
int ret;
/* TPM 2.0 */
if (chip->flags & TPM_CHIP_FLAG_TPM2)
return tpm2_get_tpm_pt(chip, 0x100, &cap2, desc);
else
return tpm1_getcap(chip, TPM_CAP_PROP_TIS_TIMEOUT, &cap, desc,
0);
/* TPM 1.2 */
ret = request_locality(chip, 0);
if (ret < 0)
return ret;
ret = tpm1_getcap(chip, TPM_CAP_PROP_TIS_TIMEOUT, &cap, desc, 0);
release_locality(chip, 0);
return ret;
}
/* Register the IRQ and issue a command that will cause an interrupt. If an
@ -1019,11 +1029,21 @@ int tpm_tis_core_init(struct device *dev, struct tpm_tis_data *priv, int irq,
init_waitqueue_head(&priv->read_queue);
init_waitqueue_head(&priv->int_queue);
if (irq != -1) {
/* Before doing irq testing issue a command to the TPM in polling mode
/*
* Before doing irq testing issue a command to the TPM in polling mode
* to make sure it works. May as well use that command to set the
* proper timeouts for the driver.
*/
if (tpm_get_timeouts(chip)) {
rc = request_locality(chip, 0);
if (rc < 0)
goto out_err;
rc = tpm_get_timeouts(chip);
release_locality(chip, 0);
if (rc) {
dev_err(dev, "Could not get TPM timeouts and durations\n");
rc = -ENODEV;
goto out_err;

View file

@ -903,10 +903,11 @@ void amdgpu_acpi_fini(struct amdgpu_device *adev)
*/
bool amdgpu_acpi_is_s0ix_supported(struct amdgpu_device *adev)
{
#if defined(CONFIG_AMD_PMC)
if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0) {
if (adev->flags & AMD_IS_APU)
return true;
}
#endif
return false;
}

View file

@ -357,7 +357,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
while (size) {
uint32_t value;
value = RREG32_PCIE(*pos >> 2);
value = RREG32_PCIE(*pos);
r = put_user(value, (uint32_t *)buf);
if (r) {
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
@ -424,7 +424,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
return r;
}
WREG32_PCIE(*pos >> 2, value);
WREG32_PCIE(*pos, value);
result += 4;
buf += 4;

View file

@ -173,8 +173,6 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags)
switch (adev->asic_type) {
case CHIP_VEGA20:
case CHIP_ARCTURUS:
case CHIP_SIENNA_CICHLID:
case CHIP_NAVY_FLOUNDER:
/* enable runpm if runpm=1 */
if (amdgpu_runtime_pm > 0)
adev->runpm = true;

View file

@ -558,7 +558,8 @@ static bool nv_is_headless_sku(struct pci_dev *pdev)
{
if ((pdev->device == 0x731E &&
(pdev->revision == 0xC6 || pdev->revision == 0xC7)) ||
(pdev->device == 0x7340 && pdev->revision == 0xC9))
(pdev->device == 0x7340 && pdev->revision == 0xC9) ||
(pdev->device == 0x7360 && pdev->revision == 0xC7))
return true;
return false;
}
@ -634,7 +635,8 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)
if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT &&
!amdgpu_sriov_vf(adev))
amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block);
if (!nv_is_headless_sku(adev->pdev))
amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block);
if (!amdgpu_sriov_vf(adev))
amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block);
break;

View file

@ -530,7 +530,7 @@ bool dm_helpers_dp_write_dsc_enable(
{
uint8_t enable_dsc = enable ? 1 : 0;
struct amdgpu_dm_connector *aconnector;
uint8_t ret;
uint8_t ret = 0;
if (!stream)
return false;

View file

@ -1322,7 +1322,7 @@ static int arcturus_set_power_profile_mode(struct smu_context *smu,
CMN2ASIC_MAPPING_WORKLOAD,
profile_mode);
if (workload_type < 0) {
dev_err(smu->adev->dev, "Unsupported power profile mode %d on arcturus\n", profile_mode);
dev_dbg(smu->adev->dev, "Unsupported power profile mode %d on arcturus\n", profile_mode);
return -EINVAL;
}

View file

@ -78,6 +78,9 @@ MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_smc.bin");
#define PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE_MASK 0xC000
#define PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT 0xE
#define mmTHM_BACO_CNTL_ARCT 0xA7
#define mmTHM_BACO_CNTL_ARCT_BASE_IDX 0
static int link_width[] = {0, 1, 2, 4, 8, 12, 16};
static int link_speed[] = {25, 50, 80, 160};
@ -1532,9 +1535,15 @@ int smu_v11_0_baco_set_state(struct smu_context *smu, enum smu_baco_state state)
break;
default:
if (!ras || !ras->supported) {
data = RREG32_SOC15(THM, 0, mmTHM_BACO_CNTL);
data |= 0x80000000;
WREG32_SOC15(THM, 0, mmTHM_BACO_CNTL, data);
if (adev->asic_type == CHIP_ARCTURUS) {
data = RREG32_SOC15(THM, 0, mmTHM_BACO_CNTL_ARCT);
data |= 0x80000000;
WREG32_SOC15(THM, 0, mmTHM_BACO_CNTL_ARCT, data);
} else {
data = RREG32_SOC15(THM, 0, mmTHM_BACO_CNTL);
data |= 0x80000000;
WREG32_SOC15(THM, 0, mmTHM_BACO_CNTL, data);
}
ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_EnterBaco, 0, NULL);
} else {

View file

@ -810,7 +810,7 @@ static int vangogh_set_power_profile_mode(struct smu_context *smu, long *input,
CMN2ASIC_MAPPING_WORKLOAD,
profile_mode);
if (workload_type < 0) {
dev_err_once(smu->adev->dev, "Unsupported power profile mode %d on VANGOGH\n",
dev_dbg(smu->adev->dev, "Unsupported power profile mode %d on VANGOGH\n",
profile_mode);
return -EINVAL;
}
@ -1685,9 +1685,9 @@ static int vangogh_system_features_control(struct smu_context *smu, bool en)
uint32_t feature_mask[2];
int ret = 0;
if (adev->pm.fw_version >= 0x43f1700)
if (adev->pm.fw_version >= 0x43f1700 && !en)
ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_RlcPowerNotify,
en ? RLC_STATUS_NORMAL : RLC_STATUS_OFF, NULL);
RLC_STATUS_OFF, NULL);
bitmap_zero(feature->enabled, feature->feature_num);
bitmap_zero(feature->supported, feature->feature_num);

View file

@ -844,7 +844,7 @@ static int renoir_set_power_profile_mode(struct smu_context *smu, long *input, u
* TODO: If some case need switch to powersave/default power mode
* then can consider enter WORKLOAD_COMPUTE/WORKLOAD_CUSTOM for power saving.
*/
dev_err_once(smu->adev->dev, "Unsupported power profile mode %d on RENOIR\n", profile_mode);
dev_dbg(smu->adev->dev, "Unsupported power profile mode %d on RENOIR\n", profile_mode);
return -EINVAL;
}

View file

@ -261,6 +261,9 @@ gk104_fifo_pbdma = {
struct nvkm_engine *
gk104_fifo_id_engine(struct nvkm_fifo *base, int engi)
{
if (engi == GK104_FIFO_ENGN_SW)
return nvkm_device_engine(base->engine.subdev.device, NVKM_ENGINE_SW, 0);
return gk104_fifo(base)->engine[engi].engine;
}

View file

@ -3651,6 +3651,7 @@ static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv,
struct ib_cm_sidr_rep_param *param)
{
struct ib_mad_send_buf *msg;
unsigned long flags;
int ret;
lockdep_assert_held(&cm_id_priv->lock);
@ -3676,12 +3677,12 @@ static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv,
return ret;
}
cm_id_priv->id.state = IB_CM_IDLE;
spin_lock_irq(&cm.lock);
spin_lock_irqsave(&cm.lock, flags);
if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node)) {
rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
RB_CLEAR_NODE(&cm_id_priv->sidr_id_node);
}
spin_unlock_irq(&cm.lock);
spin_unlock_irqrestore(&cm.lock, flags);
return 0;
}

View file

@ -91,7 +91,7 @@ void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm,
}
/**
* uverbs_alloc() - Quickly allocate memory for use with a bundle
* _uverbs_alloc() - Quickly allocate memory for use with a bundle
* @bundle: The bundle
* @size: Number of bytes to allocate
* @flags: Allocator flags

View file

@ -2073,8 +2073,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT)(
num_alloc_xa_entries++;
event_sub = kzalloc(sizeof(*event_sub), GFP_KERNEL);
if (!event_sub)
if (!event_sub) {
err = -ENOMEM;
goto err;
}
list_add_tail(&event_sub->event_list, &sub_list);
uverbs_uobject_get(&ev_file->uobj);

View file

@ -1082,7 +1082,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
return ret ? ret : npages;
}
/**
/*
* Parse a series of data segments for page fault handling.
*
* @dev: Pointer to mlx5 IB device

View file

@ -4,6 +4,7 @@ config RDMA_RXE
depends on INET && PCI && INFINIBAND
depends on INFINIBAND_VIRT_DMA
select NET_UDP_TUNNEL
select CRYPTO
select CRYPTO_CRC32
help
This driver implements the InfiniBand RDMA transport over

View file

@ -547,6 +547,7 @@ int rxe_completer(void *arg)
struct sk_buff *skb = NULL;
struct rxe_pkt_info *pkt = NULL;
enum comp_state state;
int ret = 0;
rxe_add_ref(qp);
@ -554,7 +555,8 @@ int rxe_completer(void *arg)
qp->req.state == QP_STATE_RESET) {
rxe_drain_resp_pkts(qp, qp->valid &&
qp->req.state == QP_STATE_ERROR);
goto exit;
ret = -EAGAIN;
goto done;
}
if (qp->comp.timeout) {
@ -564,8 +566,10 @@ int rxe_completer(void *arg)
qp->comp.timeout_retry = 0;
}
if (qp->req.need_retry)
goto exit;
if (qp->req.need_retry) {
ret = -EAGAIN;
goto done;
}
state = COMPST_GET_ACK;
@ -636,8 +640,6 @@ int rxe_completer(void *arg)
break;
case COMPST_DONE:
if (pkt)
free_pkt(pkt);
goto done;
case COMPST_EXIT:
@ -660,7 +662,8 @@ int rxe_completer(void *arg)
qp->qp_timeout_jiffies)
mod_timer(&qp->retrans_timer,
jiffies + qp->qp_timeout_jiffies);
goto exit;
ret = -EAGAIN;
goto done;
case COMPST_ERROR_RETRY:
/* we come here if the retry timer fired and we did
@ -672,18 +675,18 @@ int rxe_completer(void *arg)
*/
/* there is nothing to retry in this case */
if (!wqe || (wqe->state == wqe_state_posted))
goto exit;
if (!wqe || (wqe->state == wqe_state_posted)) {
pr_warn("Retry attempted without a valid wqe\n");
ret = -EAGAIN;
goto done;
}
/* if we've started a retry, don't start another
* retry sequence, unless this is a timeout.
*/
if (qp->comp.started_retry &&
!qp->comp.timeout_retry) {
if (pkt)
free_pkt(pkt);
!qp->comp.timeout_retry)
goto done;
}
if (qp->comp.retry_cnt > 0) {
if (qp->comp.retry_cnt != 7)
@ -704,8 +707,6 @@ int rxe_completer(void *arg)
qp->comp.started_retry = 1;
rxe_run_task(&qp->req.task, 0);
}
if (pkt)
free_pkt(pkt);
goto done;
} else {
@ -726,8 +727,8 @@ int rxe_completer(void *arg)
mod_timer(&qp->rnr_nak_timer,
jiffies + rnrnak_jiffies(aeth_syn(pkt)
& ~AETH_TYPE_MASK));
free_pkt(pkt);
goto exit;
ret = -EAGAIN;
goto done;
} else {
rxe_counter_inc(rxe,
RXE_CNT_RNR_RETRY_EXCEEDED);
@ -740,25 +741,15 @@ int rxe_completer(void *arg)
WARN_ON_ONCE(wqe->status == IB_WC_SUCCESS);
do_complete(qp, wqe);
rxe_qp_error(qp);
if (pkt)
free_pkt(pkt);
goto exit;
ret = -EAGAIN;
goto done;
}
}
exit:
/* we come here if we are done with processing and want the task to
* exit from the loop calling us
*/
WARN_ON_ONCE(skb);
rxe_drop_ref(qp);
return -EAGAIN;
done:
/* we come here if we have processed a packet we want the task to call
* us again to see if there is anything else to do
*/
WARN_ON_ONCE(skb);
if (pkt)
free_pkt(pkt);
rxe_drop_ref(qp);
return 0;
return ret;
}

View file

@ -407,14 +407,22 @@ int rxe_send(struct rxe_pkt_info *pkt, struct sk_buff *skb)
return 0;
}
/* fix up a send packet to match the packets
* received from UDP before looping them back
*/
void rxe_loopback(struct sk_buff *skb)
{
struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
if (skb->protocol == htons(ETH_P_IP))
skb_pull(skb, sizeof(struct iphdr));
else
skb_pull(skb, sizeof(struct ipv6hdr));
rxe_rcv(skb);
if (WARN_ON(!ib_device_try_get(&pkt->rxe->ib_dev)))
kfree_skb(skb);
else
rxe_rcv(skb);
}
struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,

View file

@ -237,8 +237,6 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
struct rxe_mc_elem *mce;
struct rxe_qp *qp;
union ib_gid dgid;
struct sk_buff *per_qp_skb;
struct rxe_pkt_info *per_qp_pkt;
int err;
if (skb->protocol == htons(ETH_P_IP))
@ -250,10 +248,15 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
/* lookup mcast group corresponding to mgid, takes a ref */
mcg = rxe_pool_get_key(&rxe->mc_grp_pool, &dgid);
if (!mcg)
goto err1; /* mcast group not registered */
goto drop; /* mcast group not registered */
spin_lock_bh(&mcg->mcg_lock);
/* this is unreliable datagram service so we let
* failures to deliver a multicast packet to a
* single QP happen and just move on and try
* the rest of them on the list
*/
list_for_each_entry(mce, &mcg->qp_list, qp_list) {
qp = mce->qp;
@ -266,39 +269,47 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
if (err)
continue;
/* for all but the last qp create a new clone of the
* skb and pass to the qp. If an error occurs in the
* checks for the last qp in the list we need to
* free the skb since it hasn't been passed on to
* rxe_rcv_pkt() which would free it later.
/* for all but the last QP create a new clone of the
* skb and pass to the QP. Pass the original skb to
* the last QP in the list.
*/
if (mce->qp_list.next != &mcg->qp_list) {
per_qp_skb = skb_clone(skb, GFP_ATOMIC);
if (WARN_ON(!ib_device_try_get(&rxe->ib_dev))) {
kfree_skb(per_qp_skb);
struct sk_buff *cskb;
struct rxe_pkt_info *cpkt;
cskb = skb_clone(skb, GFP_ATOMIC);
if (unlikely(!cskb))
continue;
if (WARN_ON(!ib_device_try_get(&rxe->ib_dev))) {
kfree_skb(cskb);
break;
}
cpkt = SKB_TO_PKT(cskb);
cpkt->qp = qp;
rxe_add_ref(qp);
rxe_rcv_pkt(cpkt, cskb);
} else {
per_qp_skb = skb;
/* show we have consumed the skb */
skb = NULL;
pkt->qp = qp;
rxe_add_ref(qp);
rxe_rcv_pkt(pkt, skb);
skb = NULL; /* mark consumed */
}
if (unlikely(!per_qp_skb))
continue;
per_qp_pkt = SKB_TO_PKT(per_qp_skb);
per_qp_pkt->qp = qp;
rxe_add_ref(qp);
rxe_rcv_pkt(per_qp_pkt, per_qp_skb);
}
spin_unlock_bh(&mcg->mcg_lock);
rxe_drop_ref(mcg); /* drop ref from rxe_pool_get_key. */
err1:
/* free skb if not consumed */
if (likely(!skb))
return;
/* This only occurs if one of the checks fails on the last
* QP in the list above
*/
drop:
kfree_skb(skb);
ib_device_put(&rxe->ib_dev);
}

View file

@ -182,6 +182,10 @@ static bool increase_address_space(struct protection_domain *domain,
bool ret = true;
u64 *pte;
pte = (void *)get_zeroed_page(gfp);
if (!pte)
return false;
spin_lock_irqsave(&domain->lock, flags);
if (address <= PM_LEVEL_SIZE(domain->iop.mode))
@ -191,10 +195,6 @@ static bool increase_address_space(struct protection_domain *domain,
if (WARN_ON_ONCE(domain->iop.mode == PAGE_MODE_6_LEVEL))
goto out;
pte = (void *)get_zeroed_page(gfp);
if (!pte)
goto out;
*pte = PM_LEVEL_PDE(domain->iop.mode, iommu_virt_to_phys(domain->iop.root));
domain->iop.root = pte;
@ -208,10 +208,12 @@ static bool increase_address_space(struct protection_domain *domain,
*/
amd_iommu_domain_set_pgtable(domain, pte, domain->iop.mode);
pte = NULL;
ret = true;
out:
spin_unlock_irqrestore(&domain->lock, flags);
free_page((unsigned long)pte);
return ret;
}

View file

@ -311,6 +311,11 @@ static void iommu_dma_flush_iotlb_all(struct iova_domain *iovad)
domain->ops->flush_iotlb_all(domain);
}
static bool dev_is_untrusted(struct device *dev)
{
return dev_is_pci(dev) && to_pci_dev(dev)->untrusted;
}
/**
* iommu_dma_init_domain - Initialise a DMA mapping domain
* @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
@ -365,8 +370,9 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
init_iova_domain(iovad, 1UL << order, base_pfn);
if (!cookie->fq_domain && !iommu_domain_get_attr(domain,
DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, &attr) && attr) {
if (!cookie->fq_domain && (!dev || !dev_is_untrusted(dev)) &&
!iommu_domain_get_attr(domain, DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, &attr) &&
attr) {
if (init_iova_flush_queue(iovad, iommu_dma_flush_iotlb_all,
iommu_dma_entry_dtor))
pr_warn("iova flush queue initialization failed\n");
@ -508,11 +514,6 @@ static void __iommu_dma_unmap_swiotlb(struct device *dev, dma_addr_t dma_addr,
iova_align(iovad, size), dir, attrs);
}
static bool dev_is_untrusted(struct device *dev)
{
return dev_is_pci(dev) && to_pci_dev(dev)->untrusted;
}
static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
size_t size, int prot, u64 dma_mask)
{

View file

@ -30,8 +30,8 @@
#define VCMD_VRSP_IP 0x1
#define VCMD_VRSP_SC(e) (((e) >> 1) & 0x3)
#define VCMD_VRSP_SC_SUCCESS 0
#define VCMD_VRSP_SC_NO_PASID_AVAIL 1
#define VCMD_VRSP_SC_INVALID_PASID 1
#define VCMD_VRSP_SC_NO_PASID_AVAIL 2
#define VCMD_VRSP_SC_INVALID_PASID 2
#define VCMD_VRSP_RESULT_PASID(e) (((e) >> 8) & 0xfffff)
#define VCMD_CMD_OPERAND(e) ((e) << 8)
/*

View file

@ -798,10 +798,70 @@ static phys_addr_t tegra_smmu_iova_to_phys(struct iommu_domain *domain,
return SMMU_PFN_PHYS(pfn) + SMMU_OFFSET_IN_PAGE(iova);
}
static struct tegra_smmu *tegra_smmu_find(struct device_node *np)
{
struct platform_device *pdev;
struct tegra_mc *mc;
pdev = of_find_device_by_node(np);
if (!pdev)
return NULL;
mc = platform_get_drvdata(pdev);
if (!mc)
return NULL;
return mc->smmu;
}
static int tegra_smmu_configure(struct tegra_smmu *smmu, struct device *dev,
struct of_phandle_args *args)
{
const struct iommu_ops *ops = smmu->iommu.ops;
int err;
err = iommu_fwspec_init(dev, &dev->of_node->fwnode, ops);
if (err < 0) {
dev_err(dev, "failed to initialize fwspec: %d\n", err);
return err;
}
err = ops->of_xlate(dev, args);
if (err < 0) {
dev_err(dev, "failed to parse SW group ID: %d\n", err);
iommu_fwspec_free(dev);
return err;
}
return 0;
}
static struct iommu_device *tegra_smmu_probe_device(struct device *dev)
{
struct tegra_smmu *smmu = dev_iommu_priv_get(dev);
struct device_node *np = dev->of_node;
struct tegra_smmu *smmu = NULL;
struct of_phandle_args args;
unsigned int index = 0;
int err;
while (of_parse_phandle_with_args(np, "iommus", "#iommu-cells", index,
&args) == 0) {
smmu = tegra_smmu_find(args.np);
if (smmu) {
err = tegra_smmu_configure(smmu, dev, &args);
of_node_put(args.np);
if (err < 0)
return ERR_PTR(err);
break;
}
of_node_put(args.np);
index++;
}
smmu = dev_iommu_priv_get(dev);
if (!smmu)
return ERR_PTR(-ENODEV);
@ -1028,6 +1088,16 @@ struct tegra_smmu *tegra_smmu_probe(struct device *dev,
if (!smmu)
return ERR_PTR(-ENOMEM);
/*
* This is a bit of a hack. Ideally we'd want to simply return this
* value. However the IOMMU registration process will attempt to add
* all devices to the IOMMU when bus_set_iommu() is called. In order
* not to rely on global variables to track the IOMMU instance, we
* set it here so that it can be looked up from the .probe_device()
* callback via the IOMMU device's .drvdata field.
*/
mc->smmu = smmu;
size = BITS_TO_LONGS(soc->num_asids) * sizeof(long);
smmu->asids = devm_kzalloc(dev, size, GFP_KERNEL);

View file

@ -1526,6 +1526,10 @@ EXPORT_SYMBOL_GPL(dm_bufio_get_block_size);
sector_t dm_bufio_get_device_size(struct dm_bufio_client *c)
{
sector_t s = i_size_read(c->bdev->bd_inode) >> SECTOR_SHIFT;
if (s >= c->start)
s -= c->start;
else
s = 0;
if (likely(c->sectors_per_block_bits >= 0))
s >>= c->sectors_per_block_bits;
else

View file

@ -61,19 +61,18 @@ static int fec_decode_rs8(struct dm_verity *v, struct dm_verity_fec_io *fio,
static u8 *fec_read_parity(struct dm_verity *v, u64 rsb, int index,
unsigned *offset, struct dm_buffer **buf)
{
u64 position, block;
u64 position, block, rem;
u8 *res;
position = (index + rsb) * v->fec->roots;
block = position >> v->data_dev_block_bits;
*offset = (unsigned)(position - (block << v->data_dev_block_bits));
block = div64_u64_rem(position, v->fec->roots << SECTOR_SHIFT, &rem);
*offset = (unsigned)rem;
res = dm_bufio_read(v->fec->bufio, v->fec->start + block, buf);
res = dm_bufio_read(v->fec->bufio, block, buf);
if (IS_ERR(res)) {
DMERR("%s: FEC %llu: parity read failed (block %llu): %ld",
v->data_dev->name, (unsigned long long)rsb,
(unsigned long long)(v->fec->start + block),
PTR_ERR(res));
(unsigned long long)block, PTR_ERR(res));
*buf = NULL;
}
@ -155,7 +154,7 @@ static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio,
/* read the next block when we run out of parity bytes */
offset += v->fec->roots;
if (offset >= 1 << v->data_dev_block_bits) {
if (offset >= v->fec->roots << SECTOR_SHIFT) {
dm_bufio_release(buf);
par = fec_read_parity(v, rsb, block_offset, &offset, &buf);
@ -674,7 +673,7 @@ int verity_fec_ctr(struct dm_verity *v)
{
struct dm_verity_fec *f = v->fec;
struct dm_target *ti = v->ti;
u64 hash_blocks;
u64 hash_blocks, fec_blocks;
int ret;
if (!verity_fec_is_enabled(v)) {
@ -744,15 +743,17 @@ int verity_fec_ctr(struct dm_verity *v)
}
f->bufio = dm_bufio_client_create(f->dev->bdev,
1 << v->data_dev_block_bits,
f->roots << SECTOR_SHIFT,
1, 0, NULL, NULL);
if (IS_ERR(f->bufio)) {
ti->error = "Cannot initialize FEC bufio client";
return PTR_ERR(f->bufio);
}
if (dm_bufio_get_device_size(f->bufio) <
((f->start + f->rounds * f->roots) >> v->data_dev_block_bits)) {
dm_bufio_set_sector_offset(f->bufio, f->start << (v->data_dev_block_bits - SECTOR_SHIFT));
fec_blocks = div64_u64(f->rounds * f->roots, v->fec->roots << SECTOR_SHIFT);
if (dm_bufio_get_device_size(f->bufio) < fec_blocks) {
ti->error = "FEC device is too small";
return -E2BIG;
}

View file

@ -1343,11 +1343,21 @@ int xenvif_tx_action(struct xenvif_queue *queue, int budget)
return 0;
gnttab_batch_copy(queue->tx_copy_ops, nr_cops);
if (nr_mops != 0)
if (nr_mops != 0) {
ret = gnttab_map_refs(queue->tx_map_ops,
NULL,
queue->pages_to_map,
nr_mops);
if (ret) {
unsigned int i;
netdev_err(queue->vif->dev, "Map fail: nr %u ret %d\n",
nr_mops, ret);
for (i = 0; i < nr_mops; ++i)
WARN_ON_ONCE(queue->tx_map_ops[i].status ==
GNTST_okay);
}
}
work_done = xenvif_tx_submit(queue);

View file

@ -630,7 +630,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
opts->queue_size = NVMF_DEF_QUEUE_SIZE;
opts->nr_io_queues = num_online_cpus();
opts->reconnect_delay = NVMF_DEF_RECONNECT_DELAY;
opts->kato = NVME_DEFAULT_KATO;
opts->kato = 0;
opts->duplicate_connect = false;
opts->fast_io_fail_tmo = NVMF_DEF_FAIL_FAST_TMO;
opts->hdr_digest = false;
@ -893,6 +893,9 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
opts->nr_write_queues = 0;
opts->nr_poll_queues = 0;
opts->duplicate_connect = true;
} else {
if (!opts->kato)
opts->kato = NVME_DEFAULT_KATO;
}
if (ctrl_loss_tmo < 0) {
opts->max_reconnects = -1;

View file

@ -248,6 +248,7 @@ int nvme_hwmon_init(struct nvme_ctrl *ctrl)
if (IS_ERR(hwmon)) {
dev_warn(dev, "Failed to instantiate hwmon device\n");
kfree(data);
return PTR_ERR(hwmon);
}
ctrl->hwmon_device = hwmon;
return 0;

View file

@ -3234,7 +3234,8 @@ static const struct pci_device_id nvme_id_table[] = {
{ PCI_DEVICE(0x126f, 0x2263), /* Silicon Motion unidentified */
.driver_data = NVME_QUIRK_NO_NS_DESC_LIST, },
{ PCI_DEVICE(0x1bb1, 0x0100), /* Seagate Nytro Flash Storage */
.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY |
NVME_QUIRK_NO_NS_DESC_LIST, },
{ PCI_DEVICE(0x1c58, 0x0003), /* HGST adapter */
.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
{ PCI_DEVICE(0x1c58, 0x0023), /* WDC SN200 adapter */
@ -3248,6 +3249,9 @@ static const struct pci_device_id nvme_id_table[] = {
NVME_QUIRK_IGNORE_DEV_SUBNQN, },
{ PCI_DEVICE(0x1987, 0x5016), /* Phison E16 */
.driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, },
{ PCI_DEVICE(0x1b4b, 0x1092), /* Lexar 256 GB SSD */
.driver_data = NVME_QUIRK_NO_NS_DESC_LIST |
NVME_QUIRK_IGNORE_DEV_SUBNQN, },
{ PCI_DEVICE(0x1d1d, 0x1f1f), /* LighNVM qemu device */
.driver_data = NVME_QUIRK_LIGHTNVM, },
{ PCI_DEVICE(0x1d1d, 0x2807), /* CNEX WL */
@ -3265,6 +3269,8 @@ static const struct pci_device_id nvme_id_table[] = {
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
{ PCI_DEVICE(0x1d97, 0x2263), /* SPCC */
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
{ PCI_DEVICE(0x2646, 0x2262), /* KINGSTON SKC2000 NVMe SSD */
.driver_data = NVME_QUIRK_NO_DEEPEST_PS, },
{ PCI_DEVICE(0x2646, 0x2263), /* KINGSTON A2000 NVMe SSD */
.driver_data = NVME_QUIRK_NO_DEEPEST_PS, },
{ PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0061),

View file

@ -313,27 +313,40 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req)
nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR);
}
static void nvmet_id_set_model_number(struct nvme_id_ctrl *id,
struct nvmet_subsys *subsys)
static u16 nvmet_set_model_number(struct nvmet_subsys *subsys)
{
const char *model = NVMET_DEFAULT_CTRL_MODEL;
struct nvmet_subsys_model *subsys_model;
u16 status = 0;
rcu_read_lock();
subsys_model = rcu_dereference(subsys->model);
if (subsys_model)
model = subsys_model->number;
memcpy_and_pad(id->mn, sizeof(id->mn), model, strlen(model), ' ');
rcu_read_unlock();
mutex_lock(&subsys->lock);
if (!subsys->model_number) {
subsys->model_number =
kstrdup(NVMET_DEFAULT_CTRL_MODEL, GFP_KERNEL);
if (!subsys->model_number)
status = NVME_SC_INTERNAL;
}
mutex_unlock(&subsys->lock);
return status;
}
static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
struct nvmet_subsys *subsys = ctrl->subsys;
struct nvme_id_ctrl *id;
u32 cmd_capsule_size;
u16 status = 0;
/*
* If there is no model number yet, set it now. It will then remain
* stable for the life time of the subsystem.
*/
if (!subsys->model_number) {
status = nvmet_set_model_number(subsys);
if (status)
goto out;
}
id = kzalloc(sizeof(*id), GFP_KERNEL);
if (!id) {
status = NVME_SC_INTERNAL;
@ -347,7 +360,8 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
memset(id->sn, ' ', sizeof(id->sn));
bin2hex(id->sn, &ctrl->subsys->serial,
min(sizeof(ctrl->subsys->serial), sizeof(id->sn) / 2));
nvmet_id_set_model_number(id, ctrl->subsys);
memcpy_and_pad(id->mn, sizeof(id->mn), subsys->model_number,
strlen(subsys->model_number), ' ');
memcpy_and_pad(id->fr, sizeof(id->fr),
UTS_RELEASE, strlen(UTS_RELEASE), ' ');

View file

@ -1118,16 +1118,12 @@ static ssize_t nvmet_subsys_attr_model_show(struct config_item *item,
char *page)
{
struct nvmet_subsys *subsys = to_subsys(item);
struct nvmet_subsys_model *subsys_model;
char *model = NVMET_DEFAULT_CTRL_MODEL;
int ret;
rcu_read_lock();
subsys_model = rcu_dereference(subsys->model);
if (subsys_model)
model = subsys_model->number;
ret = snprintf(page, PAGE_SIZE, "%s\n", model);
rcu_read_unlock();
mutex_lock(&subsys->lock);
ret = snprintf(page, PAGE_SIZE, "%s\n", subsys->model_number ?
subsys->model_number : NVMET_DEFAULT_CTRL_MODEL);
mutex_unlock(&subsys->lock);
return ret;
}
@ -1138,14 +1134,17 @@ static bool nvmet_is_ascii(const char c)
return c >= 0x20 && c <= 0x7e;
}
static ssize_t nvmet_subsys_attr_model_store(struct config_item *item,
const char *page, size_t count)
static ssize_t nvmet_subsys_attr_model_store_locked(struct nvmet_subsys *subsys,
const char *page, size_t count)
{
struct nvmet_subsys *subsys = to_subsys(item);
struct nvmet_subsys_model *new_model;
char *new_model_number;
int pos = 0, len;
if (subsys->model_number) {
pr_err("Can't set model number. %s is already assigned\n",
subsys->model_number);
return -EINVAL;
}
len = strcspn(page, "\n");
if (!len)
return -EINVAL;
@ -1155,28 +1154,25 @@ static ssize_t nvmet_subsys_attr_model_store(struct config_item *item,
return -EINVAL;
}
new_model_number = kmemdup_nul(page, len, GFP_KERNEL);
if (!new_model_number)
subsys->model_number = kmemdup_nul(page, len, GFP_KERNEL);
if (!subsys->model_number)
return -ENOMEM;
return count;
}
new_model = kzalloc(sizeof(*new_model) + len + 1, GFP_KERNEL);
if (!new_model) {
kfree(new_model_number);
return -ENOMEM;
}
memcpy(new_model->number, new_model_number, len);
static ssize_t nvmet_subsys_attr_model_store(struct config_item *item,
const char *page, size_t count)
{
struct nvmet_subsys *subsys = to_subsys(item);
ssize_t ret;
down_write(&nvmet_config_sem);
mutex_lock(&subsys->lock);
new_model = rcu_replace_pointer(subsys->model, new_model,
mutex_is_locked(&subsys->lock));
ret = nvmet_subsys_attr_model_store_locked(subsys, page, count);
mutex_unlock(&subsys->lock);
up_write(&nvmet_config_sem);
kfree_rcu(new_model, rcuhead);
kfree(new_model_number);
return count;
return ret;
}
CONFIGFS_ATTR(nvmet_subsys_, attr_model);

View file

@ -1532,7 +1532,7 @@ static void nvmet_subsys_free(struct kref *ref)
nvmet_passthru_subsys_free(subsys);
kfree(subsys->subsysnqn);
kfree_rcu(subsys->model, rcuhead);
kfree(subsys->model_number);
kfree(subsys);
}

View file

@ -208,11 +208,6 @@ struct nvmet_ctrl {
bool pi_support;
};
struct nvmet_subsys_model {
struct rcu_head rcuhead;
char number[];
};
struct nvmet_subsys {
enum nvme_subsys_type type;
@ -242,7 +237,7 @@ struct nvmet_subsys {
struct config_group namespaces_group;
struct config_group allowed_hosts_group;
struct nvmet_subsys_model __rcu *model;
char *model_number;
#ifdef CONFIG_NVME_TARGET_PASSTHRU
struct nvme_ctrl *passthru_ctrl;

View file

@ -45,7 +45,7 @@ config IDLE_INJECT
on a per CPU basis.
config DTPM
bool "Power capping for Dynamic Thermal Power Management"
bool "Power capping for Dynamic Thermal Power Management (EXPERIMENTAL)"
help
This enables support for the power capping for the dynamic
thermal power management userspace engine.

View file

@ -207,6 +207,9 @@ int dtpm_release_zone(struct powercap_zone *pcz)
if (dtpm->ops)
dtpm->ops->release(dtpm);
if (root == dtpm)
root = NULL;
kfree(dtpm);
return 0;

View file

@ -3430,125 +3430,125 @@ int iscsi_session_get_param(struct iscsi_cls_session *cls_session,
switch(param) {
case ISCSI_PARAM_FAST_ABORT:
len = sprintf(buf, "%d\n", session->fast_abort);
len = sysfs_emit(buf, "%d\n", session->fast_abort);
break;
case ISCSI_PARAM_ABORT_TMO:
len = sprintf(buf, "%d\n", session->abort_timeout);
len = sysfs_emit(buf, "%d\n", session->abort_timeout);
break;
case ISCSI_PARAM_LU_RESET_TMO:
len = sprintf(buf, "%d\n", session->lu_reset_timeout);
len = sysfs_emit(buf, "%d\n", session->lu_reset_timeout);
break;
case ISCSI_PARAM_TGT_RESET_TMO:
len = sprintf(buf, "%d\n", session->tgt_reset_timeout);
len = sysfs_emit(buf, "%d\n", session->tgt_reset_timeout);
break;
case ISCSI_PARAM_INITIAL_R2T_EN:
len = sprintf(buf, "%d\n", session->initial_r2t_en);
len = sysfs_emit(buf, "%d\n", session->initial_r2t_en);
break;
case ISCSI_PARAM_MAX_R2T:
len = sprintf(buf, "%hu\n", session->max_r2t);
len = sysfs_emit(buf, "%hu\n", session->max_r2t);
break;
case ISCSI_PARAM_IMM_DATA_EN:
len = sprintf(buf, "%d\n", session->imm_data_en);
len = sysfs_emit(buf, "%d\n", session->imm_data_en);
break;
case ISCSI_PARAM_FIRST_BURST:
len = sprintf(buf, "%u\n", session->first_burst);
len = sysfs_emit(buf, "%u\n", session->first_burst);
break;
case ISCSI_PARAM_MAX_BURST:
len = sprintf(buf, "%u\n", session->max_burst);
len = sysfs_emit(buf, "%u\n", session->max_burst);
break;
case ISCSI_PARAM_PDU_INORDER_EN:
len = sprintf(buf, "%d\n", session->pdu_inorder_en);
len = sysfs_emit(buf, "%d\n", session->pdu_inorder_en);
break;
case ISCSI_PARAM_DATASEQ_INORDER_EN:
len = sprintf(buf, "%d\n", session->dataseq_inorder_en);
len = sysfs_emit(buf, "%d\n", session->dataseq_inorder_en);
break;
case ISCSI_PARAM_DEF_TASKMGMT_TMO:
len = sprintf(buf, "%d\n", session->def_taskmgmt_tmo);
len = sysfs_emit(buf, "%d\n", session->def_taskmgmt_tmo);
break;
case ISCSI_PARAM_ERL:
len = sprintf(buf, "%d\n", session->erl);
len = sysfs_emit(buf, "%d\n", session->erl);
break;
case ISCSI_PARAM_TARGET_NAME:
len = sprintf(buf, "%s\n", session->targetname);
len = sysfs_emit(buf, "%s\n", session->targetname);
break;
case ISCSI_PARAM_TARGET_ALIAS:
len = sprintf(buf, "%s\n", session->targetalias);
len = sysfs_emit(buf, "%s\n", session->targetalias);
break;
case ISCSI_PARAM_TPGT:
len = sprintf(buf, "%d\n", session->tpgt);
len = sysfs_emit(buf, "%d\n", session->tpgt);
break;
case ISCSI_PARAM_USERNAME:
len = sprintf(buf, "%s\n", session->username);
len = sysfs_emit(buf, "%s\n", session->username);
break;
case ISCSI_PARAM_USERNAME_IN:
len = sprintf(buf, "%s\n", session->username_in);
len = sysfs_emit(buf, "%s\n", session->username_in);
break;
case ISCSI_PARAM_PASSWORD:
len = sprintf(buf, "%s\n", session->password);
len = sysfs_emit(buf, "%s\n", session->password);
break;
case ISCSI_PARAM_PASSWORD_IN:
len = sprintf(buf, "%s\n", session->password_in);
len = sysfs_emit(buf, "%s\n", session->password_in);
break;
case ISCSI_PARAM_IFACE_NAME:
len = sprintf(buf, "%s\n", session->ifacename);
len = sysfs_emit(buf, "%s\n", session->ifacename);
break;
case ISCSI_PARAM_INITIATOR_NAME:
len = sprintf(buf, "%s\n", session->initiatorname);
len = sysfs_emit(buf, "%s\n", session->initiatorname);
break;
case ISCSI_PARAM_BOOT_ROOT:
len = sprintf(buf, "%s\n", session->boot_root);
len = sysfs_emit(buf, "%s\n", session->boot_root);
break;
case ISCSI_PARAM_BOOT_NIC:
len = sprintf(buf, "%s\n", session->boot_nic);
len = sysfs_emit(buf, "%s\n", session->boot_nic);
break;
case ISCSI_PARAM_BOOT_TARGET:
len = sprintf(buf, "%s\n", session->boot_target);
len = sysfs_emit(buf, "%s\n", session->boot_target);
break;
case ISCSI_PARAM_AUTO_SND_TGT_DISABLE:
len = sprintf(buf, "%u\n", session->auto_snd_tgt_disable);
len = sysfs_emit(buf, "%u\n", session->auto_snd_tgt_disable);
break;
case ISCSI_PARAM_DISCOVERY_SESS:
len = sprintf(buf, "%u\n", session->discovery_sess);
len = sysfs_emit(buf, "%u\n", session->discovery_sess);
break;
case ISCSI_PARAM_PORTAL_TYPE:
len = sprintf(buf, "%s\n", session->portal_type);
len = sysfs_emit(buf, "%s\n", session->portal_type);
break;
case ISCSI_PARAM_CHAP_AUTH_EN:
len = sprintf(buf, "%u\n", session->chap_auth_en);
len = sysfs_emit(buf, "%u\n", session->chap_auth_en);
break;
case ISCSI_PARAM_DISCOVERY_LOGOUT_EN:
len = sprintf(buf, "%u\n", session->discovery_logout_en);
len = sysfs_emit(buf, "%u\n", session->discovery_logout_en);
break;
case ISCSI_PARAM_BIDI_CHAP_EN:
len = sprintf(buf, "%u\n", session->bidi_chap_en);
len = sysfs_emit(buf, "%u\n", session->bidi_chap_en);
break;
case ISCSI_PARAM_DISCOVERY_AUTH_OPTIONAL:
len = sprintf(buf, "%u\n", session->discovery_auth_optional);
len = sysfs_emit(buf, "%u\n", session->discovery_auth_optional);
break;
case ISCSI_PARAM_DEF_TIME2WAIT:
len = sprintf(buf, "%d\n", session->time2wait);
len = sysfs_emit(buf, "%d\n", session->time2wait);
break;
case ISCSI_PARAM_DEF_TIME2RETAIN:
len = sprintf(buf, "%d\n", session->time2retain);
len = sysfs_emit(buf, "%d\n", session->time2retain);
break;
case ISCSI_PARAM_TSID:
len = sprintf(buf, "%u\n", session->tsid);
len = sysfs_emit(buf, "%u\n", session->tsid);
break;
case ISCSI_PARAM_ISID:
len = sprintf(buf, "%02x%02x%02x%02x%02x%02x\n",
len = sysfs_emit(buf, "%02x%02x%02x%02x%02x%02x\n",
session->isid[0], session->isid[1],
session->isid[2], session->isid[3],
session->isid[4], session->isid[5]);
break;
case ISCSI_PARAM_DISCOVERY_PARENT_IDX:
len = sprintf(buf, "%u\n", session->discovery_parent_idx);
len = sysfs_emit(buf, "%u\n", session->discovery_parent_idx);
break;
case ISCSI_PARAM_DISCOVERY_PARENT_TYPE:
if (session->discovery_parent_type)
len = sprintf(buf, "%s\n",
len = sysfs_emit(buf, "%s\n",
session->discovery_parent_type);
else
len = sprintf(buf, "\n");
len = sysfs_emit(buf, "\n");
break;
default:
return -ENOSYS;
@ -3580,16 +3580,16 @@ int iscsi_conn_get_addr_param(struct sockaddr_storage *addr,
case ISCSI_PARAM_CONN_ADDRESS:
case ISCSI_HOST_PARAM_IPADDRESS:
if (sin)
len = sprintf(buf, "%pI4\n", &sin->sin_addr.s_addr);
len = sysfs_emit(buf, "%pI4\n", &sin->sin_addr.s_addr);
else
len = sprintf(buf, "%pI6\n", &sin6->sin6_addr);
len = sysfs_emit(buf, "%pI6\n", &sin6->sin6_addr);
break;
case ISCSI_PARAM_CONN_PORT:
case ISCSI_PARAM_LOCAL_PORT:
if (sin)
len = sprintf(buf, "%hu\n", be16_to_cpu(sin->sin_port));
len = sysfs_emit(buf, "%hu\n", be16_to_cpu(sin->sin_port));
else
len = sprintf(buf, "%hu\n",
len = sysfs_emit(buf, "%hu\n",
be16_to_cpu(sin6->sin6_port));
break;
default:
@ -3608,88 +3608,88 @@ int iscsi_conn_get_param(struct iscsi_cls_conn *cls_conn,
switch(param) {
case ISCSI_PARAM_PING_TMO:
len = sprintf(buf, "%u\n", conn->ping_timeout);
len = sysfs_emit(buf, "%u\n", conn->ping_timeout);
break;
case ISCSI_PARAM_RECV_TMO:
len = sprintf(buf, "%u\n", conn->recv_timeout);
len = sysfs_emit(buf, "%u\n", conn->recv_timeout);
break;
case ISCSI_PARAM_MAX_RECV_DLENGTH:
len = sprintf(buf, "%u\n", conn->max_recv_dlength);
len = sysfs_emit(buf, "%u\n", conn->max_recv_dlength);
break;
case ISCSI_PARAM_MAX_XMIT_DLENGTH:
len = sprintf(buf, "%u\n", conn->max_xmit_dlength);
len = sysfs_emit(buf, "%u\n", conn->max_xmit_dlength);
break;
case ISCSI_PARAM_HDRDGST_EN:
len = sprintf(buf, "%d\n", conn->hdrdgst_en);
len = sysfs_emit(buf, "%d\n", conn->hdrdgst_en);
break;
case ISCSI_PARAM_DATADGST_EN:
len = sprintf(buf, "%d\n", conn->datadgst_en);
len = sysfs_emit(buf, "%d\n", conn->datadgst_en);
break;
case ISCSI_PARAM_IFMARKER_EN:
len = sprintf(buf, "%d\n", conn->ifmarker_en);
len = sysfs_emit(buf, "%d\n", conn->ifmarker_en);
break;
case ISCSI_PARAM_OFMARKER_EN:
len = sprintf(buf, "%d\n", conn->ofmarker_en);
len = sysfs_emit(buf, "%d\n", conn->ofmarker_en);
break;
case ISCSI_PARAM_EXP_STATSN:
len = sprintf(buf, "%u\n", conn->exp_statsn);
len = sysfs_emit(buf, "%u\n", conn->exp_statsn);
break;
case ISCSI_PARAM_PERSISTENT_PORT:
len = sprintf(buf, "%d\n", conn->persistent_port);
len = sysfs_emit(buf, "%d\n", conn->persistent_port);
break;
case ISCSI_PARAM_PERSISTENT_ADDRESS:
len = sprintf(buf, "%s\n", conn->persistent_address);
len = sysfs_emit(buf, "%s\n", conn->persistent_address);
break;
case ISCSI_PARAM_STATSN:
len = sprintf(buf, "%u\n", conn->statsn);
len = sysfs_emit(buf, "%u\n", conn->statsn);
break;
case ISCSI_PARAM_MAX_SEGMENT_SIZE:
len = sprintf(buf, "%u\n", conn->max_segment_size);
len = sysfs_emit(buf, "%u\n", conn->max_segment_size);
break;
case ISCSI_PARAM_KEEPALIVE_TMO:
len = sprintf(buf, "%u\n", conn->keepalive_tmo);
len = sysfs_emit(buf, "%u\n", conn->keepalive_tmo);
break;
case ISCSI_PARAM_LOCAL_PORT:
len = sprintf(buf, "%u\n", conn->local_port);
len = sysfs_emit(buf, "%u\n", conn->local_port);
break;
case ISCSI_PARAM_TCP_TIMESTAMP_STAT:
len = sprintf(buf, "%u\n", conn->tcp_timestamp_stat);
len = sysfs_emit(buf, "%u\n", conn->tcp_timestamp_stat);
break;
case ISCSI_PARAM_TCP_NAGLE_DISABLE:
len = sprintf(buf, "%u\n", conn->tcp_nagle_disable);
len = sysfs_emit(buf, "%u\n", conn->tcp_nagle_disable);
break;
case ISCSI_PARAM_TCP_WSF_DISABLE:
len = sprintf(buf, "%u\n", conn->tcp_wsf_disable);
len = sysfs_emit(buf, "%u\n", conn->tcp_wsf_disable);
break;
case ISCSI_PARAM_TCP_TIMER_SCALE:
len = sprintf(buf, "%u\n", conn->tcp_timer_scale);
len = sysfs_emit(buf, "%u\n", conn->tcp_timer_scale);
break;
case ISCSI_PARAM_TCP_TIMESTAMP_EN:
len = sprintf(buf, "%u\n", conn->tcp_timestamp_en);
len = sysfs_emit(buf, "%u\n", conn->tcp_timestamp_en);
break;
case ISCSI_PARAM_IP_FRAGMENT_DISABLE:
len = sprintf(buf, "%u\n", conn->fragment_disable);
len = sysfs_emit(buf, "%u\n", conn->fragment_disable);
break;
case ISCSI_PARAM_IPV4_TOS:
len = sprintf(buf, "%u\n", conn->ipv4_tos);
len = sysfs_emit(buf, "%u\n", conn->ipv4_tos);
break;
case ISCSI_PARAM_IPV6_TC:
len = sprintf(buf, "%u\n", conn->ipv6_traffic_class);
len = sysfs_emit(buf, "%u\n", conn->ipv6_traffic_class);
break;
case ISCSI_PARAM_IPV6_FLOW_LABEL:
len = sprintf(buf, "%u\n", conn->ipv6_flow_label);
len = sysfs_emit(buf, "%u\n", conn->ipv6_flow_label);
break;
case ISCSI_PARAM_IS_FW_ASSIGNED_IPV6:
len = sprintf(buf, "%u\n", conn->is_fw_assigned_ipv6);
len = sysfs_emit(buf, "%u\n", conn->is_fw_assigned_ipv6);
break;
case ISCSI_PARAM_TCP_XMIT_WSF:
len = sprintf(buf, "%u\n", conn->tcp_xmit_wsf);
len = sysfs_emit(buf, "%u\n", conn->tcp_xmit_wsf);
break;
case ISCSI_PARAM_TCP_RECV_WSF:
len = sprintf(buf, "%u\n", conn->tcp_recv_wsf);
len = sysfs_emit(buf, "%u\n", conn->tcp_recv_wsf);
break;
case ISCSI_PARAM_LOCAL_IPADDR:
len = sprintf(buf, "%s\n", conn->local_ipaddr);
len = sysfs_emit(buf, "%s\n", conn->local_ipaddr);
break;
default:
return -ENOSYS;
@ -3707,13 +3707,13 @@ int iscsi_host_get_param(struct Scsi_Host *shost, enum iscsi_host_param param,
switch (param) {
case ISCSI_HOST_PARAM_NETDEV_NAME:
len = sprintf(buf, "%s\n", ihost->netdev);
len = sysfs_emit(buf, "%s\n", ihost->netdev);
break;
case ISCSI_HOST_PARAM_HWADDRESS:
len = sprintf(buf, "%s\n", ihost->hwaddress);
len = sysfs_emit(buf, "%s\n", ihost->hwaddress);
break;
case ISCSI_HOST_PARAM_INITIATOR_NAME:
len = sprintf(buf, "%s\n", ihost->initiatorname);
len = sysfs_emit(buf, "%s\n", ihost->initiatorname);
break;
default:
return -ENOSYS;

View file

@ -132,7 +132,11 @@ show_transport_handle(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct iscsi_internal *priv = dev_to_iscsi_internal(dev);
return sprintf(buf, "%llu\n", (unsigned long long)iscsi_handle(priv->iscsi_transport));
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
return sysfs_emit(buf, "%llu\n",
(unsigned long long)iscsi_handle(priv->iscsi_transport));
}
static DEVICE_ATTR(handle, S_IRUGO, show_transport_handle, NULL);
@ -142,7 +146,7 @@ show_transport_##name(struct device *dev, \
struct device_attribute *attr,char *buf) \
{ \
struct iscsi_internal *priv = dev_to_iscsi_internal(dev); \
return sprintf(buf, format"\n", priv->iscsi_transport->name); \
return sysfs_emit(buf, format"\n", priv->iscsi_transport->name);\
} \
static DEVICE_ATTR(name, S_IRUGO, show_transport_##name, NULL);
@ -183,7 +187,7 @@ static ssize_t
show_ep_handle(struct device *dev, struct device_attribute *attr, char *buf)
{
struct iscsi_endpoint *ep = iscsi_dev_to_endpoint(dev);
return sprintf(buf, "%llu\n", (unsigned long long) ep->id);
return sysfs_emit(buf, "%llu\n", (unsigned long long) ep->id);
}
static ISCSI_ATTR(ep, handle, S_IRUGO, show_ep_handle, NULL);
@ -2880,6 +2884,9 @@ iscsi_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev)
struct iscsi_cls_session *session;
int err = 0, value = 0;
if (ev->u.set_param.len > PAGE_SIZE)
return -EINVAL;
session = iscsi_session_lookup(ev->u.set_param.sid);
conn = iscsi_conn_lookup(ev->u.set_param.sid, ev->u.set_param.cid);
if (!conn || !session)
@ -3027,6 +3034,9 @@ iscsi_set_host_param(struct iscsi_transport *transport,
if (!transport->set_host_param)
return -ENOSYS;
if (ev->u.set_host_param.len > PAGE_SIZE)
return -EINVAL;
shost = scsi_host_lookup(ev->u.set_host_param.host_no);
if (!shost) {
printk(KERN_ERR "set_host_param could not find host no %u\n",
@ -3614,6 +3624,7 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group)
{
int err = 0;
u32 portid;
u32 pdu_len;
struct iscsi_uevent *ev = nlmsg_data(nlh);
struct iscsi_transport *transport = NULL;
struct iscsi_internal *priv;
@ -3621,6 +3632,9 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group)
struct iscsi_cls_conn *conn;
struct iscsi_endpoint *ep = NULL;
if (!netlink_capable(skb, CAP_SYS_ADMIN))
return -EPERM;
if (nlh->nlmsg_type == ISCSI_UEVENT_PATH_UPDATE)
*group = ISCSI_NL_GRP_UIP;
else
@ -3753,6 +3767,14 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group)
err = -EINVAL;
break;
case ISCSI_UEVENT_SEND_PDU:
pdu_len = nlh->nlmsg_len - sizeof(*nlh) - sizeof(*ev);
if ((ev->u.send_pdu.hdr_size > pdu_len) ||
(ev->u.send_pdu.data_size > (pdu_len - ev->u.send_pdu.hdr_size))) {
err = -EINVAL;
break;
}
conn = iscsi_conn_lookup(ev->u.send_pdu.sid, ev->u.send_pdu.cid);
if (conn) {
mutex_lock(&conn_mutex);
@ -3957,7 +3979,7 @@ static ssize_t show_conn_state(struct device *dev,
conn->state < ARRAY_SIZE(connection_state_names))
state = connection_state_names[conn->state];
return sprintf(buf, "%s\n", state);
return sysfs_emit(buf, "%s\n", state);
}
static ISCSI_CLASS_ATTR(conn, state, S_IRUGO, show_conn_state,
NULL);
@ -4185,7 +4207,7 @@ show_priv_session_state(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct iscsi_cls_session *session = iscsi_dev_to_session(dev->parent);
return sprintf(buf, "%s\n", iscsi_session_state_name(session->state));
return sysfs_emit(buf, "%s\n", iscsi_session_state_name(session->state));
}
static ISCSI_CLASS_ATTR(priv_sess, state, S_IRUGO, show_priv_session_state,
NULL);
@ -4194,7 +4216,7 @@ show_priv_session_creator(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct iscsi_cls_session *session = iscsi_dev_to_session(dev->parent);
return sprintf(buf, "%d\n", session->creator);
return sysfs_emit(buf, "%d\n", session->creator);
}
static ISCSI_CLASS_ATTR(priv_sess, creator, S_IRUGO, show_priv_session_creator,
NULL);
@ -4203,7 +4225,7 @@ show_priv_session_target_id(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct iscsi_cls_session *session = iscsi_dev_to_session(dev->parent);
return sprintf(buf, "%d\n", session->target_id);
return sysfs_emit(buf, "%d\n", session->target_id);
}
static ISCSI_CLASS_ATTR(priv_sess, target_id, S_IRUGO,
show_priv_session_target_id, NULL);
@ -4216,8 +4238,8 @@ show_priv_session_##field(struct device *dev, \
struct iscsi_cls_session *session = \
iscsi_dev_to_session(dev->parent); \
if (session->field == -1) \
return sprintf(buf, "off\n"); \
return sprintf(buf, format"\n", session->field); \
return sysfs_emit(buf, "off\n"); \
return sysfs_emit(buf, format"\n", session->field); \
}
#define iscsi_priv_session_attr_store(field) \

View file

@ -48,8 +48,6 @@ struct sdw_intel {
#endif
};
#define SDW_INTEL_QUIRK_MASK_BUS_DISABLE BIT(1)
int intel_master_startup(struct platform_device *pdev);
int intel_master_process_wakeen_event(struct platform_device *pdev);

View file

@ -18,42 +18,12 @@
#include "cadence_master.h"
#include "intel.h"
#define SDW_LINK_TYPE 4 /* from Intel ACPI documentation */
#define SDW_MAX_LINKS 4
#define SDW_SHIM_LCAP 0x0
#define SDW_SHIM_BASE 0x2C000
#define SDW_ALH_BASE 0x2C800
#define SDW_LINK_BASE 0x30000
#define SDW_LINK_SIZE 0x10000
static int ctrl_link_mask;
module_param_named(sdw_link_mask, ctrl_link_mask, int, 0444);
MODULE_PARM_DESC(sdw_link_mask, "Intel link mask (one bit per link)");
static bool is_link_enabled(struct fwnode_handle *fw_node, int i)
{
struct fwnode_handle *link;
char name[32];
u32 quirk_mask = 0;
/* Find master handle */
snprintf(name, sizeof(name),
"mipi-sdw-link-%d-subproperties", i);
link = fwnode_get_named_child_node(fw_node, name);
if (!link)
return false;
fwnode_property_read_u32(link,
"intel-quirk-mask",
&quirk_mask);
if (quirk_mask & SDW_INTEL_QUIRK_MASK_BUS_DISABLE)
return false;
return true;
}
static int sdw_intel_cleanup(struct sdw_intel_ctx *ctx)
{
struct sdw_intel_link_res *link = ctx->links;
@ -81,74 +51,6 @@ static int sdw_intel_cleanup(struct sdw_intel_ctx *ctx)
return 0;
}
static int
sdw_intel_scan_controller(struct sdw_intel_acpi_info *info)
{
struct acpi_device *adev;
int ret, i;
u8 count;
if (acpi_bus_get_device(info->handle, &adev))
return -EINVAL;
/* Found controller, find links supported */
count = 0;
ret = fwnode_property_read_u8_array(acpi_fwnode_handle(adev),
"mipi-sdw-master-count", &count, 1);
/*
* In theory we could check the number of links supported in
* hardware, but in that step we cannot assume SoundWire IP is
* powered.
*
* In addition, if the BIOS doesn't even provide this
* 'master-count' property then all the inits based on link
* masks will fail as well.
*
* We will check the hardware capabilities in the startup() step
*/
if (ret) {
dev_err(&adev->dev,
"Failed to read mipi-sdw-master-count: %d\n", ret);
return -EINVAL;
}
/* Check count is within bounds */
if (count > SDW_MAX_LINKS) {
dev_err(&adev->dev, "Link count %d exceeds max %d\n",
count, SDW_MAX_LINKS);
return -EINVAL;
}
if (!count) {
dev_warn(&adev->dev, "No SoundWire links detected\n");
return -EINVAL;
}
dev_dbg(&adev->dev, "ACPI reports %d SDW Link devices\n", count);
info->count = count;
info->link_mask = 0;
for (i = 0; i < count; i++) {
if (ctrl_link_mask && !(ctrl_link_mask & BIT(i))) {
dev_dbg(&adev->dev,
"Link %d masked, will not be enabled\n", i);
continue;
}
if (!is_link_enabled(acpi_fwnode_handle(adev), i)) {
dev_dbg(&adev->dev,
"Link %d not selected in firmware\n", i);
continue;
}
info->link_mask |= BIT(i);
}
return 0;
}
#define HDA_DSP_REG_ADSPIC2 (0x10)
#define HDA_DSP_REG_ADSPIS2 (0x14)
#define HDA_DSP_REG_ADSPIC2_SNDW BIT(5)
@ -357,66 +259,6 @@ sdw_intel_startup_controller(struct sdw_intel_ctx *ctx)
return 0;
}
static acpi_status sdw_intel_acpi_cb(acpi_handle handle, u32 level,
void *cdata, void **return_value)
{
struct sdw_intel_acpi_info *info = cdata;
struct acpi_device *adev;
acpi_status status;
u64 adr;
status = acpi_evaluate_integer(handle, METHOD_NAME__ADR, NULL, &adr);
if (ACPI_FAILURE(status))
return AE_OK; /* keep going */
if (acpi_bus_get_device(handle, &adev)) {
pr_err("%s: Couldn't find ACPI handle\n", __func__);
return AE_NOT_FOUND;
}
info->handle = handle;
/*
* On some Intel platforms, multiple children of the HDAS
* device can be found, but only one of them is the SoundWire
* controller. The SNDW device is always exposed with
* Name(_ADR, 0x40000000), with bits 31..28 representing the
* SoundWire link so filter accordingly
*/
if (FIELD_GET(GENMASK(31, 28), adr) != SDW_LINK_TYPE)
return AE_OK; /* keep going */
/* device found, stop namespace walk */
return AE_CTRL_TERMINATE;
}
/**
* sdw_intel_acpi_scan() - SoundWire Intel init routine
* @parent_handle: ACPI parent handle
* @info: description of what firmware/DSDT tables expose
*
* This scans the namespace and queries firmware to figure out which
* links to enable. A follow-up use of sdw_intel_probe() and
* sdw_intel_startup() is required for creation of devices and bus
* startup
*/
int sdw_intel_acpi_scan(acpi_handle *parent_handle,
struct sdw_intel_acpi_info *info)
{
acpi_status status;
info->handle = NULL;
status = acpi_walk_namespace(ACPI_TYPE_DEVICE,
parent_handle, 1,
sdw_intel_acpi_cb,
NULL, info, NULL);
if (ACPI_FAILURE(status) || info->handle == NULL)
return -ENODEV;
return sdw_intel_scan_controller(info);
}
EXPORT_SYMBOL_NS(sdw_intel_acpi_scan, SOUNDWIRE_INTEL_INIT);
/**
* sdw_intel_probe() - SoundWire Intel probe routine
* @res: resource data

View file

@ -1162,6 +1162,11 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force)
spin_lock(&sinfo->lock);
spin_lock(&cache->lock);
if (cache->swap_extents) {
ret = -ETXTBSY;
goto out;
}
if (cache->ro) {
cache->ro++;
ret = 0;
@ -2307,7 +2312,7 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache,
}
ret = inc_block_group_ro(cache, 0);
if (!do_chunk_alloc)
if (!do_chunk_alloc || ret == -ETXTBSY)
goto unlock_out;
if (!ret)
goto out;
@ -2316,6 +2321,8 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache,
if (ret < 0)
goto out;
ret = inc_block_group_ro(cache, 0);
if (ret == -ETXTBSY)
goto unlock_out;
out:
if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
alloc_flags = btrfs_get_alloc_profile(fs_info, cache->flags);
@ -3406,6 +3413,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
ASSERT(list_empty(&block_group->io_list));
ASSERT(list_empty(&block_group->bg_list));
ASSERT(refcount_read(&block_group->refs) == 1);
ASSERT(block_group->swap_extents == 0);
btrfs_put_block_group(block_group);
spin_lock(&info->block_group_cache_lock);
@ -3472,3 +3480,26 @@ void btrfs_unfreeze_block_group(struct btrfs_block_group *block_group)
__btrfs_remove_free_space_cache(block_group->free_space_ctl);
}
}
bool btrfs_inc_block_group_swap_extents(struct btrfs_block_group *bg)
{
bool ret = true;
spin_lock(&bg->lock);
if (bg->ro)
ret = false;
else
bg->swap_extents++;
spin_unlock(&bg->lock);
return ret;
}
void btrfs_dec_block_group_swap_extents(struct btrfs_block_group *bg, int amount)
{
spin_lock(&bg->lock);
ASSERT(!bg->ro);
ASSERT(bg->swap_extents >= amount);
bg->swap_extents -= amount;
spin_unlock(&bg->lock);
}

View file

@ -186,6 +186,12 @@ struct btrfs_block_group {
/* Flag indicating this block group is placed on a sequential zone */
bool seq_zone;
/*
* Number of extents in this block group used for swap files.
* All accesses protected by the spinlock 'lock'.
*/
int swap_extents;
/* Record locked full stripes for RAID5/6 block group */
struct btrfs_full_stripe_locks_tree full_stripe_locks_root;
@ -312,4 +318,7 @@ static inline int btrfs_block_group_done(struct btrfs_block_group *cache)
void btrfs_freeze_block_group(struct btrfs_block_group *cache);
void btrfs_unfreeze_block_group(struct btrfs_block_group *cache);
bool btrfs_inc_block_group_swap_extents(struct btrfs_block_group *bg);
void btrfs_dec_block_group_swap_extents(struct btrfs_block_group *bg, int amount);
#endif /* BTRFS_BLOCK_GROUP_H */

View file

@ -141,6 +141,7 @@ static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio,
struct btrfs_fs_info *fs_info = inode->root->fs_info;
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
const u32 csum_size = fs_info->csum_size;
const u32 sectorsize = fs_info->sectorsize;
struct page *page;
unsigned long i;
char *kaddr;
@ -154,22 +155,34 @@ static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio,
shash->tfm = fs_info->csum_shash;
for (i = 0; i < cb->nr_pages; i++) {
u32 pg_offset;
u32 bytes_left = PAGE_SIZE;
page = cb->compressed_pages[i];
kaddr = kmap_atomic(page);
crypto_shash_digest(shash, kaddr, PAGE_SIZE, csum);
kunmap_atomic(kaddr);
/* Determine the remaining bytes inside the page first */
if (i == cb->nr_pages - 1)
bytes_left = cb->compressed_len - i * PAGE_SIZE;
if (memcmp(&csum, cb_sum, csum_size)) {
btrfs_print_data_csum_error(inode, disk_start,
csum, cb_sum, cb->mirror_num);
if (btrfs_io_bio(bio)->device)
btrfs_dev_stat_inc_and_print(
btrfs_io_bio(bio)->device,
BTRFS_DEV_STAT_CORRUPTION_ERRS);
return -EIO;
/* Hash through the page sector by sector */
for (pg_offset = 0; pg_offset < bytes_left;
pg_offset += sectorsize) {
kaddr = kmap_atomic(page);
crypto_shash_digest(shash, kaddr + pg_offset,
sectorsize, csum);
kunmap_atomic(kaddr);
if (memcmp(&csum, cb_sum, csum_size) != 0) {
btrfs_print_data_csum_error(inode, disk_start,
csum, cb_sum, cb->mirror_num);
if (btrfs_io_bio(bio)->device)
btrfs_dev_stat_inc_and_print(
btrfs_io_bio(bio)->device,
BTRFS_DEV_STAT_CORRUPTION_ERRS);
return -EIO;
}
cb_sum += csum_size;
disk_start += sectorsize;
}
cb_sum += csum_size;
}
return 0;
}
@ -640,7 +653,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree,
page_offset(bio_first_page_all(bio)),
PAGE_SIZE);
fs_info->sectorsize);
read_unlock(&em_tree->lock);
if (!em)
return BLK_STS_IOERR;
@ -698,19 +711,30 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
refcount_set(&cb->pending_bios, 1);
for (pg_index = 0; pg_index < nr_pages; pg_index++) {
u32 pg_len = PAGE_SIZE;
int submit = 0;
/*
* To handle subpage case, we need to make sure the bio only
* covers the range we need.
*
* If we're at the last page, truncate the length to only cover
* the remaining part.
*/
if (pg_index == nr_pages - 1)
pg_len = min_t(u32, PAGE_SIZE,
compressed_len - pg_index * PAGE_SIZE);
page = cb->compressed_pages[pg_index];
page->mapping = inode->i_mapping;
page->index = em_start >> PAGE_SHIFT;
if (comp_bio->bi_iter.bi_size)
submit = btrfs_bio_fits_in_stripe(page, PAGE_SIZE,
submit = btrfs_bio_fits_in_stripe(page, pg_len,
comp_bio, 0);
page->mapping = NULL;
if (submit || bio_add_page(comp_bio, page, PAGE_SIZE, 0) <
PAGE_SIZE) {
if (submit || bio_add_page(comp_bio, page, pg_len, 0) < pg_len) {
unsigned int nr_sectors;
ret = btrfs_bio_wq_end_io(fs_info, comp_bio,
@ -743,9 +767,9 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
comp_bio->bi_private = cb;
comp_bio->bi_end_io = end_compressed_bio_read;
bio_add_page(comp_bio, page, PAGE_SIZE, 0);
bio_add_page(comp_bio, page, pg_len, 0);
}
cur_disk_byte += PAGE_SIZE;
cur_disk_byte += pg_len;
}
ret = btrfs_bio_wq_end_io(fs_info, comp_bio, BTRFS_WQ_ENDIO_DATA);
@ -1237,7 +1261,6 @@ int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start,
unsigned long prev_start_byte;
unsigned long working_bytes = total_out - buf_start;
unsigned long bytes;
char *kaddr;
struct bio_vec bvec = bio_iter_iovec(bio, bio->bi_iter);
/*
@ -1268,9 +1291,8 @@ int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start,
PAGE_SIZE - (buf_offset % PAGE_SIZE));
bytes = min(bytes, working_bytes);
kaddr = kmap_atomic(bvec.bv_page);
memcpy(kaddr + bvec.bv_offset, buf + buf_offset, bytes);
kunmap_atomic(kaddr);
memcpy_to_page(bvec.bv_page, bvec.bv_offset, buf + buf_offset,
bytes);
flush_dcache_page(bvec.bv_page);
buf_offset += bytes;

View file

@ -524,6 +524,11 @@ struct btrfs_swapfile_pin {
* points to a struct btrfs_device.
*/
bool is_block_group;
/*
* Only used when 'is_block_group' is true and it is the number of
* extents used by a swapfile for this block group ('ptr' field).
*/
int bg_extent_count;
};
bool btrfs_pinned_by_swapfile(struct btrfs_fs_info *fs_info, void *ptr);

View file

@ -627,7 +627,8 @@ static int btrfs_delayed_inode_reserve_metadata(
*/
if (!src_rsv || (!trans->bytes_reserved &&
src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) {
ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true);
ret = btrfs_qgroup_reserve_meta(root, num_bytes,
BTRFS_QGROUP_RSV_META_PREALLOC, true);
if (ret < 0)
return ret;
ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes,
@ -649,7 +650,7 @@ static int btrfs_delayed_inode_reserve_metadata(
btrfs_ino(inode),
num_bytes, 1);
} else {
btrfs_qgroup_free_meta_prealloc(root, fs_info->nodesize);
btrfs_qgroup_free_meta_prealloc(root, num_bytes);
}
return ret;
}

View file

@ -3008,12 +3008,23 @@ static void end_bio_extent_readpage(struct bio *bio)
if (likely(uptodate)) {
loff_t i_size = i_size_read(inode);
pgoff_t end_index = i_size >> PAGE_SHIFT;
unsigned off;
/* Zero out the end if this page straddles i_size */
off = offset_in_page(i_size);
if (page->index == end_index && off)
zero_user_segment(page, off, PAGE_SIZE);
/*
* Zero out the remaining part if this range straddles
* i_size.
*
* Here we should only zero the range inside the bvec,
* not touch anything else.
*
* NOTE: i_size is exclusive while end is inclusive.
*/
if (page->index == end_index && i_size <= end) {
u32 zero_start = max(offset_in_page(i_size),
offset_in_page(end));
zero_user_segment(page, zero_start,
offset_in_page(end) + 1);
}
}
ASSERT(bio_offset + len > bio_offset);
bio_offset += len;

View file

@ -3260,8 +3260,11 @@ static int btrfs_zero_range(struct inode *inode,
goto out;
ret = btrfs_qgroup_reserve_data(BTRFS_I(inode), &data_reserved,
alloc_start, bytes_to_reserve);
if (ret)
if (ret) {
unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart,
lockend, &cached_state);
goto out;
}
ret = btrfs_prealloc_file_range(inode, mode, alloc_start,
alloc_end - alloc_start,
i_blocksize(inode),

View file

@ -2555,7 +2555,12 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
to_unusable = size - to_free;
ctl->free_space += to_free;
block_group->zone_unusable += to_unusable;
/*
* If the block group is read-only, we should account freed space into
* bytes_readonly.
*/
if (!block_group->ro)
block_group->zone_unusable += to_unusable;
spin_unlock(&ctl->tree_lock);
if (!used) {
spin_lock(&block_group->lock);
@ -2801,8 +2806,10 @@ static void __btrfs_return_cluster_to_free_space(
struct rb_node *node;
spin_lock(&cluster->lock);
if (cluster->block_group != block_group)
goto out;
if (cluster->block_group != block_group) {
spin_unlock(&cluster->lock);
return;
}
cluster->block_group = NULL;
cluster->window_start = 0;
@ -2840,8 +2847,6 @@ static void __btrfs_return_cluster_to_free_space(
entry->offset, &entry->offset_index, bitmap);
}
cluster->root = RB_ROOT;
out:
spin_unlock(&cluster->lock);
btrfs_put_block_group(block_group);
}
@ -3125,8 +3130,6 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group *block_group,
entry->bytes -= bytes;
}
if (entry->bytes == 0)
rb_erase(&entry->offset_index, &cluster->root);
break;
}
out:
@ -3143,7 +3146,10 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group *block_group,
ctl->free_space -= bytes;
if (!entry->bitmap && !btrfs_free_space_trimmed(entry))
ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes;
spin_lock(&cluster->lock);
if (entry->bytes == 0) {
rb_erase(&entry->offset_index, &cluster->root);
ctl->free_extents--;
if (entry->bitmap) {
kmem_cache_free(btrfs_free_space_bitmap_cachep,
@ -3156,6 +3162,7 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group *block_group,
kmem_cache_free(btrfs_free_space_cachep, entry);
}
spin_unlock(&cluster->lock);
spin_unlock(&ctl->tree_lock);
return ret;

View file

@ -1674,9 +1674,6 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
*/
btrfs_release_path(path);
/* If extent is RO, we must COW it */
if (btrfs_extent_readonly(fs_info, disk_bytenr))
goto out_check;
ret = btrfs_cross_ref_exist(root, ino,
found_key.offset -
extent_offset, disk_bytenr, false);
@ -1723,6 +1720,7 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
WARN_ON_ONCE(freespace_inode);
goto out_check;
}
/* If the extent's block group is RO, we must COW */
if (!btrfs_inc_nocow_writers(fs_info, disk_bytenr))
goto out_check;
nocow = true;
@ -6085,7 +6083,7 @@ static int btrfs_dirty_inode(struct inode *inode)
return PTR_ERR(trans);
ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
if (ret && ret == -ENOSPC) {
if (ret && (ret == -ENOSPC || ret == -EDQUOT)) {
/* whoops, lets try again with the full transaction */
btrfs_end_transaction(trans);
trans = btrfs_start_transaction(root, 1);
@ -10200,6 +10198,7 @@ static int btrfs_add_swapfile_pin(struct inode *inode, void *ptr,
sp->ptr = ptr;
sp->inode = inode;
sp->is_block_group = is_block_group;
sp->bg_extent_count = 1;
spin_lock(&fs_info->swapfile_pins_lock);
p = &fs_info->swapfile_pins.rb_node;
@ -10213,6 +10212,8 @@ static int btrfs_add_swapfile_pin(struct inode *inode, void *ptr,
(sp->ptr == entry->ptr && sp->inode > entry->inode)) {
p = &(*p)->rb_right;
} else {
if (is_block_group)
entry->bg_extent_count++;
spin_unlock(&fs_info->swapfile_pins_lock);
kfree(sp);
return 1;
@ -10238,8 +10239,11 @@ static void btrfs_free_swapfile_pins(struct inode *inode)
sp = rb_entry(node, struct btrfs_swapfile_pin, node);
if (sp->inode == inode) {
rb_erase(&sp->node, &fs_info->swapfile_pins);
if (sp->is_block_group)
if (sp->is_block_group) {
btrfs_dec_block_group_swap_extents(sp->ptr,
sp->bg_extent_count);
btrfs_put_block_group(sp->ptr);
}
kfree(sp);
}
node = next;
@ -10300,7 +10304,8 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
sector_t *span)
{
struct inode *inode = file_inode(file);
struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct extent_state *cached_state = NULL;
struct extent_map *em = NULL;
@ -10351,13 +10356,27 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
"cannot activate swapfile while exclusive operation is running");
return -EBUSY;
}
/*
* Prevent snapshot creation while we are activating the swap file.
* We do not want to race with snapshot creation. If snapshot creation
* already started before we bumped nr_swapfiles from 0 to 1 and
* completes before the first write into the swap file after it is
* activated, than that write would fallback to COW.
*/
if (!btrfs_drew_try_write_lock(&root->snapshot_lock)) {
btrfs_exclop_finish(fs_info);
btrfs_warn(fs_info,
"cannot activate swapfile because snapshot creation is in progress");
return -EINVAL;
}
/*
* Snapshots can create extents which require COW even if NODATACOW is
* set. We use this counter to prevent snapshots. We must increment it
* before walking the extents because we don't want a concurrent
* snapshot to run after we've already checked the extents.
*/
atomic_inc(&BTRFS_I(inode)->root->nr_swapfiles);
atomic_inc(&root->nr_swapfiles);
isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize);
@ -10454,6 +10473,17 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
goto out;
}
if (!btrfs_inc_block_group_swap_extents(bg)) {
btrfs_warn(fs_info,
"block group for swapfile at %llu is read-only%s",
bg->start,
atomic_read(&fs_info->scrubs_running) ?
" (scrub running)" : "");
btrfs_put_block_group(bg);
ret = -EINVAL;
goto out;
}
ret = btrfs_add_swapfile_pin(inode, bg, true);
if (ret) {
btrfs_put_block_group(bg);
@ -10492,6 +10522,8 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
if (ret)
btrfs_swap_deactivate(file);
btrfs_drew_write_unlock(&root->snapshot_lock);
btrfs_exclop_finish(fs_info);
if (ret)

View file

@ -1936,7 +1936,10 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
if (vol_args->flags & BTRFS_SUBVOL_RDONLY)
readonly = true;
if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) {
if (vol_args->size > PAGE_SIZE) {
u64 nums;
if (vol_args->size < sizeof(*inherit) ||
vol_args->size > PAGE_SIZE) {
ret = -EINVAL;
goto free_args;
}
@ -1945,6 +1948,20 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
ret = PTR_ERR(inherit);
goto free_args;
}
if (inherit->num_qgroups > PAGE_SIZE ||
inherit->num_ref_copies > PAGE_SIZE ||
inherit->num_excl_copies > PAGE_SIZE) {
ret = -EINVAL;
goto free_inherit;
}
nums = inherit->num_qgroups + 2 * inherit->num_ref_copies +
2 * inherit->num_excl_copies;
if (vol_args->size != struct_size(inherit, qgroups, nums)) {
ret = -EINVAL;
goto free_inherit;
}
}
ret = __btrfs_ioctl_snap_create(file, vol_args->name, vol_args->fd,

View file

@ -467,7 +467,7 @@ int lzo_decompress(struct list_head *ws, unsigned char *data_in,
destlen = min_t(unsigned long, destlen, PAGE_SIZE);
bytes = min_t(unsigned long, destlen, out_len - start_byte);
kaddr = kmap_atomic(dest_page);
kaddr = kmap_local_page(dest_page);
memcpy(kaddr, workspace->buf + start_byte, bytes);
/*
@ -477,7 +477,7 @@ int lzo_decompress(struct list_head *ws, unsigned char *data_in,
*/
if (bytes < destlen)
memset(kaddr+bytes, 0, destlen-bytes);
kunmap_atomic(kaddr);
kunmap_local(kaddr);
out:
return ret;
}

View file

@ -3841,8 +3841,8 @@ static int sub_root_meta_rsv(struct btrfs_root *root, int num_bytes,
return num_bytes;
}
static int qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
enum btrfs_qgroup_rsv_type type, bool enforce)
int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
enum btrfs_qgroup_rsv_type type, bool enforce)
{
struct btrfs_fs_info *fs_info = root->fs_info;
int ret;
@ -3873,14 +3873,14 @@ int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
{
int ret;
ret = qgroup_reserve_meta(root, num_bytes, type, enforce);
ret = btrfs_qgroup_reserve_meta(root, num_bytes, type, enforce);
if (ret <= 0 && ret != -EDQUOT)
return ret;
ret = try_flush_qgroup(root);
if (ret < 0)
return ret;
return qgroup_reserve_meta(root, num_bytes, type, enforce);
return btrfs_qgroup_reserve_meta(root, num_bytes, type, enforce);
}
void btrfs_qgroup_free_meta_all_pertrans(struct btrfs_root *root)

View file

@ -361,6 +361,8 @@ int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len);
int btrfs_qgroup_free_data(struct btrfs_inode *inode,
struct extent_changeset *reserved, u64 start,
u64 len);
int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
enum btrfs_qgroup_rsv_type type, bool enforce);
int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
enum btrfs_qgroup_rsv_type type, bool enforce);
/* Reserve metadata space for pertrans and prealloc type */

View file

@ -249,8 +249,6 @@ int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info)
static void cache_rbio_pages(struct btrfs_raid_bio *rbio)
{
int i;
char *s;
char *d;
int ret;
ret = alloc_rbio_pages(rbio);
@ -261,13 +259,7 @@ static void cache_rbio_pages(struct btrfs_raid_bio *rbio)
if (!rbio->bio_pages[i])
continue;
s = kmap(rbio->bio_pages[i]);
d = kmap(rbio->stripe_pages[i]);
copy_page(d, s);
kunmap(rbio->bio_pages[i]);
kunmap(rbio->stripe_pages[i]);
copy_highpage(rbio->stripe_pages[i], rbio->bio_pages[i]);
SetPageUptodate(rbio->stripe_pages[i]);
}
set_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
@ -2359,16 +2351,21 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
SetPageUptodate(p_page);
if (has_qstripe) {
/* RAID6, allocate and map temp space for the Q stripe */
q_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
if (!q_page) {
__free_page(p_page);
goto cleanup;
}
SetPageUptodate(q_page);
pointers[rbio->real_stripes - 1] = kmap(q_page);
}
atomic_set(&rbio->error, 0);
/* Map the parity stripe just once */
pointers[nr_data] = kmap(p_page);
for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) {
struct page *p;
void *parity;
@ -2378,16 +2375,8 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
pointers[stripe] = kmap(p);
}
/* then add the parity stripe */
pointers[stripe++] = kmap(p_page);
if (has_qstripe) {
/*
* raid6, add the qstripe and call the
* library function to fill in our p/q
*/
pointers[stripe++] = kmap(q_page);
/* RAID6, call the library function to fill in our P/Q */
raid6_call.gen_syndrome(rbio->real_stripes, PAGE_SIZE,
pointers);
} else {
@ -2408,12 +2397,14 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
for (stripe = 0; stripe < nr_data; stripe++)
kunmap(page_in_rbio(rbio, stripe, pagenr, 0));
kunmap(p_page);
}
kunmap(p_page);
__free_page(p_page);
if (q_page)
if (q_page) {
kunmap(q_page);
__free_page(q_page);
}
writeback:
/*

View file

@ -218,11 +218,11 @@ static void __print_stack_trace(struct btrfs_fs_info *fs_info,
stack_trace_print(ra->trace, ra->trace_len, 2);
}
#else
static void inline __save_stack_trace(struct ref_action *ra)
static inline void __save_stack_trace(struct ref_action *ra)
{
}
static void inline __print_stack_trace(struct btrfs_fs_info *fs_info,
static inline void __print_stack_trace(struct btrfs_fs_info *fs_info,
struct ref_action *ra)
{
btrfs_err(fs_info, " ref-verify: no stacktrace support");

View file

@ -106,12 +106,8 @@ static int copy_inline_to_page(struct btrfs_inode *inode,
set_bit(BTRFS_INODE_NO_DELALLOC_FLUSH, &inode->runtime_flags);
if (comp_type == BTRFS_COMPRESS_NONE) {
char *map;
map = kmap(page);
memcpy(map, data_start, datal);
memcpy_to_page(page, 0, data_start, datal);
flush_dcache_page(page);
kunmap(page);
} else {
ret = btrfs_decompress(comp_type, data_start, page, 0,
inline_size, datal);
@ -553,6 +549,24 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
*/
btrfs_release_path(path);
/*
* When using NO_HOLES and we are cloning a range that covers
* only a hole (no extents) into a range beyond the current
* i_size, punching a hole in the target range will not create
* an extent map defining a hole, because the range starts at or
* beyond current i_size. If the file previously had an i_size
* greater than the new i_size set by this clone operation, we
* need to make sure the next fsync is a full fsync, so that it
* detects and logs a hole covering a range from the current
* i_size to the new i_size. If the clone range covers extents,
* besides a hole, then we know the full sync flag was already
* set by previous calls to btrfs_replace_file_extents() that
* replaced file extent items.
*/
if (last_dest_end >= i_size_read(inode))
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
&BTRFS_I(inode)->runtime_flags);
ret = btrfs_replace_file_extents(inode, path, last_dest_end,
destoff + len - 1, NULL, &trans);
if (ret)

View file

@ -3767,6 +3767,13 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
* commit_transactions.
*/
ro_set = 0;
} else if (ret == -ETXTBSY) {
btrfs_warn(fs_info,
"skipping scrub of block group %llu due to active swapfile",
cache->start);
scrub_pause_off(fs_info);
ret = 0;
goto skip_unfreeze;
} else {
btrfs_warn(fs_info,
"failed setting block group ro: %d", ret);
@ -3862,7 +3869,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
} else {
spin_unlock(&cache->lock);
}
skip_unfreeze:
btrfs_unfreeze_block_group(cache);
btrfs_put_block_group(cache);
if (ret)

View file

@ -4932,7 +4932,6 @@ static int put_file_data(struct send_ctx *sctx, u64 offset, u32 len)
struct btrfs_fs_info *fs_info = root->fs_info;
struct inode *inode;
struct page *page;
char *addr;
pgoff_t index = offset >> PAGE_SHIFT;
pgoff_t last_index;
unsigned pg_offset = offset_in_page(offset);
@ -4985,10 +4984,8 @@ static int put_file_data(struct send_ctx *sctx, u64 offset, u32 len)
}
}
addr = kmap(page);
memcpy(sctx->send_buf + sctx->send_size, addr + pg_offset,
cur_len);
kunmap(page);
memcpy_from_page(sctx->send_buf + sctx->send_size, page,
pg_offset, cur_len);
unlock_page(page);
put_page(page);
index++;

View file

@ -1918,8 +1918,8 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
btrfs_resize_thread_pool(fs_info,
fs_info->thread_pool_size, old_thread_pool_size);
if (btrfs_test_opt(fs_info, FREE_SPACE_TREE) !=
btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) &&
if ((bool)btrfs_test_opt(fs_info, FREE_SPACE_TREE) !=
(bool)btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) &&
(!sb_rdonly(sb) || (*flags & SB_RDONLY))) {
btrfs_warn(fs_info,
"remount supports changing free space tree only from ro to rw");

View file

@ -1453,22 +1453,14 @@ static int check_extent_data_ref(struct extent_buffer *leaf,
return -EUCLEAN;
}
for (; ptr < end; ptr += sizeof(*dref)) {
u64 root_objectid;
u64 owner;
u64 offset;
u64 hash;
/*
* We cannot check the extent_data_ref hash due to possible
* overflow from the leaf due to hash collisions.
*/
dref = (struct btrfs_extent_data_ref *)ptr;
root_objectid = btrfs_extent_data_ref_root(leaf, dref);
owner = btrfs_extent_data_ref_objectid(leaf, dref);
offset = btrfs_extent_data_ref_offset(leaf, dref);
hash = hash_extent_data_ref(root_objectid, owner, offset);
if (unlikely(hash != key->offset)) {
extent_err(leaf, slot,
"invalid extent data ref hash, item has 0x%016llx key has 0x%016llx",
hash, key->offset);
return -EUCLEAN;
}
if (unlikely(!IS_ALIGNED(offset, leaf->fs_info->sectorsize))) {
extent_err(leaf, slot,
"invalid extent data backref offset, have %llu expect aligned to %u",

View file

@ -3174,16 +3174,13 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
root_log_ctx.log_transid = log_root_tree->log_transid;
if (btrfs_is_zoned(fs_info)) {
mutex_lock(&fs_info->tree_root->log_mutex);
if (!log_root_tree->node) {
ret = btrfs_alloc_log_tree_node(trans, log_root_tree);
if (ret) {
mutex_unlock(&fs_info->tree_log_mutex);
mutex_unlock(&log_root_tree->log_mutex);
goto out;
}
}
mutex_unlock(&fs_info->tree_root->log_mutex);
}
/*

View file

@ -229,11 +229,33 @@ int btrfs_setxattr_trans(struct inode *inode, const char *name,
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
const bool start_trans = (current->journal_info == NULL);
int ret;
trans = btrfs_start_transaction(root, 2);
if (IS_ERR(trans))
return PTR_ERR(trans);
if (start_trans) {
/*
* 1 unit for inserting/updating/deleting the xattr
* 1 unit for the inode item update
*/
trans = btrfs_start_transaction(root, 2);
if (IS_ERR(trans))
return PTR_ERR(trans);
} else {
/*
* This can happen when smack is enabled and a directory is being
* created. It happens through d_instantiate_new(), which calls
* smack_d_instantiate(), which in turn calls __vfs_setxattr() to
* set the transmute xattr (XATTR_NAME_SMACKTRANSMUTE) on the
* inode. We have already reserved space for the xattr and inode
* update at btrfs_mkdir(), so just use the transaction handle.
* We don't join or start a transaction, as that will reset the
* block_rsv of the handle and trigger a warning for the start
* case.
*/
ASSERT(strncmp(name, XATTR_SECURITY_PREFIX,
XATTR_SECURITY_PREFIX_LEN) == 0);
trans = current->journal_info;
}
ret = btrfs_setxattr(trans, inode, name, value, size, flags);
if (ret)
@ -244,7 +266,8 @@ int btrfs_setxattr_trans(struct inode *inode, const char *name,
ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
BUG_ON(ret);
out:
btrfs_end_transaction(trans);
if (start_trans)
btrfs_end_transaction(trans);
return ret;
}

View file

@ -432,9 +432,8 @@ int zlib_decompress(struct list_head *ws, unsigned char *data_in,
PAGE_SIZE - (buf_offset % PAGE_SIZE));
bytes = min(bytes, bytes_left);
kaddr = kmap_atomic(dest_page);
memcpy(kaddr + pg_offset, workspace->buf + buf_offset, bytes);
kunmap_atomic(kaddr);
memcpy_to_page(dest_page, pg_offset,
workspace->buf + buf_offset, bytes);
pg_offset += bytes;
bytes_left -= bytes;

View file

@ -269,7 +269,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device)
sector_t sector = 0;
struct blk_zone *zones = NULL;
unsigned int i, nreported = 0, nr_zones;
unsigned int zone_sectors;
sector_t zone_sectors;
char *model, *emulated;
int ret;
@ -658,7 +658,7 @@ int btrfs_sb_log_location_bdev(struct block_device *bdev, int mirror, int rw,
u64 *bytenr_ret)
{
struct blk_zone zones[BTRFS_NR_SB_LOG_ZONES];
unsigned int zone_sectors;
sector_t zone_sectors;
u32 sb_zone;
int ret;
u8 zone_sectors_shift;

View file

@ -688,10 +688,8 @@ int zstd_decompress(struct list_head *ws, unsigned char *data_in,
bytes = min_t(unsigned long, destlen - pg_offset,
workspace->out_buf.size - buf_offset);
kaddr = kmap_atomic(dest_page);
memcpy(kaddr + pg_offset, workspace->out_buf.dst + buf_offset,
bytes);
kunmap_atomic(kaddr);
memcpy_to_page(dest_page, pg_offset,
workspace->out_buf.dst + buf_offset, bytes);
pg_offset += bytes;
}

View file

@ -16,6 +16,7 @@
#include <linux/rculist_nulls.h>
#include <linux/cpu.h>
#include <linux/tracehook.h>
#include <linux/freezer.h>
#include "../kernel/sched/sched.h"
#include "io-wq.h"
@ -52,9 +53,6 @@ struct io_worker {
struct io_wq_work *cur_work;
spinlock_t lock;
const struct cred *cur_creds;
const struct cred *saved_creds;
struct completion ref_done;
struct rcu_head rcu;
@ -117,7 +115,10 @@ struct io_wq {
struct io_wq_hash *hash;
refcount_t refs;
struct completion done;
struct completion exited;
atomic_t worker_refs;
struct completion worker_done;
struct hlist_node cpuhp_node;
@ -126,6 +127,17 @@ struct io_wq {
static enum cpuhp_state io_wq_online;
struct io_cb_cancel_data {
work_cancel_fn *fn;
void *data;
int nr_running;
int nr_pending;
bool cancel_all;
};
static void io_wqe_cancel_pending_work(struct io_wqe *wqe,
struct io_cb_cancel_data *match);
static bool io_worker_get(struct io_worker *worker)
{
return refcount_inc_not_zero(&worker->ref);
@ -175,11 +187,6 @@ static void io_worker_exit(struct io_worker *worker)
worker->flags = 0;
preempt_enable();
if (worker->saved_creds) {
revert_creds(worker->saved_creds);
worker->cur_creds = worker->saved_creds = NULL;
}
raw_spin_lock_irq(&wqe->lock);
if (flags & IO_WORKER_F_FREE)
hlist_nulls_del_rcu(&worker->nulls_node);
@ -188,7 +195,9 @@ static void io_worker_exit(struct io_worker *worker)
raw_spin_unlock_irq(&wqe->lock);
kfree_rcu(worker, rcu);
io_wq_put(wqe->wq);
if (atomic_dec_and_test(&wqe->wq->worker_refs))
complete(&wqe->wq->worker_done);
do_exit(0);
}
static inline bool io_wqe_run_queue(struct io_wqe *wqe)
@ -263,12 +272,6 @@ static void io_wqe_dec_running(struct io_worker *worker)
io_wqe_wake_worker(wqe, acct);
}
static void io_worker_start(struct io_worker *worker)
{
worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING);
io_wqe_inc_running(worker);
}
/*
* Worker will start processing some work. Move it to the busy list, if
* it's currently on the freelist
@ -319,10 +322,6 @@ static void __io_worker_idle(struct io_wqe *wqe, struct io_worker *worker)
worker->flags |= IO_WORKER_F_FREE;
hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
}
if (worker->saved_creds) {
revert_creds(worker->saved_creds);
worker->cur_creds = worker->saved_creds = NULL;
}
}
static inline unsigned int io_get_work_hash(struct io_wq_work *work)
@ -397,18 +396,6 @@ static void io_flush_signals(void)
}
}
static void io_wq_switch_creds(struct io_worker *worker,
struct io_wq_work *work)
{
const struct cred *old_creds = override_creds(work->creds);
worker->cur_creds = work->creds;
if (worker->saved_creds)
put_cred(old_creds); /* creds set by previous switch */
else
worker->saved_creds = old_creds;
}
static void io_assign_current_work(struct io_worker *worker,
struct io_wq_work *work)
{
@ -458,8 +445,6 @@ static void io_worker_handle_work(struct io_worker *worker)
unsigned int hash = io_get_work_hash(work);
next_hashed = wq_next_work(work);
if (work->creds && worker->cur_creds != work->creds)
io_wq_switch_creds(worker, work);
wq->do_work(work);
io_assign_current_work(worker, NULL);
@ -495,8 +480,13 @@ static int io_wqe_worker(void *data)
struct io_worker *worker = data;
struct io_wqe *wqe = worker->wqe;
struct io_wq *wq = wqe->wq;
char buf[TASK_COMM_LEN];
io_worker_start(worker);
worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING);
io_wqe_inc_running(worker);
sprintf(buf, "iou-wrk-%d", wq->task_pid);
set_task_comm(current, buf);
while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
set_current_state(TASK_INTERRUPTIBLE);
@ -571,67 +561,11 @@ void io_wq_worker_sleeping(struct task_struct *tsk)
raw_spin_unlock_irq(&worker->wqe->lock);
}
static int task_thread(void *data, int index)
{
struct io_worker *worker = data;
struct io_wqe *wqe = worker->wqe;
struct io_wqe_acct *acct = &wqe->acct[index];
struct io_wq *wq = wqe->wq;
char buf[TASK_COMM_LEN];
sprintf(buf, "iou-wrk-%d", wq->task_pid);
set_task_comm(current, buf);
current->pf_io_worker = worker;
worker->task = current;
set_cpus_allowed_ptr(current, cpumask_of_node(wqe->node));
current->flags |= PF_NO_SETAFFINITY;
raw_spin_lock_irq(&wqe->lock);
hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
list_add_tail_rcu(&worker->all_list, &wqe->all_list);
worker->flags |= IO_WORKER_F_FREE;
if (index == IO_WQ_ACCT_BOUND)
worker->flags |= IO_WORKER_F_BOUND;
if (!acct->nr_workers && (worker->flags & IO_WORKER_F_BOUND))
worker->flags |= IO_WORKER_F_FIXED;
acct->nr_workers++;
raw_spin_unlock_irq(&wqe->lock);
io_wqe_worker(data);
do_exit(0);
}
static int task_thread_bound(void *data)
{
return task_thread(data, IO_WQ_ACCT_BOUND);
}
static int task_thread_unbound(void *data)
{
return task_thread(data, IO_WQ_ACCT_UNBOUND);
}
pid_t io_wq_fork_thread(int (*fn)(void *), void *arg)
{
unsigned long flags = CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|
CLONE_IO|SIGCHLD;
struct kernel_clone_args args = {
.flags = ((lower_32_bits(flags) | CLONE_VM |
CLONE_UNTRACED) & ~CSIGNAL),
.exit_signal = (lower_32_bits(flags) & CSIGNAL),
.stack = (unsigned long)fn,
.stack_size = (unsigned long)arg,
};
return kernel_clone(&args);
}
static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
{
struct io_wqe_acct *acct = &wqe->acct[index];
struct io_worker *worker;
pid_t pid;
struct task_struct *tsk;
__set_current_state(TASK_RUNNING);
@ -645,17 +579,32 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
spin_lock_init(&worker->lock);
init_completion(&worker->ref_done);
refcount_inc(&wq->refs);
atomic_inc(&wq->worker_refs);
if (index == IO_WQ_ACCT_BOUND)
pid = io_wq_fork_thread(task_thread_bound, worker);
else
pid = io_wq_fork_thread(task_thread_unbound, worker);
if (pid < 0) {
io_wq_put(wq);
tsk = create_io_thread(io_wqe_worker, worker, wqe->node);
if (IS_ERR(tsk)) {
if (atomic_dec_and_test(&wq->worker_refs))
complete(&wq->worker_done);
kfree(worker);
return false;
}
tsk->pf_io_worker = worker;
worker->task = tsk;
set_cpus_allowed_ptr(tsk, cpumask_of_node(wqe->node));
tsk->flags |= PF_NOFREEZE | PF_NO_SETAFFINITY;
raw_spin_lock_irq(&wqe->lock);
hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
list_add_tail_rcu(&worker->all_list, &wqe->all_list);
worker->flags |= IO_WORKER_F_FREE;
if (index == IO_WQ_ACCT_BOUND)
worker->flags |= IO_WORKER_F_BOUND;
if (!acct->nr_workers && (worker->flags & IO_WORKER_F_BOUND))
worker->flags |= IO_WORKER_F_FIXED;
acct->nr_workers++;
raw_spin_unlock_irq(&wqe->lock);
wake_up_new_task(tsk);
return true;
}
@ -664,6 +613,8 @@ static inline bool io_wqe_need_worker(struct io_wqe *wqe, int index)
{
struct io_wqe_acct *acct = &wqe->acct[index];
if (acct->nr_workers && test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state))
return false;
/* if we have available workers or no work, no need */
if (!hlist_nulls_empty(&wqe->free_list) || !io_wqe_run_queue(wqe))
return false;
@ -697,6 +648,7 @@ static bool io_wq_for_each_worker(struct io_wqe *wqe,
static bool io_wq_worker_wake(struct io_worker *worker, void *data)
{
set_notify_signal(worker->task);
wake_up_process(worker->task);
return false;
}
@ -725,6 +677,23 @@ static void io_wq_check_workers(struct io_wq *wq)
}
}
static bool io_wq_work_match_all(struct io_wq_work *work, void *data)
{
return true;
}
static void io_wq_cancel_pending(struct io_wq *wq)
{
struct io_cb_cancel_data match = {
.fn = io_wq_work_match_all,
.cancel_all = true,
};
int node;
for_each_node(node)
io_wqe_cancel_pending_work(wq->wqes[node], &match);
}
/*
* Manager thread. Tasked with creating new workers, if we need them.
*/
@ -732,25 +701,38 @@ static int io_wq_manager(void *data)
{
struct io_wq *wq = data;
char buf[TASK_COMM_LEN];
int node;
sprintf(buf, "iou-mgr-%d", wq->task_pid);
set_task_comm(current, buf);
current->flags |= PF_IO_WORKER;
wq->manager = current;
complete(&wq->done);
do {
set_current_state(TASK_INTERRUPTIBLE);
io_wq_check_workers(wq);
schedule_timeout(HZ);
try_to_freeze();
if (fatal_signal_pending(current))
set_bit(IO_WQ_BIT_EXIT, &wq->state);
} while (!test_bit(IO_WQ_BIT_EXIT, &wq->state));
io_wq_check_workers(wq);
wq->manager = NULL;
io_wq_put(wq);
rcu_read_lock();
for_each_node(node)
io_wq_for_each_worker(wq->wqes[node], io_wq_worker_wake, NULL);
rcu_read_unlock();
/* we might not ever have created any workers */
if (atomic_read(&wq->worker_refs))
wait_for_completion(&wq->worker_done);
spin_lock_irq(&wq->hash->wait.lock);
for_each_node(node)
list_del_init(&wq->wqes[node]->wait.entry);
spin_unlock_irq(&wq->hash->wait.lock);
io_wq_cancel_pending(wq);
complete(&wq->exited);
do_exit(0);
}
@ -787,23 +769,20 @@ static void io_wqe_insert_work(struct io_wqe *wqe, struct io_wq_work *work)
static int io_wq_fork_manager(struct io_wq *wq)
{
int ret;
struct task_struct *tsk;
if (wq->manager)
return 0;
clear_bit(IO_WQ_BIT_EXIT, &wq->state);
refcount_inc(&wq->refs);
current->flags |= PF_IO_WORKER;
ret = io_wq_fork_thread(io_wq_manager, wq);
current->flags &= ~PF_IO_WORKER;
if (ret >= 0) {
wait_for_completion(&wq->done);
reinit_completion(&wq->worker_done);
tsk = create_io_thread(io_wq_manager, wq, NUMA_NO_NODE);
if (!IS_ERR(tsk)) {
wq->manager = get_task_struct(tsk);
wake_up_new_task(tsk);
return 0;
}
io_wq_put(wq);
return ret;
return PTR_ERR(tsk);
}
static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
@ -813,7 +792,8 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
unsigned long flags;
/* Can only happen if manager creation fails after exec */
if (unlikely(io_wq_fork_manager(wqe->wq))) {
if (io_wq_fork_manager(wqe->wq) ||
test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state)) {
work->flags |= IO_WQ_WORK_CANCEL;
wqe->wq->do_work(work);
return;
@ -849,14 +829,6 @@ void io_wq_hash_work(struct io_wq_work *work, void *val)
work->flags |= (IO_WQ_WORK_HASHED | (bit << IO_WQ_HASH_SHIFT));
}
struct io_cb_cancel_data {
work_cancel_fn *fn;
void *data;
int nr_running;
int nr_pending;
bool cancel_all;
};
static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
{
struct io_cb_cancel_data *match = data;
@ -1043,16 +1015,18 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
}
wq->task_pid = current->pid;
init_completion(&wq->done);
init_completion(&wq->exited);
refcount_set(&wq->refs, 1);
init_completion(&wq->worker_done);
atomic_set(&wq->worker_refs, 0);
ret = io_wq_fork_manager(wq);
if (!ret)
return wq;
io_wq_put(wq);
io_wq_put_hash(data->hash);
err:
io_wq_put_hash(data->hash);
cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);
for_each_node(node)
kfree(wq->wqes[node]);
@ -1063,6 +1037,16 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
return ERR_PTR(ret);
}
static void io_wq_destroy_manager(struct io_wq *wq)
{
if (wq->manager) {
wake_up_process(wq->manager);
wait_for_completion(&wq->exited);
put_task_struct(wq->manager);
wq->manager = NULL;
}
}
static void io_wq_destroy(struct io_wq *wq)
{
int node;
@ -1070,26 +1054,16 @@ static void io_wq_destroy(struct io_wq *wq)
cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);
set_bit(IO_WQ_BIT_EXIT, &wq->state);
if (wq->manager)
wake_up_process(wq->manager);
io_wq_destroy_manager(wq);
rcu_read_lock();
for_each_node(node)
io_wq_for_each_worker(wq->wqes[node], io_wq_worker_wake, NULL);
rcu_read_unlock();
spin_lock_irq(&wq->hash->wait.lock);
for_each_node(node) {
struct io_wqe *wqe = wq->wqes[node];
list_del_init(&wqe->wait.entry);
WARN_ON_ONCE(!wq_list_empty(&wqe->work_list));
kfree(wqe);
}
spin_unlock_irq(&wq->hash->wait.lock);
io_wq_put_hash(wq->hash);
kfree(wq->wqes);
kfree(wq);
}
void io_wq_put(struct io_wq *wq)
@ -1098,6 +1072,13 @@ void io_wq_put(struct io_wq *wq)
io_wq_destroy(wq);
}
void io_wq_put_and_exit(struct io_wq *wq)
{
set_bit(IO_WQ_BIT_EXIT, &wq->state);
io_wq_destroy_manager(wq);
io_wq_put(wq);
}
static bool io_wq_worker_affinity(struct io_worker *worker, void *data)
{
struct task_struct *task = worker->task;

View file

@ -79,8 +79,8 @@ static inline void wq_list_del(struct io_wq_work_list *list,
struct io_wq_work {
struct io_wq_work_node list;
const struct cred *creds;
unsigned flags;
unsigned short personality;
};
static inline struct io_wq_work *wq_next_work(struct io_wq_work *work)
@ -114,12 +114,11 @@ struct io_wq_data {
struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data);
void io_wq_put(struct io_wq *wq);
void io_wq_put_and_exit(struct io_wq *wq);
void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work);
void io_wq_hash_work(struct io_wq_work *work, void *val);
pid_t io_wq_fork_thread(int (*fn)(void *), void *arg);
static inline bool io_wq_is_hashed(struct io_wq_work *work)
{
return work->flags & IO_WQ_WORK_HASHED;

File diff suppressed because it is too large Load diff

Some files were not shown because too many files have changed in this diff Show more