linux-stable/drivers/gpu/drm/i915/i915_perf_types.h
Lionel Landwerlin 9cd20ef780 drm/i915/perf: allow holding preemption on filtered ctx
We would like to make use of perf in Vulkan. The Vulkan API is much
lower level than OpenGL, with applications directly exposed to the
concept of command buffers (pretty much equivalent to our batch
buffers). In Vulkan, queries are always limited in scope to a command
buffer. In OpenGL, the lack of command buffer concept meant that
queries' duration could span multiple command buffers.

With that restriction gone in Vulkan, we would like to simplify
measuring performance just by measuring the deltas between the counter
snapshots written by 2 MI_RECORD_PERF_COUNT commands, rather than the
more complex scheme we currently have in the GL driver, using 2
MI_RECORD_PERF_COUNT commands and doing some post processing on the
stream of OA reports, coming from the global OA buffer, to remove any
unrelated deltas in between the 2 MI_RECORD_PERF_COUNT.

Disabling preemption only apply to a single context with which want to
query performance counters for and is considered a privileged
operation, by default protected by CAP_SYS_ADMIN. It is possible to
enable it for a normal user by disabling the paranoid stream setting.

v2: Store preemption setting in intel_context (Chris)

v3: Use priorities to avoid preemption rather than the HW mechanism

v4: Just modify the port priority reporting function

v5: Add nopreempt flag on gem context and always flag requests
    appropriately, regarless of OA reconfiguration.

Link: https://gitlab.freedesktop.org/mesa/mesa/merge_requests/932
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20191014201404.22468-4-chris@chris-wilson.co.uk
2019-10-14 21:30:28 +01:00

406 lines
10 KiB
C

/* SPDX-License-Identifier: MIT */
/*
* Copyright © 2019 Intel Corporation
*/
#ifndef _I915_PERF_TYPES_H_
#define _I915_PERF_TYPES_H_
#include <linux/atomic.h>
#include <linux/device.h>
#include <linux/hrtimer.h>
#include <linux/llist.h>
#include <linux/poll.h>
#include <linux/sysfs.h>
#include <linux/types.h>
#include <linux/uuid.h>
#include <linux/wait.h>
#include "i915_reg.h"
#include "intel_wakeref.h"
struct drm_i915_private;
struct file;
struct i915_gem_context;
struct i915_perf;
struct i915_vma;
struct intel_context;
struct intel_engine_cs;
struct i915_oa_format {
u32 format;
int size;
};
struct i915_oa_reg {
i915_reg_t addr;
u32 value;
};
struct i915_oa_config {
struct i915_perf *perf;
char uuid[UUID_STRING_LEN + 1];
int id;
const struct i915_oa_reg *mux_regs;
u32 mux_regs_len;
const struct i915_oa_reg *b_counter_regs;
u32 b_counter_regs_len;
const struct i915_oa_reg *flex_regs;
u32 flex_regs_len;
struct attribute_group sysfs_metric;
struct attribute *attrs[2];
struct device_attribute sysfs_metric_id;
struct kref ref;
struct rcu_head rcu;
};
struct i915_perf_stream;
/**
* struct i915_perf_stream_ops - the OPs to support a specific stream type
*/
struct i915_perf_stream_ops {
/**
* @enable: Enables the collection of HW samples, either in response to
* `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened
* without `I915_PERF_FLAG_DISABLED`.
*/
void (*enable)(struct i915_perf_stream *stream);
/**
* @disable: Disables the collection of HW samples, either in response
* to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying
* the stream.
*/
void (*disable)(struct i915_perf_stream *stream);
/**
* @poll_wait: Call poll_wait, passing a wait queue that will be woken
* once there is something ready to read() for the stream
*/
void (*poll_wait)(struct i915_perf_stream *stream,
struct file *file,
poll_table *wait);
/**
* @wait_unlocked: For handling a blocking read, wait until there is
* something to ready to read() for the stream. E.g. wait on the same
* wait queue that would be passed to poll_wait().
*/
int (*wait_unlocked)(struct i915_perf_stream *stream);
/**
* @read: Copy buffered metrics as records to userspace
* **buf**: the userspace, destination buffer
* **count**: the number of bytes to copy, requested by userspace
* **offset**: zero at the start of the read, updated as the read
* proceeds, it represents how many bytes have been copied so far and
* the buffer offset for copying the next record.
*
* Copy as many buffered i915 perf samples and records for this stream
* to userspace as will fit in the given buffer.
*
* Only write complete records; returning -%ENOSPC if there isn't room
* for a complete record.
*
* Return any error condition that results in a short read such as
* -%ENOSPC or -%EFAULT, even though these may be squashed before
* returning to userspace.
*/
int (*read)(struct i915_perf_stream *stream,
char __user *buf,
size_t count,
size_t *offset);
/**
* @destroy: Cleanup any stream specific resources.
*
* The stream will always be disabled before this is called.
*/
void (*destroy)(struct i915_perf_stream *stream);
};
/**
* struct i915_perf_stream - state for a single open stream FD
*/
struct i915_perf_stream {
/**
* @perf: i915_perf backpointer
*/
struct i915_perf *perf;
/**
* @uncore: mmio access path
*/
struct intel_uncore *uncore;
/**
* @engine: Engine associated with this performance stream.
*/
struct intel_engine_cs *engine;
/**
* @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*`
* properties given when opening a stream, representing the contents
* of a single sample as read() by userspace.
*/
u32 sample_flags;
/**
* @sample_size: Considering the configured contents of a sample
* combined with the required header size, this is the total size
* of a single sample record.
*/
int sample_size;
/**
* @ctx: %NULL if measuring system-wide across all contexts or a
* specific context that is being monitored.
*/
struct i915_gem_context *ctx;
/**
* @enabled: Whether the stream is currently enabled, considering
* whether the stream was opened in a disabled state and based
* on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls.
*/
bool enabled;
/**
* @hold_preemption: Whether preemption is put on hold for command
* submissions done on the @ctx. This is useful for some drivers that
* cannot easily post process the OA buffer context to subtract delta
* of performance counters not associated with @ctx.
*/
bool hold_preemption;
/**
* @ops: The callbacks providing the implementation of this specific
* type of configured stream.
*/
const struct i915_perf_stream_ops *ops;
/**
* @oa_config: The OA configuration used by the stream.
*/
struct i915_oa_config *oa_config;
/**
* @oa_config_bos: A list of struct i915_oa_config_bo allocated lazily
* each time @oa_config changes.
*/
struct llist_head oa_config_bos;
/**
* @pinned_ctx: The OA context specific information.
*/
struct intel_context *pinned_ctx;
u32 specific_ctx_id;
u32 specific_ctx_id_mask;
struct hrtimer poll_check_timer;
wait_queue_head_t poll_wq;
bool pollin;
bool periodic;
int period_exponent;
/**
* @oa_buffer: State of the OA buffer.
*/
struct {
struct i915_vma *vma;
u8 *vaddr;
u32 last_ctx_id;
int format;
int format_size;
int size_exponent;
/**
* @ptr_lock: Locks reads and writes to all head/tail state
*
* Consider: the head and tail pointer state needs to be read
* consistently from a hrtimer callback (atomic context) and
* read() fop (user context) with tail pointer updates happening
* in atomic context and head updates in user context and the
* (unlikely) possibility of read() errors needing to reset all
* head/tail state.
*
* Note: Contention/performance aren't currently a significant
* concern here considering the relatively low frequency of
* hrtimer callbacks (5ms period) and that reads typically only
* happen in response to a hrtimer event and likely complete
* before the next callback.
*
* Note: This lock is not held *while* reading and copying data
* to userspace so the value of head observed in htrimer
* callbacks won't represent any partial consumption of data.
*/
spinlock_t ptr_lock;
/**
* @tails: One 'aging' tail pointer and one 'aged' tail pointer ready to
* used for reading.
*
* Initial values of 0xffffffff are invalid and imply that an
* update is required (and should be ignored by an attempted
* read)
*/
struct {
u32 offset;
} tails[2];
/**
* @aged_tail_idx: Index for the aged tail ready to read() data up to.
*/
unsigned int aged_tail_idx;
/**
* @aging_timestamp: A monotonic timestamp for when the current aging tail pointer
* was read; used to determine when it is old enough to trust.
*/
u64 aging_timestamp;
/**
* @head: Although we can always read back the head pointer register,
* we prefer to avoid trusting the HW state, just to avoid any
* risk that some hardware condition could * somehow bump the
* head pointer unpredictably and cause us to forward the wrong
* OA buffer data to userspace.
*/
u32 head;
} oa_buffer;
/**
* A batch buffer doing a wait on the GPU for the NOA logic to be
* reprogrammed.
*/
struct i915_vma *noa_wait;
};
/**
* struct i915_oa_ops - Gen specific implementation of an OA unit stream
*/
struct i915_oa_ops {
/**
* @is_valid_b_counter_reg: Validates register's address for
* programming boolean counters for a particular platform.
*/
bool (*is_valid_b_counter_reg)(struct i915_perf *perf, u32 addr);
/**
* @is_valid_mux_reg: Validates register's address for programming mux
* for a particular platform.
*/
bool (*is_valid_mux_reg)(struct i915_perf *perf, u32 addr);
/**
* @is_valid_flex_reg: Validates register's address for programming
* flex EU filtering for a particular platform.
*/
bool (*is_valid_flex_reg)(struct i915_perf *perf, u32 addr);
/**
* @enable_metric_set: Selects and applies any MUX configuration to set
* up the Boolean and Custom (B/C) counters that are part of the
* counter reports being sampled. May apply system constraints such as
* disabling EU clock gating as required.
*/
int (*enable_metric_set)(struct i915_perf_stream *stream);
/**
* @disable_metric_set: Remove system constraints associated with using
* the OA unit.
*/
void (*disable_metric_set)(struct i915_perf_stream *stream);
/**
* @oa_enable: Enable periodic sampling
*/
void (*oa_enable)(struct i915_perf_stream *stream);
/**
* @oa_disable: Disable periodic sampling
*/
void (*oa_disable)(struct i915_perf_stream *stream);
/**
* @read: Copy data from the circular OA buffer into a given userspace
* buffer.
*/
int (*read)(struct i915_perf_stream *stream,
char __user *buf,
size_t count,
size_t *offset);
/**
* @oa_hw_tail_read: read the OA tail pointer register
*
* In particular this enables us to share all the fiddly code for
* handling the OA unit tail pointer race that affects multiple
* generations.
*/
u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream);
};
struct i915_perf {
struct drm_i915_private *i915;
struct kobject *metrics_kobj;
struct ctl_table_header *sysctl_header;
/*
* Lock associated with adding/modifying/removing OA configs
* in perf->metrics_idr.
*/
struct mutex metrics_lock;
/*
* List of dynamic configurations (struct i915_oa_config), you
* need to hold perf->metrics_lock to access it.
*/
struct idr metrics_idr;
/*
* Lock associated with anything below within this structure
* except exclusive_stream.
*/
struct mutex lock;
/*
* The stream currently using the OA unit. If accessed
* outside a syscall associated to its file
* descriptor.
*/
struct i915_perf_stream *exclusive_stream;
/**
* For rate limiting any notifications of spurious
* invalid OA reports
*/
struct ratelimit_state spurious_report_rs;
struct i915_oa_config test_config;
u32 gen7_latched_oastatus1;
u32 ctx_oactxctrl_offset;
u32 ctx_flexeu0_offset;
/**
* The RPT_ID/reason field for Gen8+ includes a bit
* to determine if the CTX ID in the report is valid
* but the specific bit differs between Gen 8 and 9
*/
u32 gen8_valid_ctx_bit;
struct i915_oa_ops ops;
const struct i915_oa_format *oa_formats;
atomic64_t noa_programming_delay;
};
#endif /* _I915_PERF_TYPES_H_ */