diff --git a/drivers/gpu/drm/msm/msm_fence.c b/drivers/gpu/drm/msm/msm_fence.c index cd59a5918038..b92a9091a1e2 100644 --- a/drivers/gpu/drm/msm/msm_fence.c +++ b/drivers/gpu/drm/msm/msm_fence.c @@ -11,7 +11,8 @@ struct msm_fence_context * -msm_fence_context_alloc(struct drm_device *dev, const char *name) +msm_fence_context_alloc(struct drm_device *dev, volatile uint32_t *fenceptr, + const char *name) { struct msm_fence_context *fctx; @@ -22,6 +23,7 @@ msm_fence_context_alloc(struct drm_device *dev, const char *name) fctx->dev = dev; strncpy(fctx->name, name, sizeof(fctx->name)); fctx->context = dma_fence_context_alloc(1); + fctx->fenceptr = fenceptr; init_waitqueue_head(&fctx->event); spin_lock_init(&fctx->spinlock); @@ -35,7 +37,12 @@ void msm_fence_context_free(struct msm_fence_context *fctx) static inline bool fence_completed(struct msm_fence_context *fctx, uint32_t fence) { - return (int32_t)(fctx->completed_fence - fence) >= 0; + /* + * Note: Check completed_fence first, as fenceptr is in a write-combine + * mapping, so it will be more expensive to read. + */ + return (int32_t)(fctx->completed_fence - fence) >= 0 || + (int32_t)(*fctx->fenceptr - fence) >= 0; } /* legacy path for WAIT_FENCE ioctl: */ diff --git a/drivers/gpu/drm/msm/msm_fence.h b/drivers/gpu/drm/msm/msm_fence.h index 2d9af66dcca5..6ab97062ff1a 100644 --- a/drivers/gpu/drm/msm/msm_fence.h +++ b/drivers/gpu/drm/msm/msm_fence.h @@ -9,19 +9,52 @@ #include "msm_drv.h" +/** + * struct msm_fence_context - fence context for gpu + * + * Each ringbuffer has a single fence context, with the GPU writing an + * incrementing fence seqno at the end of each submit + */ struct msm_fence_context { struct drm_device *dev; + /** name: human readable name for fence timeline */ char name[32]; + /** context: see dma_fence_context_alloc() */ unsigned context; - /* last_fence == completed_fence --> no pending work */ - uint32_t last_fence; /* last assigned fence */ - uint32_t completed_fence; /* last completed fence */ + + /** + * last_fence: + * + * Last assigned fence, incremented each time a fence is created + * on this fence context. If last_fence == completed_fence, + * there is no remaining pending work + */ + uint32_t last_fence; + + /** + * completed_fence: + * + * The last completed fence, updated from the CPU after interrupt + * from GPU + */ + uint32_t completed_fence; + + /** + * fenceptr: + * + * The address that the GPU directly writes with completed fence + * seqno. This can be ahead of completed_fence. We can peek at + * this to see if a fence has already signaled but the CPU hasn't + * gotten around to handling the irq and updating completed_fence + */ + volatile uint32_t *fenceptr; + wait_queue_head_t event; spinlock_t spinlock; }; struct msm_fence_context * msm_fence_context_alloc(struct drm_device *dev, - const char *name); + volatile uint32_t *fenceptr, const char *name); void msm_fence_context_free(struct msm_fence_context *fctx); int msm_wait_fence(struct msm_fence_context *fctx, uint32_t fence, diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c index 4d2a2a4abef8..7e92d9532454 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.c +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c @@ -51,7 +51,7 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id, snprintf(name, sizeof(name), "gpu-ring-%d", ring->id); - ring->fctx = msm_fence_context_alloc(gpu->dev, name); + ring->fctx = msm_fence_context_alloc(gpu->dev, &ring->memptrs->fence, name); return ring;