Skip to content

Commit

Permalink
drm/vc4: Rework UPM allocation to avoid double buffering
Browse files Browse the repository at this point in the history
The previous UPM allocation was done per plane state, when
there is no overlap of usage between frames.

Allocate per plane, and only change the allocation if the required
size changes. We have to reference count as plane states can
be duplicated.

Signed-off-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
  • Loading branch information
6by9 committed Sep 27, 2024
1 parent 0fb3c83 commit 3c45267
Show file tree
Hide file tree
Showing 3 changed files with 147 additions and 33 deletions.
17 changes: 14 additions & 3 deletions drivers/gpu/drm/vc4/vc4_drv.h
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,19 @@ struct vc4_v3d {
struct debugfs_regset32 regset;
};

#define VC4_NUM_UPM_HANDLES 32
struct vc4_upm_refcounts {
refcount_t refcount;

/* Allocation size */
size_t size;
/* Our allocation in UPM for prefetching. */
struct drm_mm_node upm;

/* Pointer back to the HVS structure */
struct vc4_hvs *hvs;
};

#define HVS_NUM_CHANNELS 3

struct vc4_hvs {
Expand Down Expand Up @@ -351,6 +364,7 @@ struct vc4_hvs {
/* Memory manager for the UPM memory used for prefetching. */
struct drm_mm upm_mm;
struct ida upm_handles;
struct vc4_upm_refcounts upm_refcounts[VC4_NUM_UPM_HANDLES + 1];

spinlock_t mm_lock;

Expand Down Expand Up @@ -446,9 +460,6 @@ struct vc4_plane_state {
bool is_unity;
bool is_yuv;

/* Our allocation in UPM for prefetching. */
struct drm_mm_node upm[DRM_FORMAT_MAX_PLANES];

/* The Unified Pre-Fetcher Handle */
unsigned int upm_handle[DRM_FORMAT_MAX_PLANES];

Expand Down
33 changes: 31 additions & 2 deletions drivers/gpu/drm/vc4/vc4_hvs.c
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,27 @@ static int vc4_hvs_debugfs_dlist_allocs(struct seq_file *m, void *data)
return 0;
}

static int vc6_hvs_debugfs_upm_allocs(struct seq_file *m, void *data)
{
struct drm_debugfs_entry *entry = m->private;
struct drm_device *dev = entry->dev;
struct vc4_dev *vc4 = to_vc4_dev(dev);
struct vc4_hvs *hvs = vc4->hvs;
struct drm_printer p = drm_seq_file_printer(m);
struct vc4_upm_refcounts *refcount;
unsigned int i;

drm_printf(&p, "UPM Handles:\n");
for (i = 0; i < VC4_NUM_UPM_HANDLES; i++) {
refcount = &hvs->upm_refcounts[i];
drm_printf(&p, "handle %u: refcount %u, size %zu [%08llx + %08llx]\n",
i, refcount_read(&refcount->refcount), refcount->size,
refcount->upm.start, refcount->upm.size);
}

return 0;
}

/* The filter kernel is composed of dwords each containing 3 9-bit
* signed integers packed next to each other.
*/
Expand Down Expand Up @@ -1731,10 +1752,12 @@ int vc4_hvs_debugfs_init(struct drm_minor *minor)
NULL);
}

if (vc4->gen >= VC4_GEN_6)
if (vc4->gen >= VC4_GEN_6) {
drm_debugfs_add_file(drm, "hvs_dlists", vc6_hvs_debugfs_dlist, NULL);
else
drm_debugfs_add_file(drm, "hvs_upm", vc6_hvs_debugfs_upm_allocs, NULL);
} else {
drm_debugfs_add_file(drm, "hvs_dlists", vc4_hvs_debugfs_dlist, NULL);
}

drm_debugfs_add_file(drm, "hvs_underrun", vc4_hvs_debugfs_underrun, NULL);

Expand All @@ -1754,6 +1777,7 @@ struct vc4_hvs *__vc4_hvs_alloc(struct vc4_dev *vc4,
unsigned int dlist_start;
size_t dlist_size;
size_t lbm_size;
unsigned int i;

hvs = drmm_kzalloc(drm, sizeof(*hvs), GFP_KERNEL);
if (!hvs)
Expand Down Expand Up @@ -1793,6 +1817,11 @@ struct vc4_hvs *__vc4_hvs_alloc(struct vc4_dev *vc4,
else
dlist_size = 4096;

for (i = 0; i < VC4_NUM_UPM_HANDLES; i++) {
refcount_set(&hvs->upm_refcounts[i].refcount, 0);
hvs->upm_refcounts[i].hvs = hvs;
}

break;

default:
Expand Down
130 changes: 102 additions & 28 deletions drivers/gpu/drm/vc4/vc4_plane.c
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,8 @@ static bool plane_enabled(struct drm_plane_state *state)

struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane)
{
struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
struct vc4_hvs *hvs = vc4->hvs;
struct vc4_plane_state *vc4_state;
unsigned int i;

Expand All @@ -288,10 +290,10 @@ struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane)
if (!vc4_state)
return NULL;

memset(&vc4_state->upm, 0, sizeof(vc4_state->upm));

for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++)
vc4_state->upm_handle[i] = 0;
for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++) {
if (vc4_state->upm_handle[i])
refcount_inc(&hvs->upm_refcounts[vc4_state->upm_handle[i]].refcount);
}

vc4_state->dlist_initialized = 0;

Expand All @@ -311,6 +313,21 @@ struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane)
return &vc4_state->base;
}

void vc4_plane_release_upm_ida(struct vc4_hvs *hvs, unsigned int upm_handle)
{
struct vc4_upm_refcounts *refcount = &hvs->upm_refcounts[upm_handle];
unsigned long irqflags;

spin_lock_irqsave(&hvs->mm_lock, irqflags);
drm_mm_remove_node(&refcount->upm);
spin_unlock_irqrestore(&hvs->mm_lock, irqflags);
refcount->upm.start = 0;
refcount->upm.size = 0;
refcount->size = 0;

ida_free(&hvs->upm_handles, upm_handle);
}

void vc4_plane_destroy_state(struct drm_plane *plane,
struct drm_plane_state *state)
{
Expand All @@ -320,17 +337,15 @@ void vc4_plane_destroy_state(struct drm_plane *plane,
unsigned int i;

for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++) {
unsigned long irqflags;
struct vc4_upm_refcounts *refcount;

if (!drm_mm_node_allocated(&vc4_state->upm[i]))
if (!vc4_state->upm_handle[i])
continue;

spin_lock_irqsave(&hvs->mm_lock, irqflags);
drm_mm_remove_node(&vc4_state->upm[i]);
spin_unlock_irqrestore(&hvs->mm_lock, irqflags);
refcount = &hvs->upm_refcounts[vc4_state->upm_handle[i]];

if (vc4_state->upm_handle[i] > 0)
ida_free(&hvs->upm_handles, vc4_state->upm_handle[i]);
if (refcount_dec_and_test(&refcount->refcount))
vc4_plane_release_upm_ida(hvs, vc4_state->upm_handle[i]);
}

kfree(vc4_state->dlist);
Expand Down Expand Up @@ -945,32 +960,60 @@ static int vc6_plane_allocate_upm(struct drm_plane_state *state)
vc4_state->upm_buffer_lines = SCALER6_PTR0_UPM_BUFF_SIZE_2_LINES;

for (i = 0; i < info->num_planes; i++) {
struct vc4_upm_refcounts *refcount;
int upm_handle;
unsigned long irqflags;
size_t upm_size;

upm_size = vc6_upm_size(state, i);
if (!upm_size)
return -EINVAL;
upm_handle = vc4_state->upm_handle[i];

spin_lock_irqsave(&hvs->mm_lock, irqflags);
ret = drm_mm_insert_node_generic(&hvs->upm_mm,
&vc4_state->upm[i],
upm_size, HVS_UBM_WORD_SIZE,
0, 0);
spin_unlock_irqrestore(&hvs->mm_lock, irqflags);
if (ret) {
drm_err(drm, "Failed to allocate UPM entry: %d\n", ret);
return ret;
}

ret = ida_alloc_range(&hvs->upm_handles, 1, 32, GFP_KERNEL);
if (ret < 0)
return ret;
if (upm_handle &&
hvs->upm_refcounts[upm_handle].size == upm_size) {
/* Allocation is the same size as the previous user of
* the plane. Keep the allocation.
*/
vc4_state->upm_handle[i] = upm_handle;
} else {
if (upm_handle &&
refcount_dec_and_test(&hvs->upm_refcounts[upm_handle].refcount)) {
vc4_plane_release_upm_ida(hvs, upm_handle);
vc4_state->upm_handle[i] = 0;
}

vc4_state->upm_handle[i] = ret;
upm_handle = ida_alloc_range(&hvs->upm_handles, 1,
VC4_NUM_UPM_HANDLES,
GFP_KERNEL);
if (upm_handle < 0) {
drm_err(drm, "Out of upm_handles\n");
return upm_handle;
}
vc4_state->upm_handle[i] = upm_handle;

refcount = &hvs->upm_refcounts[upm_handle];
refcount_set(&refcount->refcount, 1);
refcount->size = upm_size;

spin_lock_irqsave(&hvs->mm_lock, irqflags);
ret = drm_mm_insert_node_generic(&hvs->upm_mm,
&refcount->upm,
upm_size, HVS_UBM_WORD_SIZE,
0, 0);
spin_unlock_irqrestore(&hvs->mm_lock, irqflags);
if (ret) {
drm_err(drm, "Failed to allocate UPM entry: %d\n", ret);
refcount_dec(&refcount->refcount);
ida_free(&hvs->upm_handles, upm_handle);
vc4_state->upm_handle[i] = 0;
return ret;
}
}

refcount = &hvs->upm_refcounts[upm_handle];
vc4_state->dlist[vc4_state->ptr0_offset[i]] |=
VC4_SET_FIELD(vc4_state->upm[i].start / HVS_UBM_WORD_SIZE,
VC4_SET_FIELD(refcount->upm.start / HVS_UBM_WORD_SIZE,
SCALER6_PTR0_UPM_BASE) |
VC4_SET_FIELD(vc4_state->upm_handle[i] - 1,
SCALER6_PTR0_UPM_HANDLE) |
Expand All @@ -981,6 +1024,29 @@ static int vc6_plane_allocate_upm(struct drm_plane_state *state)
return 0;
}

static void vc6_plane_free_upm(struct drm_plane_state *state)
{
struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
struct drm_device *drm = state->plane->dev;
struct vc4_dev *vc4 = to_vc4_dev(drm);
struct vc4_hvs *hvs = vc4->hvs;
unsigned int i;

WARN_ON_ONCE(vc4->gen < VC4_GEN_6);

for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++) {
unsigned int upm_handle;

upm_handle = vc4_state->upm_handle[i];
if (!upm_handle)
continue;

if (refcount_dec_and_test(&hvs->upm_refcounts[upm_handle].refcount))
vc4_plane_release_upm_ida(hvs, upm_handle);
vc4_state->upm_handle[i] = 0;
}
}

/*
* The colorspace conversion matrices are held in 3 entries in the dlist.
* Create an array of them, with entries for each full and limited mode, and
Expand Down Expand Up @@ -2051,8 +2117,16 @@ int vc4_plane_atomic_check(struct drm_plane *plane,

vc4_state->dlist_count = 0;

if (!plane_enabled(new_plane_state))
if (!plane_enabled(new_plane_state)) {
struct drm_plane_state *old_plane_state =
drm_atomic_get_old_plane_state(state, plane);

if (vc4->gen >= VC4_GEN_6 && old_plane_state &&
plane_enabled(old_plane_state)) {
vc6_plane_free_upm(new_plane_state);
}
return 0;
}

if (vc4->gen >= VC4_GEN_6)
ret = vc6_plane_mode_set(plane, new_plane_state);
Expand Down

0 comments on commit 3c45267

Please sign in to comment.