Skip to content

Commit

Permalink
Add a DDT prune operation
Browse files Browse the repository at this point in the history
# This is the commit message #2:

fixed block leak found by ztest

also some code cleanups

# This is the commit message #3:

Address recent review feedback

 - Don’t' treat empty DDT as an error
 - Switch to power-of-two for histogram (was 7 day bins)
 - Use seconds internally for age
 - Remove cmn_err debugging messages
 - Cleanup code comments
 - Fix cstyle and compiler warnings

# This is the commit message openzfs#4:

increase histogram bin count to cover 4 years

Signed-off-by: Don Brady <don.brady@klarasystems.com>
  • Loading branch information
don-brady authored and robn committed May 15, 2024
1 parent 25db08d commit a4a2d48
Show file tree
Hide file tree
Showing 12 changed files with 548 additions and 10 deletions.
90 changes: 90 additions & 0 deletions cmd/zpool/zpool_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,8 @@ static int zpool_do_version(int, char **);

static int zpool_do_wait(int, char **);

static int zpool_do_ddt_prune(int, char **);

static int zpool_do_help(int argc, char **argv);

static zpool_compat_status_t zpool_do_load_compat(
Expand Down Expand Up @@ -167,6 +169,7 @@ typedef enum {
HELP_CLEAR,
HELP_CREATE,
HELP_CHECKPOINT,
HELP_DDT_PRUNE,
HELP_DESTROY,
HELP_DETACH,
HELP_EXPORT,
Expand Down Expand Up @@ -343,6 +346,8 @@ static zpool_command_t command_table[] = {
{ "sync", zpool_do_sync, HELP_SYNC },
{ NULL },
{ "wait", zpool_do_wait, HELP_WAIT },
{ NULL },
{ "ddtprune", zpool_do_ddt_prune, HELP_DDT_PRUNE },
};

#define NCOMMAND (ARRAY_SIZE(command_table))
Expand Down Expand Up @@ -456,6 +461,8 @@ get_usage(zpool_help_t idx)
case HELP_WAIT:
return (gettext("\twait [-Hp] [-T d|u] [-t <activity>[,...]] "
"<pool> [interval]\n"));
case HELP_DDT_PRUNE:
return (gettext("\tddtprune -d|-p <amount> <pool>\n"));
default:
__builtin_unreachable();
}
Expand Down Expand Up @@ -11660,6 +11667,89 @@ found:;
return (error);
}

/*
* zpool ddtprune -d|-p <amount> <pool>
*
* -d <days> Prune entries <days> old and older
* -p <percent> Prune <percent> amount of entries
*
* Prune entries from DDT that have only a single reference to
* satisfy the amount specified.
*/
int
zpool_do_ddt_prune(int argc, char **argv)
{
zpool_ddt_prune_unit_t unit = ZPOOL_DDT_PRUNE_NONE;
uint_t amount = 0;
zpool_handle_t *zhp;
char *endptr;
int c;

while ((c = getopt(argc, argv, "d:p:")) != -1) {
switch (c) {
case 'd':
if (unit == ZPOOL_DDT_PRUNE_PERCENTAGE) {
(void) fprintf(stderr, gettext("-d cannot be "
"combined with -p option\n"));
usage(B_FALSE);
}
errno = 0;
amount = strtoull(optarg, &endptr, 0);
if (errno != 0 || *endptr != '\0' || amount == 0) {
(void) fprintf(stderr,
gettext("invalid days value\n"));
usage(B_FALSE);
}
amount *= 86400; /* convert days to seconds */
unit = ZPOOL_DDT_PRUNE_AGE;
break;
case 'p':
if (unit == ZPOOL_DDT_PRUNE_AGE) {
(void) fprintf(stderr, gettext("-p cannot be "
"combined with -d option\n"));
usage(B_FALSE);
}
errno = 0;
amount = strtoull(optarg, &endptr, 0);
if (errno != 0 || *endptr != '\0' ||
amount == 0 || amount > 100) {
(void) fprintf(stderr,
gettext("invalid percentage value\n"));
usage(B_FALSE);
}
unit = ZPOOL_DDT_PRUNE_PERCENTAGE;
break;
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
usage(B_FALSE);
}
}
argc -= optind;
argv += optind;

if (unit == ZPOOL_DDT_PRUNE_NONE) {
(void) fprintf(stderr,
gettext("missing amount option (-d|-p <value>)\n"));
usage(B_FALSE);
} else if (argc < 1) {
(void) fprintf(stderr, gettext("missing pool argument\n"));
usage(B_FALSE);
} else if (argc > 1) {
(void) fprintf(stderr, gettext("too many arguments\n"));
usage(B_FALSE);
}
zhp = zpool_open(g_zfs, argv[0]);
if (zhp == NULL)
return (-1);

int error = zpool_ddt_prune(zhp, unit, amount);

zpool_close(zhp);

return (error);
}

static int
find_command_idx(const char *command, int *idx)
{
Expand Down
13 changes: 13 additions & 0 deletions cmd/ztest.c
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,7 @@ ztest_func_t ztest_fletcher;
ztest_func_t ztest_fletcher_incr;
ztest_func_t ztest_verify_dnode_bt;
ztest_func_t ztest_pool_prefetch_ddt;
ztest_func_t ztest_ddt_prune;

static uint64_t zopt_always = 0ULL * NANOSEC; /* all the time */
static uint64_t zopt_incessant = 1ULL * NANOSEC / 10; /* every 1/10 second */
Expand Down Expand Up @@ -502,6 +503,7 @@ static ztest_info_t ztest_info[] = {
ZTI_INIT(ztest_fletcher_incr, 1, &zopt_rarely),
ZTI_INIT(ztest_verify_dnode_bt, 1, &zopt_sometimes),
ZTI_INIT(ztest_pool_prefetch_ddt, 1, &zopt_rarely),
ZTI_INIT(ztest_ddt_prune, 1, &zopt_rarely),
};

#define ZTEST_FUNCS (sizeof (ztest_info) / sizeof (ztest_info_t))
Expand Down Expand Up @@ -7288,6 +7290,17 @@ ztest_trim(ztest_ds_t *zd, uint64_t id)
mutex_exit(&ztest_vdev_lock);
}

void
ztest_ddt_prune(ztest_ds_t *zd, uint64_t id)
{
(void) zd, (void) id;

spa_t *spa = ztest_spa;
int32_t pct = ztest_random(15) + 1;

(void) ddt_prune_unique_entries(spa, ZPOOL_DDT_PRUNE_PERCENTAGE, pct);
}

/*
* Verify pool integrity by running zdb.
*/
Expand Down
3 changes: 3 additions & 0 deletions include/libzfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,9 @@ _LIBZFS_H int zpool_reopen_one(zpool_handle_t *, void *);

_LIBZFS_H int zpool_sync_one(zpool_handle_t *, void *);

_LIBZFS_H int zpool_ddt_prune(zpool_handle_t *, zpool_ddt_prune_unit_t,
uint64_t);

_LIBZFS_H int zpool_vdev_online(zpool_handle_t *, const char *, int,
vdev_state_t *);
_LIBZFS_H int zpool_vdev_offline(zpool_handle_t *, const char *, boolean_t);
Expand Down
3 changes: 3 additions & 0 deletions include/libzfs_core.h
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,9 @@ _LIBZFS_CORE_H int lzc_set_vdev_prop(const char *, nvlist_t *, nvlist_t **);

_LIBZFS_CORE_H int lzc_scrub(zfs_ioc_t, const char *, nvlist_t *, nvlist_t **);

_LIBZFS_CORE_H int lzc_ddt_prune(const char *, zpool_ddt_prune_unit_t,
uint64_t);

#ifdef __cplusplus
}
#endif
Expand Down
5 changes: 5 additions & 0 deletions include/sys/ddt.h
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ typedef struct {
dva_t ddp_dva[SPA_DVAS_PER_BP];
uint64_t ddp_refcnt;
uint64_t ddp_phys_birth;
uint64_t ddp_class_birth;
} ddt_phys_t;

typedef struct {
Expand Down Expand Up @@ -172,6 +173,7 @@ typedef struct {
#define DDE_FLAG_LOADED (1 << 0) /* entry ready for use */
#define DDE_FLAG_OVERQUOTA (1 << 1) /* entry unusable, no space */
#define DDE_FLAG_LOGGED (1 << 2) /* loaded from log */
#define DDE_FLAG_PRUNE_WANTED (1 << 3) /* prune has been requested */

/*
* Additional data to support entry update or repair. This is fixed size
Expand Down Expand Up @@ -358,6 +360,9 @@ extern int ddt_walk(spa_t *spa, ddt_bookmark_t *ddb,

extern boolean_t ddt_addref(spa_t *spa, const blkptr_t *bp);

extern int ddt_prune_unique_entries(spa_t *spa, zpool_ddt_prune_unit_t unit,
uint64_t amount);

#ifdef __cplusplus
}
#endif
Expand Down
15 changes: 14 additions & 1 deletion include/sys/fs/zfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -1419,7 +1419,7 @@ typedef enum {
*/
typedef enum zfs_ioc {
/*
* Core features - 88/128 numbers reserved.
* Core features - 89/128 numbers reserved.
*/
#ifdef __FreeBSD__
ZFS_IOC_FIRST = 0,
Expand Down Expand Up @@ -1516,6 +1516,7 @@ typedef enum zfs_ioc {
ZFS_IOC_VDEV_SET_PROPS, /* 0x5a56 */
ZFS_IOC_POOL_SCRUB, /* 0x5a57 */
ZFS_IOC_POOL_PREFETCH, /* 0x5a58 */
ZFS_IOC_DDT_PRUNE, /* 0x5a59 */

/*
* Per-platform (Optional) - 8/128 numbers reserved.
Expand Down Expand Up @@ -1652,6 +1653,12 @@ typedef enum {
ZPOOL_PREFETCH_DDT
} zpool_prefetch_type_t;

typedef enum {
ZPOOL_DDT_PRUNE_NONE,
ZPOOL_DDT_PRUNE_AGE, /* in seconds */
ZPOOL_DDT_PRUNE_PERCENTAGE, /* 1 - 100 */
} zpool_ddt_prune_unit_t;

/*
* Bookmark name values.
*/
Expand Down Expand Up @@ -1745,6 +1752,12 @@ typedef enum {
*/
#define ZPOOL_PREFETCH_TYPE "prefetch_type"

/*
* The following are names used when invoking ZFS_IOC_DDT_PRUNE.
*/
#define DDT_PRUNE_UNIT "ddt_prune_unit"
#define DDT_PRUNE_AMOUNT "ddt_prune_amount"

/*
* Flags for ZFS_IOC_VDEV_SET_STATE
*/
Expand Down
1 change: 1 addition & 0 deletions include/sys/spa_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,7 @@ struct spa {
uint64_t spa_dedup_dspace; /* Cache get_dedup_dspace() */
uint64_t spa_dedup_checksum; /* default dedup checksum */
uint64_t spa_dspace; /* dspace in normal class */
boolean_t spa_active_ddt_prune; /* ddt prune process active */
struct brt *spa_brt; /* in-core BRT */
kmutex_t spa_vdev_top_lock; /* dueling offline/remove */
kmutex_t spa_proc_lock; /* protects spa_proc* */
Expand Down
28 changes: 28 additions & 0 deletions lib/libzfs/libzfs_pool.c
Original file line number Diff line number Diff line change
Expand Up @@ -5549,3 +5549,31 @@ zpool_set_vdev_prop(zpool_handle_t *zhp, const char *vdevname,

return (ret);
}

/*
*
*/
int
zpool_ddt_prune(zpool_handle_t *zhp, zpool_ddt_prune_unit_t unit,
uint64_t amount)
{
int error = lzc_ddt_prune(zhp->zpool_name, unit, amount);
if (error != 0 && error != ENODATA) {
libzfs_handle_t *hdl = zhp->zpool_hdl;
char errbuf[ERRBUFLEN];

(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
"cannot prune dedup table on '%s'"), zhp->zpool_name);

if (error == EALREADY) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"a prune operation is already in progress"));
(void) zfs_error(hdl, EZFS_BUSY, errbuf);
} else {
(void) zpool_standard_error(hdl, errno, errbuf);
}
return (-1);
}

return (0);
}
22 changes: 22 additions & 0 deletions lib/libzfs_core/libzfs_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -1921,3 +1921,25 @@ lzc_get_bootenv(const char *pool, nvlist_t **outnvl)
{
return (lzc_ioctl(ZFS_IOC_GET_BOOTENV, pool, NULL, outnvl));
}

/*
* Prune the specified amount from the pool's dedup table.
*/
int
lzc_ddt_prune(const char *pool, zpool_ddt_prune_unit_t unit, uint64_t amount)
{
int error;

nvlist_t *result = NULL;
nvlist_t *args = fnvlist_alloc();

fnvlist_add_int32(args, DDT_PRUNE_UNIT, unit);
fnvlist_add_uint64(args, DDT_PRUNE_AMOUNT, amount);

error = lzc_ioctl(ZFS_IOC_DDT_PRUNE, pool, args, &result);

fnvlist_free(args);
fnvlist_free(result);

return (error);
}
Loading

0 comments on commit a4a2d48

Please sign in to comment.