Skip to content

[WIP] Raidz Expansion: multiple devices expanding #17588

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions cmd/raidz_test/raidz_bench.c
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,8 @@ run_gen_bench_impl(const char *impl)
if (rto_opts.rto_expand) {
rm_bench = vdev_raidz_map_alloc_expanded(
&zio_bench,
rto_opts.rto_ashift, ncols+1, ncols,
rto_opts.rto_ashift, 1,
ncols + rto_opts.rto_expand, ncols,
fn+1, rto_opts.rto_expand_offset,
0, B_FALSE);
} else {
Expand Down Expand Up @@ -174,7 +175,8 @@ run_rec_bench_impl(const char *impl)
if (rto_opts.rto_expand) {
rm_bench = vdev_raidz_map_alloc_expanded(
&zio_bench,
BENCH_ASHIFT, ncols+1, ncols,
BENCH_ASHIFT, 1,
ncols + rto_opts.rto_expand, ncols,
PARITY_PQR,
rto_opts.rto_expand_offset, 0, B_FALSE);
} else {
Expand Down
21 changes: 11 additions & 10 deletions cmd/raidz_test/raidz_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ static void usage(boolean_t requested)
"\t[-S parameter sweep (default: %s)]\n"
"\t[-t timeout for parameter sweep test]\n"
"\t[-B benchmark all raidz implementations]\n"
"\t[-e use expanded raidz map (default: %s)]\n"
"\t[-e vdevs attached to expanded raidz (default: %llx)]\n"
"\t[-r expanded raidz map reflow offset (default: %llx)]\n"
"\t[-v increase verbosity (default: %d)]\n"
"\t[-h (print help)]\n"
Expand All @@ -131,7 +131,7 @@ static void usage(boolean_t requested)
o->rto_dcols, /* -d */
ilog2(o->rto_dsize), /* -s */
rto_opts.rto_sweep ? "yes" : "no", /* -S */
rto_opts.rto_expand ? "yes" : "no", /* -e */
(u_longlong_t)rto_opts.rto_expand, /* -e */
(u_longlong_t)o->rto_expand_offset, /* -r */
o->rto_v); /* -v */

Expand All @@ -146,14 +146,15 @@ static void process_options(int argc, char **argv)

memcpy(o, &rto_opts_defaults, sizeof (*o));

while ((opt = getopt(argc, argv, "TDBSvha:er:o:d:s:t:")) != -1) {
while ((opt = getopt(argc, argv, "TDBSvha:e:r:o:d:s:t:")) != -1) {
switch (opt) {
case 'a':
value = strtoull(optarg, NULL, 0);
o->rto_ashift = MIN(13, MAX(9, value));
break;
case 'e':
o->rto_expand = 1;
value = strtoull(optarg, NULL, 0);
o->rto_expand = MIN(255, MAX(1, value));
break;
case 'r':
o->rto_expand_offset = strtoull(optarg, NULL, 0);
Expand Down Expand Up @@ -329,11 +330,11 @@ init_raidz_golden_map(raidz_test_opts_t *opts, const int parity)
if (opts->rto_expand) {
opts->rm_golden =
vdev_raidz_map_alloc_expanded(opts->zio_golden,
opts->rto_ashift, total_ncols+1, total_ncols,
parity, opts->rto_expand_offset, 0, B_FALSE);
opts->rto_ashift, 1, total_ncols + opts->rto_expand,
total_ncols, parity, opts->rto_expand_offset, 0, B_FALSE);
rm_test = vdev_raidz_map_alloc_expanded(zio_test,
opts->rto_ashift, total_ncols+1, total_ncols,
parity, opts->rto_expand_offset, 0, B_FALSE);
opts->rto_ashift, 1, total_ncols + opts->rto_expand,
total_ncols, parity, opts->rto_expand_offset, 0, B_FALSE);
} else {
opts->rm_golden = vdev_raidz_map_alloc(opts->zio_golden,
opts->rto_ashift, total_ncols, parity);
Expand Down Expand Up @@ -380,8 +381,8 @@ init_raidz_map(raidz_test_opts_t *opts, zio_t **zio, const int parity)

if (opts->rto_expand) {
rm = vdev_raidz_map_alloc_expanded(*zio,
opts->rto_ashift, total_ncols+1, total_ncols,
parity, opts->rto_expand_offset, 0, B_FALSE);
opts->rto_ashift, 1, total_ncols + opts->rto_expand,
total_ncols, parity, opts->rto_expand_offset, 0, B_FALSE);
} else {
rm = vdev_raidz_map_alloc(*zio, opts->rto_ashift,
total_ncols, parity);
Expand Down
2 changes: 1 addition & 1 deletion cmd/raidz_test/raidz_test.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ typedef struct raidz_test_opts {
size_t rto_sweep;
size_t rto_sweep_timeout;
size_t rto_benchmark;
size_t rto_expand;
uint64_t rto_expand;
uint64_t rto_expand_offset;
size_t rto_sanity;
size_t rto_gdb;
Expand Down
203 changes: 201 additions & 2 deletions cmd/zhack.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
#include <sys/dmu_tx.h>
#include <zfeature_common.h>
#include <libzutil.h>
#include <libnvpair.h>

static importargs_t g_importargs;
static char *g_pool;
Expand Down Expand Up @@ -157,8 +158,10 @@ zhack_import(char *target, boolean_t readonly)
.lpc_printerr = B_TRUE
};
error = zpool_find_config(&lpch, target, &config, &g_importargs);
if (error)
if (error) {
printf("zhack_import():P0\n");
fatal(NULL, FTAG, "cannot import '%s'", target);
}

props = NULL;
if (readonly) {
Expand All @@ -175,9 +178,11 @@ zhack_import(char *target, boolean_t readonly)
if (error == EEXIST)
error = 0;

if (error)
if (error) {
printf("zhack_import():P1\n");
fatal(NULL, FTAG, "can't import '%s': %s", target,
strerror(error));
}
}

static void
Expand Down Expand Up @@ -966,6 +971,142 @@ zhack_do_label(int argc, char **argv)
return (err);
}

static nvlist_t *
make_vdev_file(char *path[], int count, uint64_t ashift)
{
nvlist_t **file;
nvlist_t *root;

file = umem_alloc(count * sizeof (nvlist_t *), UMEM_NOFAIL);

for (int i = 0; i < count; i++) {
file[i] = fnvlist_alloc();
fnvlist_add_string(file[i], ZPOOL_CONFIG_TYPE, VDEV_TYPE_FILE);
fnvlist_add_string(file[i], ZPOOL_CONFIG_PATH, path[i]);
fnvlist_add_uint64(file[i], ZPOOL_CONFIG_ASHIFT, ashift);
}

root = fnvlist_alloc();
fnvlist_add_string(root, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT);
fnvlist_add_nvlist_array(root, ZPOOL_CONFIG_CHILDREN,
(const nvlist_t **)file, count);

return (root);
}

#define MAX_DEVS_IN_RAIDZ 255

static int
zhack_do_raidz_expand(int argc, char **argv)
{
spa_t *spa;
char *target;
char *newpath[MAX_DEVS_IN_RAIDZ];
nvlist_t *root;
vdev_t *cvd, *rzvd;
pool_raidz_expand_stat_t rzx_stats;
int count, err = 0;

argc--;
argv++;

if (argc == 0) {
(void) fprintf(stderr,
"error: no pool to attach specified\n");
usage();
}

target = argv[0];

argc--;
argv++;

for (count = 0; argc != 0; count++,argc--,argv++)
newpath[count] = argv[0];

zhack_spa_open(target, B_FALSE, FTAG, &spa);

printf("Attaching to %s:\n", target);
for (int i = 0; i < count; i++)
printf("device %s\n", newpath[i]);

rzvd = spa->spa_root_vdev->vdev_child[0];
cvd = rzvd->vdev_child[0];
root = make_vdev_file(newpath, count, cvd->vdev_ashift);
if (root == NULL) {
printf("raidz expand: cannot file config\n");
exit(1);
}

dump_nvlist(root, 0);

err = spa_vdev_attach(spa, rzvd->vdev_guid, root, B_FALSE, B_FALSE);
nvlist_free(root);
if (err != 0) {
printf("raidz expand: attach returned %d", err);
exit(1);
}

/*
* Wait for reflow to begin
*/
while (spa->spa_raidz_expand == NULL) {
txg_wait_synced(spa_get_dsl(spa), 0);
sleep(1);
}

printf("Reflow started...\n");

spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
(void) spa_raidz_expand_get_stats(spa, &rzx_stats);
spa_config_exit(spa, SCL_CONFIG, FTAG);
while (rzx_stats.pres_state == DSS_SCANNING) {
txg_wait_synced(spa_get_dsl(spa), 0);
spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
(void) spa_raidz_expand_get_stats(spa, &rzx_stats);
spa_config_exit(spa, SCL_CONFIG, FTAG);

printf("%ld/%ld,", rzx_stats.pres_reflowed/(1024*1024),
rzx_stats.pres_to_reflow/(1024*1024));
fflush(stdout);

sleep(10);
}

printf("\n");
printf("Reflow done\n");

spa_close(spa, FTAG);

return (err);
}

static int
zhack_do_rze(int argc, char **argv)
{
char *subcommand;
int err;

argc--;
argv++;
if (argc == 0) {
(void) fprintf(stderr,
"error: no label operation specified\n");
usage();
}

subcommand = argv[0];
if (strcmp(subcommand, "expand") == 0) {
err = zhack_do_raidz_expand(argc, argv);
} else {
(void) fprintf(stderr, "error: unknown subcommand: %s\n",
subcommand);
usage();
}

return (err);
}

#define MAX_NUM_PATHS 1024

int
Expand Down Expand Up @@ -1011,6 +1152,8 @@ main(int argc, char **argv)
rv = zhack_do_feature(argc, argv);
} else if (strcmp(subcommand, "label") == 0) {
return (zhack_do_label(argc, argv));
} else if (strcmp(subcommand, "raidz") == 0) {
return (zhack_do_rze(argc, argv));
} else {
(void) fprintf(stderr, "error: unknown subcommand: %s\n",
subcommand);
Expand All @@ -1026,3 +1169,59 @@ main(int argc, char **argv)

return (rv);
}

#if 0
#!/bin/bash

POOL_NAME="test"
REF_POOL="/home/user/Pools/Ref"
TEST_POOL="/home/user/Pools/Test"
VDEV_SIZE="1G"
VDEVS=4

create_ref_pool()
{
for i in $(seq 0 $(($VDEVS-1))); do
echo "Allocate file $REF_POOL/file${i}"
truncate -s $VDEV_SIZE $REF_POOL/file${i}
done

zpool create -f $POOL_NAME raidz $REF_POOL/file*

zpool status

dd if=/dev/urandom of=/test/file bs=1M status=progress

zpool export $POOL_NAME
}

attach_raidz_vdev()
{
zpool status

echo "Copy ref pool..."
rm -r -f $TEST_POOL
mkdir $TEST_POOL

pids=()
for i in $(seq 0 $(($VDEVS-1))); do
cp $REF_POOL/file${i} $TEST_POOL/ &
pids[${i}]=$!
done

# wait for all pids
for pid in ${pids[*]}; do
wait $pid
done

truncate -s $VDEV_SIZE $TEST_POOL/file${VDEVS}

/home/user/Sources/zfs/zhack -d $TEST_POOL raidz expand $POOL_NAME $TEST_POOL/file${VDEVS}

zdb -bcc -d -Y -e -p $TEST_POOL $POOL_NAME
}

# MAIN
# create_ref_pool
attach_raidz_vdev
#endif
15 changes: 13 additions & 2 deletions cmd/zpool/zpool_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -7492,12 +7492,17 @@ zpool_do_attach_or_replace(int argc, char **argv, int replacing)
boolean_t wait = B_FALSE;
int c;
nvlist_t *nvroot;
char raidz_prefix[] = "raidz";
char *poolname, *old_disk, *new_disk;
zpool_handle_t *zhp;
nvlist_t *props = NULL;
char *propval;
int ret;

printf("=====:\n");
for (int i = 0; i < argc; i++)
printf("i=%d, argv=%s\n", i, argv[i]);

/* check options */
while ((c = getopt(argc, argv, "fo:sw")) != -1) {
switch (c) {
Expand Down Expand Up @@ -7564,7 +7569,8 @@ zpool_do_attach_or_replace(int argc, char **argv, int replacing)
argv += 2;
}

if (argc > 1) {
if (argc > 1 &&
(replacing || strncmp(old_disk, raidz_prefix, strlen(raidz_prefix)))) {
(void) fprintf(stderr, gettext("too many arguments\n"));
usage(B_FALSE);
}
Expand Down Expand Up @@ -7604,12 +7610,17 @@ zpool_do_attach_or_replace(int argc, char **argv, int replacing)
return (1);
}

printf("nvroot:\n");
dump_nvlist(nvroot, 0);

printf("vdev_tree:\n");
print_vdev_tree(zhp, NULL, nvroot, 0, "", VDEV_NAME_PATH);

ret = zpool_vdev_attach(zhp, old_disk, new_disk, nvroot, replacing,
rebuild);

if (ret == 0 && wait) {
zpool_wait_activity_t activity = ZPOOL_WAIT_RESILVER;
char raidz_prefix[] = "raidz";
if (replacing) {
activity = ZPOOL_WAIT_REPLACE;
} else if (strncmp(old_disk,
Expand Down
Loading
Loading