Skip to content

Commit eeea246

Browse files
committed
FreeBSD: zfs_putpages: don't undirty pages until after write completes
zfs_putpages() would put the entire range of pages onto the ZIL, then return VM_PAGER_OK for each page to the kernel. However, an associated zil_commit() or txg sync had not happened at this point, so the write may not actually be on disk. So, we rework it to use a ZIL commit callback, and do the post-write work of undirtying the page and signaling completion there. We return VM_PAGER_PEND to the kernel instead so it knows that we will take care of it. We change from a single zfs_log_write() to cover the whole range, to multiple calls, each for a single page. This is because if zfs_log_write() receives a write too large to fit into a single ZIL block, it will split it into multiple writes, but each will receive the same callback and argument. This will lead to the callback being called multiple times, but we don't which pages/range the call is for, and so can't clean them up. This way is not ideal, but is simple and correct, and so is a good start. Sponsored-by: Klara, Inc. Sponsored-by: Wasabi Technology, Inc. Signed-off-by: Rob Norris <[email protected]>
1 parent 46b82de commit eeea246

File tree

3 files changed

+52
-9
lines changed

3 files changed

+52
-9
lines changed

include/os/freebsd/spl/sys/vm.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@
3535
extern const int zfs_vm_pagerret_bad;
3636
extern const int zfs_vm_pagerret_error;
3737
extern const int zfs_vm_pagerret_ok;
38+
extern const int zfs_vm_pagerret_pend;
39+
extern const int zfs_vm_pagerret_again;
3840
extern const int zfs_vm_pagerput_sync;
3941
extern const int zfs_vm_pagerput_inval;
4042

module/os/freebsd/spl/spl_vm.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@
4343
const int zfs_vm_pagerret_bad = VM_PAGER_BAD;
4444
const int zfs_vm_pagerret_error = VM_PAGER_ERROR;
4545
const int zfs_vm_pagerret_ok = VM_PAGER_OK;
46+
const int zfs_vm_pagerret_pend = VM_PAGER_PEND;
47+
const int zfs_vm_pagerret_again = VM_PAGER_AGAIN;
4648
const int zfs_vm_pagerput_sync = VM_PAGER_PUT_SYNC;
4749
const int zfs_vm_pagerput_inval = VM_PAGER_PUT_INVAL;
4850

module/os/freebsd/zfs/zfs_vnops_os.c

Lines changed: 48 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4084,6 +4084,15 @@ zfs_freebsd_getpages(struct vop_getpages_args *ap)
40844084
ap->a_rahead));
40854085
}
40864086

4087+
static void
4088+
zfs_putpage_commit_cb(void *arg)
4089+
{
4090+
vm_page_t pp = arg;
4091+
vm_page_undirty(pp);
4092+
vm_page_sunbusy(pp);
4093+
vm_object_pip_wakeup(pp->object);
4094+
}
4095+
40874096
static int
40884097
zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags,
40894098
int *rtvals)
@@ -4185,10 +4194,12 @@ zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags,
41854194
}
41864195

41874196
if (zp->z_blksz < PAGE_SIZE) {
4188-
for (i = 0; len > 0; off += tocopy, len -= tocopy, i++) {
4189-
tocopy = len > PAGE_SIZE ? PAGE_SIZE : len;
4197+
vm_ooffset_t woff = off;
4198+
size_t wlen = len;
4199+
for (i = 0; wlen > 0; woff += tocopy, wlen -= tocopy, i++) {
4200+
tocopy = MIN(PAGE_SIZE, wlen);
41904201
va = zfs_map_page(ma[i], &sf);
4191-
dmu_write(zfsvfs->z_os, zp->z_id, off, tocopy, va, tx);
4202+
dmu_write(zfsvfs->z_os, zp->z_id, woff, tocopy, va, tx);
41924203
zfs_unmap_page(sf);
41934204
}
41944205
} else {
@@ -4209,19 +4220,47 @@ zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags,
42094220
zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
42104221
err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
42114222
ASSERT0(err);
4223+
42124224
/*
4213-
* XXX we should be passing a callback to undirty
4214-
* but that would make the locking messier
4225+
* Loop over the pages and load them onto the ZIL. Each page
4226+
* gets a separate log write so we can get the correct page
4227+
* pointer into the callback.
42154228
*/
4216-
zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off,
4217-
len, commit, B_FALSE, NULL, NULL);
4229+
vm_ooffset_t pgoff = off;
4230+
size_t pgrem = len;
42184231

42194232
zfs_vmobject_wlock(object);
42204233
for (i = 0; i < ncount; i++) {
4221-
rtvals[i] = zfs_vm_pagerret_ok;
4222-
vm_page_undirty(ma[i]);
4234+
if (pgrem == 0) {
4235+
/*
4236+
* This page is entirely beyond the requested
4237+
* length, nothing more to do.
4238+
*/
4239+
rtvals[i] = zfs_vm_pagerret_again;
4240+
continue;
4241+
}
4242+
4243+
size_t pglen = MIN(PAGE_SIZE, pgrem);
4244+
4245+
zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp,
4246+
pgoff, pglen, commit, B_FALSE,
4247+
zfs_putpage_commit_cb, ma[i]);
4248+
4249+
pgoff += pglen;
4250+
pgrem -= pglen;
4251+
4252+
/*
4253+
* Inform the kernel that we will take care of page
4254+
* cleanup and signalling once it's written out.
4255+
*/
4256+
rtvals[i] = zfs_vm_pagerret_pend;
42234257
}
4258+
4259+
ASSERT3U(pgoff, ==, off+len);
4260+
ASSERT0(pgrem);
4261+
42244262
zfs_vmobject_wunlock(object);
4263+
42254264
VM_CNT_INC(v_vnodeout);
42264265
VM_CNT_ADD(v_vnodepgsout, ncount);
42274266
}

0 commit comments

Comments
 (0)