--- sys/contrib/openzfs/include/os/freebsd/spl/sys/vnode.h.orig +++ sys/contrib/openzfs/include/os/freebsd/spl/sys/vnode.h @@ -59,6 +59,8 @@ #include #include #include +#include +#include typedef struct vop_vector vnodeops_t; #define VOP_FID VOP_VPTOFH @@ -88,6 +90,24 @@ #define vn_has_cached_data(vp) \ ((vp)->v_object != NULL && \ (vp)->v_object->resident_page_count > 0) + +#ifndef IN_BASE +static __inline void +vn_flush_cached_data(vnode_t *vp, boolean_t sync) +{ +#if __FreeBSD_version > 1300054 + if (vm_object_mightbedirty(vp->v_object)) { +#else + if (vp->v_object->flags & OBJ_MIGHTBEDIRTY) { +#endif + int flags = sync ? OBJPC_SYNC : 0; + zfs_vmobject_wlock(vp->v_object); + vm_object_page_clean(vp->v_object, 0, 0, flags); + zfs_vmobject_wunlock(vp->v_object); + } +} +#endif + #define vn_exists(vp) do { } while (0) #define vn_invalid(vp) do { } while (0) #define vn_renamepath(tdvp, svp, tnm, lentnm) do { } while (0) --- sys/contrib/openzfs/include/os/freebsd/zfs/sys/zfs_znode_impl.h.orig +++ sys/contrib/openzfs/include/os/freebsd/zfs/sys/zfs_znode_impl.h @@ -117,7 +117,8 @@ #define Z_ISLNK(type) ((type) == VLNK) #define Z_ISDIR(type) ((type) == VDIR) -#define zn_has_cached_data(zp) vn_has_cached_data(ZTOV(zp)) +#define zn_has_cached_data(zp) vn_has_cached_data(ZTOV(zp)) +#define zn_flush_cached_data(zp, sync) vn_flush_cached_data(ZTOV(zp), sync) #define zn_rlimit_fsize(zp, uio, td) vn_rlimit_fsize(ZTOV(zp), (uio), (td)) /* Called on entry to each ZFS vnode and vfs operation */ --- sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_znode_impl.h.orig +++ sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_znode_impl.h @@ -70,7 +70,7 @@ #define Z_ISDEV(type) (S_ISCHR(type) || S_ISBLK(type) || S_ISFIFO(type)) #define Z_ISDIR(type) S_ISDIR(type) -#define zn_has_cached_data(zp) ((zp)->z_is_mapped) +#define zn_flush_cached_data(zp, sync) write_inode_now(ZTOI(zp), sync) #define zn_rlimit_fsize(zp, uio, td) (0) #define zhold(zp) igrab(ZTOI((zp))) --- sys/contrib/openzfs/include/sys/dnode.h.orig +++ sys/contrib/openzfs/include/sys/dnode.h @@ -425,6 +425,7 @@ void dnode_rele(dnode_t *dn, void *ref); void dnode_rele_and_unlock(dnode_t *dn, void *tag, boolean_t evicting); int dnode_try_claim(objset_t *os, uint64_t object, int slots); +boolean_t dnode_is_dirty(dnode_t *dn); void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx); void dnode_set_dirtyctx(dnode_t *dn, dmu_tx_t *tx, void *tag); void dnode_sync(dnode_t *dn, dmu_tx_t *tx); --- sys/contrib/openzfs/module/zfs/dmu.c.orig +++ sys/contrib/openzfs/module/zfs/dmu.c @@ -2082,42 +2082,41 @@ dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off) { dnode_t *dn; - int i, err; - boolean_t clean = B_TRUE; + int err; +restart: err = dnode_hold(os, object, FTAG, &dn); if (err) return (err); - /* - * Check if dnode is dirty - */ - for (i = 0; i < TXG_SIZE; i++) { - if (multilist_link_active(&dn->dn_dirty_link[i])) { - clean = B_FALSE; - break; - } - } + rw_enter(&dn->dn_struct_rwlock, RW_READER); - /* - * If compatibility option is on, sync any current changes before - * we go trundling through the block pointers. - */ - if (!clean && zfs_dmu_offset_next_sync) { - clean = B_TRUE; - dnode_rele(dn, FTAG); - txg_wait_synced(dmu_objset_pool(os), 0); - err = dnode_hold(os, object, FTAG, &dn); - if (err) - return (err); - } + if (dnode_is_dirty(dn)) { + /* + * If the zfs_dmu_offset_next_sync module option is enabled + * then strict hole reporting has been requested. Dirty + * dnodes must be synced to disk to accurately report all + * holes. When disabled (the default) dirty dnodes are + * reported to not have any holes which is always safe. + * + * When called by zfs_holey_common() the zp->z_rangelock + * is held to prevent zfs_write() and mmap writeback from + * re-dirtying the dnode after txg_wait_synced(). + */ + if (zfs_dmu_offset_next_sync) { + rw_exit(&dn->dn_struct_rwlock); + dnode_rele(dn, FTAG); + txg_wait_synced(dmu_objset_pool(os), 0); + goto restart; + } - if (clean) - err = dnode_next_offset(dn, - (hole ? DNODE_FIND_HOLE : 0), off, 1, 1, 0); - else err = SET_ERROR(EBUSY); + } else { + err = dnode_next_offset(dn, DNODE_FIND_HAVELOCK | + (hole ? DNODE_FIND_HOLE : 0), off, 1, 1, 0); + } + rw_exit(&dn->dn_struct_rwlock); dnode_rele(dn, FTAG); return (err); --- sys/contrib/openzfs/module/zfs/dnode.c.orig +++ sys/contrib/openzfs/module/zfs/dnode.c @@ -1652,6 +1652,26 @@ slots, NULL, NULL)); } +/* + * Checks if the dnode contains any uncommitted dirty records. + */ +boolean_t +dnode_is_dirty(dnode_t *dn) +{ + mutex_enter(&dn->dn_mtx); + + for (int i = 0; i < TXG_SIZE; i++) { + if (list_head(&dn->dn_dirty_records[i]) != NULL) { + mutex_exit(&dn->dn_mtx); + return (B_TRUE); + } + } + + mutex_exit(&dn->dn_mtx); + + return (B_FALSE); +} + void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx) { --- sys/contrib/openzfs/module/zfs/zfs_vnops.c.orig +++ sys/contrib/openzfs/module/zfs/zfs_vnops.c @@ -85,6 +85,7 @@ static int zfs_holey_common(znode_t *zp, ulong_t cmd, loff_t *off) { + zfs_locked_range_t *lr; uint64_t noff = (uint64_t)*off; /* new offset */ uint64_t file_sz; int error; @@ -100,12 +101,18 @@ else hole = B_FALSE; + /* Flush any mmap()'d data to disk */ + if (zn_has_cached_data(zp)) + zn_flush_cached_data(zp, B_FALSE); + + lr = zfs_rangelock_enter(&zp->z_rangelock, 0, file_sz, RL_READER); error = dmu_offset_next(ZTOZSB(zp)->z_os, zp->z_id, hole, &noff); + zfs_rangelock_exit(lr); if (error == ESRCH) return (SET_ERROR(ENXIO)); - /* file was dirty, so fall back to using generic logic */ + /* File was dirty, so fall back to using generic logic */ if (error == EBUSY) { if (hole) *off = file_sz;