Skip to content

Commit 0e2f80a

Browse files
apopple-nvidiaakpm00
authored andcommitted
fs/dax: ensure all pages are idle prior to filesystem unmount
File systems call dax_break_mapping() prior to reallocating file system blocks to ensure the page is not undergoing any DMA or other accesses. Generally this is needed when a file is truncated to ensure that if a block is reallocated nothing is writing to it. However filesystems currently don't call this when an FS DAX inode is evicted. This can cause problems when the file system is unmounted as a page can continue to be under going DMA or other remote access after unmount. This means if the file system is remounted any truncate or other operation which requires the underlying file system block to be freed will not wait for the remote access to complete. Therefore a busy block may be reallocated to a new file leading to corruption. Link: https://lkml.kernel.org/r/2d3cf575bbd095084993154be2f0aa7442e5cd28.1740713401.git-series.apopple@nvidia.com Signed-off-by: Alistair Popple <[email protected]> Tested-by: Alison Schofield <[email protected]> Cc: Alexander Gordeev <[email protected]> Cc: Asahi Lina <[email protected]> Cc: Balbir Singh <[email protected]> Cc: Bjorn Helgaas <[email protected]> Cc: Catalin Marinas <[email protected]> Cc: Christian Borntraeger <[email protected]> Cc: Christoph Hellwig <[email protected]> Cc: Chunyan Zhang <[email protected]> Cc: Dan Wiliams <[email protected]> Cc: "Darrick J. Wong" <[email protected]> Cc: Dave Chinner <[email protected]> Cc: Dave Hansen <[email protected]> Cc: Dave Jiang <[email protected]> Cc: David Hildenbrand <[email protected]> Cc: Gerald Schaefer <[email protected]> Cc: Heiko Carstens <[email protected]> Cc: Huacai Chen <[email protected]> Cc: Ira Weiny <[email protected]> Cc: Jan Kara <[email protected]> Cc: Jason Gunthorpe <[email protected]> Cc: Jason Gunthorpe <[email protected]> Cc: John Hubbard <[email protected]> Cc: linmiaohe <[email protected]> Cc: Logan Gunthorpe <[email protected]> Cc: Matthew Wilcow (Oracle) <[email protected]> Cc: Michael "Camp Drill Sergeant" Ellerman <[email protected]> Cc: Nicholas Piggin <[email protected]> Cc: Peter Xu <[email protected]> Cc: Sven Schnelle <[email protected]> Cc: Ted Ts'o <[email protected]> Cc: Vasily Gorbik <[email protected]> Cc: Vishal Verma <[email protected]> Cc: Vivek Goyal <[email protected]> Cc: WANG Xuerui <[email protected]> Cc: Will Deacon <[email protected]> Signed-off-by: Andrew Morton <[email protected]>
1 parent bde708f commit 0e2f80a

File tree

4 files changed

+46
-0
lines changed

4 files changed

+46
-0
lines changed

fs/dax.c

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -884,6 +884,13 @@ static int wait_page_idle(struct page *page,
884884
TASK_INTERRUPTIBLE, 0, 0, cb(inode));
885885
}
886886

887+
static void wait_page_idle_uninterruptible(struct page *page,
888+
struct inode *inode)
889+
{
890+
___wait_var_event(page, dax_page_is_idle(page),
891+
TASK_UNINTERRUPTIBLE, 0, 0, schedule());
892+
}
893+
887894
/*
888895
* Unmaps the inode and waits for any DMA to complete prior to deleting the
889896
* DAX mapping entries for the range.
@@ -919,6 +926,26 @@ int dax_break_layout(struct inode *inode, loff_t start, loff_t end,
919926
}
920927
EXPORT_SYMBOL_GPL(dax_break_layout);
921928

929+
void dax_break_layout_final(struct inode *inode)
930+
{
931+
struct page *page;
932+
933+
if (!dax_mapping(inode->i_mapping))
934+
return;
935+
936+
do {
937+
page = dax_layout_busy_page_range(inode->i_mapping, 0,
938+
LLONG_MAX);
939+
if (!page)
940+
break;
941+
942+
wait_page_idle_uninterruptible(page, inode);
943+
} while (true);
944+
945+
dax_delete_mapping_range(inode->i_mapping, 0, LLONG_MAX);
946+
}
947+
EXPORT_SYMBOL_GPL(dax_break_layout_final);
948+
922949
/*
923950
* Invalidate DAX entry if it is clean.
924951
*/

fs/ext4/inode.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,8 @@ void ext4_evict_inode(struct inode *inode)
181181

182182
trace_ext4_evict_inode(inode);
183183

184+
dax_break_layout_final(inode);
185+
184186
if (EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)
185187
ext4_evict_ea_inode(inode);
186188
if (inode->i_nlink) {

fs/xfs/xfs_super.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -751,6 +751,17 @@ xfs_fs_drop_inode(
751751
return generic_drop_inode(inode);
752752
}
753753

754+
STATIC void
755+
xfs_fs_evict_inode(
756+
struct inode *inode)
757+
{
758+
if (IS_DAX(inode))
759+
dax_break_layout_final(inode);
760+
761+
truncate_inode_pages_final(&inode->i_data);
762+
clear_inode(inode);
763+
}
764+
754765
static void
755766
xfs_mount_free(
756767
struct xfs_mount *mp)
@@ -1215,6 +1226,7 @@ static const struct super_operations xfs_super_operations = {
12151226
.destroy_inode = xfs_fs_destroy_inode,
12161227
.dirty_inode = xfs_fs_dirty_inode,
12171228
.drop_inode = xfs_fs_drop_inode,
1229+
.evict_inode = xfs_fs_evict_inode,
12181230
.put_super = xfs_fs_put_super,
12191231
.sync_fs = xfs_fs_sync_fs,
12201232
.freeze_fs = xfs_fs_freeze,

include/linux/dax.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,10 @@ static inline int __must_check dax_break_layout(struct inode *inode,
232232
{
233233
return 0;
234234
}
235+
236+
static inline void dax_break_layout_final(struct inode *inode)
237+
{
238+
}
235239
#endif
236240

237241
bool dax_alive(struct dax_device *dax_dev);
@@ -266,6 +270,7 @@ static inline int __must_check dax_break_layout_inode(struct inode *inode,
266270
{
267271
return dax_break_layout(inode, 0, LLONG_MAX, cb);
268272
}
273+
void dax_break_layout_final(struct inode *inode);
269274
int dax_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
270275
struct inode *dest, loff_t destoff,
271276
loff_t len, bool *is_same,

0 commit comments

Comments
 (0)