| // SPDX-License-Identifier: GPL-2.0-or-later |
| /* |
| * Copyright (c) 2018-2024 Oracle. All Rights Reserved. |
| * Author: Darrick J. Wong <[email protected]> |
| */ |
| #include "xfs.h" |
| #include "xfs_fs.h" |
| #include "xfs_shared.h" |
| #include "xfs_format.h" |
| #include "xfs_log_format.h" |
| #include "xfs_trans_resv.h" |
| #include "xfs_bit.h" |
| #include "xfs_sb.h" |
| #include "xfs_mount.h" |
| #include "xfs_defer.h" |
| #include "xfs_inode.h" |
| #include "xfs_trans.h" |
| #include "xfs_alloc.h" |
| #include "xfs_btree.h" |
| #include "xfs_btree_staging.h" |
| #include "xfs_metafile.h" |
| #include "xfs_rmap.h" |
| #include "xfs_rtrmap_btree.h" |
| #include "xfs_trace.h" |
| #include "xfs_cksum.h" |
| #include "xfs_error.h" |
| #include "xfs_extent_busy.h" |
| #include "xfs_rtgroup.h" |
| #include "xfs_bmap.h" |
| #include "xfs_health.h" |
| #include "xfs_buf_mem.h" |
| #include "xfs_btree_mem.h" |
| |
| static struct kmem_cache *xfs_rtrmapbt_cur_cache; |
| |
| /* |
| * Realtime Reverse Map btree. |
| * |
| * This is a btree used to track the owner(s) of a given extent in the realtime |
| * device. See the comments in xfs_rmap_btree.c for more information. |
| * |
| * This tree is basically the same as the regular rmap btree except that it |
| * is rooted in an inode and does not live in free space. |
| */ |
| |
| static struct xfs_btree_cur * |
| xfs_rtrmapbt_dup_cursor( |
| struct xfs_btree_cur *cur) |
| { |
| return xfs_rtrmapbt_init_cursor(cur->bc_tp, to_rtg(cur->bc_group)); |
| } |
| |
| STATIC int |
| xfs_rtrmapbt_get_minrecs( |
| struct xfs_btree_cur *cur, |
| int level) |
| { |
| if (level == cur->bc_nlevels - 1) { |
| struct xfs_ifork *ifp = xfs_btree_ifork_ptr(cur); |
| |
| return xfs_rtrmapbt_maxrecs(cur->bc_mp, ifp->if_broot_bytes, |
| level == 0) / 2; |
| } |
| |
| return cur->bc_mp->m_rtrmap_mnr[level != 0]; |
| } |
| |
| STATIC int |
| xfs_rtrmapbt_get_maxrecs( |
| struct xfs_btree_cur *cur, |
| int level) |
| { |
| if (level == cur->bc_nlevels - 1) { |
| struct xfs_ifork *ifp = xfs_btree_ifork_ptr(cur); |
| |
| return xfs_rtrmapbt_maxrecs(cur->bc_mp, ifp->if_broot_bytes, |
| level == 0); |
| } |
| |
| return cur->bc_mp->m_rtrmap_mxr[level != 0]; |
| } |
| |
| /* Calculate number of records in the ondisk realtime rmap btree inode root. */ |
| unsigned int |
| xfs_rtrmapbt_droot_maxrecs( |
| unsigned int blocklen, |
| bool leaf) |
| { |
| blocklen -= sizeof(struct xfs_rtrmap_root); |
| |
| if (leaf) |
| return blocklen / sizeof(struct xfs_rmap_rec); |
| return blocklen / (2 * sizeof(struct xfs_rmap_key) + |
| sizeof(xfs_rtrmap_ptr_t)); |
| } |
| |
| /* |
| * Get the maximum records we could store in the on-disk format. |
| * |
| * For non-root nodes this is equivalent to xfs_rtrmapbt_get_maxrecs, but |
| * for the root node this checks the available space in the dinode fork |
| * so that we can resize the in-memory buffer to match it. After a |
| * resize to the maximum size this function returns the same value |
| * as xfs_rtrmapbt_get_maxrecs for the root node, too. |
| */ |
| STATIC int |
| xfs_rtrmapbt_get_dmaxrecs( |
| struct xfs_btree_cur *cur, |
| int level) |
| { |
| if (level != cur->bc_nlevels - 1) |
| return cur->bc_mp->m_rtrmap_mxr[level != 0]; |
| return xfs_rtrmapbt_droot_maxrecs(cur->bc_ino.forksize, level == 0); |
| } |
| |
| /* |
| * Convert the ondisk record's offset field into the ondisk key's offset field. |
| * Fork and bmbt are significant parts of the rmap record key, but written |
| * status is merely a record attribute. |
| */ |
| static inline __be64 ondisk_rec_offset_to_key(const union xfs_btree_rec *rec) |
| { |
| return rec->rmap.rm_offset & ~cpu_to_be64(XFS_RMAP_OFF_UNWRITTEN); |
| } |
| |
| STATIC void |
| xfs_rtrmapbt_init_key_from_rec( |
| union xfs_btree_key *key, |
| const union xfs_btree_rec *rec) |
| { |
| key->rmap.rm_startblock = rec->rmap.rm_startblock; |
| key->rmap.rm_owner = rec->rmap.rm_owner; |
| key->rmap.rm_offset = ondisk_rec_offset_to_key(rec); |
| } |
| |
| STATIC void |
| xfs_rtrmapbt_init_high_key_from_rec( |
| union xfs_btree_key *key, |
| const union xfs_btree_rec *rec) |
| { |
| uint64_t off; |
| int adj; |
| |
| adj = be32_to_cpu(rec->rmap.rm_blockcount) - 1; |
| |
| key->rmap.rm_startblock = rec->rmap.rm_startblock; |
| be32_add_cpu(&key->rmap.rm_startblock, adj); |
| key->rmap.rm_owner = rec->rmap.rm_owner; |
| key->rmap.rm_offset = ondisk_rec_offset_to_key(rec); |
| if (XFS_RMAP_NON_INODE_OWNER(be64_to_cpu(rec->rmap.rm_owner)) || |
| XFS_RMAP_IS_BMBT_BLOCK(be64_to_cpu(rec->rmap.rm_offset))) |
| return; |
| off = be64_to_cpu(key->rmap.rm_offset); |
| off = (XFS_RMAP_OFF(off) + adj) | (off & ~XFS_RMAP_OFF_MASK); |
| key->rmap.rm_offset = cpu_to_be64(off); |
| } |
| |
| STATIC void |
| xfs_rtrmapbt_init_rec_from_cur( |
| struct xfs_btree_cur *cur, |
| union xfs_btree_rec *rec) |
| { |
| rec->rmap.rm_startblock = cpu_to_be32(cur->bc_rec.r.rm_startblock); |
| rec->rmap.rm_blockcount = cpu_to_be32(cur->bc_rec.r.rm_blockcount); |
| rec->rmap.rm_owner = cpu_to_be64(cur->bc_rec.r.rm_owner); |
| rec->rmap.rm_offset = cpu_to_be64( |
| xfs_rmap_irec_offset_pack(&cur->bc_rec.r)); |
| } |
| |
| STATIC void |
| xfs_rtrmapbt_init_ptr_from_cur( |
| struct xfs_btree_cur *cur, |
| union xfs_btree_ptr *ptr) |
| { |
| ptr->l = 0; |
| } |
| |
| /* |
| * Mask the appropriate parts of the ondisk key field for a key comparison. |
| * Fork and bmbt are significant parts of the rmap record key, but written |
| * status is merely a record attribute. |
| */ |
| static inline uint64_t offset_keymask(uint64_t offset) |
| { |
| return offset & ~XFS_RMAP_OFF_UNWRITTEN; |
| } |
| |
| STATIC int64_t |
| xfs_rtrmapbt_key_diff( |
| struct xfs_btree_cur *cur, |
| const union xfs_btree_key *key) |
| { |
| struct xfs_rmap_irec *rec = &cur->bc_rec.r; |
| const struct xfs_rmap_key *kp = &key->rmap; |
| __u64 x, y; |
| int64_t d; |
| |
| d = (int64_t)be32_to_cpu(kp->rm_startblock) - rec->rm_startblock; |
| if (d) |
| return d; |
| |
| x = be64_to_cpu(kp->rm_owner); |
| y = rec->rm_owner; |
| if (x > y) |
| return 1; |
| else if (y > x) |
| return -1; |
| |
| x = offset_keymask(be64_to_cpu(kp->rm_offset)); |
| y = offset_keymask(xfs_rmap_irec_offset_pack(rec)); |
| if (x > y) |
| return 1; |
| else if (y > x) |
| return -1; |
| return 0; |
| } |
| |
| STATIC int64_t |
| xfs_rtrmapbt_diff_two_keys( |
| struct xfs_btree_cur *cur, |
| const union xfs_btree_key *k1, |
| const union xfs_btree_key *k2, |
| const union xfs_btree_key *mask) |
| { |
| const struct xfs_rmap_key *kp1 = &k1->rmap; |
| const struct xfs_rmap_key *kp2 = &k2->rmap; |
| int64_t d; |
| __u64 x, y; |
| |
| /* Doesn't make sense to mask off the physical space part */ |
| ASSERT(!mask || mask->rmap.rm_startblock); |
| |
| d = (int64_t)be32_to_cpu(kp1->rm_startblock) - |
| be32_to_cpu(kp2->rm_startblock); |
| if (d) |
| return d; |
| |
| if (!mask || mask->rmap.rm_owner) { |
| x = be64_to_cpu(kp1->rm_owner); |
| y = be64_to_cpu(kp2->rm_owner); |
| if (x > y) |
| return 1; |
| else if (y > x) |
| return -1; |
| } |
| |
| if (!mask || mask->rmap.rm_offset) { |
| /* Doesn't make sense to allow offset but not owner */ |
| ASSERT(!mask || mask->rmap.rm_owner); |
| |
| x = offset_keymask(be64_to_cpu(kp1->rm_offset)); |
| y = offset_keymask(be64_to_cpu(kp2->rm_offset)); |
| if (x > y) |
| return 1; |
| else if (y > x) |
| return -1; |
| } |
| |
| return 0; |
| } |
| |
| static xfs_failaddr_t |
| xfs_rtrmapbt_verify( |
| struct xfs_buf *bp) |
| { |
| struct xfs_mount *mp = bp->b_target->bt_mount; |
| struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); |
| xfs_failaddr_t fa; |
| int level; |
| |
| if (!xfs_verify_magic(bp, block->bb_magic)) |
| return __this_address; |
| |
| if (!xfs_has_rmapbt(mp)) |
| return __this_address; |
| fa = xfs_btree_fsblock_v5hdr_verify(bp, XFS_RMAP_OWN_UNKNOWN); |
| if (fa) |
| return fa; |
| level = be16_to_cpu(block->bb_level); |
| if (level > mp->m_rtrmap_maxlevels) |
| return __this_address; |
| |
| return xfs_btree_fsblock_verify(bp, mp->m_rtrmap_mxr[level != 0]); |
| } |
| |
| static void |
| xfs_rtrmapbt_read_verify( |
| struct xfs_buf *bp) |
| { |
| xfs_failaddr_t fa; |
| |
| if (!xfs_btree_fsblock_verify_crc(bp)) |
| xfs_verifier_error(bp, -EFSBADCRC, __this_address); |
| else { |
| fa = xfs_rtrmapbt_verify(bp); |
| if (fa) |
| xfs_verifier_error(bp, -EFSCORRUPTED, fa); |
| } |
| |
| if (bp->b_error) |
| trace_xfs_btree_corrupt(bp, _RET_IP_); |
| } |
| |
| static void |
| xfs_rtrmapbt_write_verify( |
| struct xfs_buf *bp) |
| { |
| xfs_failaddr_t fa; |
| |
| fa = xfs_rtrmapbt_verify(bp); |
| if (fa) { |
| trace_xfs_btree_corrupt(bp, _RET_IP_); |
| xfs_verifier_error(bp, -EFSCORRUPTED, fa); |
| return; |
| } |
| xfs_btree_fsblock_calc_crc(bp); |
| |
| } |
| |
| const struct xfs_buf_ops xfs_rtrmapbt_buf_ops = { |
| .name = "xfs_rtrmapbt", |
| .magic = { 0, cpu_to_be32(XFS_RTRMAP_CRC_MAGIC) }, |
| .verify_read = xfs_rtrmapbt_read_verify, |
| .verify_write = xfs_rtrmapbt_write_verify, |
| .verify_struct = xfs_rtrmapbt_verify, |
| }; |
| |
| STATIC int |
| xfs_rtrmapbt_keys_inorder( |
| struct xfs_btree_cur *cur, |
| const union xfs_btree_key *k1, |
| const union xfs_btree_key *k2) |
| { |
| uint32_t x; |
| uint32_t y; |
| uint64_t a; |
| uint64_t b; |
| |
| x = be32_to_cpu(k1->rmap.rm_startblock); |
| y = be32_to_cpu(k2->rmap.rm_startblock); |
| if (x < y) |
| return 1; |
| else if (x > y) |
| return 0; |
| a = be64_to_cpu(k1->rmap.rm_owner); |
| b = be64_to_cpu(k2->rmap.rm_owner); |
| if (a < b) |
| return 1; |
| else if (a > b) |
| return 0; |
| a = offset_keymask(be64_to_cpu(k1->rmap.rm_offset)); |
| b = offset_keymask(be64_to_cpu(k2->rmap.rm_offset)); |
| if (a <= b) |
| return 1; |
| return 0; |
| } |
| |
| STATIC int |
| xfs_rtrmapbt_recs_inorder( |
| struct xfs_btree_cur *cur, |
| const union xfs_btree_rec *r1, |
| const union xfs_btree_rec *r2) |
| { |
| uint32_t x; |
| uint32_t y; |
| uint64_t a; |
| uint64_t b; |
| |
| x = be32_to_cpu(r1->rmap.rm_startblock); |
| y = be32_to_cpu(r2->rmap.rm_startblock); |
| if (x < y) |
| return 1; |
| else if (x > y) |
| return 0; |
| a = be64_to_cpu(r1->rmap.rm_owner); |
| b = be64_to_cpu(r2->rmap.rm_owner); |
| if (a < b) |
| return 1; |
| else if (a > b) |
| return 0; |
| a = offset_keymask(be64_to_cpu(r1->rmap.rm_offset)); |
| b = offset_keymask(be64_to_cpu(r2->rmap.rm_offset)); |
| if (a <= b) |
| return 1; |
| return 0; |
| } |
| |
| STATIC enum xbtree_key_contig |
| xfs_rtrmapbt_keys_contiguous( |
| struct xfs_btree_cur *cur, |
| const union xfs_btree_key *key1, |
| const union xfs_btree_key *key2, |
| const union xfs_btree_key *mask) |
| { |
| ASSERT(!mask || mask->rmap.rm_startblock); |
| |
| /* |
| * We only support checking contiguity of the physical space component. |
| * If any callers ever need more specificity than that, they'll have to |
| * implement it here. |
| */ |
| ASSERT(!mask || (!mask->rmap.rm_owner && !mask->rmap.rm_offset)); |
| |
| return xbtree_key_contig(be32_to_cpu(key1->rmap.rm_startblock), |
| be32_to_cpu(key2->rmap.rm_startblock)); |
| } |
| |
| static inline void |
| xfs_rtrmapbt_move_ptrs( |
| struct xfs_mount *mp, |
| struct xfs_btree_block *broot, |
| short old_size, |
| size_t new_size, |
| unsigned int numrecs) |
| { |
| void *dptr; |
| void *sptr; |
| |
| sptr = xfs_rtrmap_broot_ptr_addr(mp, broot, 1, old_size); |
| dptr = xfs_rtrmap_broot_ptr_addr(mp, broot, 1, new_size); |
| memmove(dptr, sptr, numrecs * sizeof(xfs_rtrmap_ptr_t)); |
| } |
| |
| static struct xfs_btree_block * |
| xfs_rtrmapbt_broot_realloc( |
| struct xfs_btree_cur *cur, |
| unsigned int new_numrecs) |
| { |
| struct xfs_mount *mp = cur->bc_mp; |
| struct xfs_ifork *ifp = xfs_btree_ifork_ptr(cur); |
| struct xfs_btree_block *broot; |
| unsigned int new_size; |
| unsigned int old_size = ifp->if_broot_bytes; |
| const unsigned int level = cur->bc_nlevels - 1; |
| |
| new_size = xfs_rtrmap_broot_space_calc(mp, level, new_numrecs); |
| |
| /* Handle the nop case quietly. */ |
| if (new_size == old_size) |
| return ifp->if_broot; |
| |
| if (new_size > old_size) { |
| unsigned int old_numrecs; |
| |
| /* |
| * If there wasn't any memory allocated before, just allocate |
| * it now and get out. |
| */ |
| if (old_size == 0) |
| return xfs_broot_realloc(ifp, new_size); |
| |
| /* |
| * If there is already an existing if_broot, then we need to |
| * realloc it and possibly move the node block pointers because |
| * those are not butted up against the btree block header. |
| */ |
| old_numrecs = xfs_rtrmapbt_maxrecs(mp, old_size, level == 0); |
| broot = xfs_broot_realloc(ifp, new_size); |
| if (level > 0) |
| xfs_rtrmapbt_move_ptrs(mp, broot, old_size, new_size, |
| old_numrecs); |
| goto out_broot; |
| } |
| |
| /* |
| * We're reducing numrecs. If we're going all the way to zero, just |
| * free the block. |
| */ |
| ASSERT(ifp->if_broot != NULL && old_size > 0); |
| if (new_size == 0) |
| return xfs_broot_realloc(ifp, 0); |
| |
| /* |
| * Shrink the btree root by possibly moving the rtrmapbt pointers, |
| * since they are not butted up against the btree block header. Then |
| * reallocate broot. |
| */ |
| if (level > 0) |
| xfs_rtrmapbt_move_ptrs(mp, ifp->if_broot, old_size, new_size, |
| new_numrecs); |
| broot = xfs_broot_realloc(ifp, new_size); |
| |
| out_broot: |
| ASSERT(xfs_rtrmap_droot_space(broot) <= |
| xfs_inode_fork_size(cur->bc_ino.ip, cur->bc_ino.whichfork)); |
| return broot; |
| } |
| |
| const struct xfs_btree_ops xfs_rtrmapbt_ops = { |
| .name = "rtrmap", |
| .type = XFS_BTREE_TYPE_INODE, |
| .geom_flags = XFS_BTGEO_OVERLAPPING | |
| XFS_BTGEO_IROOT_RECORDS, |
| |
| .rec_len = sizeof(struct xfs_rmap_rec), |
| /* Overlapping btree; 2 keys per pointer. */ |
| .key_len = 2 * sizeof(struct xfs_rmap_key), |
| .ptr_len = XFS_BTREE_LONG_PTR_LEN, |
| |
| .lru_refs = XFS_RMAP_BTREE_REF, |
| .statoff = XFS_STATS_CALC_INDEX(xs_rtrmap_2), |
| .sick_mask = XFS_SICK_RG_RMAPBT, |
| |
| .dup_cursor = xfs_rtrmapbt_dup_cursor, |
| .alloc_block = xfs_btree_alloc_metafile_block, |
| .free_block = xfs_btree_free_metafile_block, |
| .get_minrecs = xfs_rtrmapbt_get_minrecs, |
| .get_maxrecs = xfs_rtrmapbt_get_maxrecs, |
| .get_dmaxrecs = xfs_rtrmapbt_get_dmaxrecs, |
| .init_key_from_rec = xfs_rtrmapbt_init_key_from_rec, |
| .init_high_key_from_rec = xfs_rtrmapbt_init_high_key_from_rec, |
| .init_rec_from_cur = xfs_rtrmapbt_init_rec_from_cur, |
| .init_ptr_from_cur = xfs_rtrmapbt_init_ptr_from_cur, |
| .key_diff = xfs_rtrmapbt_key_diff, |
| .buf_ops = &xfs_rtrmapbt_buf_ops, |
| .diff_two_keys = xfs_rtrmapbt_diff_two_keys, |
| .keys_inorder = xfs_rtrmapbt_keys_inorder, |
| .recs_inorder = xfs_rtrmapbt_recs_inorder, |
| .keys_contiguous = xfs_rtrmapbt_keys_contiguous, |
| .broot_realloc = xfs_rtrmapbt_broot_realloc, |
| }; |
| |
| /* Allocate a new rt rmap btree cursor. */ |
| struct xfs_btree_cur * |
| xfs_rtrmapbt_init_cursor( |
| struct xfs_trans *tp, |
| struct xfs_rtgroup *rtg) |
| { |
| struct xfs_inode *ip = rtg_rmap(rtg); |
| struct xfs_mount *mp = rtg_mount(rtg); |
| struct xfs_btree_cur *cur; |
| |
| xfs_assert_ilocked(ip, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL); |
| |
| cur = xfs_btree_alloc_cursor(mp, tp, &xfs_rtrmapbt_ops, |
| mp->m_rtrmap_maxlevels, xfs_rtrmapbt_cur_cache); |
| |
| cur->bc_ino.ip = ip; |
| cur->bc_group = xfs_group_hold(rtg_group(rtg)); |
| cur->bc_ino.whichfork = XFS_DATA_FORK; |
| cur->bc_nlevels = be16_to_cpu(ip->i_df.if_broot->bb_level) + 1; |
| cur->bc_ino.forksize = xfs_inode_fork_size(ip, XFS_DATA_FORK); |
| |
| return cur; |
| } |
| |
| #ifdef CONFIG_XFS_BTREE_IN_MEM |
| /* |
| * Validate an in-memory realtime rmap btree block. Callers are allowed to |
| * generate an in-memory btree even if the ondisk feature is not enabled. |
| */ |
| static xfs_failaddr_t |
| xfs_rtrmapbt_mem_verify( |
| struct xfs_buf *bp) |
| { |
| struct xfs_mount *mp = bp->b_mount; |
| struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); |
| xfs_failaddr_t fa; |
| unsigned int level; |
| unsigned int maxrecs; |
| |
| if (!xfs_verify_magic(bp, block->bb_magic)) |
| return __this_address; |
| |
| fa = xfs_btree_fsblock_v5hdr_verify(bp, XFS_RMAP_OWN_UNKNOWN); |
| if (fa) |
| return fa; |
| |
| level = be16_to_cpu(block->bb_level); |
| if (xfs_has_rmapbt(mp)) { |
| if (level >= mp->m_rtrmap_maxlevels) |
| return __this_address; |
| } else { |
| if (level >= xfs_rtrmapbt_maxlevels_ondisk()) |
| return __this_address; |
| } |
| |
| maxrecs = xfs_rtrmapbt_maxrecs(mp, XFBNO_BLOCKSIZE, level == 0); |
| return xfs_btree_memblock_verify(bp, maxrecs); |
| } |
| |
| static void |
| xfs_rtrmapbt_mem_rw_verify( |
| struct xfs_buf *bp) |
| { |
| xfs_failaddr_t fa = xfs_rtrmapbt_mem_verify(bp); |
| |
| if (fa) |
| xfs_verifier_error(bp, -EFSCORRUPTED, fa); |
| } |
| |
| /* skip crc checks on in-memory btrees to save time */ |
| static const struct xfs_buf_ops xfs_rtrmapbt_mem_buf_ops = { |
| .name = "xfs_rtrmapbt_mem", |
| .magic = { 0, cpu_to_be32(XFS_RTRMAP_CRC_MAGIC) }, |
| .verify_read = xfs_rtrmapbt_mem_rw_verify, |
| .verify_write = xfs_rtrmapbt_mem_rw_verify, |
| .verify_struct = xfs_rtrmapbt_mem_verify, |
| }; |
| |
| const struct xfs_btree_ops xfs_rtrmapbt_mem_ops = { |
| .type = XFS_BTREE_TYPE_MEM, |
| .geom_flags = XFS_BTGEO_OVERLAPPING, |
| |
| .rec_len = sizeof(struct xfs_rmap_rec), |
| /* Overlapping btree; 2 keys per pointer. */ |
| .key_len = 2 * sizeof(struct xfs_rmap_key), |
| .ptr_len = XFS_BTREE_LONG_PTR_LEN, |
| |
| .lru_refs = XFS_RMAP_BTREE_REF, |
| .statoff = XFS_STATS_CALC_INDEX(xs_rtrmap_mem_2), |
| |
| .dup_cursor = xfbtree_dup_cursor, |
| .set_root = xfbtree_set_root, |
| .alloc_block = xfbtree_alloc_block, |
| .free_block = xfbtree_free_block, |
| .get_minrecs = xfbtree_get_minrecs, |
| .get_maxrecs = xfbtree_get_maxrecs, |
| .init_key_from_rec = xfs_rtrmapbt_init_key_from_rec, |
| .init_high_key_from_rec = xfs_rtrmapbt_init_high_key_from_rec, |
| .init_rec_from_cur = xfs_rtrmapbt_init_rec_from_cur, |
| .init_ptr_from_cur = xfbtree_init_ptr_from_cur, |
| .key_diff = xfs_rtrmapbt_key_diff, |
| .buf_ops = &xfs_rtrmapbt_mem_buf_ops, |
| .diff_two_keys = xfs_rtrmapbt_diff_two_keys, |
| .keys_inorder = xfs_rtrmapbt_keys_inorder, |
| .recs_inorder = xfs_rtrmapbt_recs_inorder, |
| .keys_contiguous = xfs_rtrmapbt_keys_contiguous, |
| }; |
| |
| /* Create a cursor for an in-memory btree. */ |
| struct xfs_btree_cur * |
| xfs_rtrmapbt_mem_cursor( |
| struct xfs_rtgroup *rtg, |
| struct xfs_trans *tp, |
| struct xfbtree *xfbt) |
| { |
| struct xfs_mount *mp = rtg_mount(rtg); |
| struct xfs_btree_cur *cur; |
| |
| cur = xfs_btree_alloc_cursor(mp, tp, &xfs_rtrmapbt_mem_ops, |
| mp->m_rtrmap_maxlevels, xfs_rtrmapbt_cur_cache); |
| cur->bc_mem.xfbtree = xfbt; |
| cur->bc_nlevels = xfbt->nlevels; |
| cur->bc_group = xfs_group_hold(rtg_group(rtg)); |
| return cur; |
| } |
| |
| /* Create an in-memory realtime rmap btree. */ |
| int |
| xfs_rtrmapbt_mem_init( |
| struct xfs_mount *mp, |
| struct xfbtree *xfbt, |
| struct xfs_buftarg *btp, |
| xfs_rgnumber_t rgno) |
| { |
| xfbt->owner = rgno; |
| return xfbtree_init(mp, xfbt, btp, &xfs_rtrmapbt_mem_ops); |
| } |
| #endif /* CONFIG_XFS_BTREE_IN_MEM */ |
| |
| /* |
| * Install a new rt reverse mapping btree root. Caller is responsible for |
| * invalidating and freeing the old btree blocks. |
| */ |
| void |
| xfs_rtrmapbt_commit_staged_btree( |
| struct xfs_btree_cur *cur, |
| struct xfs_trans *tp) |
| { |
| struct xbtree_ifakeroot *ifake = cur->bc_ino.ifake; |
| struct xfs_ifork *ifp; |
| int flags = XFS_ILOG_CORE | XFS_ILOG_DBROOT; |
| |
| ASSERT(cur->bc_flags & XFS_BTREE_STAGING); |
| ASSERT(ifake->if_fork->if_format == XFS_DINODE_FMT_META_BTREE); |
| |
| /* |
| * Free any resources hanging off the real fork, then shallow-copy the |
| * staging fork's contents into the real fork to transfer everything |
| * we just built. |
| */ |
| ifp = xfs_ifork_ptr(cur->bc_ino.ip, XFS_DATA_FORK); |
| xfs_idestroy_fork(ifp); |
| memcpy(ifp, ifake->if_fork, sizeof(struct xfs_ifork)); |
| |
| cur->bc_ino.ip->i_projid = cur->bc_group->xg_gno; |
| xfs_trans_log_inode(tp, cur->bc_ino.ip, flags); |
| xfs_btree_commit_ifakeroot(cur, tp, XFS_DATA_FORK); |
| } |
| |
| /* Calculate number of records in a rt reverse mapping btree block. */ |
| static inline unsigned int |
| xfs_rtrmapbt_block_maxrecs( |
| unsigned int blocklen, |
| bool leaf) |
| { |
| if (leaf) |
| return blocklen / sizeof(struct xfs_rmap_rec); |
| return blocklen / |
| (2 * sizeof(struct xfs_rmap_key) + sizeof(xfs_rtrmap_ptr_t)); |
| } |
| |
| /* |
| * Calculate number of records in an rt reverse mapping btree block. |
| */ |
| unsigned int |
| xfs_rtrmapbt_maxrecs( |
| struct xfs_mount *mp, |
| unsigned int blocklen, |
| bool leaf) |
| { |
| blocklen -= XFS_RTRMAP_BLOCK_LEN; |
| return xfs_rtrmapbt_block_maxrecs(blocklen, leaf); |
| } |
| |
| /* Compute the max possible height for realtime reverse mapping btrees. */ |
| unsigned int |
| xfs_rtrmapbt_maxlevels_ondisk(void) |
| { |
| unsigned long long max_dblocks; |
| unsigned int minrecs[2]; |
| unsigned int blocklen; |
| |
| blocklen = XFS_MIN_CRC_BLOCKSIZE - XFS_BTREE_LBLOCK_CRC_LEN; |
| |
| minrecs[0] = xfs_rtrmapbt_block_maxrecs(blocklen, true) / 2; |
| minrecs[1] = xfs_rtrmapbt_block_maxrecs(blocklen, false) / 2; |
| |
| /* |
| * Compute the asymptotic maxlevels for an rtrmapbt on any rtreflink fs. |
| * |
| * On a reflink filesystem, each block in an rtgroup can have up to |
| * 2^32 (per the refcount record format) owners, which means that |
| * theoretically we could face up to 2^64 rmap records. However, we're |
| * likely to run out of blocks in the data device long before that |
| * happens, which means that we must compute the max height based on |
| * what the btree will look like if it consumes almost all the blocks |
| * in the data device due to maximal sharing factor. |
| */ |
| max_dblocks = -1U; /* max ag count */ |
| max_dblocks *= XFS_MAX_CRC_AG_BLOCKS; |
| return xfs_btree_space_to_height(minrecs, max_dblocks); |
| } |
| |
| int __init |
| xfs_rtrmapbt_init_cur_cache(void) |
| { |
| xfs_rtrmapbt_cur_cache = kmem_cache_create("xfs_rtrmapbt_cur", |
| xfs_btree_cur_sizeof(xfs_rtrmapbt_maxlevels_ondisk()), |
| 0, 0, NULL); |
| |
| if (!xfs_rtrmapbt_cur_cache) |
| return -ENOMEM; |
| return 0; |
| } |
| |
| void |
| xfs_rtrmapbt_destroy_cur_cache(void) |
| { |
| kmem_cache_destroy(xfs_rtrmapbt_cur_cache); |
| xfs_rtrmapbt_cur_cache = NULL; |
| } |
| |
| /* Compute the maximum height of an rt reverse mapping btree. */ |
| void |
| xfs_rtrmapbt_compute_maxlevels( |
| struct xfs_mount *mp) |
| { |
| unsigned int d_maxlevels, r_maxlevels; |
| |
| if (!xfs_has_rtrmapbt(mp)) { |
| mp->m_rtrmap_maxlevels = 0; |
| return; |
| } |
| |
| /* |
| * The realtime rmapbt lives on the data device, which means that its |
| * maximum height is constrained by the size of the data device and |
| * the height required to store one rmap record for each block in an |
| * rt group. |
| * |
| * On a reflink filesystem, each rt block can have up to 2^32 (per the |
| * refcount record format) owners, which means that theoretically we |
| * could face up to 2^64 rmap records. This makes the computation of |
| * maxlevels based on record count meaningless, so we only consider the |
| * size of the data device. |
| */ |
| d_maxlevels = xfs_btree_space_to_height(mp->m_rtrmap_mnr, |
| mp->m_sb.sb_dblocks); |
| if (xfs_has_rtreflink(mp)) { |
| mp->m_rtrmap_maxlevels = d_maxlevels + 1; |
| return; |
| } |
| |
| r_maxlevels = xfs_btree_compute_maxlevels(mp->m_rtrmap_mnr, |
| mp->m_groups[XG_TYPE_RTG].blocks); |
| |
| /* Add one level to handle the inode root level. */ |
| mp->m_rtrmap_maxlevels = min(d_maxlevels, r_maxlevels) + 1; |
| } |
| |
| /* Calculate the rtrmap btree size for some records. */ |
| unsigned long long |
| xfs_rtrmapbt_calc_size( |
| struct xfs_mount *mp, |
| unsigned long long len) |
| { |
| return xfs_btree_calc_size(mp->m_rtrmap_mnr, len); |
| } |
| |
| /* |
| * Calculate the maximum rmap btree size. |
| */ |
| static unsigned long long |
| xfs_rtrmapbt_max_size( |
| struct xfs_mount *mp, |
| xfs_rtblock_t rtblocks) |
| { |
| /* Bail out if we're uninitialized, which can happen in mkfs. */ |
| if (mp->m_rtrmap_mxr[0] == 0) |
| return 0; |
| |
| return xfs_rtrmapbt_calc_size(mp, rtblocks); |
| } |
| |
| /* |
| * Figure out how many blocks to reserve and how many are used by this btree. |
| */ |
| xfs_filblks_t |
| xfs_rtrmapbt_calc_reserves( |
| struct xfs_mount *mp) |
| { |
| uint32_t blocks = mp->m_groups[XG_TYPE_RTG].blocks; |
| |
| if (!xfs_has_rtrmapbt(mp)) |
| return 0; |
| |
| /* Reserve 1% of the rtgroup or enough for 1 block per record. */ |
| return max_t(xfs_filblks_t, blocks / 100, |
| xfs_rtrmapbt_max_size(mp, blocks)); |
| } |
| |
| /* Convert on-disk form of btree root to in-memory form. */ |
| STATIC void |
| xfs_rtrmapbt_from_disk( |
| struct xfs_inode *ip, |
| struct xfs_rtrmap_root *dblock, |
| unsigned int dblocklen, |
| struct xfs_btree_block *rblock) |
| { |
| struct xfs_mount *mp = ip->i_mount; |
| struct xfs_rmap_key *fkp; |
| __be64 *fpp; |
| struct xfs_rmap_key *tkp; |
| __be64 *tpp; |
| struct xfs_rmap_rec *frp; |
| struct xfs_rmap_rec *trp; |
| unsigned int rblocklen = xfs_rtrmap_broot_space(mp, dblock); |
| unsigned int numrecs; |
| unsigned int maxrecs; |
| |
| xfs_btree_init_block(mp, rblock, &xfs_rtrmapbt_ops, 0, 0, ip->i_ino); |
| |
| rblock->bb_level = dblock->bb_level; |
| rblock->bb_numrecs = dblock->bb_numrecs; |
| numrecs = be16_to_cpu(dblock->bb_numrecs); |
| |
| if (be16_to_cpu(rblock->bb_level) > 0) { |
| maxrecs = xfs_rtrmapbt_droot_maxrecs(dblocklen, false); |
| fkp = xfs_rtrmap_droot_key_addr(dblock, 1); |
| tkp = xfs_rtrmap_key_addr(rblock, 1); |
| fpp = xfs_rtrmap_droot_ptr_addr(dblock, 1, maxrecs); |
| tpp = xfs_rtrmap_broot_ptr_addr(mp, rblock, 1, rblocklen); |
| memcpy(tkp, fkp, 2 * sizeof(*fkp) * numrecs); |
| memcpy(tpp, fpp, sizeof(*fpp) * numrecs); |
| } else { |
| frp = xfs_rtrmap_droot_rec_addr(dblock, 1); |
| trp = xfs_rtrmap_rec_addr(rblock, 1); |
| memcpy(trp, frp, sizeof(*frp) * numrecs); |
| } |
| } |
| |
| /* Load a realtime reverse mapping btree root in from disk. */ |
| int |
| xfs_iformat_rtrmap( |
| struct xfs_inode *ip, |
| struct xfs_dinode *dip) |
| { |
| struct xfs_mount *mp = ip->i_mount; |
| struct xfs_rtrmap_root *dfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK); |
| struct xfs_btree_block *broot; |
| unsigned int numrecs; |
| unsigned int level; |
| int dsize; |
| |
| /* |
| * growfs must create the rtrmap inodes before adding a realtime volume |
| * to the filesystem, so we cannot use the rtrmapbt predicate here. |
| */ |
| if (!xfs_has_rmapbt(ip->i_mount)) { |
| xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE); |
| return -EFSCORRUPTED; |
| } |
| |
| dsize = XFS_DFORK_SIZE(dip, mp, XFS_DATA_FORK); |
| numrecs = be16_to_cpu(dfp->bb_numrecs); |
| level = be16_to_cpu(dfp->bb_level); |
| |
| if (level > mp->m_rtrmap_maxlevels || |
| xfs_rtrmap_droot_space_calc(level, numrecs) > dsize) { |
| xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE); |
| return -EFSCORRUPTED; |
| } |
| |
| broot = xfs_broot_alloc(xfs_ifork_ptr(ip, XFS_DATA_FORK), |
| xfs_rtrmap_broot_space_calc(mp, level, numrecs)); |
| if (broot) |
| xfs_rtrmapbt_from_disk(ip, dfp, dsize, broot); |
| return 0; |
| } |
| |
| /* Convert in-memory form of btree root to on-disk form. */ |
| void |
| xfs_rtrmapbt_to_disk( |
| struct xfs_mount *mp, |
| struct xfs_btree_block *rblock, |
| unsigned int rblocklen, |
| struct xfs_rtrmap_root *dblock, |
| unsigned int dblocklen) |
| { |
| struct xfs_rmap_key *fkp; |
| __be64 *fpp; |
| struct xfs_rmap_key *tkp; |
| __be64 *tpp; |
| struct xfs_rmap_rec *frp; |
| struct xfs_rmap_rec *trp; |
| unsigned int numrecs; |
| unsigned int maxrecs; |
| |
| ASSERT(rblock->bb_magic == cpu_to_be32(XFS_RTRMAP_CRC_MAGIC)); |
| ASSERT(uuid_equal(&rblock->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid)); |
| ASSERT(rblock->bb_u.l.bb_blkno == cpu_to_be64(XFS_BUF_DADDR_NULL)); |
| ASSERT(rblock->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK)); |
| ASSERT(rblock->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK)); |
| |
| dblock->bb_level = rblock->bb_level; |
| dblock->bb_numrecs = rblock->bb_numrecs; |
| numrecs = be16_to_cpu(rblock->bb_numrecs); |
| |
| if (be16_to_cpu(rblock->bb_level) > 0) { |
| maxrecs = xfs_rtrmapbt_droot_maxrecs(dblocklen, false); |
| fkp = xfs_rtrmap_key_addr(rblock, 1); |
| tkp = xfs_rtrmap_droot_key_addr(dblock, 1); |
| fpp = xfs_rtrmap_broot_ptr_addr(mp, rblock, 1, rblocklen); |
| tpp = xfs_rtrmap_droot_ptr_addr(dblock, 1, maxrecs); |
| memcpy(tkp, fkp, 2 * sizeof(*fkp) * numrecs); |
| memcpy(tpp, fpp, sizeof(*fpp) * numrecs); |
| } else { |
| frp = xfs_rtrmap_rec_addr(rblock, 1); |
| trp = xfs_rtrmap_droot_rec_addr(dblock, 1); |
| memcpy(trp, frp, sizeof(*frp) * numrecs); |
| } |
| } |
| |
| /* Flush a realtime reverse mapping btree root out to disk. */ |
| void |
| xfs_iflush_rtrmap( |
| struct xfs_inode *ip, |
| struct xfs_dinode *dip) |
| { |
| struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK); |
| struct xfs_rtrmap_root *dfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK); |
| |
| ASSERT(ifp->if_broot != NULL); |
| ASSERT(ifp->if_broot_bytes > 0); |
| ASSERT(xfs_rtrmap_droot_space(ifp->if_broot) <= |
| xfs_inode_fork_size(ip, XFS_DATA_FORK)); |
| xfs_rtrmapbt_to_disk(ip->i_mount, ifp->if_broot, ifp->if_broot_bytes, |
| dfp, XFS_DFORK_SIZE(dip, ip->i_mount, XFS_DATA_FORK)); |
| } |
| |
| /* |
| * Create a realtime rmap btree inode. |
| */ |
| int |
| xfs_rtrmapbt_create( |
| struct xfs_rtgroup *rtg, |
| struct xfs_inode *ip, |
| struct xfs_trans *tp, |
| bool init) |
| { |
| struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK); |
| struct xfs_mount *mp = ip->i_mount; |
| struct xfs_btree_block *broot; |
| |
| ifp->if_format = XFS_DINODE_FMT_META_BTREE; |
| ASSERT(ifp->if_broot_bytes == 0); |
| ASSERT(ifp->if_bytes == 0); |
| |
| /* Initialize the empty incore btree root. */ |
| broot = xfs_broot_realloc(ifp, xfs_rtrmap_broot_space_calc(mp, 0, 0)); |
| if (broot) |
| xfs_btree_init_block(mp, broot, &xfs_rtrmapbt_ops, 0, 0, |
| ip->i_ino); |
| xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE | XFS_ILOG_DBROOT); |
| |
| return 0; |
| } |
| |
| /* |
| * Initialize an rmap for a realtime superblock using the potentially updated |
| * rt geometry in the provided @mp. |
| */ |
| int |
| xfs_rtrmapbt_init_rtsb( |
| struct xfs_mount *mp, |
| struct xfs_rtgroup *rtg, |
| struct xfs_trans *tp) |
| { |
| struct xfs_rmap_irec rmap = { |
| .rm_blockcount = mp->m_sb.sb_rextsize, |
| .rm_owner = XFS_RMAP_OWN_FS, |
| }; |
| struct xfs_btree_cur *cur; |
| int error; |
| |
| ASSERT(xfs_has_rtsb(mp)); |
| ASSERT(rtg_rgno(rtg) == 0); |
| |
| cur = xfs_rtrmapbt_init_cursor(tp, rtg); |
| error = xfs_rmap_map_raw(cur, &rmap); |
| xfs_btree_del_cursor(cur, error); |
| return error; |
| } |