Browse Source

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs fixes from Chris Mason:
 "These are mostly bug fixes and a two small performance fixes.  The
  most important of the bunch are Josef's fix for a snapshotting
  regression and Mark's update to fix compile problems on arm"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (25 commits)
  Btrfs: create the uuid tree on remount rw
  btrfs: change extent-same to copy entire argument struct
  Btrfs: dir_inode_operations should use btrfs_update_time also
  btrfs: Add btrfs: prefix to kernel log output
  btrfs: refuse to remount read-write after abort
  Btrfs: btrfs_ioctl_default_subvol: Revert back to toplevel subvolume when arg is 0
  Btrfs: don't leak transaction in btrfs_sync_file()
  Btrfs: add the missing mutex unlock in write_all_supers()
  Btrfs: iput inode on allocation failure
  Btrfs: remove space_info->reservation_progress
  Btrfs: kill delay_iput arg to the wait_ordered functions
  Btrfs: fix worst case calculator for space usage
  Revert "Btrfs: rework the overcommit logic to be based on the total size"
  Btrfs: improve replacing nocow extents
  Btrfs: drop dir i_size when adding new names on replay
  Btrfs: replay dir_index items before other items
  Btrfs: check roots last log commit when checking if an inode has been logged
  Btrfs: actually log directory we are fsync()'ing
  Btrfs: actually limit the size of delalloc range
  Btrfs: allocate the free space by the existed max extent size when ENOSPC
  ...
master
Linus Torvalds 8 years ago
parent
commit
0fbf2cc983
  1. 5
      fs/btrfs/btrfs_inode.h
  2. 7
      fs/btrfs/ctree.c
  3. 17
      fs/btrfs/ctree.h
  4. 4
      fs/btrfs/dev-replace.c
  5. 2
      fs/btrfs/disk-io.c
  6. 57
      fs/btrfs/extent-tree.c
  7. 8
      fs/btrfs/extent_io.c
  8. 4
      fs/btrfs/file.c
  9. 67
      fs/btrfs/free-space-cache.c
  10. 5
      fs/btrfs/free-space-cache.h
  11. 16
      fs/btrfs/inode.c
  12. 80
      fs/btrfs/ioctl.c
  13. 24
      fs/btrfs/ordered-data.c
  14. 5
      fs/btrfs/ordered-data.h
  15. 43
      fs/btrfs/relocation.c
  16. 112
      fs/btrfs/scrub.c
  17. 21
      fs/btrfs/super.c
  18. 2
      fs/btrfs/transaction.c
  19. 52
      fs/btrfs/tree-log.c
  20. 7
      fs/btrfs/volumes.c
  21. 1
      include/trace/events/btrfs.h

5
fs/btrfs/btrfs_inode.h

@ -213,7 +213,10 @@ static inline bool btrfs_is_free_space_inode(struct inode *inode)
static inline int btrfs_inode_in_log(struct inode *inode, u64 generation)
{
if (BTRFS_I(inode)->logged_trans == generation &&
BTRFS_I(inode)->last_sub_trans <= BTRFS_I(inode)->last_log_commit)
BTRFS_I(inode)->last_sub_trans <=
BTRFS_I(inode)->last_log_commit &&
BTRFS_I(inode)->last_sub_trans <=
BTRFS_I(inode)->root->last_log_commit)
return 1;
return 0;
}

7
fs/btrfs/ctree.c

@ -1005,8 +1005,11 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
return ret;
}
if (root->ref_cows)
btrfs_reloc_cow_block(trans, root, buf, cow);
if (root->ref_cows) {
ret = btrfs_reloc_cow_block(trans, root, buf, cow);
if (ret)
return ret;
}
if (buf == root->node) {
WARN_ON(parent && parent != buf);

17
fs/btrfs/ctree.h

@ -1118,15 +1118,6 @@ struct btrfs_space_info {
*/
struct percpu_counter total_bytes_pinned;
/*
* we bump reservation progress every time we decrement
* bytes_reserved. This way people waiting for reservations
* know something good has happened and they can check
* for progress. The number here isn't to be trusted, it
* just shows reclaim activity
*/
unsigned long reservation_progress;
unsigned int full:1; /* indicates that we cannot allocate any more
chunks for this space */
unsigned int chunk_alloc:1; /* set if we are allocating a chunk */
@ -3135,7 +3126,7 @@ static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root,
unsigned num_items)
{
return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) *
3 * num_items;
2 * num_items;
}
/*
@ -3939,9 +3930,9 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_recover_relocation(struct btrfs_root *root);
int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len);
void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *buf,
struct extent_buffer *cow);
int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *buf,
struct extent_buffer *cow);
void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans,
struct btrfs_pending_snapshot *pending,
u64 *bytes_to_reserve);

4
fs/btrfs/dev-replace.c

@ -400,7 +400,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR;
btrfs_dev_replace_unlock(dev_replace);
btrfs_wait_all_ordered_extents(root->fs_info, 0);
btrfs_wait_all_ordered_extents(root->fs_info);
/* force writing the updated state information to disk */
trans = btrfs_start_transaction(root, 0);
@ -475,7 +475,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
return ret;
}
btrfs_wait_all_ordered_extents(root->fs_info, 0);
btrfs_wait_all_ordered_extents(root->fs_info);
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {

2
fs/btrfs/disk-io.c

@ -157,6 +157,7 @@ static struct btrfs_lockdep_keyset {
{ .id = BTRFS_TREE_LOG_OBJECTID, .name_stem = "log" },
{ .id = BTRFS_TREE_RELOC_OBJECTID, .name_stem = "treloc" },
{ .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" },
{ .id = BTRFS_UUID_TREE_OBJECTID, .name_stem = "uuid" },
{ .id = 0, .name_stem = "tree" },
};
@ -3415,6 +3416,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
if (total_errors > max_errors) {
printk(KERN_ERR "btrfs: %d errors while writing supers\n",
total_errors);
mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
/* FUA is masked off if unsupported and can't be the reason */
btrfs_error(root->fs_info, -EIO,

57
fs/btrfs/extent-tree.c

@ -3925,7 +3925,6 @@ static int can_overcommit(struct btrfs_root *root,
u64 space_size;
u64 avail;
u64 used;
u64 to_add;
used = space_info->bytes_used + space_info->bytes_reserved +
space_info->bytes_pinned + space_info->bytes_readonly;
@ -3959,25 +3958,17 @@ static int can_overcommit(struct btrfs_root *root,
BTRFS_BLOCK_GROUP_RAID10))
avail >>= 1;
to_add = space_info->total_bytes;
/*
* If we aren't flushing all things, let us overcommit up to
* 1/2th of the space. If we can flush, don't let us overcommit
* too much, let it overcommit up to 1/8 of the space.
*/
if (flush == BTRFS_RESERVE_FLUSH_ALL)
to_add >>= 3;
avail >>= 3;
else
to_add >>= 1;
/*
* Limit the overcommit to the amount of free space we could possibly
* allocate for chunks.
*/
to_add = min(avail, to_add);
avail >>= 1;
if (used + bytes < space_info->total_bytes + to_add)
if (used + bytes < space_info->total_bytes + avail)
return 1;
return 0;
}
@ -4000,7 +3991,7 @@ static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root,
*/
btrfs_start_all_delalloc_inodes(root->fs_info, 0);
if (!current->journal_info)
btrfs_wait_all_ordered_extents(root->fs_info, 0);
btrfs_wait_all_ordered_extents(root->fs_info);
}
}
@ -4030,7 +4021,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
if (delalloc_bytes == 0) {
if (trans)
return;
btrfs_wait_all_ordered_extents(root->fs_info, 0);
btrfs_wait_all_ordered_extents(root->fs_info);
return;
}
@ -4058,7 +4049,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
loops++;
if (wait_ordered && !trans) {
btrfs_wait_all_ordered_extents(root->fs_info, 0);
btrfs_wait_all_ordered_extents(root->fs_info);
} else {
time_left = schedule_timeout_killable(1);
if (time_left)
@ -4465,7 +4456,6 @@ static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
space_info->bytes_may_use -= num_bytes;
trace_btrfs_space_reservation(fs_info, "space_info",
space_info->flags, num_bytes, 0);
space_info->reservation_progress++;
spin_unlock(&space_info->lock);
}
}
@ -4666,7 +4656,6 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
sinfo->bytes_may_use -= num_bytes;
trace_btrfs_space_reservation(fs_info, "space_info",
sinfo->flags, num_bytes, 0);
sinfo->reservation_progress++;
block_rsv->reserved = block_rsv->size;
block_rsv->full = 1;
}
@ -5446,7 +5435,6 @@ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
space_info->bytes_readonly += num_bytes;
cache->reserved -= num_bytes;
space_info->bytes_reserved -= num_bytes;
space_info->reservation_progress++;
}
spin_unlock(&cache->lock);
spin_unlock(&space_info->lock);
@ -6117,10 +6105,13 @@ enum btrfs_loop_type {
/*
* walks the btree of allocated extents and find a hole of a given size.
* The key ins is changed to record the hole:
* ins->objectid == block start
* ins->objectid == start position
* ins->flags = BTRFS_EXTENT_ITEM_KEY
* ins->offset == number of blocks
* ins->offset == the size of the hole.
* Any available blocks before search_start are skipped.
*
* If there is no suitable free space, we will record the max size of
* the free space extent currently.
*/
static noinline int find_free_extent(struct btrfs_root *orig_root,
u64 num_bytes, u64 empty_size,
@ -6133,6 +6124,7 @@ static noinline int find_free_extent(struct btrfs_root *orig_root,
struct btrfs_block_group_cache *block_group = NULL;
struct btrfs_block_group_cache *used_block_group;
u64 search_start = 0;
u64 max_extent_size = 0;
int empty_cluster = 2 * 1024 * 1024;
struct btrfs_space_info *space_info;
int loop = 0;
@ -6292,7 +6284,10 @@ have_block_group:
btrfs_get_block_group(used_block_group);
offset = btrfs_alloc_from_cluster(used_block_group,
last_ptr, num_bytes, used_block_group->key.objectid);
last_ptr,
num_bytes,
used_block_group->key.objectid,
&max_extent_size);
if (offset) {
/* we have a block, we're done */
spin_unlock(&last_ptr->refill_lock);
@ -6355,8 +6350,10 @@ refill_cluster:
* cluster
*/
offset = btrfs_alloc_from_cluster(block_group,
last_ptr, num_bytes,
search_start);
last_ptr,
num_bytes,
search_start,
&max_extent_size);
if (offset) {
/* we found one, proceed */
spin_unlock(&last_ptr->refill_lock);
@ -6391,13 +6388,18 @@ unclustered_alloc:
if (cached &&
block_group->free_space_ctl->free_space <
num_bytes + empty_cluster + empty_size) {
if (block_group->free_space_ctl->free_space >
max_extent_size)
max_extent_size =
block_group->free_space_ctl->free_space;
spin_unlock(&block_group->free_space_ctl->tree_lock);
goto loop;
}
spin_unlock(&block_group->free_space_ctl->tree_lock);
offset = btrfs_find_space_for_alloc(block_group, search_start,
num_bytes, empty_size);
num_bytes, empty_size,
&max_extent_size);
/*
* If we didn't find a chunk, and we haven't failed on this
* block group before, and this block group is in the middle of
@ -6515,7 +6517,8 @@ loop:
ret = 0;
}
out:
if (ret == -ENOSPC)
ins->offset = max_extent_size;
return ret;
}
@ -6573,8 +6576,8 @@ again:
flags);
if (ret == -ENOSPC) {
if (!final_tried) {
num_bytes = num_bytes >> 1;
if (!final_tried && ins->offset) {
num_bytes = min(num_bytes >> 1, ins->offset);
num_bytes = round_down(num_bytes, root->sectorsize);
num_bytes = max(num_bytes, min_alloc_size);
if (num_bytes == min_alloc_size)

8
fs/btrfs/extent_io.c

@ -1481,10 +1481,12 @@ static noinline u64 find_delalloc_range(struct extent_io_tree *tree,
*end = state->end;
cur_start = state->end + 1;
node = rb_next(node);
if (!node)
break;
total_bytes += state->end - state->start + 1;
if (total_bytes >= max_bytes)
if (total_bytes >= max_bytes) {
*end = *start + max_bytes - 1;
break;
}
if (!node)
break;
}
out:

4
fs/btrfs/file.c

@ -1859,8 +1859,8 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
ret = btrfs_log_dentry_safe(trans, root, dentry);
if (ret < 0) {
mutex_unlock(&inode->i_mutex);
goto out;
/* Fallthrough and commit/free transaction. */
ret = 1;
}
/* we've logged all the items and now have a consistent

67
fs/btrfs/free-space-cache.c

@ -1431,13 +1431,19 @@ static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
ctl->free_space += bytes;
}
/*
* If we can not find suitable extent, we will use bytes to record
* the size of the max extent.
*/
static int search_bitmap(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *bitmap_info, u64 *offset,
u64 *bytes)
{
unsigned long found_bits = 0;
unsigned long max_bits = 0;
unsigned long bits, i;
unsigned long next_zero;
unsigned long extent_bits;
i = offset_to_bit(bitmap_info->offset, ctl->unit,
max_t(u64, *offset, bitmap_info->offset));
@ -1446,9 +1452,12 @@ static int search_bitmap(struct btrfs_free_space_ctl *ctl,
for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) {
next_zero = find_next_zero_bit(bitmap_info->bitmap,
BITS_PER_BITMAP, i);
if ((next_zero - i) >= bits) {
found_bits = next_zero - i;
extent_bits = next_zero - i;
if (extent_bits >= bits) {
found_bits = extent_bits;
break;
} else if (extent_bits > max_bits) {
max_bits = extent_bits;
}
i = next_zero;
}
@ -1459,38 +1468,41 @@ static int search_bitmap(struct btrfs_free_space_ctl *ctl,
return 0;
}
*bytes = (u64)(max_bits) * ctl->unit;
return -1;
}
/* Cache the size of the max extent in bytes */
static struct btrfs_free_space *
find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes,
unsigned long align)
unsigned long align, u64 *max_extent_size)
{
struct btrfs_free_space *entry;
struct rb_node *node;
u64 ctl_off;
u64 tmp;
u64 align_off;
int ret;
if (!ctl->free_space_offset.rb_node)
return NULL;
goto out;
entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1);
if (!entry)
return NULL;
goto out;
for (node = &entry->offset_index; node; node = rb_next(node)) {
entry = rb_entry(node, struct btrfs_free_space, offset_index);
if (entry->bytes < *bytes)
if (entry->bytes < *bytes) {
if (entry->bytes > *max_extent_size)
*max_extent_size = entry->bytes;
continue;
}
/* make sure the space returned is big enough
* to match our requested alignment
*/
if (*bytes >= align) {
ctl_off = entry->offset - ctl->start;
tmp = ctl_off + align - 1;;
tmp = entry->offset - ctl->start + align - 1;
do_div(tmp, align);
tmp = tmp * align + ctl->start;
align_off = tmp - entry->offset;
@ -1499,14 +1511,22 @@ find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes,
tmp = entry->offset;
}
if (entry->bytes < *bytes + align_off)
if (entry->bytes < *bytes + align_off) {
if (entry->bytes > *max_extent_size)
*max_extent_size = entry->bytes;
continue;
}
if (entry->bitmap) {
ret = search_bitmap(ctl, entry, &tmp, bytes);
u64 size = *bytes;
ret = search_bitmap(ctl, entry, &tmp, &size);
if (!ret) {
*offset = tmp;
*bytes = size;
return entry;
} else if (size > *max_extent_size) {
*max_extent_size = size;
}
continue;
}
@ -1515,7 +1535,7 @@ find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes,
*bytes = entry->bytes - align_off;
return entry;
}
out:
return NULL;
}
@ -2116,7 +2136,8 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
}
u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
u64 offset, u64 bytes, u64 empty_size)
u64 offset, u64 bytes, u64 empty_size,
u64 *max_extent_size)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct btrfs_free_space *entry = NULL;
@ -2127,7 +2148,7 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
spin_lock(&ctl->tree_lock);
entry = find_free_space(ctl, &offset, &bytes_search,
block_group->full_stripe_len);
block_group->full_stripe_len, max_extent_size);
if (!entry)
goto out;
@ -2137,7 +2158,6 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
if (!entry->bytes)
free_bitmap(ctl, entry);
} else {
unlink_free_space(ctl, entry);
align_gap_len = offset - entry->offset;
align_gap = entry->offset;
@ -2151,7 +2171,6 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
else
link_free_space(ctl, entry);
}
out:
spin_unlock(&ctl->tree_lock);
@ -2206,7 +2225,8 @@ int btrfs_return_cluster_to_free_space(
static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
struct btrfs_free_cluster *cluster,
struct btrfs_free_space *entry,
u64 bytes, u64 min_start)
u64 bytes, u64 min_start,
u64 *max_extent_size)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
int err;
@ -2218,8 +2238,11 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
search_bytes = bytes;
err = search_bitmap(ctl, entry, &search_start, &search_bytes);
if (err)
if (err) {
if (search_bytes > *max_extent_size)
*max_extent_size = search_bytes;
return 0;
}
ret = search_start;
__bitmap_clear_bits(ctl, entry, ret, bytes);
@ -2234,7 +2257,7 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
*/
u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
struct btrfs_free_cluster *cluster, u64 bytes,
u64 min_start)
u64 min_start, u64 *max_extent_size)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct btrfs_free_space *entry = NULL;
@ -2254,6 +2277,9 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
entry = rb_entry(node, struct btrfs_free_space, offset_index);
while(1) {
if (entry->bytes < bytes && entry->bytes > *max_extent_size)
*max_extent_size = entry->bytes;
if (entry->bytes < bytes ||
(!entry->bitmap && entry->offset < min_start)) {
node = rb_next(&entry->offset_index);
@ -2267,7 +2293,8 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
if (entry->bitmap) {
ret = btrfs_alloc_from_bitmap(block_group,
cluster, entry, bytes,
cluster->window_start);
cluster->window_start,
max_extent_size);
if (ret == 0) {
node = rb_next(&entry->offset_index);
if (!node)

5
fs/btrfs/free-space-cache.h

@ -94,7 +94,8 @@ void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl);
void btrfs_remove_free_space_cache(struct btrfs_block_group_cache
*block_group);
u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
u64 offset, u64 bytes, u64 empty_size);
u64 offset, u64 bytes, u64 empty_size,
u64 *max_extent_size);
u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root);
void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
u64 bytes);
@ -105,7 +106,7 @@ int btrfs_find_space_cluster(struct btrfs_root *root,
void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster);
u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
struct btrfs_free_cluster *cluster, u64 bytes,
u64 min_start);
u64 min_start, u64 *max_extent_size);
int btrfs_return_cluster_to_free_space(
struct btrfs_block_group_cache *block_group,
struct btrfs_free_cluster *cluster);

16
fs/btrfs/inode.c

@ -4688,11 +4688,11 @@ static void inode_tree_add(struct inode *inode)
struct btrfs_inode *entry;
struct rb_node **p;
struct rb_node *parent;
struct rb_node *new = &BTRFS_I(inode)->rb_node;
u64 ino = btrfs_ino(inode);
if (inode_unhashed(inode))
return;
again:
parent = NULL;
spin_lock(&root->inode_lock);
p = &root->inode_tree.rb_node;
@ -4707,14 +4707,14 @@ again:
else {
WARN_ON(!(entry->vfs_inode.i_state &
(I_WILL_FREE | I_FREEING)));
rb_erase(parent, &root->inode_tree);
rb_replace_node(parent, new, &root->inode_tree);
RB_CLEAR_NODE(parent);
spin_unlock(&root->inode_lock);
goto again;
return;
}
}
rb_link_node(&BTRFS_I(inode)->rb_node, parent, p);
rb_insert_color(&BTRFS_I(inode)->rb_node, &root->inode_tree);
rb_link_node(new, parent, p);
rb_insert_color(new, &root->inode_tree);
spin_unlock(&root->inode_lock);
}
@ -8216,6 +8216,10 @@ static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
work = btrfs_alloc_delalloc_work(inode, 0, delay_iput);
if (unlikely(!work)) {
if (delay_iput)
btrfs_add_delayed_iput(inode);
else
iput(inode);
ret = -ENOMEM;
goto out;
}
@ -8613,11 +8617,13 @@ static const struct inode_operations btrfs_dir_inode_operations = {
.removexattr = btrfs_removexattr,
.permission = btrfs_permission,
.get_acl = btrfs_get_acl,
.update_time = btrfs_update_time,
};
static const struct inode_operations btrfs_dir_ro_inode_operations = {
.lookup = btrfs_lookup,
.permission = btrfs_permission,
.get_acl = btrfs_get_acl,
.update_time = btrfs_update_time,
};
static const struct file_operations btrfs_dir_file_operations = {

80
fs/btrfs/ioctl.c

@ -574,7 +574,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
if (ret)
return ret;
btrfs_wait_ordered_extents(root, 0);
btrfs_wait_ordered_extents(root);
pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS);
if (!pending_snapshot)
@ -2696,9 +2696,9 @@ out_unlock:
static long btrfs_ioctl_file_extent_same(struct file *file,
void __user *argp)
{
struct btrfs_ioctl_same_args *args = argp;
struct btrfs_ioctl_same_args same;
struct btrfs_ioctl_same_extent_info info;
struct btrfs_ioctl_same_args tmp;
struct btrfs_ioctl_same_args *same;
struct btrfs_ioctl_same_extent_info *info;
struct inode *src = file->f_dentry->d_inode;
struct file *dst_file = NULL;
struct inode *dst;
@ -2706,6 +2706,7 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
u64 len;
int i;
int ret;
unsigned long size;
u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
bool is_admin = capable(CAP_SYS_ADMIN);
@ -2716,15 +2717,30 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
if (ret)
return ret;
if (copy_from_user(&same,
if (copy_from_user(&tmp,
(struct btrfs_ioctl_same_args __user *)argp,
sizeof(same))) {
sizeof(tmp))) {
ret = -EFAULT;
goto out;
}
off = same.logical_offset;
len = same.length;
size = sizeof(tmp) +
tmp.dest_count * sizeof(struct btrfs_ioctl_same_extent_info);
same = kmalloc(size, GFP_NOFS);
if (!same) {
ret = -EFAULT;
goto out;
}
if (copy_from_user(same,
(struct btrfs_ioctl_same_args __user *)argp, size)) {
ret = -EFAULT;
goto out;
}
off = same->logical_offset;
len = same->length;
/*
* Limit the total length we will dedupe for each operation.
@ -2752,27 +2768,28 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
if (!S_ISREG(src->i_mode))
goto out;
ret = 0;
for (i = 0; i < same.dest_count; i++) {
if (copy_from_user(&info, &args->info[i], sizeof(info))) {
ret = -EFAULT;
goto out;
}
/* pre-format output fields to sane values */
for (i = 0; i < same->dest_count; i++) {
same->info[i].bytes_deduped = 0ULL;
same->info[i].status = 0;
}
info.bytes_deduped = 0;
ret = 0;
for (i = 0; i < same->dest_count; i++) {
info = &same->info[i];
dst_file = fget(info.fd);
dst_file = fget(info->fd);
if (!dst_file) {
info.status = -EBADF;
info->status = -EBADF;
goto next;
}
if (!(is_admin || (dst_file->f_mode & FMODE_WRITE))) {
info.status = -EINVAL;
info->status = -EINVAL;
goto next;
}
info.status = -EXDEV;
info->status = -EXDEV;
if (file->f_path.mnt != dst_file->f_path.mnt)
goto next;
@ -2781,32 +2798,29 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
goto next;
if (S_ISDIR(dst->i_mode)) {
info.status = -EISDIR;
info->status = -EISDIR;
goto next;
}
if (!S_ISREG(dst->i_mode)) {
info.status = -EACCES;
info->status = -EACCES;
goto next;
}
info.status = btrfs_extent_same(src, off, len, dst,
info.logical_offset);
if (info.status == 0)
info.bytes_deduped += len;
info->status = btrfs_extent_same(src, off, len, dst,
info->logical_offset);
if (info->status == 0)
info->bytes_deduped += len;
next:
if (dst_file)
fput(dst_file);
if (__put_user_unaligned(info.status, &args->info[i].status) ||
__put_user_unaligned(info.bytes_deduped,
&args->info[i].bytes_deduped)) {
ret = -EFAULT;
goto out;
}
}
ret = copy_to_user(argp, same, size);
if (ret)
ret = -EFAULT;
out:
mnt_drop_write_file(file);
return ret;
@ -3310,7 +3324,7 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
}
if (!objectid)
objectid = root->root_key.objectid;
objectid = BTRFS_FS_TREE_OBJECTID;
location.objectid = objectid;
location.type = BTRFS_ROOT_ITEM_KEY;

24
fs/btrfs/ordered-data.c

@ -563,11 +563,10 @@ static void btrfs_run_ordered_extent_work(struct btrfs_work *work)
* wait for all the ordered extents in a root. This is done when balancing
* space between drives.
*/
void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
void btrfs_wait_ordered_extents(struct btrfs_root *root)
{
struct list_head splice, works;
struct btrfs_ordered_extent *ordered, *next;
struct inode *inode;
INIT_LIST_HEAD(&splice);
INIT_LIST_HEAD(&works);
@ -580,15 +579,6 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
root_extent_list);
list_move_tail(&ordered->root_extent_list,
&root->ordered_extents);
/*
* the inode may be getting freed (in sys_unlink path).
*/
inode = igrab(ordered->inode);
if (!inode) {
cond_resched_lock(&root->ordered_extent_lock);
continue;
}
atomic_inc(&ordered->refs);
spin_unlock(&root->ordered_extent_lock);
@ -605,21 +595,13 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
list_for_each_entry_safe(ordered, next, &works, work_list) {
list_del_init(&ordered->work_list);
wait_for_completion(&ordered->completion);
inode = ordered->inode;
btrfs_put_ordered_extent(ordered);
if (delay_iput)
btrfs_add_delayed_iput(inode);
else
iput(inode);
cond_resched();
}
mutex_unlock(&root->fs_info->ordered_operations_mutex);
}
void btrfs_wait_all_ordered_extents(struct btrfs_fs_info *fs_info,
int delay_iput)
void btrfs_wait_all_ordered_extents(struct btrfs_fs_info *fs_info)
{
struct btrfs_root *root;
struct list_head splice;
@ -637,7 +619,7 @@ void btrfs_wait_all_ordered_extents(struct btrfs_fs_info *fs_info,
&fs_info->ordered_roots);
spin_unlock(&fs_info->ordered_root_lock);
btrfs_wait_ordered_extents(root, delay_iput);
btrfs_wait_ordered_extents(root);
btrfs_put_fs_root(root);
spin_lock(&fs_info->ordered_root_lock);

5
fs/btrfs/ordered-data.h

@ -195,9 +195,8 @@ int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct inode *inode);
void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput);
void btrfs_wait_all_ordered_extents(struct btrfs_fs_info *fs_info,
int delay_iput);
void btrfs_wait_ordered_extents(struct btrfs_root *root);
void btrfs_wait_all_ordered_extents(struct btrfs_fs_info *fs_info);
void btrfs_get_logged_extents(struct btrfs_root *log, struct inode *inode);
void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid);
void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid);

43
fs/btrfs/relocation.c

@ -1548,7 +1548,7 @@ static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr,
btrfs_file_extent_other_encoding(leaf, fi));
if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi)) {
ret = 1;
ret = -EINVAL;
goto out;
}
@ -1579,7 +1579,7 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
u64 end;
u32 nritems;
u32 i;
int ret;
int ret = 0;
int first = 1;
int dirty = 0;
@ -1642,11 +1642,13 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
ret = get_new_location(rc->data_inode, &new_bytenr,
bytenr, num_bytes);
if (ret > 0) {
WARN_ON(1);
continue;
if (ret) {
/*
* Don't have to abort since we've not changed anything
* in the file extent yet.
*/
break;
}
BUG_ON(ret < 0);
btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr);
dirty = 1;
@ -1656,18 +1658,24 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
num_bytes, parent,
btrfs_header_owner(leaf),
key.objectid, key.offset, 1);
BUG_ON(ret);
if (ret) {
btrfs_abort_transaction(trans, root, ret);
break;
}
ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
parent, btrfs_header_owner(leaf),
key.objectid, key.offset, 1);
BUG_ON(ret);
if (ret) {
btrfs_abort_transaction(trans, root, ret);
break;
}
}
if (dirty)
btrfs_mark_buffer_dirty(leaf);
if (inode)
btrfs_add_delayed_iput(inode);
return 0;
return ret;
}
static noinline_for_stack
@ -4238,7 +4246,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
err = ret;
goto out;
}
btrfs_wait_all_ordered_extents(fs_info, 0);
btrfs_wait_all_ordered_extents(fs_info);
while (1) {
mutex_lock(&fs_info->cleaner_mutex);
@ -4499,19 +4507,19 @@ out:
return ret;
}
void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *buf,
struct extent_buffer *cow)
int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *buf,
struct extent_buffer *cow)
{
struct reloc_control *rc;
struct backref_node *node;
int first_cow = 0;
int level;
int ret;
int ret = 0;
rc = root->fs_info->reloc_ctl;
if (!rc)
return;
return 0;
BUG_ON(rc->stage == UPDATE_DATA_PTRS &&
root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID);
@ -4547,10 +4555,9 @@ void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
rc->nodes_relocated += buf->len;
}
if (level == 0 && first_cow && rc->stage == UPDATE_DATA_PTRS) {
if (level == 0 && first_cow && rc->stage == UPDATE_DATA_PTRS)
ret = replace_file_extents(trans, rc, root, cow);
BUG_ON(ret);
}
return ret;
}
/*

112
fs/btrfs/scrub.c

@ -158,12 +158,20 @@ struct scrub_fixup_nodatasum {
int mirror_num;
};
struct scrub_nocow_inode {
u64 inum;
u64 offset;
u64 root;
struct list_head list;
};
struct scrub_copy_nocow_ctx {
struct scrub_ctx *sctx;
u64 logical;
u64 len;
int mirror_num;
u64 physical_for_dev_replace;
struct list_head inodes;
struct btrfs_work work;
};
@ -245,7 +253,7 @@ static void scrub_wr_bio_end_io_worker(struct btrfs_work *work);
static int write_page_nocow(struct scrub_ctx *sctx,
u64 physical_for_dev_replace, struct page *page);
static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
void *ctx);
struct scrub_copy_nocow_ctx *ctx);
static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
int mirror_num, u64 physical_for_dev_replace);
static void copy_nocow_pages_worker(struct btrfs_work *work);
@ -3126,12 +3134,30 @@ static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
nocow_ctx->mirror_num = mirror_num;
nocow_ctx->physical_for_dev_replace = physical_for_dev_replace;
nocow_ctx->work.func = copy_nocow_pages_worker;
INIT_LIST_HEAD(&nocow_ctx->inodes);
btrfs_queue_worker(&fs_info->scrub_nocow_workers,
&nocow_ctx->work);
return 0;
}
static int record_inode_for_nocow(u64 inum, u64 offset, u64 root, void *ctx)
{
struct scrub_copy_nocow_ctx *nocow_ctx = ctx;
struct scrub_nocow_inode *nocow_inode;
nocow_inode = kzalloc(sizeof(*nocow_inode), GFP_NOFS);
if (!nocow_inode)
return -ENOMEM;
nocow_inode->inum = inum;
nocow_inode->offset = offset;
nocow_inode->root = root;
list_add_tail(&nocow_inode->list, &nocow_ctx->inodes);
return 0;
}
#define COPY_COMPLETE 1
static void copy_nocow_pages_worker(struct btrfs_work *work)
{
struct scrub_copy_nocow_ctx *nocow_ctx =
@ -3167,8 +3193,7 @@ static void copy_nocow_pages_worker(struct btrfs_work *work)
}
ret = iterate_inodes_from_logical(logical, fs_info, path,
copy_nocow_pages_for_inode,
nocow_ctx);
record_inode_for_nocow, nocow_ctx);
if (ret != 0 && ret != -ENOENT) {
pr_warn("iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %u, ret %d\n",
logical, physical_for_dev_replace, len, mirror_num,
@ -3177,7 +3202,33 @@ static void copy_nocow_pages_worker(struct btrfs_work *work)
goto out;
}
btrfs_end_transaction(trans, root);
trans = NULL;
while (!list_empty(&nocow_ctx->inodes)) {
struct scrub_nocow_inode *entry;
entry = list_first_entry(&nocow_ctx->inodes,
struct scrub_nocow_inode,
list);
list_del_init(&entry->list);
ret = copy_nocow_pages_for_inode(entry->inum, entry->offset,
entry->root, nocow_ctx);
kfree(entry);
if (ret == COPY_COMPLETE) {
ret = 0;
break;
} else if (ret) {
break;
}
}
out:
while (!list_empty(&nocow_ctx->inodes)) {
struct scrub_nocow_inode *entry;
entry = list_first_entry(&nocow_ctx->inodes,
struct scrub_nocow_inode,
list);
list_del_init(&entry->list);
kfree(entry);
}
if (trans && !IS_ERR(trans))
btrfs_end_transaction(trans, root);
if (not_written)
@ -3190,20 +3241,25 @@ out:
scrub_pending_trans_workers_dec(sctx);
}
static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx)
static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
struct scrub_copy_nocow_ctx *nocow_ctx)
{
struct scrub_copy_nocow_ctx *nocow_ctx = ctx;
struct btrfs_fs_info *fs_info = nocow_ctx->sctx->dev_root->fs_info;
struct btrfs_key key;
struct inode *inode;
struct page *page;
struct btrfs_root *local_root;
struct btrfs_ordered_extent *ordered;
struct extent_map *em;
struct extent_state *cached_state = NULL;
struct extent_io_tree *io_tree;
u64 physical_for_dev_replace;
u64 len;
u64 len = nocow_ctx->len;
u64 lockstart = offset, lockend = offset + len - 1;
unsigned long index;
int srcu_index;
int ret;
int err;
int ret = 0;
int err = 0;
key.objectid = root;
key.type = BTRFS_ROOT_ITEM_KEY;
@ -3229,9 +3285,33 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx)
mutex_lock(&inode->i_mutex);
inode_dio_wait(inode);
ret = 0;
physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
len = nocow_ctx->len;
io_tree = &BTRFS_I(inode)->io_tree;
lock_extent_bits(io_tree, lockstart, lockend, 0, &cached_state);
ordered = btrfs_lookup_ordered_range(inode, lockstart, len);
if (ordered) {
btrfs_put_ordered_extent(ordered);
goto out_unlock;
}
em = btrfs_get_extent(inode, NULL, 0, lockstart, len, 0);
if (IS_ERR(em)) {
ret = PTR_ERR(em);
goto out_unlock;
}
/*
* This extent does not actually cover the logical extent anymore,
* move on to the next inode.
*/
if (em->block_start > nocow_ctx->logical ||
em->block_start + em->block_len < nocow_ctx->logical + len) {
free_extent_map(em);
goto out_unlock;
}
free_extent_map(em);
while (len >= PAGE_CACHE_SIZE) {
index = offset >> PAGE_CACHE_SHIFT;
again:
@ -3247,10 +3327,9 @@ again:
goto next_page;
} else {
ClearPageError(page);
err = extent_read_full_page(&BTRFS_I(inode)->
io_tree,
page, btrfs_get_extent,
nocow_ctx->mirror_num);
err = extent_read_full_page_nolock(io_tree, page,
btrfs_get_extent,
nocow_ctx->mirror_num);
if (err) {
ret = err;
goto next_page;
@ -3264,6 +3343,7 @@ again:
* page in the page cache.
*/
if (page->mapping != inode->i_mapping) {
unlock_page(page);
page_cache_release(page);
goto again;
}
@ -3287,6 +3367,10 @@ next_page:
physical_for_dev_replace += PAGE_CACHE_SIZE;
len -= PAGE_CACHE_SIZE;
}
ret = COPY_COMPLETE;
out_unlock:
unlock_extent_cached(io_tree, lockstart, lockend, &cached_state,
GFP_NOFS);
out:
mutex_unlock(&inode->i_mutex);
iput(inode);

21
fs/btrfs/super.c

@ -921,7 +921,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
return 0;
}
btrfs_wait_all_ordered_extents(fs_info, 1);
btrfs_wait_all_ordered_extents(fs_info);
trans = btrfs_attach_transaction_barrier(root);
if (IS_ERR(trans)) {
@ -1340,6 +1340,12 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
if (ret)
goto restore;
} else {
if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) {
btrfs_err(fs_info,
"Remounting read-write after error is not allowed\n");
ret = -EINVAL;
goto restore;
}
if (fs_info->fs_devices->rw_devices == 0) {
ret = -EACCES;
goto restore;
@ -1377,6 +1383,16 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
pr_warn("btrfs: failed to resume dev_replace\n");
goto restore;
}
if (!fs_info->uuid_root) {
pr_info("btrfs: creating UUID tree\n");
ret = btrfs_create_uuid_tree(fs_info);
if (ret) {
pr_warn("btrfs: failed to create the uuid tree"
"%d\n", ret);
goto restore;
}
}
sb->s_flags &= ~MS_RDONLY;
}
out:
@ -1762,6 +1778,9 @@ static void btrfs_print_info(void)
#ifdef CONFIG_BTRFS_DEBUG
", debug=on"
#endif
#ifdef CONFIG_BTRFS_ASSERT
", assert=on"
#endif
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
", integrity-checker=on"
#endif

2
fs/btrfs/transaction.c

@ -1603,7 +1603,7 @@ static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
{
if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT))
btrfs_wait_all_ordered_extents(fs_info, 1);
btrfs_wait_all_ordered_extents(fs_info);
}
int btrfs_commit_transaction(struct btrfs_trans_handle *trans,

52
fs/btrfs/tree-log.c

@ -93,7 +93,8 @@
*/
#define LOG_WALK_PIN_ONLY 0
#define LOG_WALK_REPLAY_INODES 1
#define LOG_WALK_REPLAY_ALL 2
#define LOG_WALK_REPLAY_DIR_INDEX 2
#define LOG_WALK_REPLAY_ALL 3
static int btrfs_log_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode,
@ -393,6 +394,7 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
if (inode_item) {
struct btrfs_inode_item *item;
u64 nbytes;
u32 mode;
item = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_item);
@ -400,9 +402,19 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
item = btrfs_item_ptr(eb, slot,
struct btrfs_inode_item);
btrfs_set_inode_nbytes(eb, item, nbytes);
/*
* If this is a directory we need to reset the i_size to
* 0 so that we can set it up properly when replaying
* the rest of the items in this log.
*/
mode = btrfs_inode_mode(eb, item);
if (S_ISDIR(mode))
btrfs_set_inode_size(eb, item, 0);
}
} else if (inode_item) {
struct btrfs_inode_item *item;
u32 mode;
/*
* New inode, set nbytes to 0 so that the nbytes comes out
@ -410,6 +422,15 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
*/
item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
btrfs_set_inode_nbytes(eb, item, 0);
/*
* If this is a directory we need to reset the i_size to 0 so
* that we can set it up properly when replaying the rest of
* the items in this log.
*/
mode = btrfs_inode_mode(eb, item);
if (S_ISDIR(mode))
btrfs_set_inode_size(eb, item, 0);
}
insert:
btrfs_release_path(path);
@ -1496,6 +1517,7 @@ static noinline int insert_one_name(struct btrfs_trans_handle *trans,
iput(inode);
return -EIO;
}
ret = btrfs_add_link(trans, dir, inode, name, name_len, 1, index);
/* FIXME, put inode into FIXUP list */
@ -1534,6 +1556,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
u8 log_type;
int exists;
int ret = 0;
bool update_size = (key->type == BTRFS_DIR_INDEX_KEY);
dir = read_one_inode(root, key->objectid);
if (!dir)
@ -1604,6 +1627,10 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
goto insert;
out:
btrfs_release_path(path);
if (!ret && update_size) {
btrfs_i_size_write(dir, dir->i_size + name_len * 2);
ret = btrfs_update_inode(trans, root, dir);
}
kfree(name);
iput(dir);
return ret;
@ -1614,6 +1641,7 @@ insert:
name, name_len, log_type, &log_key);
if (ret && ret != -ENOENT)
goto out;
update_size = false;
ret = 0;
goto out;
}
@ -2027,6 +2055,15 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
if (ret)
break;
}
if (key.type == BTRFS_DIR_INDEX_KEY &&