original development tree for Linux kernel GTP module; now long in mainline.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2688 lines
94 KiB

/*
* ext4.h
*
* Copyright (C) 1992, 1993, 1994, 1995
* Remy Card (card@masi.ibp.fr)
* Laboratoire MASI - Institut Blaise Pascal
* Universite Pierre et Marie Curie (Paris VI)
*
* from
*
* linux/include/linux/minix_fs.h
*
* Copyright (C) 1991, 1992 Linus Torvalds
*/
#ifndef _EXT4_H
#define _EXT4_H
#include <linux/types.h>
#include <linux/blkdev.h>
#include <linux/magic.h>
#include <linux/jbd2.h>
#include <linux/quota.h>
#include <linux/rwsem.h>
#include <linux/rbtree.h>
#include <linux/seqlock.h>
#include <linux/mutex.h>
#include <linux/timer.h>
#include <linux/wait.h>
#include <linux/blockgroup_lock.h>
#include <linux/percpu_counter.h>
#include <crypto/hash.h>
#ifdef __KERNEL__
#include <linux/compat.h>
#endif
/*
* The fourth extended filesystem constants/structures
*/
/*
* Define EXT4FS_DEBUG to produce debug messages
*/
#undef EXT4FS_DEBUG
/*
* Debug code
*/
#ifdef EXT4FS_DEBUG
#define ext4_debug(f, a...) \
do { \
printk(KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:", \
__FILE__, __LINE__, __func__); \
printk(KERN_DEBUG f, ## a); \
} while (0)
#else
#define ext4_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__)
#endif
/*
* Turn on EXT_DEBUG to get lots of info about extents operations.
*/
#define EXT_DEBUG__
#ifdef EXT_DEBUG
#define ext_debug(fmt, ...) printk(fmt, ##__VA_ARGS__)
#else
#define ext_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__)
#endif
#define EXT4_ERROR_INODE(inode, fmt, a...) \
ext4_error_inode((inode), __func__, __LINE__, 0, (fmt), ## a)
#define EXT4_ERROR_INODE_BLOCK(inode, block, fmt, a...) \
ext4_error_inode((inode), __func__, __LINE__, (block), (fmt), ## a)
#define EXT4_ERROR_FILE(file, block, fmt, a...) \
ext4_error_file((file), __func__, __LINE__, (block), (fmt), ## a)
/* data type for block offset of block group */
typedef int ext4_grpblk_t;
/* data type for filesystem-wide blocks number */
typedef unsigned long long ext4_fsblk_t;
/* data type for file logical block number */
typedef __u32 ext4_lblk_t;
/* data type for block group number */
typedef unsigned int ext4_group_t;
/*
* Flags used in mballoc's allocation_context flags field.
*
* Also used to show what's going on for debugging purposes when the
* flag field is exported via the traceport interface
*/
/* prefer goal again. length */
#define EXT4_MB_HINT_MERGE 0x0001
/* blocks already reserved */
#define EXT4_MB_HINT_RESERVED 0x0002
/* metadata is being allocated */
#define EXT4_MB_HINT_METADATA 0x0004
/* first blocks in the file */
#define EXT4_MB_HINT_FIRST 0x0008
/* search for the best chunk */
#define EXT4_MB_HINT_BEST 0x0010
/* data is being allocated */
#define EXT4_MB_HINT_DATA 0x0020
/* don't preallocate (for tails) */
#define EXT4_MB_HINT_NOPREALLOC 0x0040
/* allocate for locality group */
#define EXT4_MB_HINT_GROUP_ALLOC 0x0080
/* allocate goal blocks or none */
#define EXT4_MB_HINT_GOAL_ONLY 0x0100
/* goal is meaningful */
#define EXT4_MB_HINT_TRY_GOAL 0x0200
/* blocks already pre-reserved by delayed allocation */
#define EXT4_MB_DELALLOC_RESERVED 0x0400
/* We are doing stream allocation */
#define EXT4_MB_STREAM_ALLOC 0x0800
/* Use reserved root blocks if needed */
#define EXT4_MB_USE_ROOT_BLOCKS 0x1000
struct ext4_allocation_request {
/* target inode for block we're allocating */
struct inode *inode;
/* how many blocks we want to allocate */
unsigned int len;
/* logical block in target inode */
ext4_lblk_t logical;
/* the closest logical allocated block to the left */
ext4_lblk_t lleft;
/* the closest logical allocated block to the right */
ext4_lblk_t lright;
/* phys. target (a hint) */
ext4_fsblk_t goal;
/* phys. block for the closest logical allocated block to the left */
ext4_fsblk_t pleft;
/* phys. block for the closest logical allocated block to the right */
ext4_fsblk_t pright;
/* flags. see above EXT4_MB_HINT_* */
unsigned int flags;
};
/*
* Logical to physical block mapping, used by ext4_map_blocks()
*
* This structure is used to pass requests into ext4_map_blocks() as
* well as to store the information returned by ext4_map_blocks(). It
* takes less room on the stack than a struct buffer_head.
*/
#define EXT4_MAP_NEW (1 << BH_New)
#define EXT4_MAP_MAPPED (1 << BH_Mapped)
#define EXT4_MAP_UNWRITTEN (1 << BH_Unwritten)
#define EXT4_MAP_BOUNDARY (1 << BH_Boundary)
#define EXT4_MAP_UNINIT (1 << BH_Uninit)
/* Sometimes (in the bigalloc case, from ext4_da_get_block_prep) the caller of
* ext4_map_blocks wants to know whether or not the underlying cluster has
* already been accounted for. EXT4_MAP_FROM_CLUSTER conveys to the caller that
* the requested mapping was from previously mapped (or delayed allocated)
* cluster. We use BH_AllocFromCluster only for this flag. BH_AllocFromCluster
* should never appear on buffer_head's state flags.
*/
#define EXT4_MAP_FROM_CLUSTER (1 << BH_AllocFromCluster)
#define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\
EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\
EXT4_MAP_UNINIT | EXT4_MAP_FROM_CLUSTER)
struct ext4_map_blocks {
ext4_fsblk_t m_pblk;
ext4_lblk_t m_lblk;
unsigned int m_len;
unsigned int m_flags;
};
/*
* For delayed allocation tracking
*/
struct mpage_da_data {
struct inode *inode;
sector_t b_blocknr; /* start block number of extent */
size_t b_size; /* size of extent */
unsigned long b_state; /* state of the extent */
unsigned long first_page, next_page; /* extent of pages */
struct writeback_control *wbc;
int io_done;
int pages_written;
int retval;
};
/*
* Flags for ext4_io_end->flags
*/
#define EXT4_IO_END_UNWRITTEN 0x0001
#define EXT4_IO_END_ERROR 0x0002
#define EXT4_IO_END_DIRECT 0x0004
struct ext4_io_page {
struct page *p_page;
atomic_t p_count;
};
#define MAX_IO_PAGES 128
/*
* For converting uninitialized extents on a work queue.
*
* 'page' is only used from the writepage() path; 'pages' is only used for
* buffered writes; they are used to keep page references until conversion
* takes place. For AIO/DIO, neither field is filled in.
*/
typedef struct ext4_io_end {
struct list_head list; /* per-file finished IO list */
struct inode *inode; /* file being written to */
unsigned int flag; /* unwritten or not */
loff_t offset; /* offset in the file */
ssize_t size; /* size of the extent */
struct kiocb *iocb; /* iocb struct for AIO */
int result; /* error value for AIO */
int num_io_pages; /* for writepages() */
struct ext4_io_page *pages[MAX_IO_PAGES]; /* for writepages() */
} ext4_io_end_t;
struct ext4_io_submit {
int io_op;
struct bio *io_bio;
ext4_io_end_t *io_end;
struct ext4_io_page *io_page;
sector_t io_next_block;
};
/*
* Special inodes numbers
*/
#define EXT4_BAD_INO 1 /* Bad blocks inode */
#define EXT4_ROOT_INO 2 /* Root inode */
#define EXT4_USR_QUOTA_INO 3 /* User quota inode */
#define EXT4_GRP_QUOTA_INO 4 /* Group quota inode */
#define EXT4_BOOT_LOADER_INO 5 /* Boot loader inode */
#define EXT4_UNDEL_DIR_INO 6 /* Undelete directory inode */
#define EXT4_RESIZE_INO 7 /* Reserved group descriptors inode */
#define EXT4_JOURNAL_INO 8 /* Journal inode */
/* First non-reserved inode for old ext4 filesystems */
#define EXT4_GOOD_OLD_FIRST_INO 11
/*
* Maximal count of links to a file
*/
#define EXT4_LINK_MAX 65000
/*
* Macro-instructions used to manage several block sizes
*/
#define EXT4_MIN_BLOCK_SIZE 1024
#define EXT4_MAX_BLOCK_SIZE 65536
#define EXT4_MIN_BLOCK_LOG_SIZE 10
#define EXT4_MAX_BLOCK_LOG_SIZE 16
#ifdef __KERNEL__
# define EXT4_BLOCK_SIZE(s) ((s)->s_blocksize)
#else
# define EXT4_BLOCK_SIZE(s) (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size)
#endif
#define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof(__u32))
#define EXT4_CLUSTER_SIZE(s) (EXT4_BLOCK_SIZE(s) << \
EXT4_SB(s)->s_cluster_bits)
#ifdef __KERNEL__
# define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits)
# define EXT4_CLUSTER_BITS(s) (EXT4_SB(s)->s_cluster_bits)
#else
# define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_log_block_size + 10)
#endif
#ifdef __KERNEL__
#define EXT4_ADDR_PER_BLOCK_BITS(s) (EXT4_SB(s)->s_addr_per_block_bits)
#define EXT4_INODE_SIZE(s) (EXT4_SB(s)->s_inode_size)
#define EXT4_FIRST_INO(s) (EXT4_SB(s)->s_first_ino)
#else
#define EXT4_INODE_SIZE(s) (((s)->s_rev_level == EXT4_GOOD_OLD_REV) ? \
EXT4_GOOD_OLD_INODE_SIZE : \
(s)->s_inode_size)
#define EXT4_FIRST_INO(s) (((s)->s_rev_level == EXT4_GOOD_OLD_REV) ? \
EXT4_GOOD_OLD_FIRST_INO : \
(s)->s_first_ino)
#endif
#define EXT4_BLOCK_ALIGN(size, blkbits) ALIGN((size), (1 << (blkbits)))
/* Translate a block number to a cluster number */
#define EXT4_B2C(sbi, blk) ((blk) >> (sbi)->s_cluster_bits)
/* Translate a cluster number to a block number */
#define EXT4_C2B(sbi, cluster) ((cluster) << (sbi)->s_cluster_bits)
/* Translate # of blks to # of clusters */
#define EXT4_NUM_B2C(sbi, blks) (((blks) + (sbi)->s_cluster_ratio - 1) >> \
(sbi)->s_cluster_bits)
/*
* Structure of a blocks group descriptor
*/
struct ext4_group_desc
{
__le32 bg_block_bitmap_lo; /* Blocks bitmap block */
__le32 bg_inode_bitmap_lo; /* Inodes bitmap block */
__le32 bg_inode_table_lo; /* Inodes table block */
__le16 bg_free_blocks_count_lo;/* Free blocks count */
__le16 bg_free_inodes_count_lo;/* Free inodes count */
__le16 bg_used_dirs_count_lo; /* Directories count */
Ext4: Uninitialized Block Groups In pass1 of e2fsck, every inode table in the fileystem is scanned and checked, regardless of whether it is in use. This is this the most time consuming part of the filesystem check. The unintialized block group feature can greatly reduce e2fsck time by eliminating checking of uninitialized inodes. With this feature, there is a a high water mark of used inodes for each block group. Block and inode bitmaps can be uninitialized on disk via a flag in the group descriptor to avoid reading or scanning them at e2fsck time. A checksum of each group descriptor is used to ensure that corruption in the group descriptor&#39;s bit flags does not cause incorrect operation. The feature is enabled through a mkfs option mke2fs /dev/ -O uninit_groups A patch adding support for uninitialized block groups to e2fsprogs tools has been posted to the linux-ext4 mailing list. The patches have been stress tested with fsstress and fsx. In performance tests testing e2fsck time, we have seen that e2fsck time on ext3 grows linearly with the total number of inodes in the filesytem. In ext4 with the uninitialized block groups feature, the e2fsck time is constant, based solely on the number of used inodes rather than the total inode count. Since typical ext4 filesystems only use 1-10% of their inodes, this feature can greatly reduce e2fsck time for users. With performance improvement of 2-20 times, depending on how full the filesystem is. The attached graph shows the major improvements in e2fsck times in filesystems with a large total inode count, but few inodes in use. In each group descriptor if we have EXT4_BG_INODE_UNINIT set in bg_flags: Inode table is not initialized/used in this group. So we can skip the consistency check during fsck. EXT4_BG_BLOCK_UNINIT set in bg_flags: No block in the group is used. So we can skip the block bitmap verification for this group. We also add two new fields to group descriptor as a part of uninitialized group patch. __le16 bg_itable_unused; /* Unused inodes count */ __le16 bg_checksum; /* crc16(sb_uuid+group+desc) */ bg_itable_unused: If we have EXT4_BG_INODE_UNINIT not set in bg_flags then bg_itable_unused will give the offset within the inode table till the inodes are used. This can be used by fsck to skip list of inodes that are marked unused. bg_checksum: Now that we depend on bg_flags and bg_itable_unused to determine the block and inode usage, we need to make sure group descriptor is not corrupt. We add checksum to group descriptor to detect corruption. If the descriptor is found to be corrupt, we mark all the blocks and inodes in the group used. Signed-off-by: Avantika Mathur &lt;mathur@us.ibm.com&gt; Signed-off-by: Andreas Dilger &lt;adilger@clusterfs.com&gt; Signed-off-by: Mingming Cao &lt;cmm@us.ibm.com&gt; Signed-off-by: Aneesh Kumar K.V &lt;aneesh.kumar@linux.vnet.ibm.com&gt;
14 years ago
__le16 bg_flags; /* EXT4_BG_flags (INODE_UNINIT, etc) */
__le32 bg_exclude_bitmap_lo; /* Exclude bitmap for snapshots */
__le16 bg_block_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+bbitmap) LE */
__le16 bg_inode_bitmap_csum_lo;/* crc32c(s_uuid+grp_num+ibitmap) LE */
__le16 bg_itable_unused_lo; /* Unused inodes count */
Ext4: Uninitialized Block Groups In pass1 of e2fsck, every inode table in the fileystem is scanned and checked, regardless of whether it is in use. This is this the most time consuming part of the filesystem check. The unintialized block group feature can greatly reduce e2fsck time by eliminating checking of uninitialized inodes. With this feature, there is a a high water mark of used inodes for each block group. Block and inode bitmaps can be uninitialized on disk via a flag in the group descriptor to avoid reading or scanning them at e2fsck time. A checksum of each group descriptor is used to ensure that corruption in the group descriptor&#39;s bit flags does not cause incorrect operation. The feature is enabled through a mkfs option mke2fs /dev/ -O uninit_groups A patch adding support for uninitialized block groups to e2fsprogs tools has been posted to the linux-ext4 mailing list. The patches have been stress tested with fsstress and fsx. In performance tests testing e2fsck time, we have seen that e2fsck time on ext3 grows linearly with the total number of inodes in the filesytem. In ext4 with the uninitialized block groups feature, the e2fsck time is constant, based solely on the number of used inodes rather than the total inode count. Since typical ext4 filesystems only use 1-10% of their inodes, this feature can greatly reduce e2fsck time for users. With performance improvement of 2-20 times, depending on how full the filesystem is. The attached graph shows the major improvements in e2fsck times in filesystems with a large total inode count, but few inodes in use. In each group descriptor if we have EXT4_BG_INODE_UNINIT set in bg_flags: Inode table is not initialized/used in this group. So we can skip the consistency check during fsck. EXT4_BG_BLOCK_UNINIT set in bg_flags: No block in the group is used. So we can skip the block bitmap verification for this group. We also add two new fields to group descriptor as a part of uninitialized group patch. __le16 bg_itable_unused; /* Unused inodes count */ __le16 bg_checksum; /* crc16(sb_uuid+group+desc) */ bg_itable_unused: If we have EXT4_BG_INODE_UNINIT not set in bg_flags then bg_itable_unused will give the offset within the inode table till the inodes are used. This can be used by fsck to skip list of inodes that are marked unused. bg_checksum: Now that we depend on bg_flags and bg_itable_unused to determine the block and inode usage, we need to make sure group descriptor is not corrupt. We add checksum to group descriptor to detect corruption. If the descriptor is found to be corrupt, we mark all the blocks and inodes in the group used. Signed-off-by: Avantika Mathur &lt;mathur@us.ibm.com&gt; Signed-off-by: Andreas Dilger &lt;adilger@clusterfs.com&gt; Signed-off-by: Mingming Cao &lt;cmm@us.ibm.com&gt; Signed-off-by: Aneesh Kumar K.V &lt;aneesh.kumar@linux.vnet.ibm.com&gt;
14 years ago
__le16 bg_checksum; /* crc16(sb_uuid+group+desc) */
__le32 bg_block_bitmap_hi; /* Blocks bitmap block MSB */
__le32 bg_inode_bitmap_hi; /* Inodes bitmap block MSB */
__le32 bg_inode_table_hi; /* Inodes table block MSB */
__le16 bg_free_blocks_count_hi;/* Free blocks count MSB */
__le16 bg_free_inodes_count_hi;/* Free inodes count MSB */
__le16 bg_used_dirs_count_hi; /* Directories count MSB */
__le16 bg_itable_unused_hi; /* Unused inodes count MSB */
__le32 bg_exclude_bitmap_hi; /* Exclude bitmap block MSB */
__le16 bg_block_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+bbitmap) BE */
__le16 bg_inode_bitmap_csum_hi;/* crc32c(s_uuid+grp_num+ibitmap) BE */
__u32 bg_reserved;
};
#define EXT4_BG_INODE_BITMAP_CSUM_HI_END \
(offsetof(struct ext4_group_desc, bg_inode_bitmap_csum_hi) + \
sizeof(__le16))
#define EXT4_BG_BLOCK_BITMAP_CSUM_HI_END \
(offsetof(struct ext4_group_desc, bg_block_bitmap_csum_hi) + \
sizeof(__le16))
/*
* Structure of a flex block group info
*/
struct flex_groups {
atomic_t free_inodes;
atomic_t free_clusters;
atomic_t used_dirs;
};
Ext4: Uninitialized Block Groups In pass1 of e2fsck, every inode table in the fileystem is scanned and checked, regardless of whether it is in use. This is this the most time consuming part of the filesystem check. The unintialized block group feature can greatly reduce e2fsck time by eliminating checking of uninitialized inodes. With this feature, there is a a high water mark of used inodes for each block group. Block and inode bitmaps can be uninitialized on disk via a flag in the group descriptor to avoid reading or scanning them at e2fsck time. A checksum of each group descriptor is used to ensure that corruption in the group descriptor&#39;s bit flags does not cause incorrect operation. The feature is enabled through a mkfs option mke2fs /dev/ -O uninit_groups A patch adding support for uninitialized block groups to e2fsprogs tools has been posted to the linux-ext4 mailing list. The patches have been stress tested with fsstress and fsx. In performance tests testing e2fsck time, we have seen that e2fsck time on ext3 grows linearly with the total number of inodes in the filesytem. In ext4 with the uninitialized block groups feature, the e2fsck time is constant, based solely on the number of used inodes rather than the total inode count. Since typical ext4 filesystems only use 1-10% of their inodes, this feature can greatly reduce e2fsck time for users. With performance improvement of 2-20 times, depending on how full the filesystem is. The attached graph shows the major improvements in e2fsck times in filesystems with a large total inode count, but few inodes in use. In each group descriptor if we have EXT4_BG_INODE_UNINIT set in bg_flags: Inode table is not initialized/used in this group. So we can skip the consistency check during fsck. EXT4_BG_BLOCK_UNINIT set in bg_flags: No block in the group is used. So we can skip the block bitmap verification for this group. We also add two new fields to group descriptor as a part of uninitialized group patch. __le16 bg_itable_unused; /* Unused inodes count */ __le16 bg_checksum; /* crc16(sb_uuid+group+desc) */ bg_itable_unused: If we have EXT4_BG_INODE_UNINIT not set in bg_flags then bg_itable_unused will give the offset within the inode table till the inodes are used. This can be used by fsck to skip list of inodes that are marked unused. bg_checksum: Now that we depend on bg_flags and bg_itable_unused to determine the block and inode usage, we need to make sure group descriptor is not corrupt. We add checksum to group descriptor to detect corruption. If the descriptor is found to be corrupt, we mark all the blocks and inodes in the group used. Signed-off-by: Avantika Mathur &lt;mathur@us.ibm.com&gt; Signed-off-by: Andreas Dilger &lt;adilger@clusterfs.com&gt; Signed-off-by: Mingming Cao &lt;cmm@us.ibm.com&gt; Signed-off-by: Aneesh Kumar K.V &lt;aneesh.kumar@linux.vnet.ibm.com&gt;
14 years ago
#define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */
#define EXT4_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */
#define EXT4_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */
/*
* Macro-instructions used to manage group descriptors
*/
#define EXT4_MIN_DESC_SIZE 32
#define EXT4_MIN_DESC_SIZE_64BIT 64
#define EXT4_MAX_DESC_SIZE EXT4_MIN_BLOCK_SIZE
#define EXT4_DESC_SIZE(s) (EXT4_SB(s)->s_desc_size)
#ifdef __KERNEL__
# define EXT4_BLOCKS_PER_GROUP(s) (EXT4_SB(s)->s_blocks_per_group)
# define EXT4_CLUSTERS_PER_GROUP(s) (EXT4_SB(s)->s_clusters_per_group)
# define EXT4_DESC_PER_BLOCK(s) (EXT4_SB(s)->s_desc_per_block)
# define EXT4_INODES_PER_GROUP(s) (EXT4_SB(s)->s_inodes_per_group)
# define EXT4_DESC_PER_BLOCK_BITS(s) (EXT4_SB(s)->s_desc_per_block_bits)
#else
# define EXT4_BLOCKS_PER_GROUP(s) ((s)->s_blocks_per_group)
# define EXT4_DESC_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / EXT4_DESC_SIZE(s))
# define EXT4_INODES_PER_GROUP(s) ((s)->s_inodes_per_group)
#endif
/*
* Constants relative to the data blocks
*/
#define EXT4_NDIR_BLOCKS 12
#define EXT4_IND_BLOCK EXT4_NDIR_BLOCKS
#define EXT4_DIND_BLOCK (EXT4_IND_BLOCK + 1)
#define EXT4_TIND_BLOCK (EXT4_DIND_BLOCK + 1)
#define EXT4_N_BLOCKS (EXT4_TIND_BLOCK + 1)
/*
* Inode flags
*/
#define EXT4_SECRM_FL 0x00000001 /* Secure deletion */
#define EXT4_UNRM_FL 0x00000002 /* Undelete */
#define EXT4_COMPR_FL 0x00000004 /* Compress file */
#define EXT4_SYNC_FL 0x00000008 /* Synchronous updates */
#define EXT4_IMMUTABLE_FL 0x00000010 /* Immutable file */
#define EXT4_APPEND_FL 0x00000020 /* writes to file may only append */
#define EXT4_NODUMP_FL 0x00000040 /* do not dump file */
#define EXT4_NOATIME_FL 0x00000080 /* do not update atime */
/* Reserved for compression usage... */
#define EXT4_DIRTY_FL 0x00000100
#define EXT4_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */
#define EXT4_NOCOMPR_FL 0x00000400 /* Don't compress */
#define EXT4_ECOMPR_FL 0x00000800 /* Compression error */
/* End compression flags --- maybe not all used */
#define EXT4_INDEX_FL 0x00001000 /* hash-indexed directory */
#define EXT4_IMAGIC_FL 0x00002000 /* AFS directory */
#define EXT4_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */
#define EXT4_NOTAIL_FL 0x00008000 /* file tail should not be merged */
#define EXT4_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */
#define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
#define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */
#define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */
#define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */
#define EXT4_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */
#define EXT4_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */
#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
#define EXT4_FL_USER_VISIBLE 0x004BDFFF /* User visible flags */
#define EXT4_FL_USER_MODIFIABLE 0x004B80FF /* User modifiable flags */
/* Flags that should be inherited by new inodes from their parent. */
#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL)
/* Flags that are appropriate for regular files (all but dir-specific ones). */
#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL))
/* Flags that are appropriate for non-directories/regular files. */
#define EXT4_OTHER_FLMASK (EXT4_NODUMP_FL | EXT4_NOATIME_FL)
/* Mask out flags that are inappropriate for the given type of inode. */
static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags)
{
if (S_ISDIR(mode))
return flags;
else if (S_ISREG(mode))
return flags & EXT4_REG_FLMASK;
else
return flags & EXT4_OTHER_FLMASK;
}
/*
* Inode flags used for atomic set/get
*/
enum {
EXT4_INODE_SECRM = 0, /* Secure deletion */
EXT4_INODE_UNRM = 1, /* Undelete */
EXT4_INODE_COMPR = 2, /* Compress file */
EXT4_INODE_SYNC = 3, /* Synchronous updates */
EXT4_INODE_IMMUTABLE = 4, /* Immutable file */
EXT4_INODE_APPEND = 5, /* writes to file may only append */
EXT4_INODE_NODUMP = 6, /* do not dump file */
EXT4_INODE_NOATIME = 7, /* do not update atime */
/* Reserved for compression usage... */
EXT4_INODE_DIRTY = 8,
EXT4_INODE_COMPRBLK = 9, /* One or more compressed clusters */
EXT4_INODE_NOCOMPR = 10, /* Don't compress */
EXT4_INODE_ECOMPR = 11, /* Compression error */
/* End compression flags --- maybe not all used */
EXT4_INODE_INDEX = 12, /* hash-indexed directory */
EXT4_INODE_IMAGIC = 13, /* AFS directory */
EXT4_INODE_JOURNAL_DATA = 14, /* file data should be journaled */
EXT4_INODE_NOTAIL = 15, /* file tail should not be merged */
EXT4_INODE_DIRSYNC = 16, /* dirsync behaviour (directories only) */
EXT4_INODE_TOPDIR = 17, /* Top of directory hierarchies*/
EXT4_INODE_HUGE_FILE = 18, /* Set to each huge file */
EXT4_INODE_EXTENTS = 19, /* Inode uses extents */
EXT4_INODE_EA_INODE = 21, /* Inode used for large EA */
EXT4_INODE_EOFBLOCKS = 22, /* Blocks allocated beyond EOF */
EXT4_INODE_INLINE_DATA = 28, /* Data in inode. */
EXT4_INODE_RESERVED = 31, /* reserved for ext4 lib */
};
/*
* Since it's pretty easy to mix up bit numbers and hex values, we use a
* build-time check to make sure that EXT4_XXX_FL is consistent with respect to
* EXT4_INODE_XXX. If all is well, the macros will be dropped, so, it won't cost
* any extra space in the compiled kernel image, otherwise, the build will fail.
* It's important that these values are the same, since we are using
* EXT4_INODE_XXX to test for flag values, but EXT4_XXX_FL must be consistent
* with the values of FS_XXX_FL defined in include/linux/fs.h and the on-disk
* values found in ext2, ext3 and ext4 filesystems, and of course the values
* defined in e2fsprogs.
*
* It's not paranoia if the Murphy's Law really *is* out to get you. :-)
*/
#define TEST_FLAG_VALUE(FLAG) (EXT4_##FLAG##_FL == (1 << EXT4_INODE_##FLAG))
#define CHECK_FLAG_VALUE(FLAG) BUILD_BUG_ON(!TEST_FLAG_VALUE(FLAG))
static inline void ext4_check_flag_values(void)
{
CHECK_FLAG_VALUE(SECRM);
CHECK_FLAG_VALUE(UNRM);
CHECK_FLAG_VALUE(COMPR);
CHECK_FLAG_VALUE(SYNC);
CHECK_FLAG_VALUE(IMMUTABLE);
CHECK_FLAG_VALUE(APPEND);
CHECK_FLAG_VALUE(NODUMP);
CHECK_FLAG_VALUE(NOATIME);
CHECK_FLAG_VALUE(DIRTY);
CHECK_FLAG_VALUE(COMPRBLK);
CHECK_FLAG_VALUE(NOCOMPR);
CHECK_FLAG_VALUE(ECOMPR);
CHECK_FLAG_VALUE(INDEX);
CHECK_FLAG_VALUE(IMAGIC);
CHECK_FLAG_VALUE(JOURNAL_DATA);
CHECK_FLAG_VALUE(NOTAIL);
CHECK_FLAG_VALUE(DIRSYNC);
CHECK_FLAG_VALUE(TOPDIR);
CHECK_FLAG_VALUE(HUGE_FILE);
CHECK_FLAG_VALUE(EXTENTS);
CHECK_FLAG_VALUE(EA_INODE);
CHECK_FLAG_VALUE(EOFBLOCKS);
CHECK_FLAG_VALUE(INLINE_DATA);
CHECK_FLAG_VALUE(RESERVED);
}
/* Used to pass group descriptor data when online resize is done */
struct ext4_new_group_input {
__u32 group; /* Group number for this data */
__u64 block_bitmap; /* Absolute block number of block bitmap */
__u64 inode_bitmap; /* Absolute block number of inode bitmap */
__u64 inode_table; /* Absolute block number of inode table start */
__u32 blocks_count; /* Total number of blocks in this group */
__u16 reserved_blocks; /* Number of reserved blocks in this group */
__u16 unused;
};
#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
struct compat_ext4_new_group_input {
u32 group;
compat_u64 block_bitmap;
compat_u64 inode_bitmap;
compat_u64 inode_table;
u32 blocks_count;
u16 reserved_blocks;
u16 unused;
};
#endif
/* The struct ext4_new_group_input in kernel space, with free_blocks_count */
struct ext4_new_group_data {
__u32 group;
__u64 block_bitmap;
__u64 inode_bitmap;
__u64 inode_table;
__u32 blocks_count;
__u16 reserved_blocks;
__u16 unused;
__u32 free_blocks_count;
};
/* Indexes used to index group tables in ext4_new_group_data */
enum {
BLOCK_BITMAP = 0, /* block bitmap */
INODE_BITMAP, /* inode bitmap */
INODE_TABLE, /* inode tables */
GROUP_TABLE_COUNT,
};
/*
* Flags used by ext4_map_blocks()
*/
/* Allocate any needed blocks and/or convert an unitialized
extent to be an initialized ext4 */
#define EXT4_GET_BLOCKS_CREATE 0x0001
/* Request the creation of an unitialized extent */
#define EXT4_GET_BLOCKS_UNINIT_EXT 0x0002
#define EXT4_GET_BLOCKS_CREATE_UNINIT_EXT (EXT4_GET_BLOCKS_UNINIT_EXT|\
EXT4_GET_BLOCKS_CREATE)
/* Caller is from the delayed allocation writeout path,
so set the magic i_delalloc_reserve_flag after taking the
inode allocation semaphore for */
#define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004
/* caller is from the direct IO path, request to creation of an
unitialized extents if not allocated, split the uninitialized
extent if blocks has been preallocated already*/
#define EXT4_GET_BLOCKS_PRE_IO 0x0008
#define EXT4_GET_BLOCKS_CONVERT 0x0010
#define EXT4_GET_BLOCKS_IO_CREATE_EXT (EXT4_GET_BLOCKS_PRE_IO|\
EXT4_GET_BLOCKS_CREATE_UNINIT_EXT)
/* Convert extent to initialized after IO complete */
#define EXT4_GET_BLOCKS_IO_CONVERT_EXT (EXT4_GET_BLOCKS_CONVERT|\
EXT4_GET_BLOCKS_CREATE_UNINIT_EXT)
/* Punch out blocks of an extent */
#define EXT4_GET_BLOCKS_PUNCH_OUT_EXT 0x0020
/* Don't normalize allocation size (used for fallocate) */
#define EXT4_GET_BLOCKS_NO_NORMALIZE 0x0040
/* Request will not result in inode size update (user for fallocate) */
#define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080
/* Do not take i_data_sem locking in ext4_map_blocks */
#define EXT4_GET_BLOCKS_NO_LOCK 0x0100
/*
* Flags used by ext4_free_blocks
*/
#define EXT4_FREE_BLOCKS_METADATA 0x0001
#define EXT4_FREE_BLOCKS_FORGET 0x0002
#define EXT4_FREE_BLOCKS_VALIDATED 0x0004
#define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008
#define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER 0x0010
#define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020
ext4: Add new ext4_discard_partial_page_buffers routines This patch adds two new routines: ext4_discard_partial_page_buffers and ext4_discard_partial_page_buffers_no_lock. The ext4_discard_partial_page_buffers routine is a wrapper function to ext4_discard_partial_page_buffers_no_lock. The wrapper function locks the page and passes it to ext4_discard_partial_page_buffers_no_lock. Calling functions that already have the page locked can call ext4_discard_partial_page_buffers_no_lock directly. The ext4_discard_partial_page_buffers_no_lock function zeros a specified range in a page, and unmaps the corresponding buffer heads. Only block aligned regions of the page will have their buffer heads unmapped. Unblock aligned regions will be mapped if needed so that they can be updated with the partial zero out. This function is meant to be used to update a page and its buffer heads to be zeroed and unmapped when the corresponding blocks have been released or will be released. This routine is used in the following scenarios: * A hole is punched and the non page aligned regions of the head and tail of the hole need to be discarded * The file is truncated and the partial page beyond EOF needs to be discarded * The end of a hole is in the same page as EOF. After the page is flushed, the partial page beyond EOF needs to be discarded. * A write operation begins or ends inside a hole and the partial page appearing before or after the write needs to be discarded * A write operation extends EOF and the partial page beyond EOF needs to be discarded This function takes a flag EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED which is used when a write operation begins or ends in a hole. When the EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED flag is used, only buffer heads that are already unmapped will have the corresponding regions of the page zeroed. Signed-off-by: Allison Henderson &lt;achender@linux.vnet.ibm.com&gt; Signed-off-by: &#34;Theodore Ts&#39;o&#34; &lt;tytso@mit.edu&gt;
10 years ago
/*
* Flags used by ext4_discard_partial_page_buffers
*/
#define EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED 0x0001
/*
* ioctl commands
*/
#define EXT4_IOC_GETFLAGS FS_IOC_GETFLAGS
#define EXT4_IOC_SETFLAGS FS_IOC_SETFLAGS
#define EXT4_IOC_GETVERSION _IOR('f', 3, long)
#define EXT4_IOC_SETVERSION _IOW('f', 4, long)
#define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION
#define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION
#define EXT4_IOC_GETRSVSZ _IOR('f', 5, long)
#define EXT4_IOC_SETRSVSZ _IOW('f', 6, long)
#define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long)
#define EXT4_IOC_GROUP_ADD _IOW('f', 8, struct ext4_new_group_input)
#define EXT4_IOC_MIGRATE _IO('f', 9)
/* note ioctl 10 reserved for an early version of the FIEMAP ioctl */
/* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */
#define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12)
#define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent)
#define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64)
#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
/*
* ioctl commands in 32 bit emulation
*/
#define EXT4_IOC32_GETFLAGS FS_IOC32_GETFLAGS
#define EXT4_IOC32_SETFLAGS FS_IOC32_SETFLAGS
#define EXT4_IOC32_GETVERSION _IOR('f', 3, int)
#define EXT4_IOC32_SETVERSION _IOW('f', 4, int)
#define EXT4_IOC32_GETRSVSZ _IOR('f', 5, int)
#define EXT4_IOC32_SETRSVSZ _IOW('f', 6, int)
#define EXT4_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int)
#define EXT4_IOC32_GROUP_ADD _IOW('f', 8, struct compat_ext4_new_group_input)
#define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION
#define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION
#endif