刘泉皓

没有最强的法术,只有最强的法师。

ext234的inode结构说明

02 Apr 2016 » memory

今天在复习《鸟哥的私房菜-基础学习篇》,看到inode大小为128bytes,想看下这128字节里面到底是什么样的。于是我查了下google,发现ext2/3是128字节,ext4是256字节,以下是ext2/ext4对应的结构。

ext2.h/ext2_inode

/*
 * Structure of an inode on the disk
 */
struct ext2_inode {
        __le16  i_mode;         /* File mode */
        __le16  i_uid;          /* Low 16 bits of Owner Uid */
        __le32  i_size;         /* Size in bytes */
        __le32  i_atime;        /* Access time */
        __le32  i_ctime;        /* Creation time */
        __le32  i_mtime;        /* Modification time */
        __le32  i_dtime;        /* Deletion Time */
        __le16  i_gid;          /* Low 16 bits of Group Id */
        __le16  i_links_count;  /* Links count */
        __le32  i_blocks;       /* Blocks count */
        __le32  i_flags;        /* File flags */
        union {
                struct {
                        __le32  l_i_reserved1;
                } linux1;
                struct {
                        __le32  h_i_translator;
                } hurd1;
                struct {
                        __le32  m_i_reserved1;
                } masix1;
        } osd1;                         /* OS dependent 1 */
        __le32  i_block[EXT2_N_BLOCKS];/* Pointers to blocks */
        __le32  i_generation;   /* File version (for NFS) */
        __le32  i_file_acl;     /* File ACL */
        __le32  i_dir_acl;      /* Directory ACL */
        __le32  i_faddr;        /* Fragment address */
        union {
                struct {
                        __u8    l_i_frag;       /* Fragment number */
                        __u8    l_i_fsize;      /* Fragment size */
                        __u16   i_pad1;
                        __le16  l_i_uid_high;   /* these 2 fields    */
                        __le16  l_i_gid_high;   /* were reserved2[0] */
                        __u32   l_i_reserved2;
                } linux2;
                struct {
                        __u8    h_i_frag;       /* Fragment number */
                        __u8    h_i_fsize;      /* Fragment size */
                        __le16  h_i_mode_high;
                        __le16  h_i_uid_high;
                        __le16  h_i_gid_high;
                        __le32  h_i_author;
                } hurd2;
                struct {
                        __u8    m_i_frag;       /* Fragment number */
                        __u8    m_i_fsize;      /* Fragment size */
                        __u16   m_pad1;
                        __u32   m_i_reserved2[2];
                } masix2;
        } osd2;                         /* OS dependent 2 */
};

ext4.h/ext4_inode

/*
 * Structure of an inode on the disk
 */
struct ext4_inode {
        __le16  i_mode;         /* File mode */
        __le16  i_uid;          /* Low 16 bits of Owner Uid */
        __le32  i_size_lo;      /* Size in bytes */
        __le32  i_atime;        /* Access time */
        __le32  i_ctime;        /* Inode Change time */
        __le32  i_mtime;        /* Modification time */
        __le32  i_dtime;        /* Deletion Time */
        __le16  i_gid;          /* Low 16 bits of Group Id */
        __le16  i_links_count;  /* Links count */
        __le32  i_blocks_lo;    /* Blocks count */
        __le32  i_flags;        /* File flags */
        union {
                struct {
                        __le32  l_i_version;
                } linux1;
                struct {
                        __u32  h_i_translator;
                } hurd1;
                struct {
                        __u32  m_i_reserved1;
                } masix1;
        } osd1;                         /* OS dependent 1 */
        __le32  i_block[EXT4_N_BLOCKS];/* Pointers to blocks */
        __le32  i_generation;   /* File version (for NFS) */
        __le32  i_file_acl_lo;  /* File ACL */
        __le32  i_size_high;
        __le32  i_obso_faddr;   /* Obsoleted fragment address */
        union {
                struct {
                        __le16  l_i_blocks_high; /* were l_i_reserved1 */
                        __le16  l_i_file_acl_high;
                        __le16  l_i_uid_high;   /* these 2 fields */
                        __le16  l_i_gid_high;   /* were reserved2[0] */
                        __le16  l_i_checksum_lo;/* crc32c(uuid+inum+inode) LE */
                        __le16  l_i_reserved;
                } linux2;
                struct {
                        __le16  h_i_reserved1;  /* Obsoleted fragment number/size which are removed in ext4 */
                        __u16   h_i_mode_high;
                        __u16   h_i_uid_high;
                        __u16   h_i_gid_high;
                        __u32   h_i_author;
                } hurd2;
                struct {
                        __le16  h_i_reserved1;  /* Obsoleted fragment number/size which are removed in ext4 */
                        __le16  m_i_file_acl_high;
                        __u32   m_i_reserved2[2];
                } masix2;
        } osd2;                         /* OS dependent 2 */
        __le16  i_extra_isize;
        __le16  i_checksum_hi;  /* crc32c(uuid+inum+inode) BE */
        __le32  i_ctime_extra;  /* extra Change time      (nsec << 2 | epoch) */
        __le32  i_mtime_extra;  /* extra Modification time(nsec << 2 | epoch) */
        __le32  i_atime_extra;  /* extra Access time      (nsec << 2 | epoch) */
        __le32  i_crtime;       /* File Creation time */
        __le32  i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */
        __le32  i_version_hi;   /* high 32 bits for 64-bit version */
        __le32  i_projid;       /* Project ID */
};

可以看见ext4_inode仅仅是最下面多了一点。

以下是官方介绍:

Inode Size

In ext2 and ext3, the inode structure size was fixed at 128 bytes (EXT2_GOOD_OLD_INODE_SIZE) and each inode had a disk record size of 128 bytes.
 Starting with ext4, it is possible to allocate a larger on-disk inode at format time for all inodes in the filesystem to provide space beyond 
the end of the original ext2 inode. The on-disk inode record size is recorded in the superblock as s_inode_size. The number of bytes actually 
used by struct ext4_inode beyond the original 128-byte ext2 inode is recorded in the i_extra_isize field for each inode, which allows struct 
ext4_inode to grow for a new kernel without having to upgrade all of the on-disk inodes. Access to fields beyond EXT2_GOOD_OLD_INODE_SIZE should 
be verified to be within i_extra_isize. By default, ext4 inode records are 256 bytes, and (as of October 2013) the inode structure is 156 bytes
 (i_extra_isize = 28). The extra space between the end of the inode structure and the end of the inode record can be used to store extended 
attributes. Each inode record can be as large as the filesystem block size, though this is not terribly efficient.

官方说2013年8月,i_extra_isize是28字节,ext4_inode是156字节(128+28)。但我今天贴的ext4_inode代码,后面的i_extra_isize部分应该是32字节(i_extra_isize + i_checksum_hi + 4 * 7),所以ext4_inode应该是160字节(128+32)。为了确定,我去github克隆了linus的linux源码。

$ git clone https://github.com/torvalds/linux.git
$ cd linux
$ git log -L 753,761:fs/ext4/ext4.h

753到761就是i_extra_isize下面的几行,可以自己查看一位置。

结果log输出:

commit 8b4953e13f4c5d9a3c869f5fca7d51e1700e7db0
Author: Theodore Ts'o <tytso@mit.edu>
Date:   Sat Oct 17 16:15:18 2015 -0400

    ext4: reserve code points for the project quota feature
    
    Signed-off-by: Theodore Ts'o <tytso@mit.edu>

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -687,8 +690,9 @@
        __le16  i_extra_isize;
        __le16  i_checksum_hi;  /* crc32c(uuid+inum+inode) BE */
        __le32  i_ctime_extra;  /* extra Change time      (nsec << 2 | epoch) */
        __le32  i_mtime_extra;  /* extra Modification time(nsec << 2 | epoch) */
        __le32  i_atime_extra;  /* extra Access time      (nsec << 2 | epoch) */
        __le32  i_crtime;       /* File Creation time */
        __le32  i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */
        __le32  i_version_hi;   /* high 32 bits for 64-bit version */
+       __le32  i_projid;       /* Project ID */

commit e615391896064eb5a0c760d086b8e1c6ecfffeab
Author: Darrick J. Wong <djwong@us.ibm.com>
Date:   Sun Apr 29 18:23:10 2012 -0400

    ext4: change on-disk layout to support extended metadata checksumming
    
    Define flags and change structure definitions to allow checksumming of
    ext4 metadata.
    
    Signed-off-by: Darrick J. Wong <djwong@us.ibm.com>
    Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -668,8 +674,8 @@
        __le16  i_extra_isize;
-       __le16  i_pad1;
+       __le16  i_checksum_hi;  /* crc32c(uuid+inum+inode) BE */
        __le32  i_ctime_extra;  /* extra Change time      (nsec << 2 | epoch) */
        __le32  i_mtime_extra;  /* extra Modification time(nsec << 2 | epoch) */
        __le32  i_atime_extra;  /* extra Access time      (nsec << 2 | epoch) */
        __le32  i_crtime;       /* File Creation time */
        __le32  i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */
        __le32  i_version_hi;   /* high 32 bits for 64-bit version */

commit 3dcf54515aa4981a647ad74859199032965193a5
Author: Christoph Hellwig <hch@lst.de>
Date:   Tue Apr 29 18:13:32 2008 -0400

    ext4: move headers out of include/linux
    
    Move ext4 headers out of include/linux.  This is just the trivial move,
    there's some more thing that could be done later.
    
    Signed-off-by: Christoph Hellwig <hch@lst.de>
    Signed-off-by: Mingming Cao <cmm@us.ibm.com>
    Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
--- /dev/null
+++ b/fs/ext4/ext4.h
@@ -0,0 +376,8 @@
+       __le16  i_extra_isize;
+       __le16  i_pad1;
+       __le32  i_ctime_extra;  /* extra Change time      (nsec << 2 | epoch) */
+       __le32  i_mtime_extra;  /* extra Modification time(nsec << 2 | epoch) */
+       __le32  i_atime_extra;  /* extra Access time      (nsec << 2 | epoch) */
+       __le32  i_crtime;       /* File Creation time */
+       __le32  i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */
+       __le32  i_version_hi;   /* high 32 bits for 64-bit version */

这下就清楚了,可以看见,2015年10月17号有一次提交,加了一行“__le32 i_projid; /* Project ID */”,所以多了4字节,之前2012年和2008年的代码确实都是28字节。

回到正题,linux的ext4文件系统的inode是256字节,ext4_inode却只有160字节,那160到255字节有啥用呢,它们可以用来存放其它属性,如ACL,SELinux的属性。

I think by default current versions of mkfs.ext2/3/4 default to 256 byte inode size (see /etc/mke2fs.conf). This IIRC enables nanosecond 
timestamps with ext4, and as you say, more extended attributes fit within the inode. Such extended attributes are, for instance, ACL's, 
SELinux labels, some Samba specific labels.

Bigger inodes of course waste a little bit of space, and as you make them bigger you get into diminishing returns territory pretty quickly.
 The default 256 bytes is probably a perfectly good compromise for most situations.
参考: ext2.h:http://lxr.free-electrons.com/source/fs/ext2/ext2.h#L297 ext4.h:http://lxr.free-electrons.com/source/fs/ext4/ext4.h#L688 ext4的inode介绍:https://ext4.wiki.kernel.org/index.php/Ext4_Disk_Layout#Inode_Size ext4扩展部分使用介绍:http://serverfault.com/questions/143691/linux-why-change-inode-size linux源码github地址:https://github.com/torvalds/linux

知识共享许可协议    鄂ICP备 15002452号-5    鄂公网安备 42088102000048号