今天在复习《鸟哥的私房菜-基础学习篇》,看到inode大小为128bytes,想看下这128字节里面到底是什么样的。于是我查了下google,发现ext2/3是128字节,ext4是256字节,以下是ext2/ext4对应的结构。
ext2.h/ext2_inode
:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
/*
* Structure of an inode on the disk
*/
struct ext2_inode {
__le16 i_mode; /* File mode */
__le16 i_uid; /* Low 16 bits of Owner Uid */
__le32 i_size; /* Size in bytes */
__le32 i_atime; /* Access time */
__le32 i_ctime; /* Creation time */
__le32 i_mtime; /* Modification time */
__le32 i_dtime; /* Deletion Time */
__le16 i_gid; /* Low 16 bits of Group Id */
__le16 i_links_count; /* Links count */
__le32 i_blocks; /* Blocks count */
__le32 i_flags; /* File flags */
union {
struct {
__le32 l_i_reserved1;
} linux1;
struct {
__le32 h_i_translator;
} hurd1;
struct {
__le32 m_i_reserved1;
} masix1;
} osd1; /* OS dependent 1 */
__le32 i_block[EXT2_N_BLOCKS];/* Pointers to blocks */
__le32 i_generation; /* File version (for NFS) */
__le32 i_file_acl; /* File ACL */
__le32 i_dir_acl; /* Directory ACL */
__le32 i_faddr; /* Fragment address */
union {
struct {
__u8 l_i_frag; /* Fragment number */
__u8 l_i_fsize; /* Fragment size */
__u16 i_pad1;
__le16 l_i_uid_high; /* these 2 fields */
__le16 l_i_gid_high; /* were reserved2[0] */
__u32 l_i_reserved2;
} linux2;
struct {
__u8 h_i_frag; /* Fragment number */
__u8 h_i_fsize; /* Fragment size */
__le16 h_i_mode_high;
__le16 h_i_uid_high;
__le16 h_i_gid_high;
__le32 h_i_author;
} hurd2;
struct {
__u8 m_i_frag; /* Fragment number */
__u8 m_i_fsize; /* Fragment size */
__u16 m_pad1;
__u32 m_i_reserved2[2];
} masix2;
} osd2; /* OS dependent 2 */
};
ext4.h/ext4_inode
:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
/*
* Structure of an inode on the disk
*/
struct ext4_inode {
__le16 i_mode; /* File mode */
__le16 i_uid; /* Low 16 bits of Owner Uid */
__le32 i_size_lo; /* Size in bytes */
__le32 i_atime; /* Access time */
__le32 i_ctime; /* Inode Change time */
__le32 i_mtime; /* Modification time */
__le32 i_dtime; /* Deletion Time */
__le16 i_gid; /* Low 16 bits of Group Id */
__le16 i_links_count; /* Links count */
__le32 i_blocks_lo; /* Blocks count */
__le32 i_flags; /* File flags */
union {
struct {
__le32 l_i_version;
} linux1;
struct {
__u32 h_i_translator;
} hurd1;
struct {
__u32 m_i_reserved1;
} masix1;
} osd1; /* OS dependent 1 */
__le32 i_block[EXT4_N_BLOCKS];/* Pointers to blocks */
__le32 i_generation; /* File version (for NFS) */
__le32 i_file_acl_lo; /* File ACL */
__le32 i_size_high;
__le32 i_obso_faddr; /* Obsoleted fragment address */
union {
struct {
__le16 l_i_blocks_high; /* were l_i_reserved1 */
__le16 l_i_file_acl_high;
__le16 l_i_uid_high; /* these 2 fields */
__le16 l_i_gid_high; /* were reserved2[0] */
__le16 l_i_checksum_lo;/* crc32c(uuid+inum+inode) LE */
__le16 l_i_reserved;
} linux2;
struct {
__le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */
__u16 h_i_mode_high;
__u16 h_i_uid_high;
__u16 h_i_gid_high;
__u32 h_i_author;
} hurd2;
struct {
__le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */
__le16 m_i_file_acl_high;
__u32 m_i_reserved2[2];
} masix2;
} osd2; /* OS dependent 2 */
__le16 i_extra_isize;
__le16 i_checksum_hi; /* crc32c(uuid+inum+inode) BE */
__le32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */
__le32 i_mtime_extra; /* extra Modification time(nsec << 2 | epoch) */
__le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */
__le32 i_crtime; /* File Creation time */
__le32 i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */
__le32 i_version_hi; /* high 32 bits for 64-bit version */
__le32 i_projid; /* Project ID */
};
可以看见ext4_inode仅仅是最下面多了一点。
以下是官方介绍:
1
2
3
4
5
6
7
8
9
10
Inode Size
In ext2 and ext3, the inode structure size was fixed at 128 bytes (EXT2_GOOD_OLD_INODE_SIZE) and each inode had a disk record size of 128 bytes.
Starting with ext4, it is possible to allocate a larger on-disk inode at format time for all inodes in the filesystem to provide space beyond
the end of the original ext2 inode. The on-disk inode record size is recorded in the superblock as s_inode_size. The number of bytes actually
used by struct ext4_inode beyond the original 128-byte ext2 inode is recorded in the i_extra_isize field for each inode, which allows struct
ext4_inode to grow for a new kernel without having to upgrade all of the on-disk inodes. Access to fields beyond EXT2_GOOD_OLD_INODE_SIZE should
be verified to be within i_extra_isize. By default, ext4 inode records are 256 bytes, and (as of October 2013) the inode structure is 156 bytes
(i_extra_isize = 28). The extra space between the end of the inode structure and the end of the inode record can be used to store extended
attributes. Each inode record can be as large as the filesystem block size, though this is not terribly efficient.
官方说2013年8月,i_extra_isize是28字节,ext4_inode是156字节(128+28)。但我今天贴的ext4_inode代码,后面的i_extra_isize部分应该是32字节(i_extra_isize + i_checksum_hi + 4 * 7),所以ext4_inode应该是160字节(128+32)。为了确定,我去github克隆了linus的linux源码。
1
2
3
$ git clone https://github.com/torvalds/linux.git
$ cd linux
$ git log -L 753,761:fs/ext4/ext4.h
753到761就是i_extra_isize下面的几行,可以自己查看一位置。
结果log输出:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
commit 8b4953e13f4c5d9a3c869f5fca7d51e1700e7db0
Author: Theodore Ts'o <tytso@mit.edu>
Date: Sat Oct 17 16:15:18 2015 -0400
ext4: reserve code points for the project quota feature
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -687,8 +690,9 @@
__le16 i_extra_isize;
__le16 i_checksum_hi; /* crc32c(uuid+inum+inode) BE */
__le32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */
__le32 i_mtime_extra; /* extra Modification time(nsec << 2 | epoch) */
__le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */
__le32 i_crtime; /* File Creation time */
__le32 i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */
__le32 i_version_hi; /* high 32 bits for 64-bit version */
+ __le32 i_projid; /* Project ID */
commit e615391896064eb5a0c760d086b8e1c6ecfffeab
Author: Darrick J. Wong <djwong@us.ibm.com>
Date: Sun Apr 29 18:23:10 2012 -0400
ext4: change on-disk layout to support extended metadata checksumming
Define flags and change structure definitions to allow checksumming of
ext4 metadata.
Signed-off-by: Darrick J. Wong <djwong@us.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -668,8 +674,8 @@
__le16 i_extra_isize;
- __le16 i_pad1;
+ __le16 i_checksum_hi; /* crc32c(uuid+inum+inode) BE */
__le32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */
__le32 i_mtime_extra; /* extra Modification time(nsec << 2 | epoch) */
__le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */
__le32 i_crtime; /* File Creation time */
__le32 i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */
__le32 i_version_hi; /* high 32 bits for 64-bit version */
commit 3dcf54515aa4981a647ad74859199032965193a5
Author: Christoph Hellwig <hch@lst.de>
Date: Tue Apr 29 18:13:32 2008 -0400
ext4: move headers out of include/linux
Move ext4 headers out of include/linux. This is just the trivial move,
there's some more thing that could be done later.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
--- /dev/null
+++ b/fs/ext4/ext4.h
@@ -0,0 +376,8 @@
+ __le16 i_extra_isize;
+ __le16 i_pad1;
+ __le32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */
+ __le32 i_mtime_extra; /* extra Modification time(nsec << 2 | epoch) */
+ __le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */
+ __le32 i_crtime; /* File Creation time */
+ __le32 i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */
+ __le32 i_version_hi; /* high 32 bits for 64-bit version */
这下就清楚了,可以看见,2015年10月17号有一次提交,加了一行“__le32 i_projid; /* Project ID */”,所以多了4字节,之前2012年和2008年的代码确实都是28字节。
回到正题,linux的ext4文件系统的inode是256字节,ext4_inode却只有160字节,那160到255字节有啥用呢,它们可以用来存放其它属性,如ACL,SELinux的属性。
1
2
3
4
5
6
I think by default current versions of mkfs.ext2/3/4 default to 256 byte inode size (see /etc/mke2fs.conf). This IIRC enables nanosecond
timestamps with ext4, and as you say, more extended attributes fit within the inode. Such extended attributes are, for instance, ACL's,
SELinux labels, some Samba specific labels.
Bigger inodes of course waste a little bit of space, and as you make them bigger you get into diminishing returns territory pretty quickly.
The default 256 bytes is probably a perfectly good compromise for most situations.