leevis.com
leevis.com copied to clipboard
linux-0.98文件系统初始化
概述
微软早期的操作系统是DOS,磁盘操作系统。可见磁盘管理在操作系统中的地位。 而linux中一切兼文件,抽象出的虚拟文件系统更强大。而我一只想搞清楚一切兼文件的真正的意义,一切兼文件是如何实现的,一切兼文件是如何抽象出来的。
代码分析
在main.c文件中,初始化后切换到用户态后,fork
出第一个进程先调用了init
函数,该函数调用了setup
函数初始化文件文件系统挂载根目录。
setup
函数是用户态函数,真正的实现是内核kernel/blk_drv/genhd.c文件中的sys_setup
函数,在该函数中调用了fs/super.c文件中的mount_root
函数挂载根目录。
// 支持的文件系统
static struct file_system_type file_systems[] = {
{minix_read_super,"minix"},
{ext_read_super,"ext"},
{msdos_read_super,"msdos"},
{proc_read_super,"proc"},
{NULL,NULL}
};
void mount_root(void)
{
int i;
struct file_system_type * fs_type = file_systems;
struct super_block * p;
struct inode * mi;
if (32 != sizeof (struct minix_inode))
panic("bad i-node size");
// 初始化全局文件表
// fs/file_table.c文件中定义
for(i=0;i<NR_FILE;i++)
file_table[i].f_count=0;
fcntl_init_locks();
if (MAJOR(ROOT_DEV) == 2) {
printk("Insert root floppy and press ENTER");
wait_for_keypress();
}
// 初始化超级块数组
for(p = &super_block[0] ; p < &super_block[NR_SUPER] ; p++) {
p->s_dev = 0;
p->s_blocksize = 0;
p->s_lock = 0;
p->s_wait = NULL;
p->s_mounted = p->s_covered = NULL;
}
while (fs_type->read_super && fs_type->name) {
p = read_super(ROOT_DEV,fs_type->name,0,NULL);
if (p) {
mi = p->s_mounted;
mi->i_count += 3 ; /* NOTE! it is logically used 4 times, not 1 */
p->s_covered = mi;
p->s_flags = 0;
current->pwd = mi;
current->root = mi;
return;
}
fs_type++;
}
panic("Unable to mount root");
}
static struct super_block * read_super(int dev,char *name,int flags,void *data)
{
struct super_block * s;
struct file_system_type *type;
if (!dev)
return NULL;
check_disk_change(dev);
if (s = get_super(dev))
return s;
if (!(type = get_fs_type(name))) {
printk("get fs type failed %s\n",name);
return NULL;
}
for (s = 0+super_block ;; s++) {
if (s >= NR_SUPER+super_block)
return NULL;
if (!s->s_dev)
break;
}
s->s_dev = dev;
s->s_flags = flags;
// ext文件系统,ext_read_super函数
if (!type->read_super(s,data)) {
s->s_dev = 0;
return NULL;
}
s->s_dev = dev;
s->s_covered = NULL;
s->s_rd_only = 0;
s->s_dirt = 0;
return s;
}
支持minix ext msdos等文件系统。以ext为例,挂载调用了fs/ext/inode.c文件中ext_read_super
函数。
该函数主要功能就是读取磁盘超级块的信息保存到super_block
结构体的u.ext_sb
中,s_op
指向ext_sops
;
include/linux/fs.h文件
struct super_block {
unsigned short s_dev;
unsigned long s_blocksize;
unsigned char s_lock;
unsigned char s_rd_only;
unsigned char s_dirt;
struct super_operations *s_op;
unsigned long s_flags;
unsigned long s_magic;
unsigned long s_time;
struct inode * s_covered;
struct inode * s_mounted;
struct wait_queue * s_wait;
union {
struct minix_sb_info minix_sb;
struct ext_sb_info ext_sb; // <-----include/linux/ext_fs_sb.h文件
struct msdos_sb_info msdos_sb;
} u;
};
fs/ext/inode.c文件
static struct super_operations ext_sops = {
ext_read_inode,
ext_write_inode,
ext_put_inode,
ext_put_super,
ext_write_super,
ext_statfs
};
// 读取ext文件系统的超级块
struct super_block *ext_read_super(struct super_block *s,void *data)
{
struct buffer_head *bh;
struct ext_super_block *es;
int dev = s->s_dev,block;
lock_super(s);
if (!(bh = bread(dev, 1, BLOCK_SIZE))) {
s->s_dev=0;
free_super(s);
printk("bread failed\n");
return NULL;
}
es = (struct ext_super_block *) bh->b_data;
s->s_blocksize = 1024;
s->u.ext_sb.s_ninodes = es->s_ninodes;
s->u.ext_sb.s_nzones = es->s_nzones;
s->u.ext_sb.s_firstdatazone = es->s_firstdatazone;
s->u.ext_sb.s_log_zone_size = es->s_log_zone_size;
s->u.ext_sb.s_max_size = es->s_max_size;
s->s_magic = es->s_magic;
s->u.ext_sb.s_firstfreeblocknumber = es->s_firstfreeblock;
s->u.ext_sb.s_freeblockscount = es->s_freeblockscount;
s->u.ext_sb.s_firstfreeinodenumber = es->s_firstfreeinode;
s->u.ext_sb.s_freeinodescount = es->s_freeinodescount;
brelse(bh);
if (s->s_magic != EXT_SUPER_MAGIC) {
s->s_dev = 0;
free_super(s);
printk("magic match failed\n");
return NULL;
}
if (!s->u.ext_sb.s_firstfreeblocknumber)
s->u.ext_sb.s_firstfreeblock = NULL;
else
if (!(s->u.ext_sb.s_firstfreeblock = bread(dev,
s->u.ext_sb.s_firstfreeblocknumber, BLOCK_SIZE))) {
printk ("ext_read_super: unable to read first free block\n");
s->s_dev = 0;
free_super(s);
return NULL;
}
if (!s->u.ext_sb.s_firstfreeinodenumber)
s->u.ext_sb.s_firstfreeinodeblock = NULL;
else {
block = 2 + (s->u.ext_sb.s_firstfreeinodenumber - 1) / EXT_INODES_PER_BLOCK;
if (!(s->u.ext_sb.s_firstfreeinodeblock = bread(dev, block, BLOCK_SIZE))) {
printk ("ext_read_super: unable to read first free inode block\n");
brelse(s->u.ext_sb.s_firstfreeblock);
s->s_dev = 0;
free_super (s);
return NULL;
}
}
free_super(s);
/* set up enough so that it can read an inode */
s->s_dev = dev;
s->s_op = &ext_sops; // 回调函数
// ext文件系统的根目录inode
if (!(s->s_mounted = iget(dev,EXT_ROOT_INO))) {
s->s_dev=0;
printk("get root inode failed\n");
return NULL;
}
return s;
}
// 回调函数,读取ext文件系统inode
void ext_read_inode(struct inode * inode)
{
struct buffer_head * bh;
struct ext_inode * raw_inode;
int block;
block = 2 + (inode->i_ino-1)/EXT_INODES_PER_BLOCK;
if (!(bh=bread(inode->i_dev, block, BLOCK_SIZE)))
panic("unable to read i-node block");
raw_inode = ((struct ext_inode *) bh->b_data) +
(inode->i_ino-1)%EXT_INODES_PER_BLOCK;
inode->i_mode = raw_inode->i_mode;
inode->i_uid = raw_inode->i_uid;
inode->i_gid = raw_inode->i_gid;
inode->i_nlink = raw_inode->i_nlinks;
inode->i_size = raw_inode->i_size;
inode->i_mtime = inode->i_atime = inode->i_ctime = raw_inode->i_time;
inode->i_blocks = inode->i_blksize = 0;
if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
inode->i_rdev = raw_inode->i_zone[0];
else for (block = 0; block < 12; block++)
inode->u.ext_i.i_data[block] = raw_inode->i_zone[block];
brelse(bh);
inode->i_op = NULL;
if (S_ISREG(inode->i_mode))
inode->i_op = &ext_file_inode_operations;
else if (S_ISDIR(inode->i_mode))
inode->i_op = &ext_dir_inode_operations; // fs/ext/dir.c文件中
else if (S_ISLNK(inode->i_mode))
inode->i_op = &ext_symlink_inode_operations;
else if (S_ISCHR(inode->i_mode))
inode->i_op = &ext_chrdev_inode_operations;
else if (S_ISBLK(inode->i_mode))
inode->i_op = &ext_blkdev_inode_operations;
else if (S_ISFIFO(inode->i_mode)) {
inode->i_op = &ext_fifo_inode_operations;
inode->i_pipe = 1;
PIPE_BASE(*inode) = NULL;
PIPE_HEAD(*inode) = PIPE_TAIL(*inode) = 0;
PIPE_READ_WAIT(*inode) = PIPE_WRITE_WAIT(*inode) = NULL;
PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 0;
}
}
然后,调用了fs/inode.c文件中的iget
函数获取根目录inode。
该函数先调用get_empty_inode
函数从inode_table
数组中获取一个空的inode
,然后调用read_inode函数填充该空inode。
struct inode * iget(int dev,int nr)
{
struct inode * inode, * empty;
if (!dev)
panic("iget with dev==0");
empty = get_empty_inode();
......
if (!empty)
return (NULL);
inode = empty;
// 获取对应超级块节点
if (!(inode->i_sb = get_super(dev))) {
printk("iget: gouldn't get super-block\n\t");
iput(inode);
return NULL;
}
inode->i_dev = dev;
inode->i_ino = nr;
inode->i_flags = inode->i_sb->s_flags;
// 调用函数填充inode
read_inode(inode);
return inode;
}
static void read_inode(struct inode * inode)
{
lock_inode(inode);
// 该s_op 指向的就是ext_sops结构体,对应的read_inode指向的是ext_read_inode函数
// 然后调用对应文件系统的回调函数,读取inode
if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->read_inode)
inode->i_sb->s_op->read_inode(inode);
unlock_inode(inode);
}
最后,返回到mount_root
函数中,根目录inode
赋值给init进程
结构体的pwd
和root
。
在进程中调用open
函数,依赖进程的pwd
和root
,而所有进程又都是init进程
的子进程,继承了init进程
的pwd
和root
。
总结
为了支持多种文件系统,linux-0.98的文件系统部分比linux-0.11的复杂了很多。
-
read_super
函数封装了多个文件系统的读取超级块的调用,不同的文件系统调用不同的函数,具体支持的文件系统见file_systems[]
数组。 -
iget
函数封装了不同文件系统获取i 节点
的调用函数,利用的是super_block
的回调函数指针s_op
。 - 不同文件系统不同类型的
inode
有不同的处理回调函数,利用的是inode
的回调函数指针i_op
。