leevis.com icon indicating copy to clipboard operation
leevis.com copied to clipboard

linux-0.98文件系统初始化

Open vislee opened this issue 3 years ago • 0 comments

概述

微软早期的操作系统是DOS,磁盘操作系统。可见磁盘管理在操作系统中的地位。 而linux中一切兼文件,抽象出的虚拟文件系统更强大。而我一只想搞清楚一切兼文件的真正的意义,一切兼文件是如何实现的,一切兼文件是如何抽象出来的。

代码分析

在main.c文件中,初始化后切换到用户态后,fork出第一个进程先调用了init函数,该函数调用了setup函数初始化文件文件系统挂载根目录。 setup函数是用户态函数,真正的实现是内核kernel/blk_drv/genhd.c文件中的sys_setup函数,在该函数中调用了fs/super.c文件中的mount_root函数挂载根目录。


// 支持的文件系统
static struct file_system_type file_systems[] = {
	{minix_read_super,"minix"},
	{ext_read_super,"ext"},
	{msdos_read_super,"msdos"},
	{proc_read_super,"proc"},
	{NULL,NULL}
};

void mount_root(void)
{
	int i;
	struct file_system_type * fs_type = file_systems;
	struct super_block * p;
	struct inode * mi;

	if (32 != sizeof (struct minix_inode))
		panic("bad i-node size");

        // 初始化全局文件表 
        // fs/file_table.c文件中定义
	for(i=0;i<NR_FILE;i++)
		file_table[i].f_count=0;
	fcntl_init_locks();
	if (MAJOR(ROOT_DEV) == 2) {
		printk("Insert root floppy and press ENTER");
		wait_for_keypress();
	}
        // 初始化超级块数组
	for(p = &super_block[0] ; p < &super_block[NR_SUPER] ; p++) {
		p->s_dev = 0;
		p->s_blocksize = 0;
		p->s_lock = 0;
		p->s_wait = NULL;
		p->s_mounted = p->s_covered = NULL;
	}
	while (fs_type->read_super && fs_type->name) {
		p = read_super(ROOT_DEV,fs_type->name,0,NULL);
		if (p) {
			mi = p->s_mounted;
			mi->i_count += 3 ;	/* NOTE! it is logically used 4 times, not 1 */
			p->s_covered = mi;
			p->s_flags = 0;
			current->pwd = mi;
			current->root = mi;
			return;
		}
		fs_type++;
	}
	panic("Unable to mount root");
}

static struct super_block * read_super(int dev,char *name,int flags,void *data)
{
	struct super_block * s;
	struct file_system_type *type;

	if (!dev)
		return NULL;
	check_disk_change(dev);
	if (s = get_super(dev))
		return s;
	if (!(type = get_fs_type(name))) {
		printk("get fs type failed %s\n",name);
		return NULL;
	}
	for (s = 0+super_block ;; s++) {
		if (s >= NR_SUPER+super_block)
			return NULL;
		if (!s->s_dev)
			break;
	}
	s->s_dev = dev;
	s->s_flags = flags;
        // ext文件系统,ext_read_super函数
	if (!type->read_super(s,data)) {
		s->s_dev = 0;
		return NULL;
	}
	s->s_dev = dev;
	s->s_covered = NULL;
	s->s_rd_only = 0;
	s->s_dirt = 0;
	return s;
}

支持minix ext msdos等文件系统。以ext为例,挂载调用了fs/ext/inode.c文件中ext_read_super函数。 该函数主要功能就是读取磁盘超级块的信息保存到super_block结构体的u.ext_sb中,s_op 指向ext_sops;

include/linux/fs.h文件

struct super_block {
	unsigned short s_dev;
	unsigned long s_blocksize;
	unsigned char s_lock;
	unsigned char s_rd_only;
	unsigned char s_dirt;
	struct super_operations *s_op;
	unsigned long s_flags;
	unsigned long s_magic;
	unsigned long s_time;
	struct inode * s_covered;
	struct inode * s_mounted;
	struct wait_queue * s_wait;
	union {
		struct minix_sb_info minix_sb;
		struct ext_sb_info ext_sb;              // <-----include/linux/ext_fs_sb.h文件
		struct msdos_sb_info msdos_sb;
	} u;
};

fs/ext/inode.c文件

static struct super_operations ext_sops = { 
	ext_read_inode,
	ext_write_inode,
	ext_put_inode,
	ext_put_super,
	ext_write_super,
	ext_statfs
};

// 读取ext文件系统的超级块
struct super_block *ext_read_super(struct super_block *s,void *data)
{
	struct buffer_head *bh;
	struct ext_super_block *es;
	int dev = s->s_dev,block;

	lock_super(s);
	if (!(bh = bread(dev, 1, BLOCK_SIZE))) {
		s->s_dev=0;
		free_super(s);
		printk("bread failed\n");
		return NULL;
	}
	es = (struct ext_super_block *) bh->b_data;
	s->s_blocksize = 1024;
	s->u.ext_sb.s_ninodes = es->s_ninodes;
	s->u.ext_sb.s_nzones = es->s_nzones;
	s->u.ext_sb.s_firstdatazone = es->s_firstdatazone;
	s->u.ext_sb.s_log_zone_size = es->s_log_zone_size;
	s->u.ext_sb.s_max_size = es->s_max_size;
	s->s_magic = es->s_magic;
	s->u.ext_sb.s_firstfreeblocknumber = es->s_firstfreeblock;
	s->u.ext_sb.s_freeblockscount = es->s_freeblockscount;
	s->u.ext_sb.s_firstfreeinodenumber = es->s_firstfreeinode;
	s->u.ext_sb.s_freeinodescount = es->s_freeinodescount;
	brelse(bh);
	if (s->s_magic != EXT_SUPER_MAGIC) {
		s->s_dev = 0;
		free_super(s);
		printk("magic match failed\n");
		return NULL;
	}
	if (!s->u.ext_sb.s_firstfreeblocknumber)
		s->u.ext_sb.s_firstfreeblock = NULL;
	else
		if (!(s->u.ext_sb.s_firstfreeblock = bread(dev,
			s->u.ext_sb.s_firstfreeblocknumber, BLOCK_SIZE))) {
			printk ("ext_read_super: unable to read first free block\n");
			s->s_dev = 0;
			free_super(s);
			return NULL;
		}
	if (!s->u.ext_sb.s_firstfreeinodenumber)
		s->u.ext_sb.s_firstfreeinodeblock = NULL;
	else {
		block = 2 + (s->u.ext_sb.s_firstfreeinodenumber - 1) / EXT_INODES_PER_BLOCK;
		if (!(s->u.ext_sb.s_firstfreeinodeblock = bread(dev, block, BLOCK_SIZE))) {
			printk ("ext_read_super: unable to read first free inode block\n");
			brelse(s->u.ext_sb.s_firstfreeblock);
			s->s_dev = 0;
			free_super (s);
			return NULL;
		}
	}
	free_super(s);
	/* set up enough so that it can read an inode */
	s->s_dev = dev;
	s->s_op = &ext_sops;   // 回调函数
        // ext文件系统的根目录inode
	if (!(s->s_mounted = iget(dev,EXT_ROOT_INO))) {
		s->s_dev=0;
		printk("get root inode failed\n");
		return NULL;
	}
	return s;
}

// 回调函数,读取ext文件系统inode
void ext_read_inode(struct inode * inode)
{
	struct buffer_head * bh;
	struct ext_inode * raw_inode;
	int block;

	block = 2 + (inode->i_ino-1)/EXT_INODES_PER_BLOCK;
	if (!(bh=bread(inode->i_dev, block, BLOCK_SIZE)))
		panic("unable to read i-node block");
	raw_inode = ((struct ext_inode *) bh->b_data) +
		(inode->i_ino-1)%EXT_INODES_PER_BLOCK;
	inode->i_mode = raw_inode->i_mode;
	inode->i_uid = raw_inode->i_uid;
	inode->i_gid = raw_inode->i_gid;
	inode->i_nlink = raw_inode->i_nlinks;
	inode->i_size = raw_inode->i_size;
	inode->i_mtime = inode->i_atime = inode->i_ctime = raw_inode->i_time;
	inode->i_blocks = inode->i_blksize = 0;
	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
		inode->i_rdev = raw_inode->i_zone[0];
	else for (block = 0; block < 12; block++)
		inode->u.ext_i.i_data[block] = raw_inode->i_zone[block];
	brelse(bh);
	inode->i_op = NULL;
	if (S_ISREG(inode->i_mode))
		inode->i_op = &ext_file_inode_operations;
	else if (S_ISDIR(inode->i_mode))
		inode->i_op = &ext_dir_inode_operations;   // fs/ext/dir.c文件中
	else if (S_ISLNK(inode->i_mode))
		inode->i_op = &ext_symlink_inode_operations;
	else if (S_ISCHR(inode->i_mode))
		inode->i_op = &ext_chrdev_inode_operations;
	else if (S_ISBLK(inode->i_mode))
		inode->i_op = &ext_blkdev_inode_operations;
	else if (S_ISFIFO(inode->i_mode)) {
		inode->i_op = &ext_fifo_inode_operations;
		inode->i_pipe = 1;
		PIPE_BASE(*inode) = NULL;
		PIPE_HEAD(*inode) = PIPE_TAIL(*inode) = 0;
		PIPE_READ_WAIT(*inode) = PIPE_WRITE_WAIT(*inode) = NULL;
		PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 0;
	}
}

然后,调用了fs/inode.c文件中的iget函数获取根目录inode。 该函数先调用get_empty_inode函数从inode_table数组中获取一个空的inode,然后调用read_inode函数填充该空inode。

struct inode * iget(int dev,int nr)
{
	struct inode * inode, * empty;

	if (!dev)
		panic("iget with dev==0");
	empty = get_empty_inode();
        ......

	if (!empty)
		return (NULL);
	inode = empty;
        // 获取对应超级块节点
	if (!(inode->i_sb = get_super(dev))) {
		printk("iget: gouldn't get super-block\n\t");
		iput(inode);
		return NULL;
	}
	inode->i_dev = dev;
	inode->i_ino = nr;
	inode->i_flags = inode->i_sb->s_flags;
        // 调用函数填充inode
	read_inode(inode);
	return inode;
}

static void read_inode(struct inode * inode)
{
	lock_inode(inode);
        // 该s_op 指向的就是ext_sops结构体,对应的read_inode指向的是ext_read_inode函数
        // 然后调用对应文件系统的回调函数,读取inode
	if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->read_inode)
		inode->i_sb->s_op->read_inode(inode);
	unlock_inode(inode);
}

最后,返回到mount_root 函数中,根目录inode赋值给init进程结构体的pwdroot。 在进程中调用open函数,依赖进程的pwdroot,而所有进程又都是init进程的子进程,继承了init进程pwdroot

总结

为了支持多种文件系统,linux-0.98的文件系统部分比linux-0.11的复杂了很多。

  1. read_super 函数封装了多个文件系统的读取超级块的调用,不同的文件系统调用不同的函数,具体支持的文件系统见file_systems[]数组。
  2. iget 函数封装了不同文件系统获取i 节点的调用函数,利用的是super_block的回调函数指针s_op
  3. 不同文件系统不同类型的inode有不同的处理回调函数,利用的是inode的回调函数指针i_op

vislee avatar Jan 01 '22 03:01 vislee