leevis.com
leevis.com copied to clipboard
linux-0.11 execve函数分析
概述
在linux系统上执行一个程序通常是先调用fork
函数复制父进程空间,然后调用execve
函数执行磁盘上的可执行文件。
例如,在linux-0.11启动时的一段代码:
if (!(pid=fork())) {
close(0);
if (open("/etc/rc",O_RDONLY,0))
_exit(1);
execve("/bin/sh",argv_rc,envp_rc);
_exit(2);
}
在unistd.h文件中execve函数声明如下,带了3个参数。
int execve(const char * filename, char ** argv, char ** envp);
接下来,跟踪一下内核代码 execve的实现。
代码分析
从内核代码中直接搜上述声明的函数是搜不到对应的实现的,实际上是通过一个宏定义的。
_syscall3(int,execve,const char *,file,char **,argv,char **,envp)
, _syscall3
是关键,通过代码可看出主要是c内嵌汇编调用了0x80
中断,把函数的参数传递给寄存器。
eax是__NR_execve,也就是系统调用号,在unistd.h中定义。
ebx对应第一个参数,是可执行文件名。
ecx对应第二个参数,参数数组地址。
edx对应第三个参数,环境变量数组地址。
#define _syscall3(type,name,atype,a,btype,b,ctype,c) \
type name(atype a,btype b,ctype c) \
{ \
long __res; \
__asm__ volatile ("int $0x80" \
: "=a" (__res) \
: "0" (__NR_##name),"b" ((long)(a)),"c" ((long)(b)),"d" ((long)(c))); \
if (__res>=0) \
return (type) __res; \
errno=-__res; \
return -1; \
}
0x80中断是什么?
在系统初始化时调用了sched.c文件中的sched_init
函数,在该函数中调用了 set_system_gate(0x80,&system_call);
所以,0x80中断实际就是system_call
函数,该函数是汇编定义的
_system_call:
cmpl $nr_system_calls-1,%eax # 判断系统调用号是否有定义
ja bad_sys_call
push %ds
push %es
push %fs
pushl %edx # 参数压栈,注意顺序
pushl %ecx # push %ebx,%ecx,%edx as parameters
pushl %ebx # to the system call
movl $0x10,%edx # set up ds,es to kernel space
mov %dx,%ds
mov %dx,%es
movl $0x17,%edx # fs points to local data space
mov %dx,%fs
call _sys_call_table(,%eax,4) # 调用中断函数回调函数。execve 对应的就是sys_execve。
pushl %eax
movl _current,%eax
cmpl $0,state(%eax) # state
jne reschedule
cmpl $0,counter(%eax) # counter
je reschedule
ret_from_sys_call:
movl _current,%eax # task[0] cannot have signals
cmpl _task,%eax
je 3f
cmpw $0x0f,CS(%esp) # was old code segment supervisor ?
jne 3f
cmpw $0x17,OLDSS(%esp) # was stack segment = 0x17 ?
jne 3f
movl signal(%eax),%ebx
movl blocked(%eax),%ecx
notl %ecx
andl %ebx,%ecx
bsfl %ecx,%ecx
je 3f
btrl %ecx,%ebx
movl %ebx,signal(%eax)
incl %ecx
pushl %ecx
call _do_signal
popl %eax
3: popl %eax
popl %ebx
popl %ecx
popl %edx
pop %fs
pop %es
pop %ds
iret # 中断返回
_sys_execve:
lea EIP(%esp),%eax # 中断时会发生特权级的切换,该地址为中断栈保存的中断返回用户态执行的代码
pushl %eax # 中断返回要执行的指令的在中断栈上的地址
call _do_execve
addl $4,%esp
ret
在_system_call
函数中压入堆栈的**envp
环境变量数组指针、**argv
参数数组指针、*filename
可执行文件名
在_sys_execve
函数中压入堆栈的eip
中断用户态程序执行的指令在中断栈上的地址,修改了该值后也就修改了中断返回(iret)要执行的指令。
int do_execve(unsigned long * eip,long tmp,char * filename,
char ** argv, char ** envp)
{
struct m_inode * inode;
struct buffer_head * bh;
struct exec ex;
unsigned long page[MAX_ARG_PAGES];
int i,argc,envc;
int e_uid, e_gid;
int retval;
int sh_bang = 0;
unsigned long p=PAGE_SIZE*MAX_ARG_PAGES-4;
// eip[0] 是用户态IP eip[1] 是用户态CS
if ((0xffff & eip[1]) != 0x000f)
panic("execve called from supervisor mode");
for (i=0 ; i<MAX_ARG_PAGES ; i++) /* clear page-table */
page[i]=0;
if (!(inode=namei(filename))) /* get executables inode */
return -ENOENT;
argc = count(argv);
envc = count(envp);
restart_interp:
if (!S_ISREG(inode->i_mode)) { /* must be regular file */
retval = -EACCES;
goto exec_error2;
}
i = inode->i_mode;
e_uid = (i & S_ISUID) ? inode->i_uid : current->euid;
e_gid = (i & S_ISGID) ? inode->i_gid : current->egid;
if (current->euid == inode->i_uid)
i >>= 6;
else if (current->egid == inode->i_gid)
i >>= 3;
if (!(i & 1) &&
!((inode->i_mode & 0111) && suser())) {
retval = -ENOEXEC;
goto exec_error2;
}
if (!(bh = bread(inode->i_dev,inode->i_zone[0]))) {
retval = -EACCES;
goto exec_error2;
}
ex = *((struct exec *) bh->b_data); /* read exec-header */
if ((bh->b_data[0] == '#') && (bh->b_data[1] == '!') && (!sh_bang)) {
/*
* This section does the #! interpretation.
* Sorta complicated, but hopefully it will work. -TYT
*/
char buf[1023], *cp, *interp, *i_name, *i_arg;
unsigned long old_fs;
strncpy(buf, bh->b_data+2, 1022);
brelse(bh);
iput(inode);
buf[1022] = '\0';
if (cp = strchr(buf, '\n')) {
*cp = '\0';
for (cp = buf; (*cp == ' ') || (*cp == '\t'); cp++);
}
if (!cp || *cp == '\0') {
retval = -ENOEXEC; /* No interpreter name found */
goto exec_error1;
}
interp = i_name = cp;
i_arg = 0;
for ( ; *cp && (*cp != ' ') && (*cp != '\t'); cp++) {
if (*cp == '/')
i_name = cp+1;
}
if (*cp) {
*cp++ = '\0';
i_arg = cp;
}
/*
* OK, we've parsed out the interpreter name and
* (optional) argument.
*/
if (sh_bang++ == 0) {
p = copy_strings(envc, envp, page, p, 0);
p = copy_strings(--argc, argv+1, page, p, 0);
}
/*
* Splice in (1) the interpreter's name for argv[0]
* (2) (optional) argument to interpreter
* (3) filename of shell script
*
* This is done in reverse order, because of how the
* user environment and arguments are stored.
*/
p = copy_strings(1, &filename, page, p, 1);
argc++;
if (i_arg) {
p = copy_strings(1, &i_arg, page, p, 2);
argc++;
}
p = copy_strings(1, &i_name, page, p, 2);
argc++;
if (!p) {
retval = -ENOMEM;
goto exec_error1;
}
/*
* OK, now restart the process with the interpreter's inode.
*/
old_fs = get_fs();
set_fs(get_ds());
if (!(inode=namei(interp))) { /* get executables inode */
set_fs(old_fs);
retval = -ENOENT;
goto exec_error1;
}
set_fs(old_fs);
goto restart_interp;
}
brelse(bh);
if (N_MAGIC(ex) != ZMAGIC || ex.a_trsize || ex.a_drsize ||
ex.a_text+ex.a_data+ex.a_bss>0x3000000 ||
inode->i_size < ex.a_text+ex.a_data+ex.a_syms+N_TXTOFF(ex)) {
retval = -ENOEXEC;
goto exec_error2;
}
if (N_TXTOFF(ex) != BLOCK_SIZE) {
printk("%s: N_TXTOFF != BLOCK_SIZE. See a.out.h.", filename);
retval = -ENOEXEC;
goto exec_error2;
}
if (!sh_bang) {
p = copy_strings(envc,envp,page,p,0);
p = copy_strings(argc,argv,page,p,0);
if (!p) {
retval = -ENOMEM;
goto exec_error2;
}
}
/* OK, This is the point of no return */
if (current->executable)
iput(current->executable);
current->executable = inode;
for (i=0 ; i<32 ; i++)
current->sigaction[i].sa_handler = NULL;
for (i=0 ; i<NR_OPEN ; i++)
if ((current->close_on_exec>>i)&1)
sys_close(i);
current->close_on_exec = 0;
free_page_tables(get_base(current->ldt[1]),get_limit(0x0f));
free_page_tables(get_base(current->ldt[2]),get_limit(0x17));
if (last_task_used_math == current)
last_task_used_math = NULL;
current->used_math = 0;
p += change_ldt(ex.a_text,page)-MAX_ARG_PAGES*PAGE_SIZE;
p = (unsigned long) create_tables((char *)p,argc,envc);
current->brk = ex.a_bss +
(current->end_data = ex.a_data +
(current->end_code = ex.a_text));
current->start_stack = p & 0xfffff000;
current->euid = e_uid;
current->egid = e_gid;
i = ex.a_text+ex.a_data;
while (i&0xfff)
put_fs_byte(0,(char *) (i++));
eip[0] = ex.a_entry; /* eip, magic happens :-) */ // 可执行文件入口地址
eip[3] = p; /* stack pointer */
return 0;
exec_error2:
iput(inode);
exec_error1:
for (i=0 ; i<MAX_ARG_PAGES ; i++)
free_page(page[i]);
return(retval);
}
总结
系统调用,调用0x80中断陷入内核态后,先执行_system_call把用户态传递的参数压入内核态堆栈,然后调用_sys_execve。 _sys_execve函数把中断返回要执行的指令(该指令地址被保存到中断的内核态堆栈)的内核态堆栈的地址压入内核态堆栈,然后调用do_execve函数,该函数执行一些权限判断后,根据进程内存模型设置好后,把中断返回用户态执行的指令改为程序人口地址,当中断返回后就从入口地址开始执行程序了。