leevis.com icon indicating copy to clipboard operation
leevis.com copied to clipboard

linux-0.98网络系统

Open vislee opened this issue 3 years ago • 0 comments

概述

应用程序通过socket创建套件字句柄,通过bind绑定地址, 通过listen监听端口,通过connect发起连接,通过accept接收连接, 通过readrecv读取数据,通过writesend发送数据。 我们就来看下这些函数的内核实现。

代码分析

linux一切兼文件,网络子系统也是建立在文件系统之上的, 因此先回顾一下进程打开文件结构体,也就是说每个socket也会对应一个file结构体,通过fd索引。

struct file {
	unsigned short f_mode;
	unsigned short f_flags;
	unsigned short f_count;
	unsigned short f_reada;
	unsigned short f_rdev;		/* needed for /dev/tty */
	struct inode * f_inode;
	struct file_operations * f_op;
	off_t f_pos;
};

然后我们再看网络子系统, 内核初始化时,会调用 sock_init 函数初始化网络系统。

先看下内核socket结构,定义在net/kern_sock.h文件中:

struct socket {
	short type;			/* SOCK_STREAM, ... */
	socket_state state;        // socket的状态
	long flags;
	struct proto_ops *ops;		/* protocols do most everything */
	void *data;			/* protocol data */     // AF_UNIX 指向struct unix_proto_data结构体。
	struct socket *conn;		/* server socket connected to */
	struct socket *iconn;		/* incomplete client connections */    // 等待连接的客户端socket队列,可以理解为半连接队列
	struct socket *next;
	struct wait_queue **wait;	/* ptr to place to wait on */
	void *dummy;                  // 指向inode
};

net/socket.c文件中

// 支持的协议类型
static struct {
	short family;
	char *name;
	struct proto_ops *ops;
} proto_table[] = {
	{AF_UNIX,	"AF_UNIX",	&unix_proto_ops},  // unix域套接字
#ifdef INET_SOCKETS
	{AF_INET,	"AF_INET",	&inet_proto_ops},  // 网络套接字
#endif
};


void
sock_init(void)
{
	struct socket *sock;
	int i, ok;

        // 初始化内核sockets数组
	for (sock = sockets; sock <= last_socket; ++sock)
		sock->state = SS_FREE;
        // 初始化支持的协议类型
	for (i = ok = 0; i < NPROTO; ++i) {
		printk("sock_init: initializing family %d (%s)\n",
		       proto_table[i].family, proto_table[i].name);
		if ((*proto_table[i].ops->init)() < 0) {
			printk("sock_init: init failed.\n",
			       proto_table[i].family);
			proto_table[i].family = -1;
		}
		else
			++ok;
	}
	if (!ok)
		printk("sock_init: warning: no protocols initialized\n");
	return;
}

AF_UNIX unix socket实现。 net/unix.c 文件中

// unix socket结构体
static struct unix_proto_data {
	int refcnt;			/* cnt of reference 0=free */   // 引用次数
	struct socket *socket;		/* socket we're bound to */
	int protocol;
	struct sockaddr_un sockaddr_un;
	short sockaddr_len;		/* >0 if name bound */
	char *buf;
	int bp_head, bp_tail;
	struct inode *inode;
	struct unix_proto_data *peerupd;
} unix_datas[NSOCKETS];

struct proto_ops unix_proto_ops = {
	unix_proto_init,
	unix_proto_create,
	unix_proto_dup,
	unix_proto_release,
	unix_proto_bind,
	unix_proto_connect,
	unix_proto_socketpair,
	unix_proto_accept,
	unix_proto_getname,
	unix_proto_read,
	unix_proto_write,
	unix_proto_select,
 	unix_proto_ioctl,
 	unix_proto_listen,
 	unix_proto_send,
 	unix_proto_recv,
 	unix_proto_sendto,
 	unix_proto_recvfrom,
 	unix_proto_shutdown,
 	unix_proto_setsockopt,
 	unix_proto_getsockopt,
 	NULL /* unix_proto_fcntl. */
};

// 初始化unix socket 结构体数组
static int
unix_proto_init(void)
{
	struct unix_proto_data *upd;

	PRINTK("unix_proto_init: initializing...\n");
	for (upd = unix_datas; upd <= last_unix_data; ++upd)
		upd->refcnt = 0;
	return 0;
}

/*
 * upon a create, we allocate an empty protocol data, and grab a page to
 * buffer writes
 */
static int
unix_proto_create(struct socket *sock, int protocol)
{
	struct unix_proto_data *upd;

	PRINTK("unix_proto_create: socket 0x%x, proto %d\n", sock, protocol);
	if (protocol != 0) {
		PRINTK("unix_proto_create: protocol != 0\n");
		return -EINVAL;
	}
        // 从数组中分配一个unix socket 结构体
	if (!(upd = unix_data_alloc())) {
		printk("unix_proto_create: can't allocate buffer\n");
		return -ENOMEM;
	}
        // 分配一页物理内存
	if (!(upd->buf = (char *)get_free_page(GFP_USER))) {
		printk("unix_proto_create: can't get page!\n");
		unix_data_deref(upd);
		return -ENOMEM;
	}
	upd->protocol = protocol;
	upd->socket = sock;
	UN_DATA(sock) = upd;
	PRINTK("unix_proto_create: allocated data 0x%x\n", upd);
	return 0;
}

linux0.98通过sys_socketcall函数封装了概述中的函数实现,具体socket则是调用了内核sock_socket函数。


/*
 * perform the socket system call. we locate the appropriate family, then
 * create a fresh socket.
 */
static int
sock_socket(int family, int type, int protocol)
{
	int i, fd;
	struct socket *sock;
	struct proto_ops *ops;

	PRINTK("sys_socket: family = %d (%s), type = %d, protocol = %d\n",
	       family, family_name(family), type, protocol);

	/*
	 * locate the correct protocol family
	 */
        // 是否支持协议族
	for (i = 0; i < NPROTO; ++i)
		if (proto_table[i].family == family)
			break;
	if (i == NPROTO) {
		PRINTK("sys_socket: family not found\n");
		return -EINVAL;
	}
	ops = proto_table[i].ops;

	/*
	 * check that this is a type that we know how to manipulate and
	 * the protocol makes sense here. the family can still reject the
	 * protocol later.
	 */
        // 是否是支持的类型
	if ((type != SOCK_STREAM &&
	     type != SOCK_DGRAM &&
	     type != SOCK_SEQPACKET &&
	     type != SOCK_RAW) ||
	    protocol < 0)
		return -EINVAL;

	/*
	 * allocate the socket and allow the family to set things up. if
	 * the protocol is 0, the family is instructed to select an appropriate
	 * default.
	 */
        // 同步阻塞从sockets[]数组中分配socket结构体和inode
	if (!(sock = sock_alloc(1))) {
		printk("sys_socket: no more sockets\n");
		return -EAGAIN;
	}
	sock->type = type;
        // 见上述proto_table数组中,unix socket 对应的是unix_proto_ops
        // 对应的create回调函数是unix_proto_create
	sock->ops = ops;
	if ((i = sock->ops->create(sock, protocol)) < 0) {
		sock_release(sock);
		return i;
	}

        // 获取fd
	if ((fd = get_fd(SOCK_INODE(sock))) < 0) {
		sock_release(sock);
		return -EINVAL;
	}

	return fd;
}

// 从当前进程打开文件数组中获取一个空槽,下标为fd
// 该打开文件的f_inode 指向inode
static int
get_fd(struct inode *inode)
{
	int fd, i;
	struct file *file;

	/*
	 * find a file descriptor suitable for return to the user.
	 */
	for (fd = 0; fd < NR_OPEN; ++fd)
		if (!current->filp[fd])
			break;
	if (fd == NR_OPEN)
		return -1;
	current->close_on_exec &= ~(1 << fd);
	for (file = file_table, i = 0; i < NR_FILE; ++i, ++file)
		if (!file->f_count)
			break;
	if (i == NR_FILE)
		return -1;
	current->filp[fd] = file;
	file->f_op = &socket_file_ops;
	file->f_mode = 3;
	file->f_flags = 0;
	file->f_count = 1;
	file->f_inode = inode;
	file->f_pos = 0;
	return fd;
}

通过文件句柄fd从当前打开文件数组current->filp[fd] 获取对应的打开文件结构filefile.f_inodesocket结构体对应的inode,也就是socket.dummy的值, 而socket.data是对应协议的数据结构。socket.ops是对应协议族的回调函数。

创建好socket句柄后,就需要bind地址。

在net/socket.c文件中,

static int
sock_bind(int fd, struct sockaddr *umyaddr, int addrlen)
{
	struct socket *sock;
	int i;

	PRINTK("sys_bind: fd = %d\n", fd);
        // 根据fd获取对应socket结构
	if (!(sock = sockfd_lookup(fd, NULL)))
		return -EBADF;
        // 调用 根据socket结构获取对应协议族回调函数,unix socket 则调用unix_proto_bind
	if ((i = sock->ops->bind(sock, umyaddr, addrlen)) < 0) {
		PRINTK("sys_bind: bind failed\n");
		return i;
	}
	return 0;
}

static inline struct socket *
socki_lookup(struct inode *inode)
{
	struct socket *sock;

        // 遍历所有socket结构体,查找inode对应的socket结构
	for (sock = sockets; sock <= last_socket; ++sock)
		if (sock->state != SS_FREE && SOCK_INODE(sock) == inode)
			return sock;
	return NULL;
}

static inline struct socket *
sockfd_lookup(int fd, struct file **pfile)
{
	struct file *file;

	if (fd < 0 || fd >= NR_OPEN || !(file = current->filp[fd]))
		return NULL;
	if (pfile)
		*pfile = file;
	return socki_lookup(file->f_inode);
}

net/unix.c文件

static int
unix_proto_bind(struct socket *sock, struct sockaddr *umyaddr,
		int sockaddr_len)
{
	struct unix_proto_data *upd = UN_DATA(sock);
	char fname[sizeof(((struct sockaddr_un *)0)->sun_path) + 1];
	int i;
	unsigned long old_fs;

	PRINTK("unix_proto_bind: socket 0x%x, len=%d\n", sock,
	       sockaddr_len);
	if (sockaddr_len <= UN_PATH_OFFSET ||
	    sockaddr_len >= sizeof(struct sockaddr_un)) {
		PRINTK("unix_proto_bind: bad length %d\n", sockaddr_len);
		return -EINVAL;
	}
	if (upd->sockaddr_len || upd->inode) {
		printk("unix_proto_bind: already bound!\n");
		return -EINVAL;
	}
	verify_area(umyaddr, sockaddr_len);
	memcpy_fromfs(&upd->sockaddr_un, umyaddr, sockaddr_len);
	if (upd->sockaddr_un.sun_family != AF_UNIX) {
		PRINTK("unix_proto_bind: family is %d, not AF_UNIX (%d)\n",
		       upd->sockaddr_un.sun_family, AF_UNIX);
		return -EINVAL;
	}

	memcpy(fname, upd->sockaddr_un.sun_path, sockaddr_len-UN_PATH_OFFSET);
	fname[sockaddr_len-UN_PATH_OFFSET] = '\0';
	old_fs = get_fs();
	set_fs(get_ds());
        // 调用fs/namei.c文件中的do_mknod函数创建监听的unix socket 的文件inode
        // 也就是unix socket 监听时候会在文件系统创建一个文件。
	i = do_mknod(fname, S_IFSOCK | 0777, 0);
        // 创建成功,则调用fs/namei.c文件的中open_namei函数获取该文件的inode。
	if (i == 0)
		i = open_namei(fname, 0, S_IFSOCK, &upd->inode, NULL);
	set_fs(old_fs);
	if (i < 0) {
		printk("unix_proto_bind: can't open socket %s\n", fname);
		return i;
	}

	upd->sockaddr_len = sockaddr_len;	/* now its legal */
	PRINTK("unix_proto_bind: bound socket address: ");
#ifdef SOCK_DEBUG
	sockaddr_un_printk(&upd->sockaddr_un, upd->sockaddr_len);
#endif
	return 0;
}

bind做的事情,通过fd找到对应的socket结构,调用对应协议族的bind回调函数, 对于unix socket,则打开一个文件,打开文件的inode赋值到unix socket结构体的upd->inode

服务端程序调用listen监听,对于unix socket协议族,listen没干啥实质的事情,仅仅是把socket结构体的状态改一下sock->flags |= SO_ACCEPTCON;。 对于客户端程序则需要调用connect发起连接,对应内核的net/socket.c文件中的sock_connect函数,实际上就是对 对应协议族回调函数sock->ops->connect的封装调用。

net/unix.c文件:

static int
unix_proto_connect(struct socket *sock, struct sockaddr *uservaddr,
		   int sockaddr_len, int flags)
{
	int i;
	struct unix_proto_data *serv_upd;
	struct sockaddr_un sockun;

	PRINTK("unix_proto_connect: socket 0x%x, servlen=%d\n", sock,
	       sockaddr_len);
	if (sockaddr_len <= UN_PATH_OFFSET ||
	    sockaddr_len >= sizeof(struct sockaddr_un)) {
		PRINTK("unix_proto_connect: bad length %d\n", sockaddr_len);
		return -EINVAL;
	}
	verify_area(uservaddr, sockaddr_len);
	memcpy_fromfs(&sockun, uservaddr, sockaddr_len);
	if (sockun.sun_family != AF_UNIX) {
		PRINTK("unix_proto_connect: family is %d, not AF_UNIX (%d)\n",
		       sockun.sun_family, AF_UNIX);
		return -EINVAL;
	}
        // 根据unix socket的监听的文件,直接从内核查找服务端unix socket 结构体
	if (!(serv_upd = unix_data_lookup(&sockun, sockaddr_len))) {
		PRINTK("unix_proto_connect: can't locate peer\n");
		return -EINVAL;
	}
	if ((i = sock_awaitconn(sock, serv_upd->socket)) < 0) {
		PRINTK("unix_proto_connect: can't await connection\n");
		return i;
	}
	unix_data_ref(UN_DATA(sock->conn));
	UN_DATA(sock)->peerupd = UN_DATA(sock->conn); /* ref server */
	return 0;
}

int
sock_awaitconn(struct socket *mysock, struct socket *servsock)
{
	struct socket *last;

	PRINTK("sock_awaitconn: trying to connect socket 0x%x to 0x%x\n",
	       mysock, servsock);
	if (!(servsock->flags & SO_ACCEPTCON)) {
		PRINTK("sock_awaitconn: server not accepting connections\n");
		return -EINVAL;
	}

	/*
	 * put ourselves on the server's incomplete connection queue.
	 */
	mysock->next = NULL;
	cli();
        // 服务端通过一个队列支持多个客户端的连接
	if (!(last = servsock->iconn))
		servsock->iconn = mysock;
	else {
		while (last->next)
			last = last->next;
		last->next = mysock;
	}
	mysock->state = SS_CONNECTING;
	mysock->conn = servsock;
	sti();

	/*
	 * wake up server, then await connection. server will set state to
	 * SS_CONNECTED if we're connected.
	 */
        // 唤醒服务端进程处理连接
	wake_up(servsock->wait);
	if (mysock->state != SS_CONNECTED) {
                // 客户端进程阻塞等待服务端程序接收连接
		interruptible_sleep_on(mysock->wait);
		if (mysock->state != SS_CONNECTED) {
			/*
			 * if we're not connected we could have been
			 * 1) interrupted, so we need to remove ourselves
			 *    from the server list
			 * 2) rejected (mysock->conn == NULL), and have
			 *    already been removed from the list
			 */
			if (mysock->conn == servsock) {
				cli();
				if ((last = servsock->iconn) == mysock)
					servsock->iconn = mysock->next;
				else {
					while (last->next != mysock)
						last = last->next;
					last->next = mysock->next;
				}
				sti();
			}
			return mysock->conn ? -EINTR : -EACCES;
		}
	}
	return 0;
}

客户端调用accept发起连接后,服务端程序调用accept接收连接。

// net/socket.c 文件
static int
sock_accept(int fd, struct sockaddr *upeer_sockaddr, int *upeer_addrlen)
{
	struct file *file;
	struct socket *sock, *newsock;
	int i;

	PRINTK("sys_accept: fd = %d\n", fd);
	if (!(sock = sockfd_lookup(fd, &file)))
		return -EBADF;
	if (sock->state != SS_UNCONNECTED) {
		PRINTK("sys_accept: socket isn't unconnected\n");
		return -EINVAL;
	}
	if (!(sock->flags & SO_ACCEPTCON)) {
		PRINTK("sys_accept: socket not accepting connections!\n");
		return -EINVAL;
	}

        // 为服务端程序新的连接分配一个socket结构体
	if (!(newsock = sock_alloc(0))) {
		printk("sys_accept: no more sockets\n");
		return -EAGAIN;
	}
	newsock->type = sock->type;
	newsock->ops = sock->ops;
	if ((i = sock->ops->dup(newsock, sock)) < 0) {
		sock_release(newsock);
		return i;
	}

        // 为新连接socket从进程打开文件列表寻找一个空槽,返回对应的下标fd
	if ((fd = get_fd(SOCK_INODE(newsock))) < 0) {
		sock_release(newsock);
		return -EINVAL;
	}
        // 如果是unix socket,则调用unix_proto_accept函数,从半连接队列取一个连接
	i = newsock->ops->accept(sock, newsock, file->f_flags);

	if ( i < 0)
	  {
	     sys_close (fd);
	     return (i);
	  }

	PRINTK("sys_accept: connected socket 0x%x via 0x%x\n",
	       sock, newsock);

	if (upeer_sockaddr)
		newsock->ops->getname(newsock, upeer_sockaddr,
				      upeer_addrlen, 1);

	return fd;
}

// net/unix.c文件
static int
unix_proto_accept(struct socket *sock, struct socket *newsock, int flags)
{
   struct socket *clientsock;

	PRINTK("unix_proto_accept: socket 0x%x accepted via socket 0x%x\n",
	       sock, newsock);

	/*
	 * if there aren't any sockets awaiting connection, then wait for
	 * one, unless nonblocking
	 */
	while (!(clientsock = sock->iconn)) {
		if (flags & O_NONBLOCK)
			return -EAGAIN;
		interruptible_sleep_on(sock->wait);
		if (current->signal & ~current->blocked) {
			PRINTK("sys_accept: sleep was interrupted\n");
			return -ERESTARTSYS;
		}
	}

	/*
	 * great. finish the connection relative to server and client,
	 * wake up the client and return the new fd to the server
	 */
	sock->iconn = clientsock->next;
	clientsock->next = NULL;
	newsock->conn = clientsock;
	clientsock->conn = newsock;
	clientsock->state = SS_CONNECTED;
	newsock->state = SS_CONNECTED;
	wake_up(clientsock->wait);
        unix_data_ref (UN_DATA(newsock->conn));
	UN_DATA(newsock)->peerupd = UN_DATA(newsock->conn);
	return 0;
}

连接建立起来后,就可以通过readwrite函数读写数据了。 write 对应内核函数为 sys_write read 对应的内核函数为 sys_read

// fs/read_write.c 文件中
int sys_read(unsigned int fd,char * buf,unsigned int count)
{
	struct file * file;
	struct inode * inode;

	if (fd>=NR_OPEN || !(file=current->filp[fd]) || !(inode=file->f_inode))
		return -EBADF;
	if (!(file->f_mode & 1))
		return -EBADF;
	if (!count)
		return 0;
	verify_area(buf,count);
	if (file->f_op && file->f_op->read)
		return file->f_op->read(inode,file,buf,count);  // 实际调用的是net/socket.c文件的sock_read函数
	return -EINVAL;
}

int sys_write(unsigned int fd,char * buf,unsigned int count)
{
	struct file * file;
	struct inode * inode;
	
	if (fd>=NR_OPEN || !(file=current->filp[fd]) || !(inode=file->f_inode))
		return -EBADF;
	if (!(file->f_mode&2))
		return -EBADF;
	if (!count)
		return 0;
	if (file->f_op && file->f_op->write)
		return file->f_op->write(inode,file,buf,count);    // 实际调用的是net/socket.c文件的sock_write函数
	return -EINVAL;
}

// 在调用内核函数sock_socket函数创建socket结构体时,调用了get_fd函数分配一个打开文件的结构体,
// 在该函数中对file的回调函数进行了赋值, file->f_op = &socket_file_ops;
// 因此 上述 sys_read调用的函数是sock_read sock_write

// net/socket.c文件
static int
sock_read(struct inode *inode, struct file *file, char *ubuf, int size)
{
	struct socket *sock;

	PRINTK("sock_read: buf=0x%x, size=%d\n", ubuf, size);
	if (!(sock = socki_lookup(inode))) {
		printk("sock_read: can't find socket for inode!\n");
		return -EBADF;
	}
	if (sock->flags & SO_ACCEPTCON)
		return -EINVAL;
	return sock->ops->read(sock, ubuf, size, (file->f_flags & O_NONBLOCK));
}

static int
sock_write(struct inode *inode, struct file *file, char *ubuf, int size)
{
	struct socket *sock;

	PRINTK("sock_write: buf=0x%x, size=%d\n", ubuf, size);
	if (!(sock = socki_lookup(inode))) {
		printk("sock_write: can't find socket for inode!\n");
		return -EBADF;
	}
	if (sock->flags & SO_ACCEPTCON)
		return -EINVAL;
	return sock->ops->write(sock, ubuf, size,(file->f_flags & O_NONBLOCK));
}

// 在调用sock_socket 函数创建结构体时,根据协议族从proto_table数组中查找对应的回调函数,
// 对于unix socket 则是net/unix.c文件的unix_proto_ops结构体, 参考代码 sock->ops = ops;
// 因此上述函数,对于unix socket 则调用的是: unix_proto_read 和 unix_proto_write

/*
 * we read from our own buf.
 */
static int
unix_proto_read(struct socket *sock, char *ubuf, int size, int nonblock)
{
	struct unix_proto_data *upd;
	int todo, avail;

	if ((todo = size) <= 0)
		return 0;
	upd = UN_DATA(sock);
	while (!(avail = UN_BUF_AVAIL(upd))) {
		if (sock->state != SS_CONNECTED) {
			PRINTK("unix_proto_read: socket not connected\n");
			return (sock->state == SS_DISCONNECTING) ? 0 : -EINVAL;
		}
		PRINTK("unix_proto_read: no data available...\n");
		if (nonblock)
			return -EAGAIN;
		interruptible_sleep_on(sock->wait);
		if (current->signal & ~current->blocked) {
			PRINTK("unix_proto_read: interrupted\n");
			return -ERESTARTSYS;
		}
		if (sock->state == SS_DISCONNECTING) {
			PRINTK("unix_proto_read: disconnected\n");
			return 0;
		}
	}

	/*
	 * copy from the read buffer into the user's buffer, watching for
	 * wraparound. then we wake up the writer
	 */
	do {
		int part, cando;

		if (avail <= 0) {
			PRINTK("unix_proto_read: AVAIL IS NEGATIVE!!!\n");
			send_sig(SIGKILL,current,1);
			return -EINTR;
		}

		if ((cando = todo) > avail)
			cando = avail;
		if (cando > (part = BUF_SIZE - upd->bp_tail))
			cando = part;
		PRINTK("unix_proto_read: avail=%d, todo=%d, cando=%d\n",
		       avail, todo, cando);
		verify_area(ubuf, cando);
		memcpy_tofs(ubuf, upd->buf + upd->bp_tail, cando);
		upd->bp_tail = (upd->bp_tail + cando) & (BUF_SIZE-1);
		ubuf += cando;
		todo -= cando;
		if (sock->state == SS_CONNECTED)
			wake_up(sock->conn->wait);
		avail = UN_BUF_AVAIL(upd);
	} while (todo && avail);
	return size - todo;
}

/*
 * we write to our peer's buf. when we connected we ref'd this peer so we
 * are safe that the buffer remains, even after the peer has disconnected,
 * which we check other ways.
 */
static int
unix_proto_write(struct socket *sock, char *ubuf, int size, int nonblock)
{
	struct unix_proto_data *pupd;
	int todo, space;

	if ((todo = size) <= 0)
		return 0;
	if (sock->state != SS_CONNECTED) {
		PRINTK("unix_proto_write: socket not connected\n");
		if (sock->state == SS_DISCONNECTING) {
			send_sig(SIGPIPE,current,1);
			return -EINTR;
		}
		return -EINVAL;
	}
	pupd = UN_DATA(sock)->peerupd;	/* safer than sock->conn */

	while (!(space = UN_BUF_SPACE(pupd))) {
		PRINTK("unix_proto_write: no space left...\n");
		if (nonblock)
			return -EAGAIN;
		interruptible_sleep_on(sock->wait);
		if (current->signal & ~current->blocked) {
			PRINTK("unix_proto_write: interrupted\n");
			return -ERESTARTSYS;
		}
		if (sock->state == SS_DISCONNECTING) {
			PRINTK("unix_proto_write: disconnected (SIGPIPE)\n");
			send_sig(SIGPIPE,current,1);
			return -EINTR;
		}
	}

	/*
	 * copy from the user's buffer to the write buffer, watching for
	 * wraparound. then we wake up the reader
	 */
	do {
		int part, cando;

		if (space <= 0) {
			PRINTK("unix_proto_write: SPACE IS NEGATIVE!!!\n");
			send_sig(SIGKILL,current,1);
			return -EINTR;
		}

		/*
		 * we may become disconnected inside this loop, so watch
		 * for it (peerupd is safe until we close)
		 */
		if (sock->state == SS_DISCONNECTING) {
			send_sig(SIGPIPE,current,1);
			return -EINTR;
		}
		if ((cando = todo) > space)
			cando = space;
		if (cando > (part = BUF_SIZE - pupd->bp_head))
			cando = part;
		PRINTK("unix_proto_write: space=%d, todo=%d, cando=%d\n",
		       space, todo, cando);
		verify_area(ubuf, cando);
		memcpy_fromfs(pupd->buf + pupd->bp_head, ubuf, cando);
		pupd->bp_head = (pupd->bp_head + cando) & (BUF_SIZE-1);
		ubuf += cando;
		todo -= cando;
		if (sock->state == SS_CONNECTED)
			wake_up(sock->conn->wait);
		space = UN_BUF_SPACE(pupd);
	} while (todo && space);
	return size - todo;
}

总结

  1. 系统初始化时,调用sock_init函数,该函数调用了内核支持的协议族(AF_UNIX, AF_INET)初始化函数unix_proto_ops.init(unix_proto_init), inet_proto_ops.init (ip_proto_init) 。

vislee avatar Jan 02 '22 16:01 vislee