libhermit icon indicating copy to clipboard operation
libhermit copied to clipboard

Uhyve file read/write should not assume target buffer is contiguous in physical memory

Open olivierpierre opened this issue 7 years ago • 4 comments

Hello,

With Uhyve, when performing file read/write operations in the host, the target buffer is fully read or written from the guest physical memory. Such a design assumes that the buffer is contiguous in physical memory, which is not always the case. When using a buffer allocated with malloc, example of bad things that can happen are:

  • Reading from a file in a buffer that is not or partially mapped to physical memory (because of on-demand mapping)
  • Reading from a file in a buffer which virtual pages are not mapped to contiguous physical pages
  • Writing in a file from a buffer which virtual pages are not mapped to contiguous physical pages

You can confirm these issues with these two sample programs and the corresponding Makefile:

/* read_test.c */

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>

#define PAGE_SIZE		4096
#define FILE_TO_READ	"fread.bin"
#define FILE_SIZE		PAGE_SIZE * 4

char verify_buf[FILE_SIZE];

int main(int argc, char **argv) {
	int fd, verify_fd, i;
	char *buf;

	printf("Read test starting\n");

	buf = malloc(FILE_SIZE);
	if(!buf) {
		fprintf(stderr, "error malloc!\n");
		return -1;
	}

	verify_fd = open(FILE_TO_READ, O_RDONLY, 0x0);
	fd = open(FILE_TO_READ, O_RDONLY, 0x0);
	if(fd == -1 || verify_fd == -1) {
		printf("error open\n");
		return -1;
	}

	if(read(verify_fd, verify_buf, FILE_SIZE) != FILE_SIZE) {
		fprintf(stderr, "read error\n");
		return -1;
	}

	if(read(fd, buf, FILE_SIZE) != FILE_SIZE) {
		fprintf(stderr, "read error\n");
		return -1;
	}
	printf("Read in malloc'd buffer done\n");

	for(i=0; i<FILE_SIZE; i++)
		if(buf[i] != verify_buf[i]) {
			fprintf(stderr, "verification failed!\n");
			fprintf(stderr, "buf[%d] == %x\n", i, buf[i]);
			fprintf(stderr, "verify_buf[%d] == %x\n", i, verify_buf[i]);
			return -1;
		}

	printf("Read verification successful!\n");

	free(buf);
	close(fd);
	return 0;
}
/* write_test.c */

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>

#define PAGE_SIZE		4096
#define FILE_SIZE		PAGE_SIZE * 4
#define FILE_TO_WRITE	"fwrite.bin"

char verify_buf[FILE_SIZE];

int main(int argc, char **argv) {
	char *buf;
	int fd, i;

	printf("write test starting\n");

	buf = malloc(FILE_SIZE);
	if(!buf) {
		fprintf(stderr, "error malloc\n");
		return -1;
	}

	fd = open(FILE_TO_WRITE, O_RDWR | O_CREAT | O_TRUNC, 0666);
	if(fd == -1) {
		fprintf(stderr, "error opening %s\n", FILE_TO_WRITE);
		return -1;
	}

	for(i=0; i<FILE_SIZE; i++)
		buf[i] = (i/4096) + '0';

	if(write(fd, buf, FILE_SIZE) != FILE_SIZE) {
		fprintf(stderr, "error writing in %s", FILE_TO_WRITE);
		return -1;
	}
	printf("Write from malloc'd buffer done\n");

	if(lseek(fd, 0x0, SEEK_SET) != 0) {
		fprintf(stderr, "error lseek\n");
		return -1;
	}

	if(read(fd, verify_buf, FILE_SIZE) != FILE_SIZE) {
		fprintf(stderr, "error read\n");
		return -1;
	}

	for(i=0; i<FILE_SIZE; i++)
		if(verify_buf[i] != buf[i]) {
			fprintf(stderr, "verification failed!\n");
			fprintf(stderr, "buf[%d] == %x\n", i, buf[i]);
			fprintf(stderr, "verify_buf[%d] == %x\n", i, verify_buf[i]);
			return -1;
		}

	printf("Write verification successful!\n");

	free(buf);
	close(fd);
	return 0;
}
# Makefile

HERMIT_LOCAL_INSTALL=/home/pierre/Desktop/HermitCore/prefix/
HERMIT_TOOLCHAIN_INSTALL=/opt/hermit/
READ_TEST=read_test
WRITE_TEST=write_test
READ_SAMPLE_FILE=fread.bin
WRITE_SAMPLE_FILE=fwrite.bin
READ_TEST_SRC=$(READ_TEST).c
WRITE_TEST_SRC=$(WRITE_TEST).c
CFLAGS=-Wall -Werror
LDFLAGS=-L$(HERMIT_LOCAL_INSTALL)/x86_64-hermit/lib
CC=$(HERMIT_TOOLCHAIN_INSTALL)/bin/x86_64-hermit-gcc
PROXY=$(HERMIT_LOCAL_INSTALL)/bin/proxy
VERBOSE?=0

all: $(READ_TEST) $(WRITE_TEST)

$(READ_TEST): $(READ_TEST_SRC) $(READ_SAMPLE_FILE)
	$(CC) $(CFLAGS) $(READ_TEST_SRC) -o $(READ_TEST) $(LDFLAGS)

$(WRITE_TEST): $(WRITE_TEST_SRC)
	$(CC) $(CFLAGS) $(WRITE_TEST_SRC) -o $(WRITE_TEST) $(LDFLAGS)

$(READ_SAMPLE_FILE):
	dd if=/dev/urandom of=$(READ_SAMPLE_FILE) bs=4K count=4

test: $(READ_TEST) $(WRITE_TEST)
	HERMIT_ISLE=uhyve HERMIT_VERBOSE=$(VERBOSE) $(PROXY) $(READ_TEST)
	HERMIT_ISLE=uhyve HERMIT_VERBOSE=$(VERBOSE) $(PROXY) $(WRITE_TEST)

clean:
	rm -rf $(READ_TEST) $(WRITE_TEST) *.o $(READ_SAMPLE_FILE) \
		$(WRITE_SAMPLE_FILE)

Here is my proposed solution: a patch that forces the file read and write operations to be performed on a page-by-page basis, i.e. Uhyve file read or write operation is called for each page composing the buffer. I did not extensively tested it, nor did I evaluated the performance impact (I guess it slows things down a bit).

diff --git a/kernel/syscall.c b/kernel/syscall.c
index d10691d..f7b7b00 100644
--- a/kernel/syscall.c
+++ b/kernel/syscall.c
@@ -159,6 +159,47 @@ typedef struct {
 	ssize_t ret;
 } __attribute__((packed)) uhyve_read_t;
 
+/* Pages belonging to the heap are mapped on demand, and are not always
+ * contiguous in physical memory. Thus, we need to force allocation and call
+ * the hypervisor file read function page by page.
+ */
+ssize_t uhyve_noncontiguous_read(int fd, char *buf, size_t len) {
+	ssize_t bytes_read = 0;
+	size_t cur_len = 0;
+	uhyve_read_t arg = {fd, 0x0, 0x0, -1};
+
+	while(len) {
+
+		if(!((size_t)buf % PAGE_SIZE)) {
+			cur_len = (len < PAGE_SIZE) ? len : PAGE_SIZE;
+		} else {
+			cur_len = PAGE_CEIL((size_t)buf) - (size_t)buf;
+			cur_len = (((size_t)buf + len) > PAGE_CEIL((size_t)buf))
+				? (PAGE_CEIL((size_t)buf) - (size_t)buf) : len;
+		}
+
+		/* Force mapping */
+		memset(buf, 0x0, 1);
+
+		arg.buf = (char *)virt_to_phys((size_t)buf);
+		arg.len = cur_len;
+		arg.ret = -1;
+		uhyve_send(UHYVE_PORT_READ, (unsigned)virt_to_phys((size_t)&arg));
+
+		if(arg.ret < 0)
+			return arg.ret;
+
+		bytes_read += arg.ret;
+		if(arg.ret != cur_len)
+			break;
+
+		buf += cur_len;
+		len -= cur_len;
+	}
+
+	return bytes_read;
+}
+
 ssize_t sys_read(int fd, char* buf, size_t len)
 {
 	sys_read_t sysargs = {__NR_read, fd, len};
@@ -174,11 +215,16 @@ ssize_t sys_read(int fd, char* buf, size_t len)
 	}
 
 	if (is_uhyve()) {
+
+		return uhyve_noncontiguous_read(fd, buf, len);
+
+#if 0
 		uhyve_read_t uhyve_args = {fd, (char*) virt_to_phys((size_t) buf), len, -1};
 
 		uhyve_send(UHYVE_PORT_READ, (unsigned)virt_to_phys((size_t)&uhyve_args));
 
 		return uhyve_args.ret;
+#endif
 	}
 
 	spinlock_irqsave_lock(&lwip_lock);
@@ -222,6 +268,39 @@ typedef struct {
 	size_t len;
 } __attribute__((packed)) uhyve_write_t;
 
+ssize_t uhyve_noncontiguous_write(int fd, const char *buf, size_t len) {
+	char *cur_buf = (char *)buf;
+	ssize_t bytes_written = 0;
+	size_t cur_len = 0;
+	uhyve_write_t arg = {fd, 0x0, 0x0};
+
+	while(len) {
+
+		if(!((size_t)cur_buf % PAGE_SIZE)) {
+			cur_len = (len < PAGE_SIZE) ? len : PAGE_SIZE;
+		} else {
+			cur_len = (((size_t)cur_buf + len) > PAGE_CEIL((size_t)cur_buf))
+				? (PAGE_CEIL((size_t)cur_buf) - (size_t)cur_buf) : len;
+		}
+
+		arg.buf = (char *)virt_to_phys((size_t)cur_buf);
+		arg.len = cur_len;
+		uhyve_send(UHYVE_PORT_WRITE, (unsigned)virt_to_phys((size_t)&arg));
+
+		if(arg.len == (size_t)(-1))
+			return -1;
+
+		bytes_written += arg.len;
+		if(arg.len != cur_len)
+			break;
+
+		cur_buf += cur_len;
+		len -= cur_len;
+	}
+
+	return bytes_written;
+}
+
 ssize_t sys_write(int fd, const char* buf, size_t len)
 {
 	if (BUILTIN_EXPECT(!buf, 0))
@@ -240,11 +319,16 @@ ssize_t sys_write(int fd, const char* buf, size_t len)
 	}
 
 	if (is_uhyve()) {
+
+		return uhyve_noncontiguous_write(fd, buf, len);
+
+#if 0
 		uhyve_write_t uhyve_args = {fd, (const char*) virt_to_phys((size_t) buf), len};
 
 		uhyve_send(UHYVE_PORT_WRITE, (unsigned)virt_to_phys((size_t)&uhyve_args));
 
 		return uhyve_args.len;
+#endif
 	}
 
 	spinlock_irqsave_lock(&lwip_lock);

olivierpierre avatar Apr 05 '18 14:04 olivierpierre

Hello Pierre,

you are right. I will create patch to solve this issue.

Best

Stefan

stlankes avatar Apr 05 '18 21:04 stlankes

@olivierpierre: I got to know about your problem, however not about your proposed solution. Anyway, I've committed a fix for HermitCore-rs (the Rust implementation of HermitCore), which lets uhyve perform the virtual-to-physical address translation for the buffers of read/write operations. This way, the physical memory addresses of the buffer pages can be arbitrary.

The fix commit is https://github.com/hermitcore/hermit-caves/commit/6b9e4be0dc4ad65411e87c674e38138cd215c47e and seems to have also been merged to the HermitCore C version in the meantime.

ColinFinck avatar Jul 23 '18 12:07 ColinFinck

@olivierpierre : Since yesterday I added a patch to the C version (see pull request 97). Could you test it on your system? By the way, I add also gdb support for aarch64...

stlankes avatar Jul 23 '18 12:07 stlankes

@ColinFinck: I believe my initial post includes a description of the problem as well as two sample programs that trigger the issue. Please let me know if you need something specific.

@stlankes: thank! I will try the patch on our version and let you know

olivierpierre avatar Jul 23 '18 14:07 olivierpierre