diff --git a/src/elf.c b/src/elf.c index a1781724d..c158f628c 100644 --- a/src/elf.c +++ b/src/elf.c @@ -8,6 +8,7 @@ #include "elf.h" #include "io.h" +#include "utils.h" #if defined(_WIN32) /* fallback to standard I/O text stream */ @@ -290,16 +291,20 @@ bool elf_load(elf_t *e, riscv_t *rv, memory_t *mem) return true; } -bool elf_open(elf_t *e, const char *path) +bool elf_open(elf_t *e, const char *_path) { /* free previous memory */ if (e->raw_data) release(e); + char *path = sanitize_path(_path); + #if defined(USE_MMAP) int fd = open(path, O_RDONLY); - if (fd < 0) + if (fd < 0) { + free(path); return false; + } /* get file size */ struct stat st; @@ -312,14 +317,17 @@ bool elf_open(elf_t *e, const char *path) e->raw_data = mmap(0, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); if (e->raw_data == MAP_FAILED) { release(e); + free(path); return false; } close(fd); #else /* fallback to standard I/O text stream */ FILE *f = fopen(path, "rb"); - if (!f) + if (!f) { + free(path); return false; + } /* get file size */ fseek(f, 0, SEEK_END); @@ -327,6 +335,7 @@ bool elf_open(elf_t *e, const char *path) fseek(f, 0, SEEK_SET); if (e->raw_size == 0) { fclose(f); + free(path); return false; } @@ -339,6 +348,7 @@ bool elf_open(elf_t *e, const char *path) fclose(f); if (r != e->raw_size) { release(e); + free(path); return false; } #endif /* USE_MMAP */ @@ -349,9 +359,11 @@ bool elf_open(elf_t *e, const char *path) /* check it is a valid ELF file */ if (!is_valid(e)) { release(e); + free(path); return false; } + free(path); return true; } diff --git a/src/utils.c b/src/utils.c index b4384ac4c..c1018e5d9 100644 --- a/src/utils.c +++ b/src/utils.c @@ -4,6 +4,8 @@ */ #include +#include +#include #include #include @@ -67,3 +69,98 @@ void rv_clock_gettime(struct timespec *tp) tp->tv_sec = tv_sec; tp->tv_nsec = tv_usec / 1000; /* Transfer to microseconds */ } + +char *sanitize_path(const char *orig_path) +{ + size_t n = strlen(orig_path); + + char *ret = (char *) malloc(n + 1); + memset(ret, '\0', n + 1); + + /* After sanitization, the new path will only be shorter than the original + * one. Thus, we can reuse the space */ + if (strlen(orig_path) == 0) { + ret[0] = '.'; + return ret; + } + + int rooted = (orig_path[0] == '/'); + + /* + * Invariants: + * reading from path; r is index of next byte to process -> path[r] + * writing to buf; w is index of next byte to write -> ret[strlen(ret)] + * dotdot is index in buf where .. must stop, either because + * a) it is the leading slash + * b) it is a leading ../../.. prefix. + */ + size_t w = 0; + size_t r = 0; + size_t dotdot = 0; + if (rooted) { + ret[w] = '/'; + w++; + r = 1; + dotdot = 1; + } + + while (r < n) { + if (orig_path[r] == '/') { + /* empty path element */ + r++; + } else if (orig_path[r] == '.' && + (r + 1 == n || orig_path[r + 1] == '/')) { + /* . element */ + r++; + } else if (orig_path[r] == '.' && orig_path[r + 1] == '.' && + (r + 2 == n || orig_path[r + 2] == '/')) { + /* .. element: remove to last / */ + r += 2; + + if (w > dotdot) { + /* can backtrack */ + w--; + while (w > dotdot && ret[w] != '/') { + w--; + } + } else if (!rooted) { + /* cannot backtrack, but not rooted, so append .. element. */ + if (w > 0) { + ret[w] = '/'; + w++; + } + ret[w] = '.'; + w++; + ret[w] = '.'; + w++; + dotdot = w; + } + } else { + /* real path element. + add slash if needed */ + if ((rooted && w != 1) || (!rooted && w != 0)) { + ret[w] = '/'; + w++; + } + + /* copy element */ + for (; r < n && orig_path[r] != '/'; r++) { + ret[w] = orig_path[r]; + w++; + } + } + // printf("w = %ld, r = %ld, dotdot = %ld\nret = %s\n", w, r, dotdot, + // ret); + } + + /* Turn empty string into "." */ + if (w == 0) { + ret[w] = '.'; + w++; + } + + for (size_t i = w; i < n; i++) { + ret[i] = '\0'; + } + return ret; +} diff --git a/src/utils.h b/src/utils.h index 83d2a259c..fa11e66c1 100644 --- a/src/utils.h +++ b/src/utils.h @@ -21,3 +21,29 @@ void rv_clock_gettime(struct timespec *tp); /* 0x61C88647 is 32-bit golden ratio */ \ return (val * 0x61C88647 >> (32 - size_bits)) & ((size) - (1)); \ } + +/* + * Reference: + * https://cs.opensource.google/go/go/+/refs/tags/go1.21.4:src/path/path.go;l=51 + * + * sanitize_path returns the shortest path name equivalent to path + * by purely lexical processing. It applies the following rules + * iteratively until no further processing can be done: + * + * 1. Replace multiple slashes with a single slash. + * 2. Eliminate each . path name element (the current directory). + * 3. Eliminate each inner .. path name element (the parent directory) + * along with the non-.. element that precedes it. + * 4. Eliminate .. elements that begin a rooted path: + * that is, replace "/.." by "/" at the beginning of a path. + * + * The returned path ends in a slash only if it is the root "/". + * + * If the result of this process is an empty string, Clean + * returns the string ".". + * + * See also Rob Pike, “Lexical File Names in Plan 9 or + * Getting Dot-Dot Right,” + * https://9p.io/sys/doc/lexnames.html + */ +char *sanitize_path(const char *orig_path);