Skip to content

Commit

Permalink
Implement file path sanitation (Issue #137)
Browse files Browse the repository at this point in the history
The logic is ported from Golang to C.

Reference code: https://cs.opensource.google/go/go/+/refs/tags/go1.21.4:src/path/path.go;l=70
  • Loading branch information
henrybear327 committed Nov 22, 2023
1 parent dd7ce2e commit c501e2e
Show file tree
Hide file tree
Showing 3 changed files with 138 additions and 3 deletions.
18 changes: 15 additions & 3 deletions src/elf.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#include "elf.h"
#include "io.h"
#include "utils.h"

#if defined(_WIN32)
/* fallback to standard I/O text stream */
Expand Down Expand Up @@ -290,16 +291,20 @@ bool elf_load(elf_t *e, riscv_t *rv, memory_t *mem)
return true;
}

bool elf_open(elf_t *e, const char *path)
bool elf_open(elf_t *e, const char *_path)
{
/* free previous memory */
if (e->raw_data)
release(e);

char *path = sanitize_path(_path);

#if defined(USE_MMAP)
int fd = open(path, O_RDONLY);
if (fd < 0)
if (fd < 0) {
free(path);

Check notice

Code scanning / Cppcheck (reported by Codacy)

MISRA 21.3 rule Note

MISRA 21.3 rule
return false;
}

/* get file size */
struct stat st;
Expand All @@ -312,21 +317,25 @@ bool elf_open(elf_t *e, const char *path)
e->raw_data = mmap(0, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
if (e->raw_data == MAP_FAILED) {
release(e);
free(path);

Check notice

Code scanning / Cppcheck (reported by Codacy)

MISRA 21.3 rule Note

MISRA 21.3 rule
return false;
}
close(fd);

#else /* fallback to standard I/O text stream */
FILE *f = fopen(path, "rb");
if (!f)
if (!f) {
free(path);

Check notice

Code scanning / Cppcheck (reported by Codacy)

MISRA 21.3 rule Note

MISRA 21.3 rule
return false;
}

/* get file size */
fseek(f, 0, SEEK_END);
e->raw_size = ftell(f);
fseek(f, 0, SEEK_SET);
if (e->raw_size == 0) {
fclose(f);
free(path);

Check notice

Code scanning / Cppcheck (reported by Codacy)

MISRA 21.3 rule Note

MISRA 21.3 rule
return false;
}

Expand All @@ -339,6 +348,7 @@ bool elf_open(elf_t *e, const char *path)
fclose(f);
if (r != e->raw_size) {
release(e);
free(path);

Check notice

Code scanning / Cppcheck (reported by Codacy)

MISRA 21.3 rule Note

MISRA 21.3 rule
return false;
}
#endif /* USE_MMAP */
Expand All @@ -349,9 +359,11 @@ bool elf_open(elf_t *e, const char *path)
/* check it is a valid ELF file */
if (!is_valid(e)) {
release(e);
free(path);

Check notice

Code scanning / Cppcheck (reported by Codacy)

MISRA 21.3 rule Note

MISRA 21.3 rule
return false;
}

free(path);

Check notice

Code scanning / Cppcheck (reported by Codacy)

MISRA 21.3 rule Note

MISRA 21.3 rule
return true;
}

Expand Down
97 changes: 97 additions & 0 deletions src/utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
*/

#include <stdint.h>
#include <stdlib.h>

Check warning

Code scanning / Cppcheck (reported by Codacy)

Include file: <stdlib.h> not found. Please note: Cppcheck does not need standard library headers to get proper results. Warning

Include file: <stdlib.h> not found. Please note: Cppcheck does not need standard library headers to get proper results.
#include <string.h>

Check warning

Code scanning / Cppcheck (reported by Codacy)

Include file: <string.h> not found. Please note: Cppcheck does not need standard library headers to get proper results. Warning

Include file: <string.h> not found. Please note: Cppcheck does not need standard library headers to get proper results.
#include <sys/time.h>
#include <time.h>

Expand Down Expand Up @@ -67,3 +69,98 @@ void rv_clock_gettime(struct timespec *tp)
tp->tv_sec = tv_sec;
tp->tv_nsec = tv_usec / 1000; /* Transfer to microseconds */
}

char *sanitize_path(const char *orig_path)
{
size_t n = strlen(orig_path);

Check warning

Code scanning / Semgrep (reported by Codacy)

The strlen family of functions does not handle strings that are not null terminated. This can lead to buffer over reads and cause the application to crash by accessing unintended memory locations. It is recommended that strnlen be used instead as a maxlen value can be provided. For more information please see: https://linux.die.net/man/3/strnlen If developing for C Runtime Library (CRT), more secure versions of these functions should be used, see: https://learn.microsoft.com/en-us/cpp/c- Warning

The strlen family of functions does not handle strings that are not null
terminated. This can lead to buffer over reads and cause the application to
crash by accessing unintended memory locations. It is recommended that strnlen
be used instead as a maxlen value can be provided. For more information please see: https://linux.die.net/man/3/strnlen If developing for C Runtime Library (CRT), more secure versions of these functions should be
used, see:
https://learn.microsoft.com/en-us/cpp/c-

Check notice

Code scanning / Flawfinder (reported by Codacy)

Does not handle strings that are not \0-terminated; if given one it may perform an over-read (it could cause a crash if unprotected) (CWE-126). Note

Does not handle strings that are not \0-terminated; if given one it may perform an over-read (it could cause a crash if unprotected) (CWE-126).

char *ret = (char *) malloc(n + 1);

Check notice

Code scanning / Cppcheck (reported by Codacy)

MISRA 21.3 rule Note

MISRA 21.3 rule

Check notice

Code scanning / Cppcheck (reported by Codacy)

MISRA 10.4 rule Note

MISRA 10.4 rule
memset(ret, '\0', n + 1);

Check warning

Code scanning / Semgrep (reported by Codacy)

When handling sensitive information in a buffer, it's important to ensure that the data is securely erased before the buffer is deleted or reused. Warning

When handling sensitive information in a buffer, it's important to ensure that the data is securely erased before the buffer is deleted or reused.

Check notice

Code scanning / Cppcheck (reported by Codacy)

MISRA 10.4 rule Note

MISRA 10.4 rule

Check notice

Code scanning / Cppcheck (reported by Codacy)

MISRA 17.7 rule Note

MISRA 17.7 rule

/* After sanitization, the new path will only be shorter than the original
* one. Thus, we can reuse the space */
if (strlen(orig_path) == 0) {

Check warning

Code scanning / Semgrep (reported by Codacy)

The strlen family of functions does not handle strings that are not null terminated. This can lead to buffer over reads and cause the application to crash by accessing unintended memory locations. It is recommended that strnlen be used instead as a maxlen value can be provided. For more information please see: https://linux.die.net/man/3/strnlen If developing for C Runtime Library (CRT), more secure versions of these functions should be used, see: https://learn.microsoft.com/en-us/cpp/c- Warning

The strlen family of functions does not handle strings that are not null
terminated. This can lead to buffer over reads and cause the application to
crash by accessing unintended memory locations. It is recommended that strnlen
be used instead as a maxlen value can be provided. For more information please see: https://linux.die.net/man/3/strnlen If developing for C Runtime Library (CRT), more secure versions of these functions should be
used, see:
https://learn.microsoft.com/en-us/cpp/c-

Check notice

Code scanning / Cppcheck (reported by Codacy)

MISRA 10.4 rule Note

MISRA 10.4 rule

Check notice

Code scanning / Flawfinder (reported by Codacy)

Does not handle strings that are not \0-terminated; if given one it may perform an over-read (it could cause a crash if unprotected) (CWE-126). Note

Does not handle strings that are not \0-terminated; if given one it may perform an over-read (it could cause a crash if unprotected) (CWE-126).
ret[0] = '.';
return ret;

Check notice

Code scanning / Cppcheck (reported by Codacy)

MISRA 15.5 rule Note

MISRA 15.5 rule
}

int rooted = (orig_path[0] == '/');

Check warning

Code scanning / Cppcheck (reported by Codacy)

misra violation 1003 with no text in the supplied rule-texts-file Warning

misra violation 1003 with no text in the supplied rule-texts-file

/*
* Invariants:
* reading from path; r is index of next byte to process -> path[r]
* writing to buf; w is index of next byte to write -> ret[strlen(ret)]
* dotdot is index in buf where .. must stop, either because
* a) it is the leading slash
* b) it is a leading ../../.. prefix.
*/
size_t w = 0;
size_t r = 0;
size_t dotdot = 0;
if (rooted) {

Check notice

Code scanning / Cppcheck (reported by Codacy)

MISRA 14.4 rule Note

MISRA 14.4 rule
ret[w] = '/';
w++;
r = 1;
dotdot = 1;
}

while (r < n) {
if (orig_path[r] == '/') {
/* empty path element */
r++;
} else if (orig_path[r] == '.' &&

Check notice

Code scanning / Cppcheck (reported by Codacy)

MISRA 12.1 rule Note

MISRA 12.1 rule
(r + 1 == n || orig_path[r + 1] == '/')) {

Check notice

Code scanning / Cppcheck (reported by Codacy)

MISRA 12.1 rule Note

MISRA 12.1 rule

Check notice

Code scanning / Cppcheck (reported by Codacy)

MISRA 10.4 rule Note

MISRA 10.4 rule
/* . element */
r++;
} else if (orig_path[r] == '.' && orig_path[r + 1] == '.' &&

Check notice

Code scanning / Cppcheck (reported by Codacy)

MISRA 10.4 rule Note

MISRA 10.4 rule

Check notice

Code scanning / Cppcheck (reported by Codacy)

MISRA 12.1 rule Note

MISRA 12.1 rule
(r + 2 == n || orig_path[r + 2] == '/')) {

Check notice

Code scanning / Cppcheck (reported by Codacy)

MISRA 10.4 rule Note

MISRA 10.4 rule

Check notice

Code scanning / Cppcheck (reported by Codacy)

MISRA 12.1 rule Note

MISRA 12.1 rule
/* .. element: remove to last / */
r += 2;

Check notice

Code scanning / Cppcheck (reported by Codacy)

MISRA 10.4 rule Note

MISRA 10.4 rule

if (w > dotdot) {
/* can backtrack */
w--;
while (w > dotdot && ret[w] != '/') {

Check notice

Code scanning / Cppcheck (reported by Codacy)

MISRA 12.1 rule Note

MISRA 12.1 rule
w--;
}
} else if (!rooted) {
/* cannot backtrack, but not rooted, so append .. element. */
if (w > 0) {

Check notice

Code scanning / Cppcheck (reported by Codacy)

MISRA 10.4 rule Note

MISRA 10.4 rule
ret[w] = '/';
w++;
}
ret[w] = '.';
w++;
ret[w] = '.';
w++;
dotdot = w;
}

Check notice

Code scanning / Cppcheck (reported by Codacy)

MISRA 15.7 rule Note

MISRA 15.7 rule
} else {
/* real path element.
add slash if needed */
if ((rooted && w != 1) || (!rooted && w != 0)) {

Check notice

Code scanning / Cppcheck (reported by Codacy)

MISRA 10.4 rule Note

MISRA 10.4 rule

Check notice

Code scanning / Cppcheck (reported by Codacy)

MISRA 12.1 rule Note

MISRA 12.1 rule
ret[w] = '/';
w++;
}

/* copy element */
for (; r < n && orig_path[r] != '/'; r++) {

Check notice

Code scanning / Cppcheck (reported by Codacy)

MISRA 12.1 rule Note

MISRA 12.1 rule
ret[w] = orig_path[r];
w++;
}
}
// printf("w = %ld, r = %ld, dotdot = %ld\nret = %s\n", w, r, dotdot,
// ret);
}

/* Turn empty string into "." */
if (w == 0) {

Check notice

Code scanning / Cppcheck (reported by Codacy)

MISRA 10.4 rule Note

MISRA 10.4 rule
ret[w] = '.';
w++;
}

for (size_t i = w; i < n; i++) {
ret[i] = '\0';
}
return ret;
}
26 changes: 26 additions & 0 deletions src/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,29 @@ void rv_clock_gettime(struct timespec *tp);
/* 0x61C88647 is 32-bit golden ratio */ \
return (val * 0x61C88647 >> (32 - size_bits)) & ((size) - (1)); \
}

/*
* Reference:
* https://cs.opensource.google/go/go/+/refs/tags/go1.21.4:src/path/path.go;l=51
*
* sanitize_path returns the shortest path name equivalent to path
* by purely lexical processing. It applies the following rules
* iteratively until no further processing can be done:
*
* 1. Replace multiple slashes with a single slash.
* 2. Eliminate each . path name element (the current directory).
* 3. Eliminate each inner .. path name element (the parent directory)
* along with the non-.. element that precedes it.
* 4. Eliminate .. elements that begin a rooted path:
* that is, replace "/.." by "/" at the beginning of a path.
*
* The returned path ends in a slash only if it is the root "/".
*
* If the result of this process is an empty string, Clean
* returns the string ".".
*
* See also Rob Pike, “Lexical File Names in Plan 9 or
* Getting Dot-Dot Right,”
* https://9p.io/sys/doc/lexnames.html
*/
char *sanitize_path(const char *orig_path);

0 comments on commit c501e2e

Please sign in to comment.