From 6657edba7dd34356c76d2233532c4aabbeec0b86 Mon Sep 17 00:00:00 2001 From: Chun-Hung Tseng Date: Wed, 15 Nov 2023 23:46:54 +0100 Subject: [PATCH] Introduce fuzzer By leveraging the LLVM's LibFuzzer, we will be able to test our emulator code more thoroughly. The seed corpus being added automatically are all the elf files in the build folder. Major changes: - Conditional compilation for the main function, as the fuzzer will generate its own main function - Need to use the clang toolchain - Introduce loading buffer as elf file, as the input from fuzzer will be passed into the emulator directly instead of going through a file - Fixed ELF verification logic as the fuzzer already breaks the code Other minor changes are: - Fix Codacy issue "rejecting SARIF, as there are more runs than allowed" --- .ci/fuzz.sh | 37 +++++++++++++++++++ .codacy.yml | 5 +++ .github/workflows/fuzz.yml | 17 +++++++++ .gitignore | 10 +++++ Dockerfile | 2 +- Makefile | 3 +- docs/fuzzer.md | 23 ++++++++++++ src/elf.c | 31 ++++++++++++++-- src/elf.h | 4 ++ src/fuzz-target.cc | 76 ++++++++++++++++++++++++++++++++++++++ src/main.c | 73 +++++++++++++++++++++++++++++++++++- src/riscv.h | 2 + 12 files changed, 277 insertions(+), 6 deletions(-) create mode 100755 .ci/fuzz.sh create mode 100644 .codacy.yml create mode 100644 .github/workflows/fuzz.yml create mode 100644 docs/fuzzer.md create mode 100644 src/fuzz-target.cc diff --git a/.ci/fuzz.sh b/.ci/fuzz.sh new file mode 100755 index 00000000..c59786d4 --- /dev/null +++ b/.ci/fuzz.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +set -e -u -o pipefail + +# check the existence of the clang toolchain +command -v clang &> /dev/null + +# compile +make clean +clang \ + -g -O1 \ + -fsanitize=fuzzer,address,undefined \ + -include src/common.h \ + -D RV32_FEATURE_EXT_F=0 \ + -D RV32_FEATURE_SDL=0 \ + -D DEFAULT_STACK_ADDR=0xFFFFE000 \ + -D DEFAULT_ARGS_ADDR=0xFFFFF000 \ + -D FUZZER \ + -o build/rv32emu_fuzz \ + src/fuzz-target.cc \ + src/map.c \ + src/utils.c \ + src/decode.c \ + src/io.c \ + src/syscall.c \ + src/emulate.c \ + src/riscv.c \ + src/elf.c \ + src/cache.c \ + src/mpool.c \ + src/main.c + +# populate the initial CORPUS for the fuzzer using valid elf +mkdir -p build/fuzz/CORPUS_DIR +cp build/*.elf build/fuzz/CORPUS_DIR + +# execute +./build/rv32emu_fuzz build/fuzz/CORPUS_DIR -timeout=3 -max_total_time=1200 diff --git a/.codacy.yml b/.codacy.yml new file mode 100644 index 00000000..5e91213f --- /dev/null +++ b/.codacy.yml @@ -0,0 +1,5 @@ +exclude_paths: + - ".github/**" + - "build/**" + - "docs/**" + - "tests/**" \ No newline at end of file diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml new file mode 100644 index 00000000..f8d1e4b8 --- /dev/null +++ b/.github/workflows/fuzz.yml @@ -0,0 +1,17 @@ +name: Fuzz Test + +on: [push, pull_request] + +jobs: + rv32emu: + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v4 + - name: install-dependencies + run: | + sudo apt-get update + sudo apt-get install clang + shell: bash + - name: Run fuzzer + run: .ci/fuzz.sh + shell: bash diff --git a/.gitignore b/.gitignore index 212a5324..069bf9a5 100644 --- a/.gitignore +++ b/.gitignore @@ -6,18 +6,28 @@ build/id1/ build/gfx.wad build/doomrc toolchain/ +.vscode # built objects build/.config build/rv32emu +build/rv32emu_fuzz build/arch-test build/mini-gdbstub build/softfloat build/cache/ build/map/ build/path/ +build/fuzz/ *.o *.o.d tests/**/*.elf tests/arch-test-target/config.ini __pycache__/ + +# fuzzer +crash-* +leak-* +timeout-* +fuzz.elf +*.log diff --git a/Dockerfile b/Dockerfile index e3aa92ba..59385e8a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,7 +6,7 @@ FROM ubuntu:22.04 as final # Install extra packages for the emulator to compile and execute with full capabilities correctly RUN apt-get update && \ DEBIAN_FRONTEND=noninteractive apt-get install -y \ - libsdl2-dev libsdl2-mixer-dev python3-pip git && \ + libsdl2-dev libsdl2-mixer-dev python3-pip git clang && \ rm -rf /var/lib/apt/lists/* RUN python3 -m pip install git+/~https://github.com/riscv/riscof diff --git a/Makefile b/Makefile index dc0ea912..6b59a81e 100644 --- a/Makefile +++ b/Makefile @@ -3,6 +3,7 @@ include mk/toolchain.mk OUT ?= build BIN := $(OUT)/rv32emu +FUZZ_BIN := $(OUT)/rv32emu_fuzz CONFIG_FILE := $(OUT)/.config -include $(CONFIG_FILE) @@ -214,7 +215,7 @@ endif endif clean: - $(RM) $(BIN) $(OBJS) $(HIST_BIN) $(HIST_OBJS) $(deps) $(CACHE_OUT) + $(RM) $(BIN) $(FUZZ_BIN) $(OBJS) $(HIST_BIN) $(HIST_OBJS) $(deps) $(CACHE_OUT) distclean: clean -$(RM) $(DOOM_DATA) $(QUAKE_DATA) $(RM) -r $(OUT)/id1 diff --git a/docs/fuzzer.md b/docs/fuzzer.md new file mode 100644 index 00000000..823fc236 --- /dev/null +++ b/docs/fuzzer.md @@ -0,0 +1,23 @@ +# Fuzzing + +We are using the [LLVM Fuzzer](https://llvm.org/docs/LibFuzzer.html). + +The fuzzer used here is without structured input generation. Instead, we rely +on the fuzzer to mutate the input. + +The initial seeds are all the ELF files in the `build` directory. + +## Execution + +The script compiles the emulator and links it with the LibFuzzer, prepares the seed corpus, and executes the fuzzing tests. + +- `.ci/fuzz.sh` + +## References + +> Inspired by the fuzzer from [libriscv](/~https://github.com/fwsGonzo/libriscv/tree/master/fuzz). + +- [LLVM official LibFuzzer documentation](https://llvm.org/docs/LibFuzzer.html#corpus) +- [Chromium - Getting started with LibFuzzer](https://chromium.googlesource.com/chromium/src/+/refs/heads/main/testing/libfuzzer/getting_started_with_libfuzzer.md) +- [Fuzzing tutorial](/~https://github.com/google/fuzzing/blob/master/tutorial/libFuzzerTutorial.md) +- [UBSAN](https://clang.llvm.org/docs/UndefinedBehaviorSanitizer.html) diff --git a/src/elf.c b/src/elf.c index 0d579c5e..a7b43b5d 100644 --- a/src/elf.c +++ b/src/elf.c @@ -97,7 +97,7 @@ void elf_delete(elf_t *e) /* release a loaded ELF file */ static void release(elf_t *e) { -#if !defined(USE_MMAP) +#if !defined(USE_MMAP) && !defined(FUZZER) free(e->raw_data); #endif @@ -291,18 +291,36 @@ bool elf_load(elf_t *e, riscv_t *rv, memory_t *mem) return true; } +#ifdef FUZZER +bool elf_open(elf_t *e, uint8_t *data, size_t len) +#else bool elf_open(elf_t *e, const char *input) +#endif { /* free previous memory */ if (e->raw_data) release(e); +#ifndef FUZZER char *path = sanitize_path(input); if (!path) { return false; } +#endif -#if defined(USE_MMAP) +#if defined(FUZZER) + if (!data || !len) { + /* if the fuzzer sent in an empty buffer, we don't proceed further */ + return false; + } + + /* get file size */ + e->raw_size = len; + + /* allocate memory */ + free(e->raw_data); + e->raw_data = (uint8_t *) data; +#elif defined(USE_MMAP) int fd = open(path, O_RDONLY); if (fd < 0) { free(path); @@ -324,7 +342,6 @@ bool elf_open(elf_t *e, const char *input) return false; } close(fd); - #else /* fallback to standard I/O text stream */ FILE *f = fopen(path, "rb"); if (!f) { @@ -357,16 +374,24 @@ bool elf_open(elf_t *e, const char *input) #endif /* USE_MMAP */ /* point to the header */ + if (sizeof(struct Elf32_Ehdr) > e->raw_size) { + release(e); + return false; + } e->hdr = (const struct Elf32_Ehdr *) e->raw_data; /* check it is a valid ELF file */ if (!is_valid(e)) { release(e); +#ifndef FUZZER free(path); +#endif return false; } +#ifndef FUZZER free(path); +#endif return true; } diff --git a/src/elf.h b/src/elf.h index d3de2617..fa949d79 100644 --- a/src/elf.h +++ b/src/elf.h @@ -133,7 +133,11 @@ elf_t *elf_new(); void elf_delete(elf_t *e); /* Open an ELF file from specified path */ +#ifdef FUZZER +bool elf_open(elf_t *e, uint8_t *data, size_t len); +#else bool elf_open(elf_t *e, const char *path); +#endif /* Find a symbol entry */ const struct Elf32_Sym *elf_get_symbol(elf_t *e, const char *name); diff --git a/src/fuzz-target.cc b/src/fuzz-target.cc new file mode 100644 index 00000000..c8a1d7ae --- /dev/null +++ b/src/fuzz-target.cc @@ -0,0 +1,76 @@ +#include +#include +#include +#include "riscv.h" + +const int max_cycles = 5000; +const char *fake_rv32emu_name = "./fake_rv32emu"; +const char *fake_elf_name = "fake_elf"; + +/* In order to be able to inspect a coredump we want to crash on every ASAN + * error. + */ +extern "C" void __asan_on_error() +{ + abort(); +} +extern "C" void __msan_on_error() +{ + abort(); +} + +static void fuzz_elf_loader(const uint8_t *data, size_t len) +{ + int argc = 1 + 2 * 3 + 1; + char **args = (char **) malloc(sizeof(char *) * argc); + + char *arg0 = (char *) malloc(strlen(fake_rv32emu_name) + 1); + strncpy(arg0, fake_rv32emu_name, strlen(fake_rv32emu_name) + 1); + args[0] = arg0; + + char *arg1 = (char *) malloc(3); + strncpy(arg1, "-s", 3); + args[1] = arg1; + args[2] = (char *) data; + + char *arg3 = (char *) malloc(3); + strncpy(arg3, "-l", 3); + args[3] = arg3; + char *len_str = + (char *) malloc(20 + 1); /* LLONG_MIN in base 10 has 20 chars */ + sprintf(len_str, "%zu", len); + args[4] = len_str; + + char *arg5 = (char *) malloc(3); + strncpy(arg5, "-k", 3); + args[5] = arg5; + char *max_cycles_str = + (char *) malloc(11 + 1); /* INT_MIN in base 10 has 11 chars */ + sprintf(max_cycles_str, "%d", max_cycles); + args[6] = max_cycles_str; + + char *arg7 = (char *) malloc(strlen(fake_elf_name) + 1); + strncpy(arg7, fake_elf_name, strlen(fake_elf_name) + 1); + args[7] = arg7; + + int ret = rv_init_and_execute_elf(argc, args); + if (ret == 0) { + fprintf(stderr, "Executed successfully\n"); + } else { + fprintf(stderr, "Executed with failure\n"); + } + + free(arg0); + free(arg1); + free(arg3); + free(len_str); + free(arg5); + free(max_cycles_str); + free(arg7); + free(args); +} + +extern "C" void LLVMFuzzerTestOneInput(const uint8_t *data, size_t len) +{ + fuzz_elf_loader(data, len); +} diff --git a/src/main.c b/src/main.c index 7f35034c..24040008 100644 --- a/src/main.c +++ b/src/main.c @@ -34,6 +34,18 @@ static bool opt_quiet_outputs = false; /* target executable */ static const char *opt_prog_name = "a.out"; +#ifdef FUZZER +/* ELF input as string (for fuzzing) */ +static bool opt_elf_string = false; +static uint8_t *elf_string = NULL; + +static bool opt_elf_strlen = NULL; +static int elf_strlen = 0; + +static bool opt_max_execution_cycles = NULL; +static int max_execution_cycles = 0; +#endif + /* target argc and argv */ static int prog_argc; static char **prog_args; @@ -83,6 +95,13 @@ static void run_and_trace(riscv_t *rv, elf_t *elf) } } +#ifdef FUZZER +static void run(riscv_t *rv, int max_cycles) +{ + /* step instructions */ + rv_step(rv, max_cycles); +} +#else static void run(riscv_t *rv) { const uint32_t cycles_per_step = 100; @@ -91,6 +110,7 @@ static void run(riscv_t *rv) rv_step(rv, cycles_per_step); } } +#endif static void print_usage(const char *filename) { @@ -117,6 +137,40 @@ static bool parse_args(int argc, char **args) int opt; int emu_argc = 0; +#ifdef FUZZER + /* + * getopt() won't work with binary data as control characters will screw the + * string parsing + */ + int idx = 1; + while (idx + 1 < argc) { + emu_argc++; + char opt = args[idx][1]; + char *optarg = args[idx + 1]; + + switch (opt) { + case 's': /* binary string */ + opt_elf_string = true; + elf_string = (uint8_t *) optarg; + emu_argc++; + break; + case 'l': /* binary string len */ + opt_elf_strlen = true; + elf_strlen = atoi(optarg); + emu_argc++; + break; + case 'k': /* max execution cycle (since some program won't terminate, e.g. while(1) {} */ + opt_max_execution_cycles = true; + max_execution_cycles = atoi(optarg); + emu_argc++; + break; + default: + return false; + } + + idx += 2; + } +#else while ((opt = getopt(argc, args, optstr)) != -1) { emu_argc++; @@ -151,6 +205,7 @@ static bool parse_args(int argc, char **args) return false; } } +#endif prog_argc = argc - emu_argc - 1; /* optind points to the first non-option string, so it should indicate the @@ -187,7 +242,7 @@ static void dump_test_signature(elf_t *elf) fclose(f); } -int main(int argc, char **args) +int rv_init_and_execute_elf(int argc, char **args) { if (argc == 1 || !parse_args(argc, args)) { print_usage(args[0]); @@ -196,8 +251,13 @@ int main(int argc, char **args) /* open the ELF file from the file system */ elf_t *elf = elf_new(); +#ifdef FUZZER + if (!elf_open(elf, (uint8_t *) elf_string, elf_strlen)) { +#else if (!elf_open(elf, opt_prog_name)) { +#endif fprintf(stderr, "Unable to open ELF file '%s'\n", opt_prog_name); + elf_delete(elf); return 1; } @@ -251,7 +311,11 @@ int main(int argc, char **args) } #endif else { +#ifdef FUZZER + run(rv, max_execution_cycles); +#else run(rv); +#endif } /* dump registers as JSON */ @@ -269,3 +333,10 @@ int main(int argc, char **args) return 0; } + +#ifndef FUZZER +int main(int argc, char **args) +{ + return rv_init_and_execute_elf(argc, args); +} +#endif diff --git a/src/riscv.h b/src/riscv.h index 1d5e45a3..c8f2add5 100644 --- a/src/riscv.h +++ b/src/riscv.h @@ -197,6 +197,8 @@ bool rv_has_halted(riscv_t *rv); /* return the flag of outputting exit code */ bool rv_enables_to_output_exit_code(riscv_t *rv); +/* the init and execute logic shared by main and fuzzer */ +int rv_init_and_execute_elf(int argc, char **args); #ifdef __cplusplus }; #endif