Skip to content

Commit

Permalink
i#5437,i#5431 glibc2.34-35: Workarounds for SIGFPE and rseq (#5695)
Browse files Browse the repository at this point in the history
Adds a workaround for the SIGFPE in glibc 2.34+ __libc_early_init() by
setting two ld.so globals located via hardcoded offsets, making this
fragile and considered temporary.  (Improvements might include decoding
__libc_early_init or other functions to find the offsets, which is also
fragile; making runtime options to set them for a non-rebuild fix;
disabling the call to __libc_early_init which doesn't seem to be needed
for 2.34).

Tested on glibc 2.34 where every libc-using client crashes with SIGFPE
but they work with this fix.

Adds an Ubuntu22 GA CI run but it has many failures due to the rseq
issue #5431.  Adds a workaround for this by having drrun set -disable_rseq
if it detects glibc 2.35+.  Even with this we have a number of test failures
so for now we use a label to just run 4 sanity-check tests.  This should
be enough to detect glibc changes that break the offsets here.

Issue: #5437, #5431
  • Loading branch information
derekbruening authored Oct 22, 2022
1 parent 13ff46c commit cacb542
Show file tree
Hide file tree
Showing 5 changed files with 178 additions and 6 deletions.
65 changes: 64 additions & 1 deletion .github/workflows/ci-x86.yml
Original file line number Diff line number Diff line change
Expand Up @@ -134,10 +134,73 @@ jobs:
to: dynamorio-devs@googlegroups.com
from: Github Action CI

# 64-bit Linux build with gcc and run tests:
# 64-bit Linux build with gcc and run tests.
x86-64:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v2
with:
submodules: true

# Cancel any prior runs for a PR (but do not cancel master branch runs).
- uses: n1hility/cancel-previous-runs@v2
with:
token: ${{ secrets.GITHUB_TOKEN }}
if: ${{ github.event_name == 'pull_request' }}

- run: git fetch --no-tags --depth=1 origin master

# Install multilib for non-cross-compiling Linux build.
- name: Create Build Environment
run: |
sudo apt update
sudo apt-get -y install doxygen vera++ zlib1g-dev libsnappy-dev \
liblz4-dev g++-multilib libunwind-dev
echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
# Use a newer cmake to avoid 32-bit toolchain problems (i#4830).
- name: Setup newer cmake
uses: jwlawson/actions-setup-cmake@v1.8
with:
cmake-version: '3.19.7'

- name: Run Suite
working-directory: ${{ github.workspace }}
run: ./suite/runsuite_wrapper.pl automated_ci 64_only
env:
DYNAMORIO_CROSS_AARCHXX_LINUX_ONLY: no
CI_TRIGGER: ${{ github.event_name }}
CI_BRANCH: ${{ github.ref }}

- name: Send failure mail to dynamorio-devs
if: failure() && github.ref == 'refs/heads/master'
uses: dawidd6/action-send-mail@v2
with:
server_address: smtp.gmail.com
server_port: 465
username: ${{secrets.DYNAMORIO_NOTIFICATION_EMAIL_USERNAME}}
password: ${{secrets.DYNAMORIO_NOTIFICATION_EMAIL_PASSWORD}}
subject: |
[${{github.repository}}] ${{github.workflow}} FAILED
on ${{github.event_name}} at ${{github.ref}}
body: |
Github Actions CI workflow run FAILED!
Workflow: ${{github.workflow}}/x86-64
Repository: ${{github.repository}}
Branch ref: ${{github.ref}}
SHA: ${{github.sha}}
Triggering actor: ${{github.actor}}
Triggering event: ${{github.event_name}}
Run Id: ${{github.run_id}}
See more details on github.com/DynamoRIO/dynamorio/actions/runs/${{github.run_id}}
to: dynamorio-devs@googlegroups.com
from: Github Action CI

# Ubuntu22 64-bit Linux build with gcc and run tests.
# XXX: A matrix could combine this with the 20.04 but our auto-cancel
# step cancels the 2nd job so we need to solve that first.
x86-64-ubuntu22:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v2
with:
Expand Down
62 changes: 57 additions & 5 deletions core/unix/loader.c
Original file line number Diff line number Diff line change
Expand Up @@ -675,6 +675,16 @@ privload_os_finalize(privmod_t *privmod)
#ifndef LINUX
return; /* Nothing to do. */
#else
static privmod_t *privmod_ld_linux;
if (strstr(privmod->name, "ld-linux") == privmod->name) {
/* We need to first get the libc version before we clobber ld vars.
* (We could instead look for versioned symbols with "@GLIBC_2.34" in ld
* but we do not have version parsing code in place.)
* We assume ld will not be unloaded.
*/
privmod_ld_linux = privmod;
return;
}
if (strstr(privmod->name, "libc.so") != privmod->name)
return;
os_privmod_data_t *opd = (os_privmod_data_t *)privmod->os_privmod_data;
Expand All @@ -690,12 +700,54 @@ privload_os_finalize(privmod_t *privmod)
*/
void (*libc_early_init)(bool) = (void (*)(bool))get_proc_address_from_os_data(
&opd->os_data, opd->load_delta, LIBC_EARLY_INIT_NAME, NULL);
if (libc_early_init != NULL) {
LOG(GLOBAL, LOG_LOADER, 2, "%s: calling %s\n", __FUNCTION__,
LIBC_EARLY_INIT_NAME);
(*libc_early_init)(true);
if (libc_early_init == NULL) {
return;
}
#endif
/* XXX i#5437: Temporary workaround to avoid a SIGFPE in glibc 2.34+
* __libc_early_init(). As we cannot let ld/libc initialize their own TLS with the
* current design, we must explicitly initialize a few variables. Unfortunately
* we have to hardcode their offsets, making this fragile. Long-term we should try
* to find a better solution.
*/
/* Do not try to clobber vars unless we have to: get the libc version. */
# define LIBC_GET_VERSION_NAME "gnu_get_libc_version"
const char *(*libc_ver)(void) = (const char *(*)(void))get_proc_address_from_os_data(
&opd->os_data, opd->load_delta, LIBC_GET_VERSION_NAME, NULL);
if (libc_ver == NULL)
return;
LOG(GLOBAL, LOG_LOADER, 2, "%s: calling %s\n", __FUNCTION__, LIBC_GET_VERSION_NAME);
const char *ver = (*libc_ver)();
LOG(GLOBAL, LOG_LOADER, 2, "%s: libc version is |%s|\n", __FUNCTION__, ver);
if ((ver[0] == '\0' || ver[0] < '2') || ver[1] != '.' || ver[2] < '3' || ver[3] < '4')
return;
if (privmod_ld_linux == NULL) {
SYSLOG_INTERNAL_WARNING("glibc 2.34+ i#5437 workaround failed: missed ld");
return;
}
os_privmod_data_t *ld_opd = (os_privmod_data_t *)privmod_ld_linux->os_privmod_data;
byte *glro = get_proc_address_from_os_data(&ld_opd->os_data, ld_opd->load_delta,
"_rtld_global_ro", NULL);
if (glro == NULL) {
SYSLOG_INTERNAL_WARNING("glibc 2.34+ i#5437 workaround failed: missed glro");
return;
}
# define GLRO_dl_tls_static_size_OFFS 0x2a8
# define GLRO_dl_tls_static_align_OFFS 0x2b0
size_t val = 4096, written;
if (!safe_write_ex(glro + GLRO_dl_tls_static_size_OFFS, sizeof(val), &val,
&written) ||
written != sizeof(val) ||
!safe_write_ex(glro + GLRO_dl_tls_static_align_OFFS, sizeof(val), &val,
&written) ||
written != sizeof(val)) {
SYSLOG_INTERNAL_WARNING("glibc 2.34+ i#5437 workaround failed: missed write");
} else {
LOG(GLOBAL, LOG_LOADER, 2, "%s: glibc 2.34+ workaround succeeded\n",
__FUNCTION__);
}
LOG(GLOBAL, LOG_LOADER, 2, "%s: calling %s\n", __FUNCTION__, LIBC_EARLY_INIT_NAME);
(*libc_early_init)(true);
#endif /* LINUX */
}

static void
Expand Down
13 changes: 13 additions & 0 deletions suite/runsuite.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,19 @@ foreach (arg ${CTEST_SCRIPT_ARG})
endif ()
endforeach (arg)

if (UNIX AND NOT APPLE AND NOT ANDROID)
execute_process(COMMAND ldd --version
RESULT_VARIABLE ldd_result ERROR_VARIABLE ldd_err OUTPUT_VARIABLE ldd_out)
if (ldd_result OR ldd_err)
# Failed; just move on.
elseif (ldd_out MATCHES "GLIBC 2.3[5-9]")
# XXX i#5437, i#5431: While we work through Ubuntu22 issues we run
# just a few tests.
set(extra_ctest_args INCLUDE_LABEL UBUNTU_22)
set(arg_debug_only ON)
endif ()
endif ()

set(build_tests "BUILD_TESTS:BOOL=ON")

if (arg_automated_ci)
Expand Down
18 changes: 18 additions & 0 deletions suite/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5349,3 +5349,21 @@ if (NOT ANDROID AND AARCHXX)
PROPERTIES LABELS RUNS_ON_QEMU)
endif ()
endif (NOT ANDROID AND AARCHXX)

# XXX i#5437, i#5431: While we work through Ubuntu22 issues we want our new GA CI job to
# just run sanity tests that do pass now as a glibc change detector to see if our
# workarounds so far break over time.
set_tests_properties(
code_api|common.broadfun
PROPERTIES LABELS UBUNTU_22)
if (BUILD_SAMPLES AND NOT ANDROID)
set_tests_properties(
code_api|sample.bbsize
PROPERTIES LABELS UBUNTU_22)
endif ()
if (BUILD_CLIENTS AND NOT ANDROID)
set_tests_properties(
code_api|tool.drcachesim.simple
code_api|tool.drcacheoff.simple
PROPERTIES LABELS UBUNTU_22)
endif ()
26 changes: 26 additions & 0 deletions tools/drdeploy.c
Original file line number Diff line number Diff line change
Expand Up @@ -1248,6 +1248,18 @@ _tmain(int argc, TCHAR *targv[])
native_tool[0] = '\0';
#endif

/* Quick pass to set verbose for info() logs before main parsing. */
for (i = 1; i < argc; i++) {
if (strcmp(argv[i], "-verbose") == 0 || strcmp(argv[i], "-v") == 0) {
verbose = true;
break;
}
if (strcmp(argv[i], "-c") == 0 || strcmp(argv[i], "-t") == 0 ||
strcmp(argv[i], "-c32") == 0 || strcmp(argv[i], "-c64") == 0 ||
strcmp(argv[i], "--") == 0)
break;
}

/* default root: we assume this tool is in <root>/bin{32,64}/dr*.exe */
get_absolute_path(argv[0], buf, BUFFER_SIZE_ELEMENTS(buf));
NULL_TERMINATE_BUFFER(buf);
Expand All @@ -1270,6 +1282,20 @@ _tmain(int argc, TCHAR *targv[])
/* we re-read the tool list if the root, platform or toolconfig dir change */
read_tool_list(dr_toolconfig_dir, dr_platform);

#if defined(LINUX) && !defined(ANDROID) && (defined(DRCONFIG) || defined(DRRUN))
/* XXX i#5431: Workaround for an rseq issue with glibc 2.35 which makes many
* apps fail up front. We expect to remove this once the real fix is in place.
*/
# include <gnu/libc-version.h>
const char *libc_ver = gnu_get_libc_version();
if (libc_ver[0] != '\0' && libc_ver[0] >= '2' && libc_ver[1] == '.' &&
libc_ver[2] >= '3' && libc_ver[3] >= '5') {
info("glibc2.35 detected: setting -disable_rseq");
add_extra_option(extra_ops, BUFFER_SIZE_ELEMENTS(extra_ops), &extra_ops_sofar,
"-disable_rseq");
}
#endif

/* parse command line */
for (i = 1; i < argc; i++) {

Expand Down

0 comments on commit cacb542

Please sign in to comment.