emscripten-core · kripken · Jun 5, 2020 · May 20, 2020 · May 20, 2020 · May 20, 2020
diff --git a/emcc.py b/emcc.py
@@ -47,6 +47,8 @@
 from tools.minimal_runtime_shell import generate_minimal_runtime_html
 import tools.line_endings
 from tools.toolchain_profiler import ToolchainProfiler
+from tools import wasm2c
+
 if __name__ == '__main__':
   ToolchainProfiler.record_process_start()
 
@@ -3356,6 +3358,9 @@ def run_closure_compiler(final):
       dwarf_target = wasm_binary_target + '.debug.wasm'
     building.emit_debug_on_side(wasm_binary_target, dwarf_target)
 
+  if shared.Settings.WASM2C:
+    wasm2c.do_wasm2c(wasm_binary_target)
+
   # replace placeholder strings with correct subresource locations
   if shared.Settings.SINGLE_FILE:
     js = open(final).read()

diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -6,6 +6,7 @@
   "dependencies": {
     "google-closure-compiler": "20200224.0.0",
     "html-minifier-terser": "5.0.2",
-    "source-map": "0.5.6"
+    "source-map": "0.5.6",
+    "wasm2c": "1.0.0"
   }
 }
diff --git a/src/settings.js b/src/settings.js
@@ -1752,6 +1752,14 @@ var DEFAULT_TO_CXX = 1;
 // long double printing precision.
 var PRINTF_LONG_DOUBLE = 0;
 
+// Run wabt's wasm2c tool on the final wasm, and combine that with a C runtime,
+// resulting in a .c file that you can compile with a C compiler to get a
+// native executable that works the same as the normal js+wasm. This will also
+// emit the wasm2c .h file. The output filenames will be X.wasm.c, X.wasm.h
+// if your output is X.js or X.wasm (note the added .wasm. we make sure to emit,
+// which avoids trampling a C file).
+var WASM2C = 0;
+
 //===========================================
 // Internal, used for testing only, from here
 //===========================================

diff --git a/tests/other/wasm2c/my-code.c b/tests/other/wasm2c/my-code.c
@@ -0,0 +1,25 @@
+#include <stdio.h>
+
+// We could
+//
+//   #include <lib.wasm.h>
+//
+// for the externs declared here manually, but including that currently
+// requires having wasm-rt.h in the include path, which may be annoying for
+// users - needs to be thought about.
+
+extern void wasmbox_init(void);
+
+extern int (*Z_do_bad_thingZ_ii)(int);
+
+extern int (*Z_twiceZ_ii)(int);
+
+int main() {
+  puts("Initializing sandboxed unsafe library");
+  wasmbox_init();
+  printf("Calling twice on 21 returns %d\n", Z_twiceZ_ii(21));
+  puts("Calling something bad now...");
+  int num = Z_do_bad_thingZ_ii(1);
+  printf("The sandbox should not have been able to print anything.\n"
+         "It claims it printed %d chars but the test proves it didn't!\n", num);
+}
diff --git a/tests/other/wasm2c/output.txt b/tests/other/wasm2c/output.txt
@@ -0,0 +1,5 @@
+Initializing sandboxed unsafe library
+Calling twice on 21 returns 42
+Calling something bad now...
+The sandbox should not have been able to print anything.
+It claims it printed 55 chars but the test proves it didn't!
diff --git a/tests/other/wasm2c/unsafe-library.c b/tests/other/wasm2c/unsafe-library.c
@@ -0,0 +1,12 @@
+#include <emscripten.h>
+#include <stdio.h>
+
+EMSCRIPTEN_KEEPALIVE
+int twice(int x) {
+  return x + x;
+}
+
+EMSCRIPTEN_KEEPALIVE
+int do_bad_thing(int size) {
+  return printf("I am in a sandbox and should not be able to print this!");
+}
diff --git a/tests/runner.py b/tests/runner.py
@@ -58,8 +58,12 @@
 sys.path.append(__rootpath__)
 
 import parallel_testsuite
-from tools.shared import EM_CONFIG, TEMP_DIR, EMCC, EMXX, DEBUG, LLVM_TARGET, ASM_JS_TARGET, EMSCRIPTEN_TEMP_DIR, WASM_TARGET, SPIDERMONKEY_ENGINE, WINDOWS, EM_BUILD_VERBOSE
-from tools.shared import asstr, get_canonical_temp_dir, run_process, try_delete, asbytes, safe_copy, Settings
+from tools.shared import EM_CONFIG, TEMP_DIR, EMCC, EMXX, DEBUG
+from tools.shared import LLVM_TARGET, ASM_JS_TARGET, EMSCRIPTEN_TEMP_DIR
+from tools.shared import WASM_TARGET, SPIDERMONKEY_ENGINE, WINDOWS
+from tools.shared import EM_BUILD_VERBOSE, CLANG_CC
+from tools.shared import asstr, get_canonical_temp_dir, run_process, try_delete
+from tools.shared import asbytes, safe_copy, Settings
 from tools import jsrun, shared, line_endings, building
 
 
@@ -1231,6 +1235,9 @@ def do_run(self, src, expected_output, args=[], output_nicerizer=None,
       if len(wasm_engines) == 0:
         logger.warning('no wasm engine was found to run the standalone part of this test')
       js_engines += wasm_engines
+      if self.get_setting('WASM2C'):
+        # the "engine" to run wasm2c builds is clang that compiles the c
+        js_engines += [[CLANG_CC]]
     if len(js_engines) == 0:
       self.skipTest('No JS engine present to run this test with. Check %s and the paths therein.' % EM_CONFIG)
     for engine in js_engines:

diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
@@ -239,6 +239,48 @@ def cleanup(self):
     building.clear()
 
 
+class EmscriptenWasm2CBenchmarker(EmscriptenBenchmarker):
+  def __init__(self, name):
+    super(EmscriptenWasm2CBenchmarker, self).__init__(name, 'no engine needed')
+
+  def build(self, parent, filename, args, shared_args, emcc_args, native_args, native_exec, lib_builder, has_output_parser):
+    # wasm2c doesn't want minimal runtime which the normal emscripten
+    # benchmarker defaults to, as we don't have any JS anyhow
+    emcc_args = emcc_args + [
+      '-s', 'STANDALONE_WASM',
+      '-s', 'MINIMAL_RUNTIME=0',
+      '-s', 'WASM2C'
+    ]
+
+    global LLVM_FEATURE_FLAGS
+    old_flags = LLVM_FEATURE_FLAGS
+    try:
+      # wasm2c does not support anything beyond MVP
+      LLVM_FEATURE_FLAGS = []
+      super(EmscriptenWasm2CBenchmarker, self).build(parent, filename, args, shared_args, emcc_args, native_args, native_exec, lib_builder, has_output_parser)
+    finally:
+      LLVM_FEATURE_FLAGS = old_flags
+
+    # move the JS away so there is no chance we run it by mistake
+    shutil.move(self.filename, self.filename + '.old.js')
+
+    base = self.filename[:-3]
+    c = base + '.wasm.c'
+    native = base + '.exe'
+
+    run_process(['clang', c, '-o', native, OPTIMIZATIONS, '-lm',
+                 '-DWASM_RT_MAX_CALL_STACK_DEPTH=8000'])  # for havlak
+
+    self.filename = native
+
+  def run(self, args):
+    return run_process([self.filename] + args, stdout=PIPE, stderr=PIPE, check=False).stdout
+
+  def get_output_files(self):
+    # return the native code. c size may also be interesting.
+    return [self.filename]
+
+
 CHEERP_BIN = '/opt/cheerp/bin/'
 
 
@@ -323,6 +365,7 @@ def cleanup(self):
   benchmarkers += [
     EmscriptenBenchmarker(default_v8_name, aot_v8),
     EmscriptenBenchmarker(default_v8_name + '-lto', aot_v8, ['-flto']),
+    # EmscriptenWasm2CBenchmarker('wasm2c')
   ]
   if os.path.exists(CHEERP_BIN):
     benchmarkers += [

diff --git a/tests/test_core.py b/tests/test_core.py
@@ -165,6 +165,28 @@ def decorated(self):
   return decorated
 
 
+def also_with_standalone_wasm_and_wasm2c(func):
+  def decorated(self):
+    func(self)
+    # Standalone mode is only supported in the wasm backend, and not in all
+    # modes there.
+    if can_do_standalone(self):
+      print('standalone')
+      self.set_setting('STANDALONE_WASM', 1)
+      # we will not legalize the JS ffi interface, so we must use BigInt
+      # support in order for JS to have a chance to run this without trapping
+      # when it sees an i64 on the ffi.
+      self.set_setting('WASM_BIGINT', 1)
+      with js_engines_modify([NODE_JS + ['--experimental-wasm-bigint']]):
+        func(self)
+        print('wasm2c')
+        self.set_setting('WASM2C', 1)
+        with wasm_engines_modify([]):
+          func(self)
+
+  return decorated
+
+
 # Similar to also_with_standalone_wasm, but suitable for tests that cannot
 # run in a wasm VM yet, as they are not 100% standalone. We can still
 # run them with the JS code though.
@@ -187,6 +209,31 @@ def decorated(self):
   return decorated
 
 
+def also_with_impure_standalone_wasm_and_wasm2c(func):
+  def decorated(self):
+    func(self)
+    # Standalone mode is only supported in the wasm backend, and not in all
+    # modes there.
+    if can_do_standalone(self):
+      print('standalone (impure; no wasm runtimes)')
+      with wasm_engines_modify([]):
+        self.set_setting('STANDALONE_WASM', 1)
+        # we will not legalize the JS ffi interface, so we must use BigInt
+        # support in order for JS to have a chance to run this without trapping
+        # when it sees an i64 on the ffi.
+        self.set_setting('WASM_BIGINT', 1)
+        with js_engines_modify([NODE_JS + ['--experimental-wasm-bigint']]):
+          func(self)
+        print('wasm2c')
+        self.set_setting('STANDALONE_WASM', 1)
+        self.set_setting('WASM2C', 1)
+        # disable js engines too, so we only run the c output
+        with js_engines_modify([]):
+          func(self)
+
+  return decorated
+
+
 # Similar to also_with_standalone_wasm, but suitable for tests that can *only*
 # run in a wasm VM, or in non-standalone mode, but not in standalone mode with
 # our JS.
@@ -521,7 +568,7 @@ def test_cube2md5(self):
     shutil.copyfile(path_from_root('tests', 'cube2md5.txt'), 'cube2md5.txt')
     self.do_run(open(path_from_root('tests', 'cube2md5.cpp')).read(), open(path_from_root('tests', 'cube2md5.ok')).read(), assert_returncode=None)
 
-  @also_with_standalone_wasm
+  @also_with_standalone_wasm_and_wasm2c
   @needs_make('make')
   def test_cube2hash(self):
     # A good test of i64 math
@@ -1076,6 +1123,7 @@ def test_wcslen(self):
   def test_regex(self):
     self.do_run_in_out_file_test('tests', 'core', 'test_regex')
 
+  @also_with_impure_standalone_wasm_and_wasm2c
   def test_longjmp(self):
     self.do_run_in_out_file_test('tests', 'core', 'test_longjmp')
 
@@ -5580,7 +5628,7 @@ def test_unistd_misc(self):
 
   # i64s in the API, which we'd need to legalize for JS, so in standalone mode
   # all we can test is wasm VMs
-  @also_with_standalone_wasm
+  @also_with_standalone_wasm_and_wasm2c
   def test_posixtime(self):
     test_path = path_from_root('tests', 'core', 'test_posixtime')
     src, output = (test_path + s for s in ('.c', '.out'))
@@ -6608,7 +6656,7 @@ def do_autodebug(filename):
   @no_asan('autodebug logging interferes with asan')
   @no_fastcomp('autodebugging wasm is only supported in the wasm backend')
   @with_env_modify({'EMCC_AUTODEBUG': '1'})
-  @also_with_impure_standalone_wasm
+  @also_with_impure_standalone_wasm_and_wasm2c
   def test_autodebug_wasm(self):
     # Autodebug does not work with too much shadow memory.
     # Memory consumed by autodebug depends on the size of the WASM linear memory.

diff --git a/tests/test_other.py b/tests/test_other.py
@@ -10248,6 +10248,27 @@ def test_standalone_syscalls(self):
       for engine in WASM_ENGINES:
         self.assertContained(expected, run_js('test.wasm', engine))
 
+  @no_fastcomp("uses standalone mode")
+  def test_wasm2c_reactor(self):
+    # test compiling an unsafe library using wasm2c, then using it from a
+    # main program. this shows it is easy to use wasm2c as a sandboxing
+    # mechanism.
+
+    # first compile the library with emcc, getting a .c and .h
+    run_process([EMCC,
+                path_from_root('tests', 'other', 'wasm2c', 'unsafe-library.c'),
+                '-O3', '-o', 'lib.wasm', '-s', 'WASM2C', '--no-entry'])
+    # compile that .c to a native object
+    run_process([CLANG_CC, 'lib.wasm.c', '-c', '-O3', '-o', 'lib.o'])
+    # compile the main program natively normally, and link with the
+    # unsafe library
+    run_process([CLANG_CC,
+                path_from_root('tests', 'other', 'wasm2c', 'my-code.c'),
+                '-O3', 'lib.o', '-o', 'program.exe'])
+    output = run_process([os.path.abspath('program.exe')], stdout=PIPE).stdout
+    with open(path_from_root('tests', 'other', 'wasm2c', 'output.txt')) as f:
+      self.assertEqual(output, f.read())
+
   @parameterized({
     'wasm2js': (['-s', 'WASM=0'], ''),
     'modularize': (['-s', 'MODULARIZE'], 'Module()'),

diff --git a/tools/jsrun.py b/tools/jsrun.py
@@ -47,6 +47,7 @@ def make_command(filename, engine=None, args=[]):
   is_jsc = 'jsc' in jsengine
   is_wasmer = 'wasmer' in jsengine
   is_wasmtime = 'wasmtime' in jsengine
+  is_clang = engine[0] == shared.CLANG_CC
   # Disable true async compilation (async apis will in fact be synchronous) for now
   # due to https://bugs.chromium.org/p/v8/issues/detail?id=6263
   shell_option_flags = ['--no-wasm-async-compilation'] if is_d8 else []
@@ -56,6 +57,14 @@ def make_command(filename, engine=None, args=[]):
   if is_wasmer or is_wasmtime:
     # in a wasm runtime, run the wasm, not the js
     filename = shared.unsuffixed(filename) + '.wasm'
+  elif is_clang:
+    # with wasm2c, the input is a c file, which we must compile first
+    c = shared.unsuffixed(filename) + '.wasm.c'
+    executable = shared.unsuffixed(filename) + '.exe'
+    shared.run_process(engine + [c, '-o', executable])
+    # we can now run the executable directly, without an engine
+    engine = []
+    filename = os.path.abspath(executable)
   # Separates engine flags from script flags
   flag_separator = ['--'] if is_d8 or is_jsc else []
   return engine + command_flags + [filename] + shell_option_flags + flag_separator + args
@@ -83,10 +92,14 @@ def check_engine(engine):
 
 def require_engine(engine):
   engine_path = engine[0]
+  # if clang is the "engine", it means we compiled to a native executable;
+  # there is nothing to check here
+  if engine_path == shared.CLANG_CC:
+    return
   if engine_path not in WORKING_ENGINES:
     check_engine(engine)
   if not WORKING_ENGINES[engine_path]:
-    logging.critical('The JavaScript shell (%s) does not seem to work, check the paths in the config file' % engine)
+    logging.critical('The engine (%s) does not seem to work, check the paths in the config file' % engine)
     sys.exit(1)