Skip to content
This repository has been archived by the owner on May 9, 2024. It is now read-only.

Refactor GPU shared memory tests and add L0-specific ones [1/N] #618

Merged
merged 12 commits into from
Aug 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 0 additions & 77 deletions omniscidb/QueryEngine/Compiler/Backend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,6 @@
#include <llvm/IRReader/IRReader.h>
#include <llvm/Support/TargetSelect.h>
#include <llvm/Transforms/IPO/PassManagerBuilder.h>
#include <llvm/Transforms/Utils/BasicBlockUtils.h>
#include <llvm/Transforms/Utils/Cloning.h>

#ifdef HAVE_L0
#include "LLVMSPIRVLib/LLVMSPIRVLib.h"
Expand Down Expand Up @@ -873,81 +871,6 @@ std::shared_ptr<CompilationContext> L0Backend::generateNativeCode(
return generateNativeGPUCode(exts_, func, wrapper_func, live_funcs, co, gpu_target_);
}

void insert_declaration(llvm::Module* from, llvm::Module* to, const std::string& fname) {
auto fn = from->getFunction(fname);
CHECK(fn);

llvm::Function::Create(
fn->getFunctionType(), llvm::GlobalValue::ExternalLinkage, fn->getName(), *to);
}

void replace_function(llvm::Module* from, llvm::Module* to, const std::string& fname) {
auto target_fn = to->getFunction(fname);
auto from_fn = from->getFunction(fname);
CHECK(target_fn);
CHECK(from_fn);
CHECK(!from_fn->isDeclaration());

target_fn->deleteBody();

llvm::ValueToValueMapTy vmap;
llvm::Function::arg_iterator pos_fn_arg_it = target_fn->arg_begin();
for (llvm::Function::const_arg_iterator j = from_fn->arg_begin();
j != from_fn->arg_end();
++j) {
pos_fn_arg_it->setName(j->getName());
vmap[&*j] = &*pos_fn_arg_it++;
}
llvm::SmallVector<llvm::ReturnInst*, 8> returns;
#if LLVM_VERSION_MAJOR > 12
llvm::CloneFunctionInto(
target_fn, from_fn, vmap, llvm::CloneFunctionChangeType::DifferentModule, returns);
#else
llvm::CloneFunctionInto(target_fn, from_fn, vmap, true, returns);
#endif

for (auto& BB : *target_fn) {
for (llvm::BasicBlock::iterator bbi = BB.begin(); bbi != BB.end();) {
llvm::Instruction* inst = &*bbi++;
if (auto* call = llvm::dyn_cast<llvm::CallInst>(&*inst)) {
auto local_callee = to->getFunction(call->getCalledFunction()->getName());
CHECK(local_callee);
std::vector<llvm::Value*> args;
std::copy(call->arg_begin(), call->arg_end(), std::back_inserter(args));

auto new_call = llvm::CallInst::Create(local_callee, args, call->getName());

llvm::ReplaceInstWithInst(call, new_call);
inst = new_call;
}
for (unsigned op_idx = 0; op_idx < inst->getNumOperands(); ++op_idx) {
auto op = inst->getOperand(op_idx);
if (auto* global = llvm::dyn_cast<llvm::GlobalVariable>(op)) {
auto local_global = to->getGlobalVariable(global->getName(), true);
CHECK(local_global);
inst->setOperand(op_idx, local_global);
}
}
}
}
}

void insert_globals(llvm::Module* from, llvm::Module* to) {
for (const llvm::GlobalVariable& I : from->globals()) {
llvm::GlobalVariable* new_gv =
new llvm::GlobalVariable(*to,
I.getValueType(),
I.isConstant(),
I.getLinkage(),
(llvm::Constant*)nullptr,
I.getName(),
(llvm::GlobalVariable*)nullptr,
I.getThreadLocalMode(),
I.getType()->getAddressSpace());
new_gv->copyAttributesFrom(&I);
}
}

std::shared_ptr<L0CompilationContext> L0Backend::generateNativeGPUCode(
const std::map<ExtModuleKinds, std::unique_ptr<llvm::Module>>& exts,
llvm::Function* func,
Expand Down
77 changes: 77 additions & 0 deletions omniscidb/QueryEngine/Compiler/HelperFunctions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@
#include <llvm/Transforms/Scalar/SROA.h>
#include <llvm/Transforms/Scalar/SimplifyCFG.h>
#include <llvm/Transforms/Utils.h>
#include <llvm/Transforms/Utils/BasicBlockUtils.h>
#include <llvm/Transforms/Utils/Cloning.h>
#include <llvm/Transforms/Utils/Mem2Reg.h>
#include "llvm/IR/PassManager.h"
#include "llvm/Passes/StandardInstrumentations.h"
Expand Down Expand Up @@ -249,4 +251,79 @@ void optimize_ir(llvm::Function* query_func,
DUMP_MODULE(llvm_module, ir_dump_dir + "IR_OPT");
}
}

void insert_declaration(llvm::Module* from, llvm::Module* to, const std::string& fname) {
auto fn = from->getFunction(fname);
CHECK(fn);

llvm::Function::Create(
fn->getFunctionType(), llvm::GlobalValue::ExternalLinkage, fn->getName(), *to);
}

void replace_function(llvm::Module* from, llvm::Module* to, const std::string& fname) {
auto target_fn = to->getFunction(fname);
auto from_fn = from->getFunction(fname);
CHECK(target_fn);
CHECK(from_fn);
CHECK(!from_fn->isDeclaration());

target_fn->deleteBody();

llvm::ValueToValueMapTy vmap;
llvm::Function::arg_iterator pos_fn_arg_it = target_fn->arg_begin();
for (llvm::Function::const_arg_iterator j = from_fn->arg_begin();
j != from_fn->arg_end();
++j) {
pos_fn_arg_it->setName(j->getName());
vmap[&*j] = &*pos_fn_arg_it++;
}
llvm::SmallVector<llvm::ReturnInst*, 8> returns;
#if LLVM_VERSION_MAJOR > 12
llvm::CloneFunctionInto(
target_fn, from_fn, vmap, llvm::CloneFunctionChangeType::DifferentModule, returns);
#else
llvm::CloneFunctionInto(target_fn, from_fn, vmap, true, returns);
#endif

for (auto& BB : *target_fn) {
for (llvm::BasicBlock::iterator bbi = BB.begin(); bbi != BB.end();) {
llvm::Instruction* inst = &*bbi++;
if (auto* call = llvm::dyn_cast<llvm::CallInst>(&*inst)) {
auto local_callee = to->getFunction(call->getCalledFunction()->getName());
CHECK(local_callee);
std::vector<llvm::Value*> args;
std::copy(call->arg_begin(), call->arg_end(), std::back_inserter(args));

auto new_call = llvm::CallInst::Create(local_callee, args, call->getName());

llvm::ReplaceInstWithInst(call, new_call);
inst = new_call;
}
for (unsigned op_idx = 0; op_idx < inst->getNumOperands(); ++op_idx) {
auto op = inst->getOperand(op_idx);
if (auto* global = llvm::dyn_cast<llvm::GlobalVariable>(op)) {
auto local_global = to->getGlobalVariable(global->getName(), true);
CHECK(local_global);
inst->setOperand(op_idx, local_global);
}
}
}
}
}

void insert_globals(llvm::Module* from, llvm::Module* to) {
for (const llvm::GlobalVariable& I : from->globals()) {
llvm::GlobalVariable* new_gv =
new llvm::GlobalVariable(*to,
I.getValueType(),
I.isConstant(),
I.getLinkage(),
(llvm::Constant*)nullptr,
I.getName(),
(llvm::GlobalVariable*)nullptr,
I.getThreadLocalMode(),
I.getType()->getAddressSpace());
new_gv->copyAttributesFrom(&I);
}
}
} // namespace compiler
12 changes: 12 additions & 0 deletions omniscidb/QueryEngine/Compiler/HelperFunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,4 +54,16 @@ void optimize_ir(llvm::Function* query_func,
const bool is_gpu_smem_used,
const CompilationOptions& co);

// Copies a function declaration between "from" and "to" modules found by name
void insert_declaration(llvm::Module* from, llvm::Module* to, const std::string& fname);

// Inserts all globals from "from" to "to" module
void insert_globals(llvm::Module* from, llvm::Module* to);

// Clones a function body in "from" module to the same function in "to" module. The "from"
// module must contain a definition, the "to" module must contain at least a declaration.
// All the calls and global variable accesses in the copied function are retargeted to
// point to those in the "to" module.
void replace_function(llvm::Module* from, llvm::Module* to, const std::string& fname);

} // namespace compiler
20 changes: 15 additions & 5 deletions omniscidb/Tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,10 @@ if(NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
add_executable(UdfTest UdfTest.cpp)
endif()
if(ENABLE_CUDA)
add_executable(GpuSharedMemoryTest GpuSharedMemoryTest.cpp ResultSetTestUtils.cpp)
add_executable(CudaSharedMemoryTest CudaSharedMemoryTest.cpp GpuSharedMemoryTestHelpers.cpp ResultSetTestUtils.cpp)
endif()
if(ENABLE_L0)
add_executable(L0SharedMemoryTest L0SharedMemoryTest.cpp GpuSharedMemoryTestHelpers.cpp ResultSetTestUtils.cpp)
endif()

# Tests + Microbenchmarks
Expand Down Expand Up @@ -140,7 +143,10 @@ else()
endif()

if(ENABLE_CUDA)
target_link_libraries(GpuSharedMemoryTest gtest Logger QueryEngine)
target_link_libraries(CudaSharedMemoryTest gtest Logger QueryEngine)
endif()
if(ENABLE_L0)
target_link_libraries(L0SharedMemoryTest gtest Logger QueryEngine)
endif()

set(TEST_ARGS "--gtest_output=xml:../")
Expand Down Expand Up @@ -182,7 +188,10 @@ add_test(NAME StringDictionaryHashTestCpuOnly COMMAND StringDictionaryTest ${TES
set_tests_properties(StringDictionaryHashTestCpuOnly PROPERTIES LABELS "cpu_only")

if(ENABLE_CUDA)
add_test(GpuSharedMemoryTest GpuSharedMemoryTest ${TEST_ARGS})
add_test(CudaSharedMemoryTest CudaSharedMemoryTest ${TEST_ARGS})
endif()
if(ENABLE_L0)
add_test(L0SharedMemoryTest L0SharedMemoryTest ${TEST_ARGS})
endif()
if(ENABLE_L0)
set(arrow_based_execute_skip_tests
Expand Down Expand Up @@ -247,7 +256,7 @@ set(TEST_PROGRAMS
)

if(ENABLE_CUDA)
list(APPEND TEST_PROGRAMS GpuSharedMemoryTest)
list(APPEND TEST_PROGRAMS CudaSharedMemoryTest)
endif()

#if(NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
Expand All @@ -261,7 +270,7 @@ function(add_default_cpu_only_tests TEST_LIST)
endforeach()
endfunction()
add_default_cpu_only_tests("${TEST_PROGRAMS}")
set(EXCLUDE_PLATFORM_SPECIFIC_TESTS CodeGeneratorTest)
set(EXCLUDE_PLATFORM_SPECIFIC_TESTS CodeGeneratorTest L0SharedMemoryTest)


set(SANITY_TESTS ${TEST_PROGRAMS})
Expand Down Expand Up @@ -340,6 +349,7 @@ if(ENABLE_L0)
ArrowStorageSqlTest # taxi queries
SpirvBuildTest
L0MgrExecuteTest
# L0SharedMemoryTest
IntelGPUEnablingTest
)
set_tests_properties(${ENABLING_TESTS} PROPERTIES LABELS "enabling")
Expand Down
Loading