Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
Add BLAS flavour and CPU SSE and AVX flags
Browse files Browse the repository at this point in the history
  • Loading branch information
larroy committed Jan 8, 2019
1 parent 13edf65 commit 07d19a5
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 35 deletions.
10 changes: 10 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,16 @@ ifeq ($(USE_CUDNN), 1)
LDFLAGS += -lcudnn
endif

ifeq ($(use_blas), open)
CFLAGS += -DMXNET_USE_BLAS_OPEN=1
else ifeq ($(use_blas), atlas)
CFLAGS += -DMXNET_USE_BLAS_ATLAS=1
else ifeq ($(use_blas), mkl)
CFLAGS += -DMXNET_USE_BLAS_MKL=1
else ifeq ($(use_blas), apple)
CFLAGS += -DMXNET_USE_BLAS_APPLE=1
endif

# whether to use F16C instruction set extension for fast fp16 compute on CPU
# if cross compiling you may want to explicitly turn it off if target system does not support it
ifndef USE_F16C
Expand Down
4 changes: 4 additions & 0 deletions cmake/ChooseBlas.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -37,22 +37,26 @@ if(BLAS STREQUAL "Atlas" OR BLAS STREQUAL "atlas")
list(APPEND mshadow_LINKER_LIBS ${Atlas_LIBRARIES})
add_definitions(-DMSHADOW_USE_CBLAS=1)
add_definitions(-DMSHADOW_USE_MKL=0)
add_definitions(-DMXNET_USE_BLAS_ATLAS=1)
elseif(BLAS STREQUAL "Open" OR BLAS STREQUAL "open")
find_package(OpenBLAS REQUIRED)
include_directories(SYSTEM ${OpenBLAS_INCLUDE_DIR})
list(APPEND mshadow_LINKER_LIBS ${OpenBLAS_LIB})
add_definitions(-DMSHADOW_USE_CBLAS=1)
add_definitions(-DMSHADOW_USE_MKL=0)
add_definitions(-DMXNET_USE_BLAS_OPEN=1)
elseif(BLAS STREQUAL "MKL" OR BLAS STREQUAL "mkl")
find_package(MKL REQUIRED)
include_directories(SYSTEM ${MKL_INCLUDE_DIR})
list(APPEND mshadow_LINKER_LIBS ${MKL_LIBRARIES})
add_definitions(-DMSHADOW_USE_CBLAS=0)
add_definitions(-DMSHADOW_USE_MKL=1)
add_definitions(-DMXNET_USE_BLAS_MKL=1)
elseif(BLAS STREQUAL "apple")
find_package(Accelerate REQUIRED)
include_directories(SYSTEM ${Accelerate_INCLUDE_DIR})
list(APPEND mshadow_LINKER_LIBS ${Accelerate_LIBRARIES})
add_definitions(-DMSHADOW_USE_MKL=0)
add_definitions(-DMSHADOW_USE_CBLAS=1)
add_definitions(-DMXNET_USE_BLAS_APPLE=1)
endif()
26 changes: 23 additions & 3 deletions include/mxnet/mxfeatures.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@
#define MXNET_USE_CUDNN MSHADOW_USE_CUDNN
#endif

#ifndef MXNET_USE_NCCL
#define MXNET_USE_NCCL 0
#endif

/*!
*\brief whether to use cusolver library
*/
Expand All @@ -68,6 +72,23 @@
#define MXNET_USE_TENSORRT 0
#endif


#ifndef MXNET_USE_BLAS_ATLAS
#define MXNET_USE_BLAS_ATLAS 0
#endif

#ifndef MXNET_USE_BLAS_OPEN
#define MXNET_USE_BLAS_OPEN 0
#endif

#ifndef MXNET_USE_BLAS_MKL
#define MXNET_USE_BLAS_MKL 0
#endif

#ifndef MXNET_USE_BLAS_APPLE
#define MXNET_USE_BLAS_APPLE 0
#endif

#ifndef MXNET_USE_MKLDNN
#define MXNET_USE_MKLDNN 0
#endif
Expand All @@ -92,9 +113,7 @@
#define MXNET_USE_SIGNAL_HANDLER 0
#endif

#ifndef MXNET_USE_NCCL
#define MXNET_USE_NCCL 0
#endif




Expand Down Expand Up @@ -134,6 +153,7 @@ enum : uint32_t {
BLAS_ATLAS,
// Intel(R) Math Kernel Library
BLAS_MKL,
BLAS_APPLE,
// Other math libraries:
// Linear Algebra PACKage
LAPACK,
Expand Down
87 changes: 55 additions & 32 deletions src/mxfeatures.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/*
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
Expand Down Expand Up @@ -37,44 +37,67 @@ class Storage {
Storage():
feature_bits()
{
if (MXNET_USE_CUDA)
feature_bits.set(CUDA);
if (MXNET_USE_CUDNN)
feature_bits.set(CUDNN);
if (MXNET_USE_NCCL)
feature_bits.set(NCCL);
if (MXNET_USE_OPENCV)
feature_bits.set(OPENCV);
if (MXNET_ENABLE_CUDA_RTC)
feature_bits.set(CUDA_RTC);
if (MXNET_USE_TENSORRT)
feature_bits.set(TENSORRT);
if (MXNET_USE_OPENMP)
feature_bits.set(OPENMP);
if (MXNET_USE_F16C)
feature_bits.set(F16C);
if (MXNET_USE_LAPACK)
feature_bits.set(LAPACK);
if (MXNET_USE_MKLDNN)
feature_bits.set(MKLDNN);
if (MXNET_USE_OPENCV)
feature_bits.set(OPENCV);
if (MXNET_USE_CAFFE)
feature_bits.set(CAFFE);
if (MXNET_USE_DIST_KVSTORE)
feature_bits.set(DIST_KVSTORE);
if (MXNET_USE_SIGNAL_HANDLER)
feature_bits.set(SIGNAL_HANDLER);
// GPU
feature_bits.set(CUDA, MXNET_USE_CUDA);
feature_bits.set(CUDNN, MXNET_USE_CUDNN);
feature_bits.set(NCCL, MXNET_USE_NCCL);
feature_bits.set(CUDA_RTC, MXNET_ENABLE_CUDA_RTC);
feature_bits.set(TENSORRT, MXNET_USE_TENSORRT);

// Check flags for example with gcc -msse3 -mavx2 -dM -E - < /dev/null | egrep "SSE|AVX"
#if __SSE__
feature_bits.set(CPU_SSE);
#endif
#if __SSE2__
feature_bits.set(CPU_SSE2);
#endif
#if __SSE3__
feature_bits.set(CPU_SSE3);
#endif
#if __SSE4_1__
feature_bits.set(CPU_SSE4_1);
#endif
#if __SSE4_2__
feature_bits.set(CPU_SSE4_2);
#endif
#if __SSE4A__
feature_bits.set(CPU_SSE4A);
#endif
#if __AVX__
feature_bits.set(CPU_AVX);
#endif
#if __AVX2__
feature_bits.set(CPU_AVX2);
#endif

// CPU
feature_bits.set(OPENMP, MXNET_USE_OPENMP);
feature_bits.set(F16C, MXNET_USE_F16C);

// Math
feature_bits.set(BLAS_OPEN, MXNET_USE_BLAS_OPEN);
feature_bits.set(BLAS_ATLAS, MXNET_USE_BLAS_ATLAS);
feature_bits.set(BLAS_MKL, MXNET_USE_BLAS_MKL);
feature_bits.set(BLAS_APPLE, MXNET_USE_BLAS_APPLE);
feature_bits.set(LAPACK, MXNET_USE_LAPACK);
feature_bits.set(MKLDNN, MXNET_USE_MKLDNN);

// Image
feature_bits.set(OPENCV, MXNET_USE_OPENCV);

// Misc
feature_bits.set(CAFFE, MXNET_USE_CAFFE);
feature_bits.set(DIST_KVSTORE, MXNET_USE_DIST_KVSTORE);
feature_bits.set(SIGNAL_HANDLER, MXNET_USE_SIGNAL_HANDLER);
#ifndef NDEBUG
feature_bits.set(DEBUG);
#endif


#if USE_JEMALLOC == 1
feature_bits.set(JEMALLOC);
#endif
}
bool is_enabled(unsigned feat) {
bool is_enabled(const unsigned feat) const {
CHECK_LT(feat, MAX_FEATURES);
return feature_bits.test(feat);
}
Expand All @@ -84,7 +107,7 @@ class Storage {

static Storage storage;

bool is_enabled(unsigned feat) {
bool is_enabled(const unsigned feat) {
return storage.is_enabled(feat);
}

Expand Down

0 comments on commit 07d19a5

Please sign in to comment.