Skip to content

Commit

Permalink
add vector_add_vector kernel
Browse files Browse the repository at this point in the history
  • Loading branch information
Critsium-xy committed Jan 14, 2025
1 parent 91e8dc2 commit 31d4dff
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 0 deletions.
72 changes: 72 additions & 0 deletions source/module_base/blas_connector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -691,4 +691,76 @@ void vector_div_vector(const int& dim, T* result, const T* vector1, const T* vec
hsolver::vector_div_vector_op<T, base_device::DEVICE_GPU>()(gpu_ctx, dim, result, vector1, vector2);
#endif
}
}

void vector_add_vector(const int& dim, float *result, const float *vector1, const float constant1, const float *vector2, const float constant2, base_device::AbacusDevice_t device_type)
{
if (device_type == base_device::CpuDevice){
#ifdef _OPENMP
#pragma omp parallel for schedule(static, 8192 / sizeof(float))
#endif
for (int i = 0; i < dim; i++)
{
result[i] = vector1[i] * constant1 + vector2[i] * constant2;
}
}
else if (device_type == base_device::GpuDevice){
#ifdef __CUDA
hsolver::constantvector_addORsub_constantVector_op<float, base_device::DEVICE_GPU>()(gpu_ctx, dim, result, vector1, constant1, vector2, constant2);
#endif
}
}

void vector_add_vector(const int& dim, double *result, const double *vector1, const double constant1, const double *vector2, const double constant2, base_device::AbacusDevice_t device_type)
{
if (device_type == base_device::CpuDevice){
#ifdef _OPENMP
#pragma omp parallel for schedule(static, 8192 / sizeof(double))
#endif
for (int i = 0; i < dim; i++)
{
result[i] = vector1[i] * constant1 + vector2[i] * constant2;
}
}
else if (device_type == base_device::GpuDevice){
#ifdef __CUDA
hsolver::constantvector_addORsub_constantVector_op<double, base_device::DEVICE_GPU>()(gpu_ctx, dim, result, vector1, constant1, vector2, constant2);
#endif
}
}

void vector_add_vector(const int& dim, std::complex<float> *result, const std::complex<float> *vector1, const float constant1, const std::complex<float> *vector2, const float constant2, base_device::AbacusDevice_t device_type)
{
if (device_type == base_device::CpuDevice){
#ifdef _OPENMP
#pragma omp parallel for schedule(static, 8192 / sizeof(std::complex<float>))
#endif
for (int i = 0; i < dim; i++)
{
result[i] = vector1[i] * constant1 + vector2[i] * constant2;
}
}
else if (device_type == base_device::GpuDevice){
#ifdef __CUDA
hsolver::constantvector_addORsub_constantVector_op<std::complex<float>, base_device::DEVICE_GPU>()(gpu_ctx, dim, result, vector1, constant1, vector2, constant2);
#endif
}
}

void vector_add_vector(const int& dim, std::complex<double> *result, const std::complex<double> *vector1, const double constant1, const std::complex<double> *vector2, const double constant2, base_device::AbacusDevice_t device_type)
{
if (device_type == base_device::CpuDevice){
#ifdef _OPENMP
#pragma omp parallel for schedule(static, 8192 / sizeof(std::complex<double>))
#endif
for (int i = 0; i < dim; i++)
{
result[i] = vector1[i] * constant1 + vector2[i] * constant2;
}
}
else if (device_type == base_device::GpuDevice){
#ifdef __CUDA
hsolver::constantvector_addORsub_constantVector_op<std::complex<double>, base_device::DEVICE_GPU>()(gpu_ctx, dim, result, vector1, constant1, vector2, constant2);
#endif
}
}
13 changes: 13 additions & 0 deletions source/module_base/blas_connector.h
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,19 @@ class BlasConnector
template <typename T>
static
void vector_div_vector(const int& dim, T* result, const T* vector1, const T* vector2, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice);

// y = alpha * x + beta * y
static
void vector_add_vector(const int& dim, float *result, const float *vector1, const float constant1, const float *vector2, const float constant2, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice);

static
void vector_add_vector(const int& dim, double *result, const double *vector1, const double constant1, const double *vector2, const double constant2, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice);

static
void vector_add_vector(const int& dim, std::complex<float> *result, const std::complex<float> *vector1, const float constant1, const std::complex<float> *vector2, const float constant2, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice);

static
void vector_add_vector(const int& dim, std::complex<double> *result, const std::complex<double> *vector1, const double constant1, const std::complex<double> *vector2, const double constant2, base_device::AbacusDevice_t device_type = base_device::AbacusDevice_t::CpuDevice);
};

// If GATHER_INFO is defined, the original function is replaced with a "i" suffix,
Expand Down
1 change: 1 addition & 0 deletions source/module_hsolver/kernels/cuda/math_kernel_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1043,6 +1043,7 @@ template struct line_minimize_with_block_op<std::complex<float>, base_device::DE
template struct vector_div_constant_op<std::complex<float>, base_device::DEVICE_GPU>;
template struct vector_mul_vector_op<std::complex<float>, base_device::DEVICE_GPU>;
template struct vector_div_vector_op<std::complex<float>, base_device::DEVICE_GPU>;
template struct constantvector_addORsub_constantVector_op<float, base_device::DEVICE_GPU>;
template struct constantvector_addORsub_constantVector_op<std::complex<float>, base_device::DEVICE_GPU>;
template struct matrixSetToAnother<std::complex<float>, base_device::DEVICE_GPU>;

Expand Down

0 comments on commit 31d4dff

Please sign in to comment.