Skip to content

Commit

Permalink
dRPA (with no TDA) on GPU: V0
Browse files Browse the repository at this point in the history
  • Loading branch information
AbdAmmar committed Nov 29, 2024
1 parent fd4dc5b commit 542cce2
Show file tree
Hide file tree
Showing 10 changed files with 339 additions and 58 deletions.
16 changes: 16 additions & 0 deletions src/cuda/include/my_linalg.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#ifndef MY_LINALG

#define MY_LINALG

extern void A_D_At(int n, double *A, double *D, double *R);
extern void A_Dinv_At(int n, double *A, double *D, double *R);

extern void A_D_inplace(int n, double *A, double *D);
extern void A_Dinv_inplace(int n, double *A, double *D);

extern void elementwise_dsqrt(int nS, double *A, double *A_Sq);
extern void elementwise_dsqrt_inplace(int nS, double *A);

extern void diag_dn_dsyevd(int n, int *info, double *W, double *A);

#endif
3 changes: 2 additions & 1 deletion src/cuda/include/ph_rpa.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
extern void ph_dRPA_A_sing(int nO, int nV, int nBas, int nS, double *eps, double *ERI, double *A);
extern void ph_dRPA_B_sing(int nO, int nV, int nBas, int nS, double *ERI, double *B);

extern void diag_dn_dsyevd(int n, int *info, double *W, double *A);
extern void ph_dRPA_ApB_sing(int nO, int nV, int nBas, int nS, double *eps, double *ERI, double *ApB);
extern void ph_dRPA_AmB_sing(int nO, int nV, int nBas, int nS, double *eps, double *ERI, double *AmB);

#endif
2 changes: 1 addition & 1 deletion src/cuda/src/a_d_at.cu
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ __global__ void A_D_At_kernel(int n, double *A, double *D, double *R) {
while(k < n) {

kn = k * n;
R[ij] += D[k] * U[i + kn] * U[j + kn];
R[ij] += D[k] * A[i + kn] * A[j + kn];

k ++;
} // k
Expand Down
57 changes: 57 additions & 0 deletions src/cuda/src/a_d_inplace.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#include <stdio.h>


__global__ void A_D_inplace_kernel(int n, double *A, double *D) {


int i, j;
int in, ji;

double tmp;

i = blockIdx.x * blockDim.x + threadIdx.x;
j = blockIdx.y * blockDim.y + threadIdx.y;

while(i < n) {

in = i * n;

tmp = D[i];

while(j < n) {

ji = in + j;

A[ji] = A[ji] * tmp;

j += blockDim.y * gridDim.y;
} // j

i += blockDim.x * gridDim.x;
} // i

}





extern "C" void A_D_inplace(int n, double *A, double *D) {


int sBlocks = 32;
int nBlocks = (n + sBlocks - 1) / sBlocks;

dim3 dimGrid(nBlocks, nBlocks, 1);
dim3 dimBlock(sBlocks, sBlocks, 1);

printf("lunching A_D_inplace_kernel with %dx%d blocks and %dx%d threads/block\n",
nBlocks, nBlocks, sBlocks, sBlocks);


A_D_inplace_kernel<<<dimGrid, dimBlock>>>(n, A, D);

}



2 changes: 1 addition & 1 deletion src/cuda/src/a_dinv_at.cu
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ __global__ void A_Dinv_At_kernel(int n, double *A, double *D, double *R) {
while(k < n) {

kn = k * n;
R[ij] += D[k] * U[i + kn] * U[j + kn] / (D[k] + 1e-12);
R[ij] += D[k] * A[i + kn] * A[j + kn] / (D[k] + 1e-12);

k ++;
} // k
Expand Down
57 changes: 57 additions & 0 deletions src/cuda/src/a_dinv_inplace.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#include <stdio.h>


__global__ void A_Dinv_inplace_kernel(int n, double *A, double *D) {


int i, j;
int in, ji;

double tmp;

i = blockIdx.x * blockDim.x + threadIdx.x;
j = blockIdx.y * blockDim.y + threadIdx.y;

while(i < n) {

in = i * n;

tmp = 1.0 / (1e-12 + D[i]);

while(j < n) {

ji = in + j;

A[ji] = A[ji] * tmp;

j += blockDim.y * gridDim.y;
} // j

i += blockDim.x * gridDim.x;
} // i

}





extern "C" void A_Dinv_inplace(int n, double *A, double *D) {


int sBlocks = 32;
int nBlocks = (n + sBlocks - 1) / sBlocks;

dim3 dimGrid(nBlocks, nBlocks, 1);
dim3 dimBlock(sBlocks, sBlocks, 1);

printf("lunching A_Dinv_inplace_kernel with %dx%d blocks and %dx%d threads/block\n",
nBlocks, nBlocks, sBlocks, sBlocks);


A_Dinv_inplace_kernel<<<dimGrid, dimBlock>>>(n, A, D);

}



51 changes: 51 additions & 0 deletions src/cuda/src/elementwise_dsqrt.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#include <stdio.h>
#include <math.h>


__global__ void elementwise_dsqrt_kernel(int nS, double *A, double *A_Sq) {


int i;

i = blockIdx.x * blockDim.x + threadIdx.x;

while(i < nS) {

if(A[i] > 0.0) {

A_Sq[i] = sqrt(A[i]);

} else {

A_Sq[i] = sqrt(-A[i]);

}

i += blockDim.x * gridDim.x;
} // i

}





extern "C" void elementwise_dsqrt(int nS, double *A, double *A_Sq) {

int sBlocks = 32;
int nBlocks = (nS + sBlocks - 1) / sBlocks;

dim3 dimGrid(nBlocks, 1, 1);
dim3 dimBlock(sBlocks, 1, 1);

printf("lunching elementwise_dsqrt_kernel with %d blocks and %d threads/block\n",
nBlocks, sBlocks);


elementwise_dsqrt_kernel<<<dimGrid, dimBlock>>>(nS, A, A_Sq);

}




7 changes: 3 additions & 4 deletions src/cuda/src/elementwise_dsqrt_inplace.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,12 @@
#include <math.h>


__global__ void elementwise_dsqrt_inplace_kernel(int nS, double *A, int *nb_neg_sqrt) {
__global__ void elementwise_dsqrt_inplace_kernel(int nS, double *A) {


int i;

i = blockIdx.x * blockDim.x + threadIdx.x;
nb_neg_sqrt = 0;

while(i < nS) {

Expand All @@ -31,7 +30,7 @@ __global__ void elementwise_dsqrt_inplace_kernel(int nS, double *A, int *nb_neg_



extern "C" void elementwise_dsqrt_inplace(int nS, double *A, int *nb_neg_sqrt) {
extern "C" void elementwise_dsqrt_inplace(int nS, double *A) {

int sBlocks = 32;
int nBlocks = (nS + sBlocks - 1) / sBlocks;
Expand All @@ -43,7 +42,7 @@ extern "C" void elementwise_dsqrt_inplace(int nS, double *A, int *nb_neg_sqrt) {
nBlocks, sBlocks);


elementwise_dsqrt_inplace_kernel<<<dimGrid, dimBlock>>>(nS, A, nb_neg_sqrt);
elementwise_dsqrt_inplace_kernel<<<dimGrid, dimBlock>>>(nS, A);

}

Expand Down
Loading

0 comments on commit 542cce2

Please sign in to comment.