-
Notifications
You must be signed in to change notification settings - Fork 19
/
Copy pathSolveImpEqnUpdate_YZ.F90
101 lines (85 loc) · 2.88 KB
/
SolveImpEqnUpdate_YZ.F90
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! !
! FILE: SolveImpEqnUpdate_YZ.F90 !
! CONTAINS: subroutine SolveImpEqnUpdate_YZ !
! !
! PURPOSE: Inverts the implicit equation for velocity !
! in any of the horizontal directions, and updates !
! it to time t+dt !
! !
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
#ifdef USE_GPU
#define MY_ROUTINE(x) x##_GPU
#else
#define MY_ROUTINE(x) x##_CPU
#endif
subroutine MY_ROUTINE(SolveImpEqnUpdate_YZ)(q,rhs,am3sk,ac3sk,ap3sk)
#ifdef USE_GPU
use param, only: fp_kind, al, beta, nxm, nx, lvlhalo, amkl=>amkl_d, apkl=>apkl_d, ackl=>ackl_d, fkl=>fkl_d
use decomp_2d, only: xstart, xend, istart=>xstart_gpu, iend=>xend_gpu
use ep_solve
#else
use param, only: fp_kind, al, beta, nxm, nx, lvlhalo, amkl, apkl, ackl, fkl
use decomp_2d, only: xstart, xend, istart=>xstart_cpu, iend=>xend_cpu
#endif
use nvtx
implicit none
real(fp_kind), dimension(1:nx,xstart(2)-lvlhalo:xend(2)+lvlhalo,xstart(3)-lvlhalo:xend(3)+lvlhalo) :: q
real(fp_kind), dimension(1:nx,xstart(2):xend(2),xstart(3):xend(3)) :: rhs
real(fp_kind), dimension(1:nx), intent(IN) :: am3sk,ac3sk,ap3sk
#ifdef USE_GPU
attributes(device) :: am3sk,ac3sk,ap3sk,q,rhs
#endif
integer :: jc,kc,ic,nrhs,info,ipkv(nxm)
real(fp_kind) :: betadx,ackl_b
#ifndef USE_GPU
real(fp_kind) :: amkT(nxm-1),ackT(nxm),apkT(nxm-1),appk(nx-3)
#endif
betadx=beta*al
nrhs=(iend(3)-istart(3)+1)*(iend(2)-istart(2)+1)
#ifdef USE_GPU
!$cuf kernel do(1) <<<*,*>>>
#endif
do kc=1,nxm
ackl_b=real(1.0,fp_kind)/(real(1.0,fp_kind)-ac3sk(kc)*betadx)
amkl(kc)=-am3sk(kc)*betadx*ackl_b
ackl(kc)=real(1.0,fp_kind)
apkl(kc)=-ap3sk(kc)*betadx*ackl_b
enddo
#ifdef USE_GPU
call tepDgtsv_nopivot( nxm, nrhs, amkl, ackl, apkl, rhs, nx)
! Really slow way until we get a proper solver
!do ic=xstart(3),xend(3)
! do jc=xstart(2),xend(2)
! istat = cusparseDgtsv_nopivot(cusparseH, nxm, 1, amkl, ackl, apkl, rhs(1,jc,ic), nx)
! end do
!end do
#else
call nvtxStartRangeAsync("Solve CPU")
amkT=amkl(2:nxm)
apkT=apkl(1:(nxm-1))
ackT=ackl(1:nxm)
call dgttrf(nxm,amkT,ackT,apkT,appk,ipkv,info)
call dgttrs('N',nxm,nrhs,amkT,ackT,apkT,appk,ipkv,rhs(1, istart(2), istart(3)),nx,info)
call nvtxEndRangeAsync
#endif
#ifdef USE_GPU
!$cuf kernel do(3) <<<*,*>>>
#else
!$OMP PARALLEL DO &
!$OMP DEFAULT(none) &
!$OMP SHARED(rhs, q, istart, iend, nxm) &
!$OMP PRIVATE(ic, jc, kc)
#endif
do ic=istart(3),iend(3)
do jc=istart(2),iend(2)
do kc=1,nxm
q(kc,jc,ic)=q(kc,jc,ic) + rhs(kc,jc,ic)
end do
end do
end do
#ifndef USE_GPU
!$OMP END PARALLEL DO
#endif
return
end subroutine