Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
!-----------------------------------------------------------------------
!TEST: fa solo grouping locale
!
SUBROUTINE acc_ex(option)
!
!
!-----------------------------------------------------------------------
!
!
! Subroutine to compute the gravitational acceleration for all of
! the bodies. Vectorization is achieved by processing all of the
! cells at a given level in the tree simultaneously. The local
! variable option indicates whether the code is to compute the
! potential and/or acceleration.
!
! local_wg_bod is the number of clock cycle needed for a PE resident
! body having nterms=1
!=======================================================================
USE fly_h
implicit none
INCLUDE 'mpif.h'
! Declaration of local variables.
! -------------------------------
INTEGER :: n, m, q, ix_gr,ix_rec,istatus(MPI_STATUS_SIZE)
INTEGER :: tag1, tag2,tag3, req1, req2,req3,i
INTEGER(KIND=4) :: nterms, j, p
REAL(KIND=8) ::c0a, c1a, c2a, c2b, c2c
REAL(KIND=8) ::cpar_a, cpar_b,cpar_c,cpar_d
CHARACTER(LEN=4) :: option
INTEGER(KIND=4), DIMENSION (:), ALLOCATABLE :: iterms,iterms_gr
REAL(KIND=8), DIMENSION (:), ALLOCATABLE :: pmass,pmass_gr
REAL(KIND=8), DIMENSION (:), ALLOCATABLE :: drdotdr,dx,dy,dz
REAL(KIND=8), DIMENSION (:), ALLOCATABLE:: drdotdr_gr,dx_gr,dy_gr,dz_gr
REAL(KIND=8), DIMENSION (:,:), ALLOCATABLE ::pquad,pquad_gr
REAL(KIND=8), DIMENSION(:), ALLOCATABLE :: acc_g
REAL(KIND=8), DIMENSION (:), ALLOCATABLE::pos_comm
!=======================================================================
!=======================================================================
! Initialize the interaction list diagnostics.
! --------------------------------------------
c0a=MPI_WTIME()
iterms=0
numbod=0
group_access=1
ix_gr=0
nterms=0
ctot_TW_rmt=0
ctot_GS_nt_rmt=0
rmt_acc=.TRUE.
numbod_300=0
tag1=1
tag2=2
tag3=3
!===========================
! data bodies communication
!===========================
cpar_d=MPI_WTIME()
DO i=1,NPES-1
ix_gr=me+i
ix_rec=me-i
IF(ix_gr.GE.NPES) ix_gr=ix_gr-NPES
IF(ix_rec.LT.0) ix_rec=ix_rec+NPES
NLONG=nb_res_loc(me+1)*3
CALL MPI_ISEND(pos(1,1), NLONG,MPI_REAL8, ix_gr, tag1, MPI_COMM_WORLD, req1, ierror)
CALL MPI_ISEND(acc(1,1), NLONG,MPI_REAL8, ix_gr, tag2, MPI_COMM_WORLD, req2, ierror)
NLONG=nb_res_loc(ix_rec+1)*3
CALL MPI_RECV(pos_rmt(1,1),NLONG,MPI_REAL8,ix_rec,tag1,MPI_COMM_WORLD, istatus,ierror)
CALL MPI_RECV(acc_rmt(1,1),NLONG,MPI_REAL8,ix_rec,tag2,MPI_COMM_WORLD, istatus,ierror)
CALL MPI_WAIT(req1, istatus, ierror)
CALL MPI_WAIT(req2, istatus, ierror)
CALL MPI_BARRIER(MPI_COMM_WORLD, ierror)
group_access=1 ! ungrouped flag
cpar_a=MPI_WTIME()
!!! QUI VERIFICA SE FUNZIONA la &!!
!$OMP PARALLEL PRIVATE(p, acc_g,nterms) &
!$OMP PRIVATE(N_LOC_ELE,iterms,pmass,) &
!$OMP PRIVATE(drdotdr,dx,dy,dz) &
!$OMP PRIVATE(pquad,acc_g,pos_comm)
ALLOCATE(iterms(maxnterm), STAT=status)
ALLOCATE(pmass(maxnterm), STAT=status)
ALLOCATE(drdotdr(maxnterm), STAT=status)
ALLOCATE(dx(maxnterm), STAT=status)
ALLOCATE(dy(maxnterm), STAT=status)
ALLOCATE(dz(maxnterm), STAT=status)
ALLOCATE(pquad(2*ndim-1,maxnterm), STAT=status)
ALLOCATE(acc_g(ndim), STAT=status)
ALLOCATE(pos_comm(ndim), STAT=status)
!$OMP PARALLEL DO
DO p=1,nb_res_loc(ix_rec+1)
!-----------------------------------------------------------------------
! Forming the interaction lists.
! p is the logical number of body
!-----------------------------------------------------------------------
110 CONTINUE
CALL ilist(p,nterms,iterms,pos_comm,pmass, drdotdr,dx,dy,dz,pquad)
!-----------------------------------------------------------------------
! Compute potential and the Force.
!-----------------------------------------------------------------------
CALL force(p,nterms,iterms,pos_com,dx,dy,dz,drdotdr,pmass,pquad,acc_g,option)
ENDDO !p=1,nb_res_loc(ix_rec+1)
DEALLOCATE(iterms)
DEALLOCATE(pmass)
DEALLOCATE(drdotdr)
DEALLOCATE(dx)
DEALLOCATE(dy)
DEALLOCATE(dz)
DEALLOCATE(pquad)
DEALLOCATE(acc_g)
DEALLOCATE(pos_comm)
!$OMP END PARALLEL
cpar_c=MPI_WTIME()-cpar_a
rmt_time(ix_rec+1,me+1)=cpar_c
write(uterm,1000) 'Remote_local_computation: PE',me,' FROM PE ',ix_rec,' acc_ex sec =',cpar_c
call flush(uterm)
1000 format(x,a,i3,2x,a,i3,2x,a,g18.8)
CALL MPI_BARRIER(MPI_COMM_WORLD, ierror)
NLONG=nb_res_loc(ix_rec+1)*3
CALL MPI_ISEND(acc_rmt(1,1), NLONG,MPI_REAL8, ix_rec, tag3, MPI_COMM_WORLD, req3, ierror)
NLONG=nb_res_loc(me+1)*3
CALL MPI_RECV(acc(1,1),NLONG,MPI_REAL8,ix_gr,tag3,MPI_COMM_WORLD, istatus, ierror)
CALL MPI_WAIT(req3, istatus, ierror)
numbod_300=numbod_300+nb_res_loc(ix_rec+1)
CALL MPI_BARRIER(MPI_COMM_WORLD, ierror)
ENDDO !i=1,NPES-1
c1a=MPI_WTIME()
ctotc=(c1a-c0a)
cpar_b=MPI_WTIME()-cpar_d
write(uterm,*)"PE=",me," TOTAL analyzed remote body in sec ",cpar_b
call flush(uterm)
rmt_acc=.FALSE.
RETURN
END