Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
!-----------------------------------------------------------------------
!TEST: fa solo grouping locale
!
SUBROUTINE acc_comp(option)
!
!
!-----------------------------------------------------------------------
!
!
! Subroutine to compute the gravitational acceleration for all of
! the bodies. Vectorization is achieved by processing all of the
! cells at a given level in the tree simultaneously. The local
! variable option indicates whether the code is to compute the
! potential and/or acceleration.
!
! local_wg_bod is the number of clock cycle needed for a PE resident
! body having nterms=1
!=======================================================================
USE fly_h
implicit none
INCLUDE 'mpif.h'
! Declaration of local variables.
! -------------------------------
INTEGER :: TID, NTID,N_LOC_ELE,status
INTEGER :: n, q
INTEGER(KIND=4) :: ele, nterms, nterms_gr, bcount_ele, j, p, uno
INTEGER(KIND=4) :: mio_ele
INTEGER(KIND=4), DIMENSION (:), ALLOCATABLE :: iterms,iterms_gr
REAL(KIND=8), DIMENSION (:), ALLOCATABLE :: pmass,pmass_gr
REAL(KIND=8), DIMENSION (:), ALLOCATABLE :: drdotdr,dx,dy,dz
REAL(KIND=8), DIMENSION (:), ALLOCATABLE:: drdotdr_gr,dx_gr,dy_gr,dz_gr
REAL(KIND=8), DIMENSION (:,:), ALLOCATABLE ::pquad,pquad_gr
REAL(KIND=8), DIMENSION(:), ALLOCATABLE :: acc_g
REAL(KIND=8), DIMENSION (:), ALLOCATABLE::pos_comm
INTEGER :: count_par
REAL(KIND=8) :: c1a, c2a, c2b, c2c, ctwg, cgs_g, cgs_b, cg1,cg2
REAL(KIND=8) ::cpar_a, cpar_b
CHARACTER(LEN=4) :: option
!=======================================================================
!=======================================================================
! Initialize the interaction list diagnostics.
! --------------------------------------------
!***************************************************
!
! m_sh, max_sh, max_pr_bal
! are computed by load_balance once before the iterations
!
!**************************************************
uno=1
group_access=0
nterms=0
nterms_gr=0
bcount_ele=0
mark_bod_gr=0
ctot_TW=0
ctot_GS_nt=0
mio_ele = 0
!=======================================================================
! GROUPING SECTION
!=======================================================================
! We find the force for the bodies of a cell of the grouping as the
! sum of two components. The first component is equal for all the
! bodies of the cell-grouping and it is due at the cells and at the
! bodies outside at the cell-grouping. The second component is
! different by body at body of the cell-grouping and it is due at
! the interactions between the bodies of the cell-grouping.
!=======================================================================
ctwg=0
cgs_g=0
cgs_b=0
!-------------------------------------------------------
! count_group_arr(#PE)=cg_loc of the #PE
! ilocal_work_gr the number of local grouping cell resolved by the PE
! ilocal_save maximum number of local grouping cell resolved by each PE
! grouping_rmt receive the grouping cell non locally resolved by the remote PE
! iblk: used for atomic update for non locally resolved grouping cell
! no_dlb_space if set to 1 there is no free space to execute dlb section of grouping
!-------------------------------------------------------
IF(me .EQ. 0) write(uterm,*)'Grouping section started'
cg1=MPI_WTIME()
!-----------------------------------------------------------------------
! Analysis of grouping cell in the PE=ix_gr
! ix_gr start from the local PE and cycle for all the PEs
! load grouping_rmt with grouping cells of a remote PE
!-----------------------------------------------------------------------
count_par=0
cpar_a=MPI_WTIME()
NTID = 1
!$ NTID = OMP_GET_MAX_THREADS();
!$OMP PARALLEL PRIVATE(mio_ele, ele,count_par,acc_g,nterms) &
!$OMP PRIVATE(nterms_gr, bcount_ele,j,q,NTID,TID) &
!$OMP PRIVATE(p,N_LOC_ELE,iterms,iterms_gr,pmass,pmass_gr) &
!$OMP PRIVATE(drdotdr,dx,dy,dz) &
!$OMP PRIVATE(drdotdr_gr,dx_gr,dy_gr,dz_gr,pquad,pquad_gr,pos_comm)
ALLOCATE(iterms(maxnterm), STAT=status)
ALLOCATE(iterms_gr(maxnterm), STAT=status)
ALLOCATE(pmass(maxnterm), STAT=status)
ALLOCATE(pmass_gr(maxnterm), STAT=status)
ALLOCATE(drdotdr(maxnterm), STAT=status)
ALLOCATE(dx(maxnterm), STAT=status)
ALLOCATE(dy(maxnterm), STAT=status)
ALLOCATE(dz(maxnterm), STAT=status)
ALLOCATE(drdotdr_gr(maxnterm), STAT=status)
ALLOCATE(dx_gr(maxnterm), STAT=status)
ALLOCATE(dy_gr(maxnterm), STAT=status)
ALLOCATE(dz_gr(maxnterm), STAT=status)
ALLOCATE(pquad(2*ndim-1,maxnterm), STAT=status)
ALLOCATE(pquad_gr(2*ndim-1,maxnterm), STAT=status)
ALLOCATE(acc_g(ndim), STAT=status)
ALLOCATE(pos_comm(ndim), STAT=status)
TID = 0
NTID = 1
!$ TID = OMP_GET_THREAD_NUM();
!$ NTID = OMP_GET_MAX_THREADS();
N_LOC_ELE=cg_loc/NTID
IF(MOD(cg_loc,NTID) .GT. TID) N_LOC_ELE=N_LOC_ELE+1
mio_ele=N_LOC_ELE*TID ! staring element of this thread
IF(TID.GT.0) THEN
IF(MOD(cg_loc,NTID) .LT. TID) THEN
mio_ele=mio_ele+MOD(cg_loc,NTID)
ENDIF
ENDIF
50 CONTINUE ! This is a cycle on the grouping cell on the PE ix_gr
!-------------------------------------------------------------------------
! iblk2 is an array atomically updated that cointain the number of cells already
! computed on the PE.
! mio_ele contains the number of elemnt to be processed: locally from 1 to
! ilocal_work_gr, and remotely (or locally for shared gr-cells), as computed
! from iblk2 array that is atomically updated
!-------------------------------------------------------------------------
mio_ele=mio_ele+1
IF(mio_ele.GT.N_LOC_ELE) THEN! there are no more grouping cell in this thread/ processor
GOTO 60
ENDIF
!-------------------------------------------------------------------------
! ele contain the number of gr-cell to be elaborated: local or remote
! cell stored in grouping_rmt array
!-------------------------------------------------------------------------
ele=grouping(mio_ele)
if(me.eq.1) write(uterm,*)"TP00 PE-TID=",me,TID," mio_ele=",mio_ele," ele=",ele, " N_LOC_ELE=",N_LOC_ELE
count_par=count_par+1
acc_g=0
! Forming the interaction lists.
! ----------------------------
CALL ilist_group (ele,nterms,nterms_gr,bcount_ele,iterms,iterms_gr, pmass, pmass_gr, pquad,pquad_gr,drdotdr_gr,dx_gr,dy_gr,dz_gr)
!
! Compute potential and/or acceleration.
! --------------------------------------
!
!
! Compute of the pot. and F_far of a grouping cell
! --------------------------------------------------
!
CALL force_group(ele,nterms_gr,iterms_gr,drdotdr_gr,dx_gr,dy_gr,dz_gr,pmass_gr,acc_g,pquad_gr,option,TID)
!-----------------------------------------------------------------------
! Computation of the pot. F_near and F_tot of each body in tghe grouping cell
! Mark to 1 (local or remomtely wuth a PUT operation) the flag (mark_bod_gr)
! of each body that is computed in this section
!-----------------------------------------------------------------------
! if(mio_ele.eq.1.and.me.eq.0) write(uterm,*)"PE=",me,"ele=",ele," acc_g=",acc_g, " nterms:",nterms," iterms ",iterms(1:nterms), " nterms_gr:",nterms_gr," iterms_gr ",iterms_gr(1:nterms_gr)
DO q=nterms-bcount_ele+1,nterms
j=iterms(q)
mark_bod_gr(j)=uno
CALL force(j,nterms,iterms,pos_comm,dx,dy,dz,drdotdr,pmass,pquad,acc_g,option)
if(j.eq.459.and.me.eq.1) write(uterm,*)"TP0 PE=",me," ele=",ele," j=",j, " nterms:",nterms," iterms ",iterms(1:nterms), " nterms_gr:",nterms_gr," iterms_gr ",iterms_gr(1:nterms_gr)," acc_g=",acc_g(1),acc_g(2),acc_g(3)," acc(1:ndim,p)=", acc(1:ndim,j)," mark_bod_gr(j)=",mark_bod_gr(j)
! numbod=numbod+1
ENDDO !q=nterms-bcount_ele+1,nterms
GOTO 50 ! Cycle on the grouping cell on the PE ix_gr
60 CONTINUE
1100 FORMAT(a,i3,3(a,i9))
1200 FORMAT(a,f5.2,a,f5.2)
!$OMP BARRIER
if(me.eq.1) write(uterm,*)"TP0.1 PE=",me," acc(1:3,459)=",acc(1:ndim,459)," mark_bod_gr(j)=",mark_bod_gr(j)
IF(me.eq.0 .and. TID.eq.0) THEN
cg2=MPI_WTIME()
ctwg=ctwg+(cg2-cg1)
write(uterm,1000)'GROUPING: PE=',me,' TIME=',ctwg,' Tot gr-cells=',cg_loc
call flush(uterm)
ENDIF
1000 FORMAT(a,i3,1(a,g15.4))
!-----------------------------------------------------------------------
! LOCAL FORCE COMPUTATION
!-----------------------------------------------------------------------
! In this section each PE compute the force for a subset of the local
! bodies, that were not computed in the grouping part
!-----------------------------------------------------------------------
group_access=1 ! ungrouped flag
IF(TID.eq.0) c2a=MPI_WTIME()
count_par=0
!$OMP DO
DO 100 p=1,nb_res_loc(me+1)
!-----------------------------------------------------------------------
! Forming the interaction lists.
! p is the logical number of body
!-----------------------------------------------------------------------
IF(mark_bod_gr(p).ge.1) CYCLE ! skip this particle. It was already computed in the grouping section
count_par=count_par+1
! numbod_100=numbod_100+1
CALL ilist(p,nterms,iterms,pos_comm,pmass, drdotdr,dx,dy,dz,pquad)
!-----------------------------------------------------------------------
! Compute potential and the Force.
!-----------------------------------------------------------------------
CALL force(p,nterms,iterms,pos_comm,dx,dy,dz,drdotdr,pmass,pquad,acc_g,option)
100 CONTINUE
!$OMP END DO
DEALLOCATE(drdotdr)
DEALLOCATE(dx)
DEALLOCATE(dy)
DEALLOCATE(dz)
DEALLOCATE(drdotdr_gr)
DEALLOCATE(dx_gr)
DEALLOCATE(dy_gr)
DEALLOCATE(dz_gr)
DEALLOCATE(iterms)
DEALLOCATE(iterms_gr)
DEALLOCATE(pmass)
DEALLOCATE(pmass_gr)
DEALLOCATE(pquad)
DEALLOCATE(pquad_gr)
DEALLOCATE(acc_g)
DEALLOCATE(pos_comm)
!$OMP END PARALLEL
c2b=MPI_WTIME()
ctota=ctot_TW+(c2b-c2a)
CALL MPI_BARRIER(MPI_COMM_WORLD,ierror)
if(me.eq.1) write(uterm,*)"TP1 PE=",me," j=",459," acc=",acc(1:ndim,459)
RETURN
END