!----------------------------------------------------------------------- !TEST: fa solo grouping locale ! SUBROUTINE acc_comp(option) ! ! !----------------------------------------------------------------------- ! ! ! Subroutine to compute the gravitational acceleration for all of ! the bodies. Vectorization is achieved by processing all of the ! cells at a given level in the tree simultaneously. The local ! variable option indicates whether the code is to compute the ! potential and/or acceleration. ! ! local_wg_bod is the number of clock cycle needed for a PE resident ! body having nterms=1 !======================================================================= USE fly_h implicit none INCLUDE 'mpif.h' ! Declaration of local variables. ! ------------------------------- INTEGER :: TID, NTID,N_LOC_ELE,status INTEGER :: n, q INTEGER(KIND=4) :: ele, nterms, nterms_gr, bcount_ele, j, p, uno INTEGER(KIND=4) :: mio_ele INTEGER(KIND=4), DIMENSION (:), ALLOCATABLE :: iterms,iterms_gr REAL(KIND=8), DIMENSION (:), ALLOCATABLE :: pmass,pmass_gr REAL(KIND=8), DIMENSION (:), ALLOCATABLE :: drdotdr,dx,dy,dz REAL(KIND=8), DIMENSION (:), ALLOCATABLE:: drdotdr_gr,dx_gr,dy_gr,dz_gr REAL(KIND=8), DIMENSION (:,:), ALLOCATABLE ::pquad,pquad_gr REAL(KIND=8), DIMENSION(:), ALLOCATABLE :: acc_g REAL(KIND=8), DIMENSION (:), ALLOCATABLE::pos_comm INTEGER :: count_par REAL(KIND=8) :: c1a, c2a, c2b, c2c, ctwg, cgs_g, cgs_b, cg1,cg2 REAL(KIND=8) ::cpar_a, cpar_b CHARACTER(LEN=4) :: option !======================================================================= !======================================================================= ! Initialize the interaction list diagnostics. ! -------------------------------------------- !*************************************************** ! ! m_sh, max_sh, max_pr_bal ! are computed by load_balance once before the iterations ! !************************************************** uno=1 group_access=0 nterms=0 nterms_gr=0 bcount_ele=0 mark_bod_gr=0 ctot_TW=0 ctot_GS_nt=0 mio_ele = 0 !======================================================================= ! GROUPING SECTION !======================================================================= ! We find the force for the bodies of a cell of the grouping as the ! sum of two components. The first component is equal for all the ! bodies of the cell-grouping and it is due at the cells and at the ! bodies outside at the cell-grouping. The second component is ! different by body at body of the cell-grouping and it is due at ! the interactions between the bodies of the cell-grouping. !======================================================================= ctwg=0 cgs_g=0 cgs_b=0 !------------------------------------------------------- ! count_group_arr(#PE)=cg_loc of the #PE ! ilocal_work_gr the number of local grouping cell resolved by the PE ! ilocal_save maximum number of local grouping cell resolved by each PE ! grouping_rmt receive the grouping cell non locally resolved by the remote PE ! iblk: used for atomic update for non locally resolved grouping cell ! no_dlb_space if set to 1 there is no free space to execute dlb section of grouping !------------------------------------------------------- IF(me .EQ. 0) write(uterm,*)'Grouping section started' cg1=MPI_WTIME() !----------------------------------------------------------------------- ! Analysis of grouping cell in the PE=ix_gr ! ix_gr start from the local PE and cycle for all the PEs ! load grouping_rmt with grouping cells of a remote PE !----------------------------------------------------------------------- count_par=0 cpar_a=MPI_WTIME() NTID = 1 !$ NTID = OMP_GET_MAX_THREADS(); !$OMP PARALLEL PRIVATE(mio_ele, ele,count_par,acc_g,nterms) & !$OMP PRIVATE(nterms_gr, bcount_ele,j,q,NTID,TID) & !$OMP PRIVATE(p,N_LOC_ELE,iterms,iterms_gr,pmass,pmass_gr) & !$OMP PRIVATE(drdotdr,dx,dy,dz) & !$OMP PRIVATE(drdotdr_gr,dx_gr,dy_gr,dz_gr,pquad,pquad_gr,pos_comm) ALLOCATE(iterms(maxnterm), STAT=status) ALLOCATE(iterms_gr(maxnterm), STAT=status) ALLOCATE(pmass(maxnterm), STAT=status) ALLOCATE(pmass_gr(maxnterm), STAT=status) ALLOCATE(drdotdr(maxnterm), STAT=status) ALLOCATE(dx(maxnterm), STAT=status) ALLOCATE(dy(maxnterm), STAT=status) ALLOCATE(dz(maxnterm), STAT=status) ALLOCATE(drdotdr_gr(maxnterm), STAT=status) ALLOCATE(dx_gr(maxnterm), STAT=status) ALLOCATE(dy_gr(maxnterm), STAT=status) ALLOCATE(dz_gr(maxnterm), STAT=status) ALLOCATE(pquad(2*ndim-1,maxnterm), STAT=status) ALLOCATE(pquad_gr(2*ndim-1,maxnterm), STAT=status) ALLOCATE(acc_g(ndim), STAT=status) ALLOCATE(pos_comm(ndim), STAT=status) TID = 0 NTID = 1 !$ TID = OMP_GET_THREAD_NUM(); !$ NTID = OMP_GET_MAX_THREADS(); N_LOC_ELE=cg_loc/NTID IF(MOD(cg_loc,NTID) .GT. TID) N_LOC_ELE=N_LOC_ELE+1 mio_ele=N_LOC_ELE*TID ! staring element of this thread IF(TID.GT.0) THEN IF(MOD(cg_loc,NTID) .LT. TID) THEN mio_ele=mio_ele+MOD(cg_loc,NTID) ENDIF ENDIF 50 CONTINUE ! This is a cycle on the grouping cell on the PE ix_gr !------------------------------------------------------------------------- ! iblk2 is an array atomically updated that cointain the number of cells already ! computed on the PE. ! mio_ele contains the number of elemnt to be processed: locally from 1 to ! ilocal_work_gr, and remotely (or locally for shared gr-cells), as computed ! from iblk2 array that is atomically updated !------------------------------------------------------------------------- mio_ele=mio_ele+1 IF(mio_ele.GT.N_LOC_ELE) THEN! there are no more grouping cell in this thread/ processor GOTO 60 ENDIF !------------------------------------------------------------------------- ! ele contain the number of gr-cell to be elaborated: local or remote ! cell stored in grouping_rmt array !------------------------------------------------------------------------- ele=grouping(mio_ele) if(me.eq.1) write(uterm,*)"TP00 PE-TID=",me,TID," mio_ele=",mio_ele," ele=",ele, " N_LOC_ELE=",N_LOC_ELE count_par=count_par+1 acc_g=0 ! Forming the interaction lists. ! ---------------------------- CALL ilist_group (ele,nterms,nterms_gr,bcount_ele,iterms,iterms_gr, pmass, pmass_gr, pquad,pquad_gr,drdotdr_gr,dx_gr,dy_gr,dz_gr) ! ! Compute potential and/or acceleration. ! -------------------------------------- ! ! ! Compute of the pot. and F_far of a grouping cell ! -------------------------------------------------- ! CALL force_group(ele,nterms_gr,iterms_gr,drdotdr_gr,dx_gr,dy_gr,dz_gr,pmass_gr,acc_g,pquad_gr,option,TID) !----------------------------------------------------------------------- ! Computation of the pot. F_near and F_tot of each body in tghe grouping cell ! Mark to 1 (local or remomtely wuth a PUT operation) the flag (mark_bod_gr) ! of each body that is computed in this section !----------------------------------------------------------------------- ! if(mio_ele.eq.1.and.me.eq.0) write(uterm,*)"PE=",me,"ele=",ele," acc_g=",acc_g, " nterms:",nterms," iterms ",iterms(1:nterms), " nterms_gr:",nterms_gr," iterms_gr ",iterms_gr(1:nterms_gr) DO q=nterms-bcount_ele+1,nterms j=iterms(q) mark_bod_gr(j)=uno CALL force(j,nterms,iterms,pos_comm,dx,dy,dz,drdotdr,pmass,pquad,acc_g,option) if(j.eq.459.and.me.eq.1) write(uterm,*)"TP0 PE=",me," ele=",ele," j=",j, " nterms:",nterms," iterms ",iterms(1:nterms), " nterms_gr:",nterms_gr," iterms_gr ",iterms_gr(1:nterms_gr)," acc_g=",acc_g(1),acc_g(2),acc_g(3)," acc(1:ndim,p)=", acc(1:ndim,j)," mark_bod_gr(j)=",mark_bod_gr(j) ! numbod=numbod+1 ENDDO !q=nterms-bcount_ele+1,nterms GOTO 50 ! Cycle on the grouping cell on the PE ix_gr 60 CONTINUE 1100 FORMAT(a,i3,3(a,i9)) 1200 FORMAT(a,f5.2,a,f5.2) !$OMP BARRIER if(me.eq.1) write(uterm,*)"TP0.1 PE=",me," acc(1:3,459)=",acc(1:ndim,459)," mark_bod_gr(j)=",mark_bod_gr(j) IF(me.eq.0 .and. TID.eq.0) THEN cg2=MPI_WTIME() ctwg=ctwg+(cg2-cg1) write(uterm,1000)'GROUPING: PE=',me,' TIME=',ctwg,' Tot gr-cells=',cg_loc call flush(uterm) ENDIF 1000 FORMAT(a,i3,1(a,g15.4)) !----------------------------------------------------------------------- ! LOCAL FORCE COMPUTATION !----------------------------------------------------------------------- ! In this section each PE compute the force for a subset of the local ! bodies, that were not computed in the grouping part !----------------------------------------------------------------------- group_access=1 ! ungrouped flag IF(TID.eq.0) c2a=MPI_WTIME() count_par=0 !$OMP DO DO 100 p=1,nb_res_loc(me+1) !----------------------------------------------------------------------- ! Forming the interaction lists. ! p is the logical number of body !----------------------------------------------------------------------- IF(mark_bod_gr(p).ge.1) CYCLE ! skip this particle. It was already computed in the grouping section count_par=count_par+1 ! numbod_100=numbod_100+1 CALL ilist(p,nterms,iterms,pos_comm,pmass, drdotdr,dx,dy,dz,pquad) !----------------------------------------------------------------------- ! Compute potential and the Force. !----------------------------------------------------------------------- CALL force(p,nterms,iterms,pos_comm,dx,dy,dz,drdotdr,pmass,pquad,acc_g,option) 100 CONTINUE !$OMP END DO DEALLOCATE(drdotdr) DEALLOCATE(dx) DEALLOCATE(dy) DEALLOCATE(dz) DEALLOCATE(drdotdr_gr) DEALLOCATE(dx_gr) DEALLOCATE(dy_gr) DEALLOCATE(dz_gr) DEALLOCATE(iterms) DEALLOCATE(iterms_gr) DEALLOCATE(pmass) DEALLOCATE(pmass_gr) DEALLOCATE(pquad) DEALLOCATE(pquad_gr) DEALLOCATE(acc_g) DEALLOCATE(pos_comm) !$OMP END PARALLEL c2b=MPI_WTIME() ctota=ctot_TW+(c2b-c2a) CALL MPI_BARRIER(MPI_COMM_WORLD,ierror) if(me.eq.1) write(uterm,*)"TP1 PE=",me," j=",459," acc=",acc(1:ndim,459) RETURN END