!----------------------------------------------------------------------- !TEST: fa solo grouping locale ! SUBROUTINE acc_comp(option) ! ! !----------------------------------------------------------------------- ! ! ! Subroutine to compute the gravitational acceleration for all of ! the bodies. Vectorization is achieved by processing all of the ! cells at a given level in the tree simultaneously. The local ! variable option indicates whether the code is to compute the ! potential and/or acceleration. ! ! local_wg_bod is the number of clock cycle needed for a PE resident ! body having nterms=1 !======================================================================= USE fly_h implicit none INCLUDE 'mpif.h' ! Declaration of local variables. ! ------------------------------- INTEGER :: TID, NTID,status INTEGER :: n, q, i INTEGER(KIND=4) :: ele, nterms, nterms_gr, bcount_ele, j, p, uno INTEGER(KIND=4) :: mio_ele INTEGER(KIND=4), DIMENSION (:), ALLOCATABLE :: iterms,iterms_gr REAL(KIND=8), DIMENSION (:), ALLOCATABLE :: pmass,pmass_gr REAL(KIND=8), DIMENSION (:), ALLOCATABLE :: drdotdr,dx,dy,dz REAL(KIND=8), DIMENSION (:), ALLOCATABLE:: drdotdr_gr,dx_gr,dy_gr,dz_gr REAL(KIND=8), DIMENSION (:,:), ALLOCATABLE ::pquad,pquad_gr REAL(KIND=8), DIMENSION(:), ALLOCATABLE :: acc_g REAL(KIND=8), DIMENSION (:), ALLOCATABLE::pos_comm INTEGER :: count_par REAL(KIND=8) :: c1a, c2a, c2b, c2c, ctwg, cgs_g, cgs_b, cg1,cg2 REAL(KIND=8) ::cpar_a, cpar_b CHARACTER(LEN=4) :: option !======================================================================= !======================================================================= ! Initialize the interaction list diagnostics. ! -------------------------------------------- !*************************************************** ! ! m_sh, max_sh, max_pr_bal ! are computed by load_balance once before the iterations ! !************************************************** uno=1 group_access=0 nterms=0 nterms_gr=0 bcount_ele=0 mark_bod_gr=0 ctot_TW=0 ctot_GS_nt=0 !======================================================================= ! GROUPING SECTION !======================================================================= ! We find the force for the bodies of a cell of the grouping as the ! sum of two components. The first component is equal for all the ! bodies of the cell-grouping and it is due at the cells and at the ! bodies outside at the cell-grouping. The second component is ! different by body at body of the cell-grouping and it is due at ! the interactions between the bodies of the cell-grouping. !======================================================================= ctwg=0 cgs_g=0 cgs_b=0 !------------------------------------------------------- ! count_group_arr(#PE)=cg_loc of the #PE ! ilocal_work_gr the number of local grouping cell resolved by the PE ! ilocal_save maximum number of local grouping cell resolved by each PE ! grouping_rmt receive the grouping cell non locally resolved by the remote PE ! iblk: used for atomic update for non locally resolved grouping cell ! no_dlb_space if set to 1 there is no free space to execute dlb section of grouping !------------------------------------------------------- IF(me .EQ. 0) write(uterm,*)'Grouping section started' cg1=MPI_WTIME() !----------------------------------------------------------------------- ! Analysis of grouping cell in the PE=ix_gr ! ix_gr start from the local PE and cycle for all the PEs ! load grouping_rmt with grouping cells of a remote PE !----------------------------------------------------------------------- count_par=0 cpar_a=MPI_WTIME() NTID = 1 !$ NTID = OMP_GET_MAX_THREADS(); !$OMP PARALLEL PRIVATE(mio_ele, ele,count_par,acc_g,nterms) & !$OMP PRIVATE(nterms_gr, bcount_ele,j,q,NTID,TID) & !$OMP PRIVATE(p,iterms,iterms_gr,pmass,pmass_gr) & !$OMP PRIVATE(drdotdr,dx,dy,dz) & !$OMP PRIVATE(drdotdr_gr,dx_gr,dy_gr,dz_gr,pquad,pquad_gr,pos_comm) ALLOCATE(iterms(maxnterm), STAT=status) ALLOCATE(iterms_gr(maxnterm), STAT=status) ALLOCATE(pmass(maxnterm), STAT=status) ALLOCATE(pmass_gr(maxnterm), STAT=status) ALLOCATE(drdotdr(maxnterm), STAT=status) ALLOCATE(dx(maxnterm), STAT=status) ALLOCATE(dy(maxnterm), STAT=status) ALLOCATE(dz(maxnterm), STAT=status) ALLOCATE(drdotdr_gr(maxnterm), STAT=status) ALLOCATE(dx_gr(maxnterm), STAT=status) ALLOCATE(dy_gr(maxnterm), STAT=status) ALLOCATE(dz_gr(maxnterm), STAT=status) ALLOCATE(pquad(2*ndim-1,maxnterm), STAT=status) ALLOCATE(pquad_gr(2*ndim-1,maxnterm), STAT=status) ALLOCATE(acc_g(ndim), STAT=status) ALLOCATE(pos_comm(ndim), STAT=status) TID = 0 !$ TID = OMP_GET_THREAD_NUM(); !$OMP DO DO mio_ele=1,cg_loc !------------------------------------------------------------------------- ! iblk2 is an array atomically updated that cointain the number of cells already ! computed on the PE. ! mio_ele contains the number of elemnt to be processed: locally from 1 to ! ilocal_work_gr, and remotely (or locally for shared gr-cells), as computed ! from iblk2 array that is atomically updated !------------------------------------------------------------------------- !------------------------------------------------------------------------- ! ele contain the number of gr-cell to be elaborated: local or remote ! cell stored in grouping_rmt array !------------------------------------------------------------------------- ele=grouping(mio_ele) count_par=count_par+1 acc_g=0 ! Forming the interaction lists. ! ---------------------------- CALL ilist_group (ele,nterms,nterms_gr,bcount_ele,iterms,iterms_gr,& pmass, pmass_gr, pquad,pquad_gr,drdotdr_gr,dx_gr,dy_gr,dz_gr) ! ! Compute potential and/or acceleration. ! -------------------------------------- ! ! ! Compute of the pot. and F_far of a grouping cell ! -------------------------------------------------- ! CALL force_group(ele,nterms_gr,iterms_gr,drdotdr_gr,dx_gr,dy_gr,dz_gr,pmass_gr,acc_g,pquad_gr,option,TID) !----------------------------------------------------------------------- ! Computation of the pot. F_near and F_tot of each body in tghe grouping cell ! Mark to 1 (local or remomtely wuth a PUT operation) the flag (mark_bod_gr) ! of each body that is computed in this section !----------------------------------------------------------------------- DO q=nterms-bcount_ele+1,nterms j=iterms(q) mark_bod_gr(j)=uno CALL force(j,nterms,iterms,pos_comm,dx,dy,dz,drdotdr,pmass,pquad,acc_g,option) ENDDO !q=nterms-bcount_ele+1,nterms ENDDO ! mio_ele=q,cg_log !$OMP END DO 1100 FORMAT(a,i3,3(a,i9)) 1200 FORMAT(a,f5.2,a,f5.2) !$OMP BARRIER IF(me.eq.0 .and. TID.eq.0) THEN cg2=MPI_WTIME() ctwg=ctwg+(cg2-cg1) write(uterm,1000)'GROUPING: PE=',me,' TIME=',ctwg,' Tot gr-cells=',cg_loc call flush(uterm) ENDIF 1000 FORMAT(a,i3,1(a,g15.4)) !----------------------------------------------------------------------- ! LOCAL FORCE COMPUTATION !----------------------------------------------------------------------- ! In this section each PE compute the force for a subset of the local ! bodies, that were not computed in the grouping part !----------------------------------------------------------------------- group_access=1 ! ungrouped flag IF(TID.eq.0) c2a=MPI_WTIME() count_par=0 !$OMP DO DO 100 p=1,nb_res_loc(me+1) !----------------------------------------------------------------------- ! Forming the interaction lists. ! p is the logical number of body !----------------------------------------------------------------------- IF(mark_bod_gr(p).ge.1) CYCLE ! skip this particle. It was already computed in the grouping section count_par=count_par+1 ! numbod_100=numbod_100+1 CALL ilist(p,nterms,iterms,pos_comm,pmass, drdotdr,dx,dy,dz,pquad) !----------------------------------------------------------------------- ! Compute potential and the Force. !----------------------------------------------------------------------- CALL force(p,nterms,iterms,pos_comm,dx,dy,dz,drdotdr,pmass,pquad,acc_g,option) 100 CONTINUE !$OMP END DO DEALLOCATE(drdotdr) DEALLOCATE(dx) DEALLOCATE(dy) DEALLOCATE(dz) DEALLOCATE(drdotdr_gr) DEALLOCATE(dx_gr) DEALLOCATE(dy_gr) DEALLOCATE(dz_gr) DEALLOCATE(iterms) DEALLOCATE(iterms_gr) DEALLOCATE(pmass) DEALLOCATE(pmass_gr) DEALLOCATE(pquad) DEALLOCATE(pquad_gr) DEALLOCATE(acc_g) DEALLOCATE(pos_comm) !$OMP END PARALLEL c2b=MPI_WTIME() ctota=ctot_TW+(c2b-c2a) CALL MPI_BARRIER(MPI_COMM_WORLD,ierror) RETURN END