Skip to content
acc_comp.F90_oKomp1 10.2 KiB
Newer Older
Fabio Roberto Vitello's avatar
Fabio Roberto Vitello committed
!-----------------------------------------------------------------------
!TEST: fa solo grouping locale
!
                        SUBROUTINE acc_comp(option)
!
!
!-----------------------------------------------------------------------
!
!
!     Subroutine to compute the gravitational acceleration for all of
!     the bodies.  Vectorization is achieved by processing all of the
!     cells at a given level in the tree simultaneously.  The local
!     variable option indicates whether the code is to compute the
!     potential and/or acceleration.
!
!     local_wg_bod is the number of clock cycle needed for a PE resident 
!     body having nterms=1
!=======================================================================
	 
	 USE fly_h
 	implicit none 
	INCLUDE 'mpif.h'


!   Declaration of local variables.
!   -------------------------------
		INTEGER :: TID, NTID,N_LOC_ELE,status
    	INTEGER :: n, q 
        INTEGER(KIND=4) :: ele, nterms, nterms_gr, bcount_ele, j, p, uno
        INTEGER(KIND=4) :: mio_ele
	INTEGER(KIND=4),  DIMENSION (:), ALLOCATABLE :: iterms,iterms_gr
	REAL(KIND=8),  DIMENSION (:), ALLOCATABLE :: pmass,pmass_gr
	REAL(KIND=8),   DIMENSION (:), ALLOCATABLE :: drdotdr,dx,dy,dz
	REAL(KIND=8),   DIMENSION (:), ALLOCATABLE:: drdotdr_gr,dx_gr,dy_gr,dz_gr
	REAL(KIND=8),   DIMENSION (:,:), ALLOCATABLE ::pquad,pquad_gr
	REAL(KIND=8), DIMENSION(:), ALLOCATABLE :: acc_g
	REAL(KIND=8), DIMENSION (:), ALLOCATABLE::pos_comm


	INTEGER :: count_par	

	
	REAL(KIND=8) :: c1a,   c2a, c2b, c2c, ctwg, cgs_g, cgs_b, cg1,cg2
 	REAL(KIND=8) ::cpar_a, cpar_b
     	CHARACTER(LEN=4)  :: option
 
	
!=======================================================================
!=======================================================================
!   Initialize the interaction list diagnostics.
!   --------------------------------------------
!***************************************************
!
! m_sh, max_sh, max_pr_bal
! are computed by load_balance once before the iterations
!
!**************************************************


	

	uno=1
	
	
	group_access=0
        
 	nterms=0
	nterms_gr=0
	bcount_ele=0
	mark_bod_gr=0
	ctot_TW=0
	ctot_GS_nt=0

	
	
	mio_ele = 0
	
	
	

!=======================================================================
!       GROUPING  SECTION   
!=======================================================================
!  We find the force for the bodies of a cell of the grouping as the 
!  sum of two components. The first component is equal for all the 
!  bodies of the cell-grouping and it is due at the cells and at the
!  bodies outside at the cell-grouping. The second component is 
!  different by body at body of the cell-grouping and it is due at  
!  the interactions between the bodies of the cell-grouping.
!=======================================================================
	   ctwg=0
	   cgs_g=0
	   cgs_b=0
	   
!-------------------------------------------------------
! count_group_arr(#PE)=cg_loc of the #PE
! ilocal_work_gr the number of local grouping cell  resolved by the PE
! ilocal_save maximum number of local grouping cell resolved by each PE
! grouping_rmt receive the grouping cell non locally resolved by the remote PE
! iblk: used for atomic update for non locally resolved grouping cell
! no_dlb_space if set to 1 there is no free space to execute dlb section of grouping
!-------------------------------------------------------
	
	
	IF(me .EQ. 0) write(uterm,*)'Grouping section started'

	cg1=MPI_WTIME()
	  

!-----------------------------------------------------------------------
! Analysis of grouping cell in the PE=ix_gr	   
! ix_gr start from the local PE and cycle for all the PEs
! load grouping_rmt with grouping cells of a remote PE
!-----------------------------------------------------------------------
	    
	   count_par=0
	   cpar_a=MPI_WTIME()

      NTID = 1
!$    NTID = OMP_GET_MAX_THREADS();


	   
!$OMP PARALLEL PRIVATE(mio_ele, ele,count_par,acc_g,nterms) & 
!$OMP PRIVATE(nterms_gr, bcount_ele,j,q,NTID,TID) &
!$OMP PRIVATE(p,N_LOC_ELE,iterms,iterms_gr,pmass,pmass_gr) &
!$OMP PRIVATE(drdotdr,dx,dy,dz) &
!$OMP PRIVATE(drdotdr_gr,dx_gr,dy_gr,dz_gr,pquad,pquad_gr,pos_comm)
 
	    ALLOCATE(iterms(maxnterm), STAT=status) 
	    ALLOCATE(iterms_gr(maxnterm), STAT=status) 
	    ALLOCATE(pmass(maxnterm), STAT=status) 
	    ALLOCATE(pmass_gr(maxnterm), STAT=status) 
	    ALLOCATE(drdotdr(maxnterm), STAT=status) 
	    ALLOCATE(dx(maxnterm), STAT=status) 
	    ALLOCATE(dy(maxnterm), STAT=status) 
	    ALLOCATE(dz(maxnterm), STAT=status) 
	    ALLOCATE(drdotdr_gr(maxnterm), STAT=status) 
	    ALLOCATE(dx_gr(maxnterm), STAT=status) 
	    ALLOCATE(dy_gr(maxnterm), STAT=status) 
	    ALLOCATE(dz_gr(maxnterm), STAT=status) 
	    ALLOCATE(pquad(2*ndim-1,maxnterm), STAT=status) 
	    ALLOCATE(pquad_gr(2*ndim-1,maxnterm), STAT=status) 
	    ALLOCATE(acc_g(ndim), STAT=status) 
	    ALLOCATE(pos_comm(ndim), STAT=status) 
	
      TID = 0
      NTID = 1
!$    TID = OMP_GET_THREAD_NUM();
!$    NTID = OMP_GET_MAX_THREADS();
	  N_LOC_ELE=cg_loc/NTID
	  IF(MOD(cg_loc,NTID) .GT. TID) N_LOC_ELE=N_LOC_ELE+1
	  
	  mio_ele=N_LOC_ELE*TID  ! staring element of this thread
	  
	  IF(TID.GT.0) THEN
	  	IF(MOD(cg_loc,NTID) .LT. TID) THEN
	  		mio_ele=mio_ele+MOD(cg_loc,NTID)
	  	ENDIF
	  ENDIF
	  
	 

50	CONTINUE  ! This is a cycle on the grouping cell on the PE ix_gr


!-------------------------------------------------------------------------
! iblk2 is an array atomically updated that cointain the number of cells already
! computed on the PE.
! mio_ele contains the number of elemnt to be processed: locally from 1 to 
! ilocal_work_gr, and remotely (or locally for shared gr-cells), as computed
! from iblk2 array that is atomically updated
!-------------------------------------------------------------------------
	
	   mio_ele=mio_ele+1
	      
          IF(mio_ele.GT.N_LOC_ELE) THEN! there are no more grouping cell in this thread/ processor

	   GOTO 60	
	
	  ENDIF
	
	
!-------------------------------------------------------------------------
! ele contain the number of gr-cell to be elaborated: local or remote 
! cell stored in grouping_rmt array
!-------------------------------------------------------------------------
	    
	    
	       ele=grouping(mio_ele)
	       
    if(me.eq.1) write(uterm,*)"TP00 PE-TID=",me,TID," mio_ele=",mio_ele," ele=",ele, " N_LOC_ELE=",N_LOC_ELE

        count_par=count_par+1
 	acc_g=0
	

!   Forming the interaction lists.
!   ----------------------------
         
	   CALL ilist_group (ele,nterms,nterms_gr,bcount_ele,iterms,iterms_gr, pmass, pmass_gr, pquad,pquad_gr,drdotdr_gr,dx_gr,dy_gr,dz_gr) 
	   

!
!   Compute potential and/or acceleration.
!   --------------------------------------
!
!
!     Compute of the pot. and F_far of a  grouping cell
!     --------------------------------------------------
!
          
 		CALL force_group(ele,nterms_gr,iterms_gr,drdotdr_gr,dx_gr,dy_gr,dz_gr,pmass_gr,acc_g,pquad_gr,option,TID)  

!-----------------------------------------------------------------------
! Computation of the pot. F_near and F_tot of each body in tghe grouping cell
! Mark to 1 (local or remomtely wuth a PUT operation) the flag (mark_bod_gr) 
! of each body that is computed in this section
!-----------------------------------------------------------------------
!   if(mio_ele.eq.1.and.me.eq.0) write(uterm,*)"PE=",me,"ele=",ele," acc_g=",acc_g, " nterms:",nterms," iterms ",iterms(1:nterms),  " nterms_gr:",nterms_gr," iterms_gr ",iterms_gr(1:nterms_gr) 
       DO  q=nterms-bcount_ele+1,nterms

	   j=iterms(q)

	    
	   mark_bod_gr(j)=uno
	   
	 

	       
          CALL force(j,nterms,iterms,pos_comm,dx,dy,dz,drdotdr,pmass,pquad,acc_g,option)  

    if(j.eq.459.and.me.eq.1) write(uterm,*)"TP0 PE=",me," ele=",ele," j=",j, " nterms:",nterms," iterms ",iterms(1:nterms),  " nterms_gr:",nterms_gr," iterms_gr ",iterms_gr(1:nterms_gr)," acc_g=",acc_g(1),acc_g(2),acc_g(3)," acc(1:ndim,p)=", acc(1:ndim,j)," mark_bod_gr(j)=",mark_bod_gr(j)
	    
!	    numbod=numbod+1

	ENDDO  !q=nterms-bcount_ele+1,nterms
	

	

	
	   GOTO 50 ! Cycle on the grouping cell on the PE ix_gr
	
 	
60	CONTINUE 
 
  	

	
		

1100 	FORMAT(a,i3,3(a,i9))
1200 	FORMAT(a,f5.2,a,f5.2)


!$OMP BARRIER
    if(me.eq.1) write(uterm,*)"TP0.1 PE=",me," acc(1:3,459)=",acc(1:ndim,459)," mark_bod_gr(j)=",mark_bod_gr(j)
	
		IF(me.eq.0 .and. TID.eq.0) THEN	
	    	cg2=MPI_WTIME()
	    	ctwg=ctwg+(cg2-cg1)
			write(uterm,1000)'GROUPING: PE=',me,' TIME=',ctwg,' Tot gr-cells=',cg_loc
	 		call flush(uterm)
 		ENDIF
1000 	FORMAT(a,i3,1(a,g15.4))

!-----------------------------------------------------------------------
!        LOCAL FORCE COMPUTATION   
!-----------------------------------------------------------------------
!	In this section each PE compute the force for a subset of the local
!	bodies, that were not computed in the grouping part
!-----------------------------------------------------------------------
	group_access=1 ! ungrouped flag
	

		IF(TID.eq.0) c2a=MPI_WTIME()
        count_par=0
        
	

!$OMP  DO
        DO 100 p=1,nb_res_loc(me+1)

!-----------------------------------------------------------------------
!   Forming the interaction lists.
!   p is the logical number of body
!-----------------------------------------------------------------------
	   
	   IF(mark_bod_gr(p).ge.1) CYCLE ! skip this particle. It was already computed in the grouping section
	

        count_par=count_par+1
           
!	   numbod_100=numbod_100+1

          CALL ilist(p,nterms,iterms,pos_comm,pmass, drdotdr,dx,dy,dz,pquad)  



 	   
!-----------------------------------------------------------------------
!   Compute potential and the Force.
!-----------------------------------------------------------------------
 	   CALL force(p,nterms,iterms,pos_comm,dx,dy,dz,drdotdr,pmass,pquad,acc_g,option)


100    CONTINUE
!$OMP END DO
	DEALLOCATE(drdotdr) 
	DEALLOCATE(dx) 
	DEALLOCATE(dy) 
	DEALLOCATE(dz)
	DEALLOCATE(drdotdr_gr) 
	DEALLOCATE(dx_gr) 
	DEALLOCATE(dy_gr) 
	DEALLOCATE(dz_gr) 
	DEALLOCATE(iterms)
	DEALLOCATE(iterms_gr)
	DEALLOCATE(pmass)
	DEALLOCATE(pmass_gr)
	DEALLOCATE(pquad)
	DEALLOCATE(pquad_gr)
	DEALLOCATE(acc_g) 
	DEALLOCATE(pos_comm) 
          
!$OMP END PARALLEL           

	   c2b=MPI_WTIME()
	   ctota=ctot_TW+(c2b-c2a)



        
	CALL MPI_BARRIER(MPI_COMM_WORLD,ierror)
	     if(me.eq.1) write(uterm,*)"TP1 PE=",me," j=",459," acc=",acc(1:ndim,459)


	RETURN
        END