Skip to content
acc_ex.F90_new 5.18 KiB
Newer Older
Fabio Roberto Vitello's avatar
Fabio Roberto Vitello committed
!-----------------------------------------------------------------------
!TEST: fa solo grouping locale
!
                        SUBROUTINE acc_ex(option)
!
!
!-----------------------------------------------------------------------
!
!
!     Subroutine to compute the gravitational acceleration for all of
!     the bodies.  Vectorization is achieved by processing all of the
!     cells at a given level in the tree simultaneously.  The local
!     variable option indicates whether the code is to compute the
!     potential and/or acceleration.
!
!     local_wg_bod is the number of clock cycle needed for a PE resident 
!     body having nterms=1
!=======================================================================
	 
	 USE fly_h
	 implicit none

 	 INCLUDE 'mpif.h'

!   Declaration of local variables.
!   -------------------------------

    	INTEGER :: n, m, q, ix_gr,ix_rec,istatus(MPI_STATUS_SIZE)
	INTEGER :: tag1, tag2,tag3, req1, req2,req3,i
        INTEGER(KIND=4) ::  nterms,   j, p
	REAL(KIND=8) ::c0a, c1a,  c2a, c2b, c2c
 	REAL(KIND=8) ::cpar_a, cpar_b,cpar_c,cpar_d
     	CHARACTER(LEN=4)  :: option

	INTEGER(KIND=4),  DIMENSION (:), ALLOCATABLE :: iterms,iterms_gr
	REAL(KIND=8),  DIMENSION (:), ALLOCATABLE :: pmass,pmass_gr
	REAL(KIND=8),   DIMENSION (:), ALLOCATABLE :: drdotdr,dx,dy,dz
	REAL(KIND=8),   DIMENSION (:), ALLOCATABLE:: drdotdr_gr,dx_gr,dy_gr,dz_gr
	REAL(KIND=8),   DIMENSION (:,:), ALLOCATABLE ::pquad,pquad_gr
	REAL(KIND=8), DIMENSION(:), ALLOCATABLE :: acc_g
	REAL(KIND=8), DIMENSION (:), ALLOCATABLE::pos_comm

	
!=======================================================================
!=======================================================================
!   Initialize the interaction list diagnostics.
!   --------------------------------------------
        c0a=MPI_WTIME()
	iterms=0
        numbod=0
	group_access=1
        ix_gr=0
	nterms=0
	ctot_TW_rmt=0
	ctot_GS_nt_rmt=0
	rmt_acc=.TRUE.
	numbod_300=0
	tag1=1
	tag2=2
	tag3=3
!===========================
! data bodies communication
!===========================
	cpar_d=MPI_WTIME()

        DO  i=1,NPES-1

        ix_gr=me+i
        ix_rec=me-i
        IF(ix_gr.GE.NPES) ix_gr=ix_gr-NPES
        IF(ix_rec.LT.0) ix_rec=ix_rec+NPES
        NLONG=nb_res_loc(me+1)*3


        CALL MPI_ISEND(pos(1,1), NLONG,MPI_REAL8, ix_gr, tag1, MPI_COMM_WORLD, req1, ierror)
        CALL MPI_ISEND(acc(1,1), NLONG,MPI_REAL8, ix_gr, tag2, MPI_COMM_WORLD, req2, ierror)

        NLONG=nb_res_loc(ix_rec+1)*3
        CALL MPI_RECV(pos_rmt(1,1),NLONG,MPI_REAL8,ix_rec,tag1,MPI_COMM_WORLD, istatus,ierror)
        CALL MPI_RECV(acc_rmt(1,1),NLONG,MPI_REAL8,ix_rec,tag2,MPI_COMM_WORLD, istatus,ierror)
        CALL MPI_WAIT(req1, istatus, ierror)
        CALL MPI_WAIT(req2, istatus, ierror)

        CALL MPI_BARRIER(MPI_COMM_WORLD, ierror)
 
	group_access=1 ! ungrouped flag

	cpar_a=MPI_WTIME()
!!! QUI VERIFICA SE FUNZIONA la &!!
	   
!$OMP PARALLEL PRIVATE(p, acc_g,nterms) & 
!$OMP PRIVATE(N_LOC_ELE,iterms,pmass,) &
!$OMP PRIVATE(drdotdr,dx,dy,dz) &
!$OMP PRIVATE(pquad,acc_g,pos_comm)
	    ALLOCATE(iterms(maxnterm), STAT=status) 
	    ALLOCATE(pmass(maxnterm), STAT=status) 
	    ALLOCATE(drdotdr(maxnterm), STAT=status) 
	    ALLOCATE(dx(maxnterm), STAT=status) 
	    ALLOCATE(dy(maxnterm), STAT=status) 
	    ALLOCATE(dz(maxnterm), STAT=status) 
	    ALLOCATE(pquad(2*ndim-1,maxnterm), STAT=status) 
	    ALLOCATE(acc_g(ndim), STAT=status) 
	    ALLOCATE(pos_comm(ndim), STAT=status) 
	
!$OMP PARALLEL DO
        DO p=1,nb_res_loc(ix_rec+1)

!-----------------------------------------------------------------------
!   Forming the interaction lists.
!   p is the logical number of body
!-----------------------------------------------------------------------
110	   CONTINUE

           CALL ilist(p,nterms,iterms,pos_comm,pmass, drdotdr,dx,dy,dz,pquad) 



    
!-----------------------------------------------------------------------
!   Compute potential and the Force.
!-----------------------------------------------------------------------
           
	   CALL force(p,nterms,iterms,pos_com,dx,dy,dz,drdotdr,pmass,pquad,acc_g,option)


	ENDDO !p=1,nb_res_loc(ix_rec+1)	   

	    DEALLOCATE(iterms) 
	    DEALLOCATE(pmass) 
	    DEALLOCATE(drdotdr) 
	    DEALLOCATE(dx) 
	    DEALLOCATE(dy) 
	    DEALLOCATE(dz) 
	    DEALLOCATE(pquad) 
	    DEALLOCATE(acc_g) 
	    DEALLOCATE(pos_comm) 
!$OMP END PARALLEL           

	cpar_c=MPI_WTIME()-cpar_a
	rmt_time(ix_rec+1,me+1)=cpar_c
	write(uterm,1000) 'Remote_local_computation: PE',me,' FROM PE ',ix_rec,' acc_ex sec =',cpar_c
	call flush(uterm)

1000	format(x,a,i3,2x,a,i3,2x,a,g18.8)

 	CALL MPI_BARRIER(MPI_COMM_WORLD, ierror)


	NLONG=nb_res_loc(ix_rec+1)*3
	
	CALL MPI_ISEND(acc_rmt(1,1), NLONG,MPI_REAL8, ix_rec, tag3, MPI_COMM_WORLD, req3, ierror)
	
	NLONG=nb_res_loc(me+1)*3
	
	CALL MPI_RECV(acc(1,1),NLONG,MPI_REAL8,ix_gr,tag3,MPI_COMM_WORLD, istatus, ierror)
        CALL MPI_WAIT(req3, istatus, ierror)
 
	numbod_300=numbod_300+nb_res_loc(ix_rec+1)

 	CALL MPI_BARRIER(MPI_COMM_WORLD, ierror)

	ENDDO !i=1,NPES-1

              c1a=MPI_WTIME()
              ctotc=(c1a-c0a)

          cpar_b=MPI_WTIME()-cpar_d
	  write(uterm,*)"PE=",me," TOTAL analyzed  remote body  in sec ",cpar_b
	  call flush(uterm)

	rmt_acc=.FALSE.

	RETURN
        END