Commit 3cc43560 authored by Fabio Roberto Vitello's avatar Fabio Roberto Vitello
Browse files

fly5.0.1

parents
Loading
Loading
Loading
Loading

FLY_mpi

0 → 100755
+6.42 MiB

File added.

No diff preview for this file type.

Makefile

0 → 100644
+128 −0
Original line number Diff line number Diff line
#SHELL=/bin/sh

OBJS=    fly_h.o fly.o  null.o error.o \
	sys_init.o read_params.o read_redsh.o\
         read_b_asc.o read_b_bin.o  init_pars.o init_ew.o \
         init_pos.o reset_pos.o inpar_dyn.o step.o step_force.o \
 	 tree_build.o tree_sort.o  \
	 tree_gen.o find_group.o \
	 cell_prop.o acc_comp.o acc_ex.o \
	 ilist_group.o force_group.o force.o ilist.o upd_pos.o upd_vel.o \
	 wr_native.o write_b_asc.o write_b_bin.o out_32.o leapf_corr.o \
         io.o  dt_comp.o read_s_bin.o
	 
	
	
# Tunable parameters
#
# CF		Name of the fortran compiling system to use
# LDFLAGS	Flags to the loader
# LIBS		List of libraries
# CMD		Name of the executable
# PROFLIB	Library needed for profiling
#

# CF = ifort
# CF = /opt/share/mvapich2_Working_4GB/bin/mpif90
 CF = mpif90
# CF = pgf90
# CF = /opt/share/mpich2/bin/mpif90
 #CF = /opt/share/pgi/linux86/6.2/bin/pgf90
# CC = pgcc
 CC = mpicc
# LIBS =-L/opt/share/mvapich2/lib -lmpich -lfmpich -lmpichf90 -lmpichcxx  -L.
# LIBS =-L/opt/share/mpich2_pgi_01-02/lib -lmpich -lfmpich -lmpichf90 -lmpichcxx

# LDFLAGS= -L/opt/share/intel/fc/9.1.036/lib -L/opt/share/intel/cc/9.1.042/lib -L/opt/share/mpich2/lib
# LDFLAGS= -L/opt/share/mvapich2_Working_4GB/lib
# LDFLAGS= -L/opt/share/mvapich2/lib
# LDFLAGS= -L/opt/share/intel/fc/9.1.036/lib -L/opt/share/intel/cc/9.1.042/lib -L/opt/share/mvapich2/lib
# LDFLAGS= -L/opt/share/mpich2_pgi_01-02/lib

# CMD = ../bin_16mlu/FLY_mpif90_1 # compilato con mpich2 -> mpich2_intel_gb/
# CMD = ../bin_16mlu/FLY_mpif90_1_1 # compilato con mpich2 -> mpich2_intel_gb/ compiltore ifort diretto
# CMD = ../bin_test/FLY_mpif90_2 # compilato con  mpich2-31-01/
# CMD = ../bin_test/FLY_mpif90_3 # compilato con  mpich2_pgi_01-02
# CMD = ../bin_test/FLY_mpif90_3_1 # compilato con  mpich2_pgi_01-02 -fastsse
#                      -O3 -fastsse -Mprefetch=w -Msmart -tp k8-32 -Mipa=fast -Minline
# CMD = ../bin_test/FLY_mpif90_3_2 # compilato con  mpich2_pgi_01-02 -fastsse
# CMD = ../bin_16mlu/FLY_mpif90_3 # compilato con  mpich2_pgi_01-02 
# CMD = ../bin_16mlu/FLY_mpi_ifort # compilato con  mpich2 + ifort
# CMD = /gpfs/FLY/bin_big_40/FLY_mpi # compilato con  mpif90 new ver
# CMD = /gpfs/FLY/bin_big/FLY_mpi_file # compilato con  acc_comp_file
# CMD = /home01/ube/FLY_4/bin_2m/FLY_mpi4 # compilato con  acc_comp_noall
# CMD = /gpfs/FLY/bin_16mlu/FLY_mpi # compilato con  acc_comp_noall
# CMD = /gpfs/FLY/bin_big/FLY_mpi_simp # compilato con  acc_comp_nodlb_gr
  CMD= FLY_mpi
# CMD = ../bin_test/FLY_mpif90_4 # compilato con  mvapich2_try4
# CMD = ../bin_test/FLY_mpif90_5 # compilato con  mvapich2.tcp
# CMD = ../bin_test/FLY_mpif90_6 # compilato con  mvapich2.vapi
# CMD = ../bin_test/FLY_mpif90_7 # compilato con  mvapich2_pgi_01-02

# CMD = /gpfs/FLY/bin_big/FLY_mpi
 
# FFLAGS = -quiet -O0 -r8 -w95  -I/opt/share/mpich2/include/ -I/usr/local/include
# FFLAGS = -O3 -w95 -r8 -I/opt/share/mpich2/include/ -I/usr/local/include -I/usr/include
# FFLAGS = -O3  -I/opt/share/mpich2/include/ -I/usr/local/include -I/usr/include
# FFLAGS = -O3 -fast -Mprefetch=w -Msmart -tp amd64 -Mipa=fast -Minline
# FFLAGS = -O0 -check all
 FFLAGS = -O3 
# FFLAGS = "-mcmodel=medium" 
# FFLAGS = -O3  -I/opt/share/mpich2/include/ 
# FFLAGS = -O3 -I/opt/share/mvapich2/include/ -I/usr/include -I/usr/local/topspin/include -I/usr/local/topspin/include/vapi
#FFLAGS = -O3 -Bstatic -I./include
#FFLAGS = -fast -O3 -I./include
#FFLAGS = -fastsse -Mprefetch -tp amd64e  -Mprefetch=w -Msmart -Mipa=fast -Minline **QUI
#FFLAGS1 = -O0 -w95 -r8 -I/o8pt/share/mpich2/include/ -I/usr/local/include -I/usr/include
#FFLAGS1 = -O0  -I/opt/share/mvapich2/include/ -I/usr/include -I/usr/local/topspin/include -I/usr/local/topspin/include/vapi
 #FFLAGS1 = -O0 -Bstatic -I./include
 #FFLAGS1 = -fast -O0 -I./include
#####FFLAGS1 =  -O0  -I/opt/share/mpich2/include/ -I/usr/local/include -I/usr/include
# FFLAGS1 = -O3

#FFLAGS1 = "-mcmodel=medium"
 
 CFLAGS = -O3    
 
 
LD = $(CF)

# Lines from here on down should not need to be changed.  They are the
# actual rules which make uses to build a.out.
#
all:		$(CMD)
.SUFFIXES :
.SUFFIXES : .o .F90 .c

$(OBJS):   fly_h.F90 

#read_b_bin.o : read_b_bin.F90 $(CF) $(FFLAGS1) -c $<

read_b_bin.o : read_b_bin.F90 
	$(CF) $(FFLAGS1) -c $<

read_redsh.o : read_redsh.F90
	$(CF) $(FFLAGS1) -c $<

.F90.o :
	$(CF) $(FFLAGS) -c $<

.c.o :
	$(CC) $(CFLAGS) -c $<

$(CMD):		$(OBJS)
	$(LD) $(LDFLAGS) -o $(@) $(OBJS) $(LIBS) 

# Make the profiled version of the command and call it a.out.prof
#
$(CMD).prof:	$(OBJS)
	$(CF) $(LDFLAGS) -o $(@) $(OBJS) $(PROFLIB) $(LIBS)

clean:
	-rm -f $(OBJS)

clobber:	clean
	-rm -f $(CMD) $(CMD).prof

void:	clobber
	-rm -f $(SRCS) makefile

README.md

0 → 100644
+0 −0

Empty file added.

TestOMP.F90

0 → 100644
+54 −0
Original line number Diff line number Diff line
    PROGRAM TestOMP 
	
	 USE omp_lib
	implicit none
 	 INCLUDE 'mpif.h'

!   Declaration of local variables.
!   -------------------------------
          INTEGER(KIND=4),  DIMENSION (:) , ALLOCATABLE ::isub
		INTEGER :: TID, NTID
		INTEGER:: status
		 INTEGER :: ierr,me,npes, ierror,lname
		 INTEGER :: asss,bsss,csss,dsss,esss,fsss,gsss,hsss
		 
	character*(MPI_MAX_PROCESSOR_NAME) hostname_me
	
 

!-----------------------------------------------------------------------
        CALL MPI_INIT(ierror)


	CALL MPI_COMM_RANK(MPI_COMM_WORLD, me, ierror)
        CALL MPI_COMM_SIZE(MPI_COMM_WORLD, npes, ierror)
        CALL MPI_GET_PROCESSOR_NAME(hostname_me, lname, ierror)
	WRITE(6, *) "TestOMP - RUN. PE=",me," HOSTNAME:",hostname_me(1:lname),' npes=',npes


      NTID = 1
    NTID = OMP_GET_MAX_THREADS()
	
!$OMP PARALLEL PRIVATE(isub) &
!$OMP PRIVATE(asss,bsss,csss)     
!---!$OMP PRIVATE(dsss,esss,fsss) &    
!---!$OMP PRIVATE(gsss,hsss)    

    ALLOCATE(isub(8), STAT=status) 
!-----------------------------------------------------------------------
!	Ogni thread conta quanti indici di sottocella genera in pcg
!-----------------------------------------------------------------------
      TID = 0
     TID = OMP_GET_THREAD_NUM()
     asss=TID
     esss=TID
     if(tid.lt.5) hsss=TID
 	isub(1:8)=TID
    write(6,*)'PE=',me,"TID=",TID, asss,esss,hsss 
    DEALLOCATE(isub) 
!$OMP END PARALLEL 
	
	        call MPI_FINALIZE(ierror)
	  
	  STOP
	END

acc_comp.F90

0 → 100644
+308 −0
Original line number Diff line number Diff line
!-----------------------------------------------------------------------
!TEST: fa solo grouping locale
!
                        SUBROUTINE acc_comp(option)
!
!
!-----------------------------------------------------------------------
!
!
!     Subroutine to compute the gravitational acceleration for all of
!     the bodies.  Vectorization is achieved by processing all of the
!     cells at a given level in the tree simultaneously.  The local
!     variable option indicates whether the code is to compute the
!     potential and/or acceleration.
!
!     local_wg_bod is the number of clock cycle needed for a PE resident 
!     body having nterms=1
!=======================================================================
	 
	 USE fly_h
 	implicit none 
	INCLUDE 'mpif.h'


!   Declaration of local variables.
!   -------------------------------
		INTEGER :: TID, NTID,status
    	INTEGER :: n, q, i 
        INTEGER(KIND=4) :: ele, nterms, nterms_gr, bcount_ele, j, p, uno
        INTEGER(KIND=4) :: mio_ele
	INTEGER(KIND=4),  DIMENSION (:), ALLOCATABLE :: iterms,iterms_gr
	REAL(KIND=8),  DIMENSION (:), ALLOCATABLE :: pmass,pmass_gr
	REAL(KIND=8),   DIMENSION (:), ALLOCATABLE :: drdotdr,dx,dy,dz
	REAL(KIND=8),   DIMENSION (:), ALLOCATABLE:: drdotdr_gr,dx_gr,dy_gr,dz_gr
	REAL(KIND=8),   DIMENSION (:,:), ALLOCATABLE ::pquad,pquad_gr
	REAL(KIND=8), DIMENSION(:), ALLOCATABLE :: acc_g
	REAL(KIND=8), DIMENSION (:), ALLOCATABLE::pos_comm


	INTEGER :: count_par	

	
	REAL(KIND=8) :: c1a,   c2a, c2b, c2c, ctwg, cgs_g, cgs_b, cg1,cg2
 	REAL(KIND=8) ::cpar_a, cpar_b
     	CHARACTER(LEN=4)  :: option
 
	
!=======================================================================
!=======================================================================
!   Initialize the interaction list diagnostics.
!   --------------------------------------------
!***************************************************
!
! m_sh, max_sh, max_pr_bal
! are computed by load_balance once before the iterations
!
!**************************************************


	

	uno=1
	
	
	group_access=0
        
 	nterms=0
	nterms_gr=0
	bcount_ele=0
	mark_bod_gr=0
	ctot_TW=0
	ctot_GS_nt=0

	
	
	

!=======================================================================
!       GROUPING  SECTION   
!=======================================================================
!  We find the force for the bodies of a cell of the grouping as the 
!  sum of two components. The first component is equal for all the 
!  bodies of the cell-grouping and it is due at the cells and at the
!  bodies outside at the cell-grouping. The second component is 
!  different by body at body of the cell-grouping and it is due at  
!  the interactions between the bodies of the cell-grouping.
!=======================================================================
	   ctwg=0
	   cgs_g=0
	   cgs_b=0
	   
!-------------------------------------------------------
! count_group_arr(#PE)=cg_loc of the #PE
! ilocal_work_gr the number of local grouping cell  resolved by the PE
! ilocal_save maximum number of local grouping cell resolved by each PE
! grouping_rmt receive the grouping cell non locally resolved by the remote PE
! iblk: used for atomic update for non locally resolved grouping cell
! no_dlb_space if set to 1 there is no free space to execute dlb section of grouping
!-------------------------------------------------------
	
	
	IF(me .EQ. 0) write(uterm,*)'Grouping section started'

	cg1=MPI_WTIME()
	  

!-----------------------------------------------------------------------
! Analysis of grouping cell in the PE=ix_gr	   
! ix_gr start from the local PE and cycle for all the PEs
! load grouping_rmt with grouping cells of a remote PE
!-----------------------------------------------------------------------
	    
	   count_par=0
	   cpar_a=MPI_WTIME()

      NTID = 1
!$    NTID = OMP_GET_MAX_THREADS();
	   
!$OMP PARALLEL PRIVATE(mio_ele, ele,count_par,acc_g,nterms) & 
!$OMP PRIVATE(nterms_gr, bcount_ele,j,q,NTID,TID) &
!$OMP PRIVATE(p,iterms,iterms_gr,pmass,pmass_gr) &
!$OMP PRIVATE(drdotdr,dx,dy,dz) &
!$OMP PRIVATE(drdotdr_gr,dx_gr,dy_gr,dz_gr,pquad,pquad_gr,pos_comm)
 
	    ALLOCATE(iterms(maxnterm), STAT=status) 
	    ALLOCATE(iterms_gr(maxnterm), STAT=status) 
	    ALLOCATE(pmass(maxnterm), STAT=status) 
	    ALLOCATE(pmass_gr(maxnterm), STAT=status) 
	    ALLOCATE(drdotdr(maxnterm), STAT=status) 
	    ALLOCATE(dx(maxnterm), STAT=status) 
	    ALLOCATE(dy(maxnterm), STAT=status) 
	    ALLOCATE(dz(maxnterm), STAT=status) 
	    ALLOCATE(drdotdr_gr(maxnterm), STAT=status) 
	    ALLOCATE(dx_gr(maxnterm), STAT=status) 
	    ALLOCATE(dy_gr(maxnterm), STAT=status) 
	    ALLOCATE(dz_gr(maxnterm), STAT=status) 
	    ALLOCATE(pquad(2*ndim-1,maxnterm), STAT=status) 
	    ALLOCATE(pquad_gr(2*ndim-1,maxnterm), STAT=status) 
	    ALLOCATE(acc_g(ndim), STAT=status) 
	    ALLOCATE(pos_comm(ndim), STAT=status) 
	
      TID = 0
!$    TID = OMP_GET_THREAD_NUM();

	  
	  
	 
!$OMP DO
	DO mio_ele=1,cg_loc

!-------------------------------------------------------------------------
! iblk2 is an array atomically updated that cointain the number of cells already
! computed on the PE.
! mio_ele contains the number of elemnt to be processed: locally from 1 to 
! ilocal_work_gr, and remotely (or locally for shared gr-cells), as computed
! from iblk2 array that is atomically updated
!-------------------------------------------------------------------------

	      
	
	
!-------------------------------------------------------------------------
! ele contain the number of gr-cell to be elaborated: local or remote 
! cell stored in grouping_rmt array
!-------------------------------------------------------------------------
	    
	    
	       ele=grouping(mio_ele)
	       

        count_par=count_par+1
 	acc_g=0
	

!   Forming the interaction lists.
!   ----------------------------
         
	   CALL ilist_group (ele,nterms,nterms_gr,bcount_ele,iterms,iterms_gr,& 
	   pmass, pmass_gr, pquad,pquad_gr,drdotdr_gr,dx_gr,dy_gr,dz_gr) 
	   

!
!   Compute potential and/or acceleration.
!   --------------------------------------
!
!
!     Compute of the pot. and F_far of a  grouping cell
!     --------------------------------------------------
!
          
 		CALL force_group(ele,nterms_gr,iterms_gr,drdotdr_gr,dx_gr,dy_gr,dz_gr,pmass_gr,acc_g,pquad_gr,option,TID)  

!-----------------------------------------------------------------------
! Computation of the pot. F_near and F_tot of each body in tghe grouping cell
! Mark to 1 (local or remomtely wuth a PUT operation) the flag (mark_bod_gr) 
! of each body that is computed in this section
!-----------------------------------------------------------------------

       DO  q=nterms-bcount_ele+1,nterms

	   j=iterms(q)

	    
	   mark_bod_gr(j)=uno
	   
	 

	       
          CALL force(j,nterms,iterms,pos_comm,dx,dy,dz,drdotdr,pmass,pquad,acc_g,option)  


	ENDDO  !q=nterms-bcount_ele+1,nterms
	

	
	ENDDO ! mio_ele=q,cg_log 
!$OMP END DO  	

	
		

1100 	FORMAT(a,i3,3(a,i9))
1200 	FORMAT(a,f5.2,a,f5.2)


!$OMP BARRIER
	
		IF(me.eq.0 .and. TID.eq.0) THEN	
	    	cg2=MPI_WTIME()
	    	ctwg=ctwg+(cg2-cg1)
			write(uterm,1000)'GROUPING: PE=',me,' TIME=',ctwg,' Tot gr-cells=',cg_loc
	 		call flush(uterm)
 		ENDIF
1000 	FORMAT(a,i3,1(a,g15.4))

!-----------------------------------------------------------------------
!        LOCAL FORCE COMPUTATION   
!-----------------------------------------------------------------------
!	In this section each PE compute the force for a subset of the local
!	bodies, that were not computed in the grouping part
!-----------------------------------------------------------------------
	group_access=1 ! ungrouped flag
	

		IF(TID.eq.0) c2a=MPI_WTIME()
        count_par=0
        
	

!$OMP  DO
        DO 100 p=1,nb_res_loc(me+1)

!-----------------------------------------------------------------------
!   Forming the interaction lists.
!   p is the logical number of body
!-----------------------------------------------------------------------
	   
	   IF(mark_bod_gr(p).ge.1) CYCLE ! skip this particle. It was already computed in the grouping section
	

        count_par=count_par+1
           
!	   numbod_100=numbod_100+1

          CALL ilist(p,nterms,iterms,pos_comm,pmass, drdotdr,dx,dy,dz,pquad)  



 	   
!-----------------------------------------------------------------------
!   Compute potential and the Force.
!-----------------------------------------------------------------------
 	   CALL force(p,nterms,iterms,pos_comm,dx,dy,dz,drdotdr,pmass,pquad,acc_g,option)


100    CONTINUE
!$OMP END DO
	DEALLOCATE(drdotdr) 
	DEALLOCATE(dx) 
	DEALLOCATE(dy) 
	DEALLOCATE(dz)
	DEALLOCATE(drdotdr_gr) 
	DEALLOCATE(dx_gr) 
	DEALLOCATE(dy_gr) 
	DEALLOCATE(dz_gr) 
	DEALLOCATE(iterms)
	DEALLOCATE(iterms_gr)
	DEALLOCATE(pmass)
	DEALLOCATE(pmass_gr)
	DEALLOCATE(pquad)
	DEALLOCATE(pquad_gr)
	DEALLOCATE(acc_g) 
	DEALLOCATE(pos_comm) 
          
!$OMP END PARALLEL           

	   c2b=MPI_WTIME()
	   ctota=ctot_TW+(c2b-c2a)



        
	CALL MPI_BARRIER(MPI_COMM_WORLD,ierror)


	RETURN
        END