#include<stdio.h>
#include "numa_vars.h"
#include "proto.h"

void write_result() {

       end = clock();
       clock_gettime(CLOCK_MONOTONIC, &finish);
       timing.tot_time = ((double) (end - start0)) / CLOCKS_PER_SEC;
       timing.tot_time1 = (finish.tv_sec - begin0.tv_sec);
       timing.tot_time1 += (finish.tv_nsec - begin0.tv_nsec) / 1000000000.0;

       if (rank == 0)
        {

	  int Ntasksmpi;
	  
          printf("Setup time:    %f sec\n",timing.setup_time);
          printf("Process time:  %f sec\n",timing.process_time);
          printf("Kernel time = %f, Array Composition time %f , Reduce time: %f sec\n",timing.kernel_time,timing.compose_time,timing.reduce_time);

	  if( Me.Nhosts > 1 ) { Ntasksmpi = Me.Nhosts;
	    MPI_Reduce( MPI_IN_PLACE, &reduce_mpi_time, sizeof(reduce_mpi_time)/sizeof(double), MPI_DOUBLE, MPI_SUM, 0, COMM[HOSTS]);}
	  	  
	  if( Me.Ntasks[Me.SHMEMl] > 1 )
	    MPI_Reduce( MPI_IN_PLACE, &reduce_shmem_time, sizeof(reduce_shmem_time)/sizeof(double), MPI_DOUBLE, MPI_SUM, 0, COMM[myHOST]);

	 #ifndef RING
	  reduce_mpi_time = timing.reduce_time; //Write the correct result for the reduce when our implementation is not working
	 #endif
	  printf("Reduce Shmem time: %f sec\n", reduce_shmem_time/Me.Ntasks[Me.SHMEMl]);
	  if ( Me.Nhosts > 1 )
	    printf("Reduce MPI time: %f sec\n", reduce_mpi_time/Ntasksmpi);
	  else
	    printf("Reduce MPI time: %f sec\n", reduce_mpi_time);
          #ifdef USE_FFTW
              printf("FFTW time:     %f sec\n",timing.fftw_time);
              printf("Phase time:    %f sec\n",timing.phase_time);
          #endif
          printf("TOT time:      %f sec\n",timing.tot_time);
          if(param.num_threads > 1)
          {
            printf("PSetup time:   %f sec\n",timing.setup_time1);
            printf("PProcess time: %f sec\n",timing.process_time1);
            printf("PKernel time = %f, PArray Composition time %f, PReduce time: %f sec\n",timing.kernel_time1,timing.compose_time1,timing.reduce_time1);
            #ifdef USE_FFTW
               printf("PFFTW time:    %f sec\n",timing.fftw_time1);
               printf("PPhase time:   %f sec\n",timing.phase_time1);
            #endif
            printf("PTOT time:     %f sec\n",timing.tot_time1);
          }
        }

       else
	 {
	   if(Me.Nhosts > 1){ if( Me.Rank[HOSTS] > 0)
	       MPI_Reduce( &reduce_mpi_time, NULL, sizeof(reduce_mpi_time)/sizeof(double), MPI_DOUBLE, MPI_SUM, 0, COMM[HOSTS]);}
	 	   
	   if ( Me.Ntasks[Me.SHMEMl] > 1 )
	     MPI_Reduce( &reduce_shmem_time, NULL, sizeof(reduce_shmem_time)/sizeof(double), MPI_DOUBLE, MPI_SUM, 0, COMM[myHOST]);
	 }

        if (rank == 0)
        {
         file.pFile = fopen (out.timingfile,"w");
         if (param.num_threads == 1)
         {
           fprintf(file.pFile, "%f %f %f %f %f %f %f\n",timing.setup_time,timing.kernel_time,timing.compose_time,timing.reduce_time,timing.fftw_time,timing.phase_time,timing.tot_time);
         } else {
           fprintf(file.pFile, "%f %f %f %f %f %f %f\n",timing.setup_time1,timing.kernel_time1,timing.compose_time1,timing.reduce_time1,timing.fftw_time1,timing.phase_time1,timing.tot_time1);
         }
         fclose(file.pFile);
        }
        
        #ifdef ONE_SIDE
           MPI_Win_fence(0,slabwin);
           MPI_Win_free(&slabwin);
        #endif
}
