Commit e0a23993 authored by David Goz's avatar David Goz 😴
Browse files

jacobi/gpu added

parent 61df9364
Loading
Loading
Loading
Loading
+63 −0
Original line number Diff line number Diff line
#######################################################################
# Author: David Goz (david.goz@inaf.it)                               #
# June  2024                                                          #
#######################################################################
#
# To see all the compilation options
# $ make info
#######################################################################

# make.def defines how the application is compiled
include make.def
#######################################################################

.PHONY: info serial valgrind_memcheck valgrind_callgrind valgrind_cachegrind debug clean 

info:
	@echo ' '
	@echo '-----------------------------------------------------------------------------------------------'
	@echo '$$ make serial                 ---> compile the serial application                             '
	@echo '$$ make debug                  ---> debug the serial application                               '
	@echo '$$ make valgrind_memcheck      ---> run the serial application using Valgrind under Memcheck   '
	@echo '$$ make valgrind_callgrind     ---> run the serial application using Valgrind under Callgrind  '
	@echo '$$ make valgrind_cachegrind    ---> run the serial application using Valgrind under Cachegrind '
	@echo '$$ make clean                  ---> clean up all                                               '
	@echo '$$ make info                   ---> get make info                                              '
	@echo '-----------------------------------------------------------------------------------------------'
	@echo ' '

serial: $(PROG)

debug: $(PROG_DEBUG)
	@echo 'OOOooo... debugging ...oooOOO'
	gdb --args ./$<
	@echo 'OOOooo... debugging ... oooOOO'

valgrind_memcheck: $(PROG_MEMCHECK)
	@echo 'oooOOO... valgrind_memcheck ...OOOooo'
	valgrind --tool=memcheck -s --leak-check=full --show-leak-kinds=all --track-origins=yes --read-var-info=yes --log-file=valgrind_memcheck_log_%p.txt ./$< 10 10
	@echo 'oooOOO... valgrind_memcheck ...OOOooo'

valgrind_callgrind: $(PROG_CALLGRIND)
	@echo 'oooOOO... valgrind_callgrind ...OOOooo'
	valgrind --tool=callgrind --dump-instr=yes --collect-jumps=yes --log-file=valgrind_callgrind_log_.%p.txt ./$< 128 128
	@echo ' '
	@echo 'To generate a function-by-function summary from the profile data file:'
	@echo '$$ callgrind_annotate --auto=yes callgrind.out.<pid> | less'
	@echo '(kcachegrind is required in order to visualize the output using the GUI)'

valgrind_cachegrind: $(PROG_CACHEGRIND)
	@echo 'oooOOO... valgrind_cachegrind ...OOOooo'
	valgrind --tool=cachegrind --cache-sim=yes --log-file=valgrind_cachegrind_log_.%p.txt ./$< 128 128
	@echo '$$ cg_annotate --auto=yes cachegrind.out.<pid> | less'
	@echo '(kcachegrind is required in order to visualize the output using the GUI)'
	@echo 'oooOOO... valgrind_cachegrind ...OOOooo'

clean:
	rm -f *~ .*~ ./src/*~ ./src/*# ./include/*~ ./include/*# *~
	rm -f $(PROG) $(PROG_DEBUG) $(PROG_MEMCHECK) $(PROG_CALLGRIND) $(PROG_CACHEGRIND)
	rm -f valgrind_*.txt
	rm -f cachegrind.out.*
	rm -f callgrind.*
	rm -f *bin
	rm -f jacobi_serial_opt_*
+9 −0
Original line number Diff line number Diff line
#pragma once

#define NGHOST   1
#define NDIM     2 /* 2D */
#define X        0
#define Y        1
#define TOL      1.e-5

typedef double MyData;
+18 −0
Original line number Diff line number Diff line
#pragma once

#include "allvars.h"
#include <assert.h>
#include <sys/time.h>
#include <time.h>
#include <sys/stat.h>
#include <stdio.h>
#include <stdlib.h>

/* function prototypes */
MyData **Allocate_2DdblArray(const int nx, const int ny);
void Show_2DdblArray(const MyData **const A,
                     const int            nx,
                     const int            ny,
                     const char *const    string);

double seconds(void);
+45 −0
Original line number Diff line number Diff line
CC     = gcc
CFLAGS = -Wall -Wextra -march=native
LIBS   = -lm

SYSTYPE = $(strip $(shell uname -n))

PROG           = jacobi_serial_opt_$(SYSTYPE)
PROG_DEBUG      = $(PROG)_DEBUG
PROG_MEMCHECK   = $(PROG)_MEMCHECK
PROG_CALLGRIND  = $(PROG)_CALLGRIND
PROG_CACHEGRIND = $(PROG)_CACHEGRIND

HEADERS       = $(wildcard ./include/*.h)
SOURCES       = $(wildcard ./src/*.c)
DEPENDENCIES  = $(SOURCES) $(HEADERS) Makefile

$(PROG): $(DEPENDENCIES)
	$(CC) $(CFLAGS) -O3 -I./include $(SOURCES) -o $@ $(LIBS)
	@echo ' '
	@echo 'Program' $(PROG) 'compiled for' $(SYSTYPE) 'machine'
	@echo ' '

$(PROG_DEBUG): $(DEPENDENCIES)
	$(CC) $(CFLAGS) -Og -ggdb3 -fno-omit-frame-pointer -I./include $(SOURCES) -o $@ $(LIBS)
	@echo ' '
	@echo 'Program' $(PROG_DEBUG) 'compiled for' $(SYSTYPE) 'machine'
	@echo ' '

$(PROG_MEMCHECK): $(DEPENDENCIES)
	$(CC) $(CFLAGS) -Og -I./include $(SOURCES) -o $@ $(LIBS)
	@echo ' '
	@echo 'Program' $(PROG_MEMCHECK) 'compiled for' $(SYSTYPE) 'machine'
	@echo ' '

$(PROG_CALLGRIND): $(DEPENDENCIES)
	$(CC) $(CFLAGS) -g -O3 -I./include $(SOURCES) -o $@ $(LIBS)
	@echo ' '
	@echo 'Program' $(PROG_CALLGRIND) 'compiled for' $(SYSTYPE) 'machine'
	@echo ' '

$(PROG_CACHEGRIND): $(DEPENDENCIES)
	$(CC) $(CFLAGS) -g -O3 -I./include $(SOURCES) -o $@ $(LIBS)
	@echo ' '
	@echo 'Program' $(PROG_CACHEGRIND) 'compiled for' $(SYSTYPE) 'machine'
	@echo ' '
+269 −0
Original line number Diff line number Diff line
/* ///////////////////////////////////////////////////////////////////// */
/* Authors:  A. Mignone (mignone@to.infn.it)                             */
/*           V. Cesare  (valentina.cesare@inaf.it)                       */
/*           D. Goz     (david.goz@inaf.it)                              */
/*                                                                       */
/* Date   : June 2024                                                    */
/*                                                                       */
/* ///////////////////////////////////////////////////////////////////// */

#include "allvars.h"
#include "tools.h"
#include <stdio.h>
#include <stdlib.h>
#include <math.h>


/* function prototypes */
void BoundaryConditions(MyData **const restrict,
			MyData  *const restrict,
			MyData  *const restrict,
			const int,
			const int);

void JacobiAlgorithm(MyData **const restrict, MyData **const restrict, MyData *const restrict,
		     const MyData *const restrict, const int, const int, const int, const int);

void WriteSolution(MyData **const phi, const int nx, const int ny);

int main(int argc, char **argv)
{
  if (argc <= 2)
    {
      printf("\n\t Usage: <executable> <x_grid_size> <y_grid_size> \n\n");
      exit(EXIT_FAILURE);
    }

  /* global X and Y grid size */
  const int NX_GLOB = (int) strtol(argv[1], NULL, 10);
  const int NY_GLOB = (int) strtol(argv[2], NULL, 10);

  const MyData xbeg = 0.0;
  const MyData xend = 1.0;
  const MyData ybeg = 0.0;
  const MyData yend = 1.0;

  const MyData delta[NDIM] = {(xend - xbeg)/(NX_GLOB + 1),
			      (yend - ybeg)/(NY_GLOB + 1)};

  /* --------------------------------------------------------
     1. Set grid indices
     -------------------------------------------------------- */
    
  const int ibeg   = NGHOST;
  const int iend   = ibeg + NX_GLOB - 1;
  const int nx     = iend - ibeg + 1;
  const int nx_tot = nx + 2 * NGHOST;
    
  const int jbeg   = NGHOST;
  const int jend   = jbeg + NY_GLOB - 1;
  const int ny     = jend - jbeg + 1;
  const int ny_tot = ny + 2 * NGHOST;

  printf("\n\t Grid indices:");
  printf("\n\t\t ibeg, iend = %d, %d; nx_tot = %d"    ,ibeg, iend, nx_tot);
  printf("\n\t\t jbeg, jend = %d, %d; ny_tot = %d\n\n",jbeg, jend, ny_tot);
    
  /* --------------------------------------------------------
     2. Generate grid, allocate memory
     -------------------------------------------------------- */

  /* memory allocation */
  MyData *xg = (MyData *) malloc((NX_GLOB + 2*NGHOST) * sizeof(MyData));
  MyData *yg = (MyData *) malloc((NY_GLOB + 2*NGHOST) * sizeof(MyData));
  assert((xg != NULL) && (yg != NULL));

  /* initial conditions */
  for (int i=0 ; i<(NX_GLOB + 2*NGHOST) ; i++) xg[i] = xbeg + (i - ibeg + 1) * delta[X];
  for (int j=0 ; j<(NY_GLOB + 2*NGHOST) ; j++) yg[j] = ybeg + (j - jbeg + 1) * delta[Y];
  MyData *x = xg; /* Global and local grids are the same  */
  MyData *y = yg; /* for serial version of the code       */

  /* grids memory allocation */
  MyData **phi  = Allocate_2DdblArray(ny_tot, nx_tot);
  MyData **phi0 = Allocate_2DdblArray(ny_tot, nx_tot);
    
  /* --------------------------------------------------------
     3. Initialize solution array to 0
     -------------------------------------------------------- */
    
  for (int j=jbeg ; j<=jend ; j++)
    for (int i=ibeg ; i<=iend ; i++)
      phi0[j][i] = 0.0;
  
  /* --------------------------------------------------------
     4. Main iteration cycle
     -------------------------------------------------------- */

  const double time_start = seconds();

  /* -- 4a. Set boundary conditions first -- */
  
  BoundaryConditions(phi0, x, y, nx, ny);
  BoundaryConditions(phi, x, y, nx, ny);


  MyData err = 1.0;
  /* iterations */
  int k = 0;

  /* - allocate phi0 and phi on GPU */
  /* - copy phi0 from host to gpu   */
  #pragma acc data copy(phi0[0:nx_tot][0:ny_tot]), create(phi[0:nx_tot][0:ny_tot])
  while (1)
    {        
      /* -- 4b. Jacobi's method and residual (interior points) -- */
      /*       core algorithm                                     */
      
      err = 0.0;
      JacobiAlgorithm(phi, phi0, &err, delta,
		      ibeg, iend, jbeg, jend);

      printf ("\n\t Iteration = %d - err = %lg\n",k, err);

      /* increase the counter of loop iterations */
      k++;

      /* check convergence */
      if (err <= TOL)
	break;
      
      /* swap the pointers */
      MyData **tmp = phi;
      phi = phi0;
      phi0 = tmp;
    }
  
  WriteSolution(phi, nx, ny);

  printf("\n\t NX_GLOB x NY_GLOB = %d x %d\n", NX_GLOB, NY_GLOB);
  printf("\n\t Time = %lf [s]\n\n", seconds() - time_start);

  // free memory
  if (phi0)
    {
      free(phi0[0]);
      free(phi0);
    }

  if (phi)
    {
      free(phi[0]);
      free(phi);
    }

  if (yg)
    free(yg);
  
  if (xg)
    free(xg);
  
  return 0;
}

/* ********************************************************************* */
void BoundaryConditions(MyData **const restrict phi,
			MyData  *const restrict x,
			MyData  *const restrict y,
                        const int               nx,
			const int               ny)
/*
*********************************************************************** */
{
  const int ibeg = NGHOST;
  const int iend = ibeg + nx - 1;
    
  const int jbeg = NGHOST;
  const int jend = jbeg + ny - 1;

  int i,j;
  
  /* -- Left -- */
  i = ibeg - 1;
  for (int j=jbeg ; j<=jend ; j++)
    phi[j][i] = (1.0 - y[j]);
    
  /* -- Right -- */
  i = jend + 1;
  for (int j=jbeg ; j<=jend ; j++)
    phi[j][i] = (y[j] * y[j]);
    
  /* -- Bottom -- */    
  j = jbeg - 1;
  for (int i=ibeg ; i<=iend ; i++)
    phi[j][i] = (1.0 - x[i]);
    
  /* -- Top -- */
  j = jend + 1;
  for (int i=ibeg ; i<=iend ; i++)
    phi[j][i] = x[i];

  return;
}

/* ********************************************************************* */

void JacobiAlgorithm(MyData      **const restrict Phi,
		     MyData      **const restrict Phi0,
		     MyData       *const restrict error,
		     const MyData *const restrict delta,
		     const int                    ibeg,
		     const int                    iend,
		     const int                    jbeg,
		     const int                    jend)
{
  double gpu_err=0.0;

#pragma acc parallel loop reduction(+: err)
  for (int j=jbeg ; j<=jend ; j++)
    {
      for (int i=ibeg ; i<=iend ; i++)
	{
	  Phi[j][i] = 0.25 * (Phi0[j  ][i-1] +
			      Phi0[j  ][i+1] +
			      Phi0[j-1][i  ] +
			      Phi0[j+1][i  ]);

	  /* avoid fabs from math library */
	  const MyData diff = (Phi[j][i] - Phi0[j][i]);
	  *error += ((diff > 0) ? diff : -diff);
	} /* loop over columns */
    } /* loop over rows */
  
  gpu_err *= (delta[X] * delta[Y]);

  *error = gpu_err;
  
  return;
}

/* ********************************************************************* */

/* ********************************************************************* */
void WriteSolution (MyData **const phi,
		    const int      nx,
		    const int      ny)
/*
*********************************************************************** */
{
  const int ibeg = NGHOST;    
  const int jbeg = NGHOST;
  const int jend = jbeg + ny - 1;
    
  static int nfile = 0;  /* File counter */

  char fname[32];
  sprintf(fname,"jacobi2D_serial_opt_%02d.bin", nfile);
    
  FILE *fp;
  printf ("> Writing %s\n",fname);
  fp = fopen(fname, "wb");
    
  for (int j=jbeg ; j<=jend ; j++)
    {
      fwrite (phi[j] + ibeg, sizeof(MyData), nx, fp);
    }
    
  nfile++;
  fclose(fp);
}
Loading