Commit 5a864f5c authored by David Goz's avatar David Goz 😴
Browse files

cuda-omp/omp/miscellaneous/globals update

parent 9723ff08
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
COMPILER_CXX ?= clang++-18
COMPILER_CXX ?= clang++
DEBUG        ?= YES
FLAGS        ?= -fopenmp --offload-arch=native -fopenmp-targets=nvptx64-nvidia-cuda
FLAGS        ?= # -fopenmp # --offload-arch=native -fopenmp-targets=nvptx64-nvidia-cuda

# executable name
EXEC     ?= globals
+1 −1
Original line number Diff line number Diff line
@@ -5,7 +5,7 @@
constexpr std::size_t X = 3;
constexpr std::size_t Y = 6;
constexpr std::size_t Z = 65536;
using MyData = double;
using MyData = int;

// Global pointer declared in target region
#pragma omp declare target
+32 −37
Original line number Diff line number Diff line
@@ -3,6 +3,7 @@
#include <omp.h>
#include <cassert>
#include <new>
#include <algorithm>

#include "allvars.hpp"

@@ -23,54 +24,46 @@ int main()

	  for (std::size_t z=0 ; z<Z ; z++)
	    {
	      global_ptr[x][y][z] = static_cast<MyData>(1);
	      global_ptr[x][y][z] = static_cast<MyData>(z + 1);
	    } // loop over Z
	} // loop over Y
    } // loop over X

  std::cout << "\n\t global_ptr allocated on the host \n" << std::endl;
  
//   // Allocate memory on the device and set the global pointer
// #pragma omp target enter data map(alloc: global_ptr[0:1][0:6][0:SIZE])
  // Allocate memory on the device and set the global pointer
  #pragma omp target enter data map(to: global_ptr[0:X][0:Y][0:Z])

//   for 
  
  
//   // Copy data from host to device
// #pragma omp target data map(to: host_data[0: SIZE])
//   {
//     #pragma omp target teams distribute parallel for
//     for (int index=0 ; index<SIZE ; index++)
//       {
// 	const int tid    = omp_get_thread_num();
// 	const int team   = omp_get_team_num();
// 	const int nthr   = omp_get_num_threads();
// 	const int whoAmI = tid + (team * nthr);
	
// 	MyData diff[6];
// 	for (std::size_t i=0 ; i<6 : i++)
// 	  {
// 	    diff[i] = global_ptr[0][i][index] * ;
// 	  }
  #pragma omp target teams distribute parallel for
  for (std::size_t index=0 ; index<Z ; index++)
    {
      MyData diff = 0;
      for (std::size_t x=0 ; x<X ; x++)
	for (std::size_t y=0 ; y<Y ; y++)
	  diff += global_ptr[x][y][index];

// 	{
// 	  global_ptr[i] = (host_data[i] * 2);
// 	}
//       } // kernel
      for (std::size_t x=0 ; x<X ; x++)
	for (std::size_t y=0 ; y<Y ; y++)
	  global_ptr[x][y][index] = (diff / (X * Y * (index + 1)));
    } // kernel
  
//     // Copy data back from device to host using the global pointer
//     #pragma omp target update from(global_ptr[0: SIZE])
//   }
  // Device-host synchronization
  #pragma omp target update from(global_ptr[0:X][0:Y][0:Z])

//   std::cout << "\n\t Result after device computation:" << std::endl;
//   for (std::size_t i=0 ; i<SIZE ; i++)
//     {
//       std::cout << global_ptr[i] << " ";
//     }
//   std::cout << std::endl;
  // Check if any element along Z is equal to 1
  for (std::size_t x=0 ; x<X ; x++)
    for (std::size_t y=0 ; y<Y ; y++)
      {
	const bool One = std::all_of(&global_ptr[x][y][0], &global_ptr[x][y][Z], [](const MyData x) {return (x == 1);});
	if (One == false)
	  {
	    std::cout << "\n\t Test failed \n" << std::endl;
	    return -1;
	  }
      }

  // Deallocate memory on the device
  //#pragma omp target exit data map(delete: global_ptr)
  #pragma omp target exit data map(delete: global_ptr[0:X][0:Y][0:Z])

  // deallocate host memory
  for (std::size_t x=0 ; x<X ; x++)
@@ -83,5 +76,7 @@ int main()
    }
  delete[] global_ptr;

  std::cout << "\n\t Test OK! \n" << std::endl;
  
  return 0;
}