Loading cuda-omp/omp/miscellaneous/globals/Makefile 0 → 100644 +32 −0 Original line number Diff line number Diff line COMPILER_CXX ?= clang++-18 DEBUG ?= YES FLAGS ?= -fopenmp --offload-arch=native -fopenmp-targets=nvptx64-nvidia-cuda # executable name EXEC ?= globals SYSTYPE ?= $(strip $(shell uname -n)) ############ DEBUG configuration ################################### ifeq ($(DEBUG), YES) OPT = -O0 -g else OPT = -O3 endif #################################################################### .PHONY: clean HEADERS = $(shell find . -name "*.hpp" -type f) SOURCES = $(shell find . -name "*.cpp" -type f) DEPENDENCIES = $(SOURCES) $(HEADERS) Makefile CFLAGS = -Wall -Wextra -v -march=native -mtune=native $(EXEC): $(DEPENDENCIES) $(COMPILER_CXX) $(CFLAGS) $(FLAGS) $(OPT) $(SOURCES) -o $@ ldd $(EXEC) @echo -e '\n\t Program' $@ 'compiled for' $(SYSTYPE) 'machine \n' clean: rm -rf $(EXEC) *~ cuda-omp/omp/miscellaneous/globals/allvars.cpp 0 → 100644 +3 −0 Original line number Diff line number Diff line #include "allvars.hpp" MyData ***global_ptr{nullptr}; cuda-omp/omp/miscellaneous/globals/allvars.hpp 0 → 100644 +13 −0 Original line number Diff line number Diff line #pragma once #include <cstddef> constexpr std::size_t X = 3; constexpr std::size_t Y = 6; constexpr std::size_t Z = 65536; using MyData = double; // Global pointer declared in target region #pragma omp declare target extern MyData ***global_ptr; #pragma omp end declare target cuda-omp/omp/miscellaneous/globals/globals.cpp 0 → 100644 +87 −0 Original line number Diff line number Diff line #include <iostream> #include <vector> #include <omp.h> #include <cassert> #include <new> #include "allvars.hpp" int main() { // allocate memory on the host and set the global pointer global_ptr = new (std::nothrow) MyData** [X]; assert(global_ptr != nullptr); for (std::size_t x=0 ; x<X ; x++) { global_ptr[x] = new (std::nothrow) MyData* [Y]; assert(global_ptr[x] != nullptr); for (std::size_t y=0 ; y<Y ; y++) { global_ptr[x][y] = new (std::nothrow) MyData [Z]; assert(global_ptr[x][y] != nullptr); for (std::size_t z=0 ; z<Z ; z++) { global_ptr[x][y][z] = static_cast<MyData>(1); } // loop over Z } // loop over Y } // loop over X std::cout << "\n\t global_ptr allocated on the host \n" << std::endl; // // Allocate memory on the device and set the global pointer // #pragma omp target enter data map(alloc: global_ptr[0:1][0:6][0:SIZE]) // for // // Copy data from host to device // #pragma omp target data map(to: host_data[0: SIZE]) // { // #pragma omp target teams distribute parallel for // for (int index=0 ; index<SIZE ; index++) // { // const int tid = omp_get_thread_num(); // const int team = omp_get_team_num(); // const int nthr = omp_get_num_threads(); // const int whoAmI = tid + (team * nthr); // MyData diff[6]; // for (std::size_t i=0 ; i<6 : i++) // { // diff[i] = global_ptr[0][i][index] * ; // } // { // global_ptr[i] = (host_data[i] * 2); // } // } // kernel // // Copy data back from device to host using the global pointer // #pragma omp target update from(global_ptr[0: SIZE]) // } // std::cout << "\n\t Result after device computation:" << std::endl; // for (std::size_t i=0 ; i<SIZE ; i++) // { // std::cout << global_ptr[i] << " "; // } // std::cout << std::endl; // Deallocate memory on the device //#pragma omp target exit data map(delete: global_ptr) // deallocate host memory for (std::size_t x=0 ; x<X ; x++) { for (std::size_t y=0 ; y<Y ; y++) { delete[] global_ptr[x][y]; } delete[] global_ptr[x]; } delete[] global_ptr; return 0; } jacobi/mpi/miscellaneous/cartesiandeleted 100755 → 0 −16.3 KiB File deleted. View file Loading
cuda-omp/omp/miscellaneous/globals/Makefile 0 → 100644 +32 −0 Original line number Diff line number Diff line COMPILER_CXX ?= clang++-18 DEBUG ?= YES FLAGS ?= -fopenmp --offload-arch=native -fopenmp-targets=nvptx64-nvidia-cuda # executable name EXEC ?= globals SYSTYPE ?= $(strip $(shell uname -n)) ############ DEBUG configuration ################################### ifeq ($(DEBUG), YES) OPT = -O0 -g else OPT = -O3 endif #################################################################### .PHONY: clean HEADERS = $(shell find . -name "*.hpp" -type f) SOURCES = $(shell find . -name "*.cpp" -type f) DEPENDENCIES = $(SOURCES) $(HEADERS) Makefile CFLAGS = -Wall -Wextra -v -march=native -mtune=native $(EXEC): $(DEPENDENCIES) $(COMPILER_CXX) $(CFLAGS) $(FLAGS) $(OPT) $(SOURCES) -o $@ ldd $(EXEC) @echo -e '\n\t Program' $@ 'compiled for' $(SYSTYPE) 'machine \n' clean: rm -rf $(EXEC) *~
cuda-omp/omp/miscellaneous/globals/allvars.cpp 0 → 100644 +3 −0 Original line number Diff line number Diff line #include "allvars.hpp" MyData ***global_ptr{nullptr};
cuda-omp/omp/miscellaneous/globals/allvars.hpp 0 → 100644 +13 −0 Original line number Diff line number Diff line #pragma once #include <cstddef> constexpr std::size_t X = 3; constexpr std::size_t Y = 6; constexpr std::size_t Z = 65536; using MyData = double; // Global pointer declared in target region #pragma omp declare target extern MyData ***global_ptr; #pragma omp end declare target
cuda-omp/omp/miscellaneous/globals/globals.cpp 0 → 100644 +87 −0 Original line number Diff line number Diff line #include <iostream> #include <vector> #include <omp.h> #include <cassert> #include <new> #include "allvars.hpp" int main() { // allocate memory on the host and set the global pointer global_ptr = new (std::nothrow) MyData** [X]; assert(global_ptr != nullptr); for (std::size_t x=0 ; x<X ; x++) { global_ptr[x] = new (std::nothrow) MyData* [Y]; assert(global_ptr[x] != nullptr); for (std::size_t y=0 ; y<Y ; y++) { global_ptr[x][y] = new (std::nothrow) MyData [Z]; assert(global_ptr[x][y] != nullptr); for (std::size_t z=0 ; z<Z ; z++) { global_ptr[x][y][z] = static_cast<MyData>(1); } // loop over Z } // loop over Y } // loop over X std::cout << "\n\t global_ptr allocated on the host \n" << std::endl; // // Allocate memory on the device and set the global pointer // #pragma omp target enter data map(alloc: global_ptr[0:1][0:6][0:SIZE]) // for // // Copy data from host to device // #pragma omp target data map(to: host_data[0: SIZE]) // { // #pragma omp target teams distribute parallel for // for (int index=0 ; index<SIZE ; index++) // { // const int tid = omp_get_thread_num(); // const int team = omp_get_team_num(); // const int nthr = omp_get_num_threads(); // const int whoAmI = tid + (team * nthr); // MyData diff[6]; // for (std::size_t i=0 ; i<6 : i++) // { // diff[i] = global_ptr[0][i][index] * ; // } // { // global_ptr[i] = (host_data[i] * 2); // } // } // kernel // // Copy data back from device to host using the global pointer // #pragma omp target update from(global_ptr[0: SIZE]) // } // std::cout << "\n\t Result after device computation:" << std::endl; // for (std::size_t i=0 ; i<SIZE ; i++) // { // std::cout << global_ptr[i] << " "; // } // std::cout << std::endl; // Deallocate memory on the device //#pragma omp target exit data map(delete: global_ptr) // deallocate host memory for (std::size_t x=0 ; x<X ; x++) { for (std::size_t y=0 ; y<Y ; y++) { delete[] global_ptr[x][y]; } delete[] global_ptr[x]; } delete[] global_ptr; return 0; }