Loading src/cluster/cluster.cpp +64 −7 Original line number Diff line number Diff line Loading @@ -886,16 +886,72 @@ int cluster_jxi488_cycle( delete outam0; #endif // DEBUG_AM #ifdef USE_TARGET_OFFLOAD if (rs.use_offload) { #ifdef USE_MAGMA magmaDoubleComplex* vec_am = (magmaDoubleComplex *)(cid->am[0]); int device_id = cid->proc_device; #pragma omp target data map(tofrom: vec_am[0:ndit*ndit]) device(device_id) magma_queue_t queue = NULL; magma_device_t device_id = (magma_device_t)(cid->proc_device); magma_queue_create(device_id, &queue); #pragma omp target data map(to: vec_am[0:ndit*ndit]) device(cid->proc_device) { magma_cms(vec_am, cid->c1, cid->proc_device); magma_cms(vec_am, cid->c1, cid->proc_device); // Initialize uninverted matrix on GPU /* // DEBUG STEP #pragma omp target update from(vec_am[0:ndit*ndit]) device(cid->proc_device) message = "c_magma_cms_jxi_" + to_string(jxi488) + ".ppm"; write_matrix_as_ppm(cid->am[0], ndit, ndit, message, "MAG", 1, 1); { VirtualAsciiFile *outam1 = new VirtualAsciiFile(); string outam1_name = output_path + "/c_MAGMA_AM1_JXI" + to_string(jxi488) + ".txt"; sprintf(virtual_line, " AM matrix after CMS before LUCIN\n"); outam1->append_line(virtual_line); sprintf(virtual_line, " %d\n", ndit); outam1->append_line(virtual_line); sprintf(virtual_line, " I1+1 I2+1 Real Imag\n"); outam1->append_line(virtual_line); write_dcomplex_matrix(outam1, cid->am, ndit, ndit, " %5d %5d (%17.8lE,%17.8lE)\n", 1); outam1->write_to_disk(outam1_name); delete outam1; } */ #pragma omp target data use_device_ptr(vec_am) device(cid->proc_device) { magma_zinvert_resident(vec_am, (magma_int_t)ndit, jer, queue, cid->proc_device, rs); magma_queue_sync(queue); } if (jer == 0) { // Get the final inverted matrix. #pragma omp target update from(vec_am[0:ndit*ndit]) device(cid->proc_device) } else { sprintf(virtual_line, "ERROR: matrix inversion returned code %d!\n", jer); message = virtual_line; logger->err(message); } } magma_queue_destroy(queue); #else // NO_USE_MAGMA cms_flat(cid->am[0], cid->c1); #endif // USE_MAGMA #endif // USE_TARGET_OFFLOAD cms_gpu(cid->am[0], cid->c1); } #else // NO_USE_TARGET_OFFLOAD cms(cid->am[0], cid->c1); message = "c_cpu_cms_jxi_" + to_string(jxi488) + ".ppm"; write_matrix_as_ppm(cid->am[0], ndit, ndit, message, "MAG", 1, 1); { VirtualAsciiFile *outam1 = new VirtualAsciiFile(); string outam1_name = output_path + "/c_AM1_JXI" + to_string(jxi488) + ".txt"; sprintf(virtual_line, " AM matrix after CMS before LUCIN\n"); outam1->append_line(virtual_line); sprintf(virtual_line, " %d\n", ndit); outam1->append_line(virtual_line); sprintf(virtual_line, " I1+1 I2+1 Real Imag\n"); outam1->append_line(virtual_line); write_dcomplex_matrix(outam1, cid->am, ndit, ndit, " %5d %5d (%17.8lE,%17.8lE)\n", 1); outam1->write_to_disk(outam1_name); delete outam1; } #ifdef DEBUG_AM VirtualAsciiFile *outam1 = new VirtualAsciiFile(); string outam1_name = output_path + "/c_AM1_JXI" + to_string(jxi488) + ".txt"; Loading Loading @@ -947,6 +1003,7 @@ int cluster_jxi488_cycle( return jer; // break; // jxi488 loop: goes to memory clean } #endif // USE_TARGET_OFFLOAD interval_start = chrono::high_resolution_clock::now(); #ifdef USE_NVTX nvtxRangePush("Average calculation"); Loading src/include/clu_subs.h +4 −4 Original line number Diff line number Diff line Loading @@ -104,10 +104,10 @@ double cgev(int ipamo, int mu, int l, int m); * This function constructs the multi-centered M-matrix of the cluster, according * to Eq. (5.28) of Borghese, Denti & Saija (2007). * * \param am: `complex double **` * \param c1: `ParticleDescriptor *` * \param[in,out] am: `complex double *` * \param[in] c1: `ParticleDescriptor *` */ void cms(dcomplex **am, ParticleDescriptor *c1); void cms(dcomplex *am, ParticleDescriptor *c1); #ifdef USE_TARGET_OFFLOAD /** Loading @@ -119,7 +119,7 @@ void cms(dcomplex **am, ParticleDescriptor *c1); * \param am: `complex double **` * \param c1: `ParticleDescriptor *` */ void cms_gpu(dcomplex *am, ParticleDescriptor *c1); void cms_flat(dcomplex *am, ParticleDescriptor *c1); #endif // USE_TARGET_OFFLOAD /** Loading src/include/magma_calls.h +19 −0 Original line number Diff line number Diff line Loading @@ -105,11 +105,30 @@ template<int IHI> magmaDoubleComplex magma_ghit( void magma_r3jjr(int j2, int j3, int m2, int m3, double rac3j[]); #pragma omp end declare target /** * \brief Invert a pre-existing GPU complex matrix with double precision elements. * * \param[in,out] mat: Matrix of complex. The matrix to be inverted. * \param[in] n: `magma_int_t` The number of rows and columns of the [n x n] matrix. * \param[out] jer: `int &` Reference to an integer return flag. * \param[in] queue: `magma_queue_t` The MAGMA device communication queue. * \param[in] device_id: `int` ID of the device for matrix inversion offloading (optional, default 0). * \param[in] rs: `const RuntimeSettings &` Runtime settings instance (optional). */ void magma_zinvert_resident( magmaDoubleComplex *mat, magma_int_t n, int &jer, magma_queue_t queue, int device_id=0, const RuntimeSettings& rs=RuntimeSettings() ); #endif // USE_TARGET_OFFLOAD /** * \brief Invert a complex matrix with double precision elements. * * This function is a first implementation of GPU use. The data transfer * steps are handled internally, via standard MAGMA calls, so that the matrix * to be inverted is copied from the host memory to the GPU, inverted, possibly * refined and finally taken back to the host, leaving the GPU free. * * \param[in,out] mat: Matrix of complex. The matrix to be inverted. * \param[in] n: `np_int` The number of rows and columns of the [n x n] matrix. * \param[out] jer: `int &` Reference to an integer return flag. Loading src/include/utils.h +9 −9 Original line number Diff line number Diff line Loading @@ -56,19 +56,19 @@ int write_dcomplex_matrix( /** * \brief Draw a PPM representation of a matrix. * * \param A: `const dcomplex *` Vector representation of the matrix. [IN] * \param m: `int` Number of rows in the matrix. [IN] * \param n: `int` Number of columns in the matrix. [IN] * \param file_name: `const string&` Reference to a string with the name of the output file. [IN] * \param mode: `const string&` Reference to a string for the value to be represented ("MAG" to * \param[in] A: `const dcomplex *` Vector representation of the matrix. * \param[in] m: `int64_t` Number of rows in the matrix. * \param[in] n: `int64_t` Number of columns in the matrix. * \param[in] file_name: `const string&` Reference to a string with the name of the output file. * \param[in] mode: `const string&` Reference to a string for the value to be represented ("MAG" to * draw magnitudes, "RE" to draw real parts, "IM" to draw imaginary parts. Optional, default is * "MAG"). [IN] * \param row_bin: `int` Number of rows to bin together (optional, default is 1). [IN] * \param col_bin: `int` Number of columns to bin together (optional, default is 1). [IN] * "MAG"). * \param[in] row_bin: `int` Number of rows to bin together (optional, default is 1). * \param[in] col_bin: `int` Number of columns to bin together (optional, default is 1). * \return result: `int` An exit code (0 if successful). */ int write_matrix_as_ppm( const dcomplex *A, int m, int n, const std::string& file_name, const std::string& mode="MAG", dcomplex *A, int64_t m, int64_t n, const std::string& file_name, const std::string& mode="MAG", int row_bin=1, int col_bin=1 ); #endif src/libnptm/clu_subs.cpp +23 −21 Original line number Diff line number Diff line Loading @@ -457,13 +457,14 @@ double cgev(int ipamo, int mu, int l, int m) { } */ void cms(dcomplex **am, ParticleDescriptor *c1) { void cms(dcomplex *am, ParticleDescriptor *c1) { const int nsph = c1->nsph; const int nlim = c1->nlim; const int li = c1->li; const int ndi = nsph * nlim; const int ndit = ndi + ndi; const int nsphmo = nsph - 1; double *rac3j = c1->rac3j; const int lmtpo = c1->lmtpo; // int nbl = 0; for (int n1 = 1; n1 <= nsphmo; n1++) { Loading Loading @@ -492,6 +493,7 @@ void cms(dcomplex **am, ParticleDescriptor *c1) { double rsh = ((l2 + l1) % 2 == 0) ? 1.0 : -1.0; double rsk = -rsh; for (int im2 = 1; im2 <= l2tpo; im2++) { double rac3j[lmtpo]; int m2 = im2 - l2po; int ilm2 = il2 + m2; int ilm2e = ilm2 + ndi; Loading @@ -501,14 +503,14 @@ void cms(dcomplex **am, ParticleDescriptor *c1) { int j2e = in1 + ilm2e; dcomplex cgh = ghit(0, 0, nbl, l1, m1, l2, m2, c1, rac3j); dcomplex cgk = ghit(0, 1, nbl, l1, m1, l2, m2, c1, rac3j); am[i1 - 1][i2 - 1] = cgh; am[i1 - 1][i2e - 1] = cgk; am[i1e - 1][i2 - 1] = cgk; am[i1e - 1][i2e - 1] = cgh; am[j1 - 1][j2 - 1] = cgh * rsh; am[j1 - 1][j2e - 1] = cgk * rsk; am[j1e - 1][j2 - 1] = cgk * rsk; am[j1e - 1][j2e - 1] = cgh * rsh; am[(i1 - 1) * ndit + i2 - 1] = cgh; am[(i1 - 1) * ndit + i2e - 1] = cgk; am[(i1e - 1) * ndit + i2 - 1] = cgk; am[(i1e - 1) * ndit + i2e - 1] = cgh; am[(j1 - 1) * ndit + j2 - 1] = cgh * rsh; am[(j1 - 1) * ndit + j2e - 1] = cgk * rsk; am[(j1e - 1) * ndit + j2 - 1] = cgk * rsk; am[(j1e - 1) * ndit + j2e - 1] = cgh * rsh; } // im2 loop } // l2 loop } // im1 loop Loading @@ -516,7 +518,7 @@ void cms(dcomplex **am, ParticleDescriptor *c1) { } // n2 loop } // n1 loop #pragma omp parallel for collapse(2) for (int n1 = 1; n1 <= nsph; n1++) { // GPU portable? for (int n1 = 1; n1 <= nsph; n1++) { for (int l1 = 1; l1 <= li; l1++) { int in1 = (n1 - 1) * nlim; dcomplex dm = c1->rmi[l1 - 1][n1 - 1]; Loading @@ -533,20 +535,20 @@ void cms(dcomplex **am, ParticleDescriptor *c1) { // for (int ilm2 = 1; ilm2 <= c1->nlim; ilm2++) { // int i2 = in1 + ilm2; // int i2e = i2 + ndi; // am[i1 - 1][i2 - 1] = cc0; // am[i1 - 1][i2e - 1] = cc0; // am[i1e - 1][i2 - 1] = cc0; // am[i1e - 1][i2e - 1] = cc0; // am[(i1 - 1) * ndit + i2 - 1] = cc0; // am[(i1 - 1) * ndit + i2e - 1] = cc0; // am[(i1e - 1) * ndit + i2 - 1] = cc0; // am[(i1e - 1) * ndit + i2e - 1] = cc0; // } am[i1 - 1][i1 - 1] = dm; am[i1e - 1][i1e - 1] = de; am[(i1 - 1) * ndit + i1 - 1] = dm; am[(i1e - 1) * ndit + i1e - 1] = de; } // im1 loop } // l1 loop } // n1 loop } #ifdef USE_TARGET_OFFLOAD void cms_gpu(dcomplex *am, ParticleDescriptor *c1) { void cms_flat(dcomplex *am, ParticleDescriptor *c1) { const int nsph = c1->nsph; const int nlim = c1->nlim; const int li = c1->li; Loading Loading
src/cluster/cluster.cpp +64 −7 Original line number Diff line number Diff line Loading @@ -886,16 +886,72 @@ int cluster_jxi488_cycle( delete outam0; #endif // DEBUG_AM #ifdef USE_TARGET_OFFLOAD if (rs.use_offload) { #ifdef USE_MAGMA magmaDoubleComplex* vec_am = (magmaDoubleComplex *)(cid->am[0]); int device_id = cid->proc_device; #pragma omp target data map(tofrom: vec_am[0:ndit*ndit]) device(device_id) magma_queue_t queue = NULL; magma_device_t device_id = (magma_device_t)(cid->proc_device); magma_queue_create(device_id, &queue); #pragma omp target data map(to: vec_am[0:ndit*ndit]) device(cid->proc_device) { magma_cms(vec_am, cid->c1, cid->proc_device); magma_cms(vec_am, cid->c1, cid->proc_device); // Initialize uninverted matrix on GPU /* // DEBUG STEP #pragma omp target update from(vec_am[0:ndit*ndit]) device(cid->proc_device) message = "c_magma_cms_jxi_" + to_string(jxi488) + ".ppm"; write_matrix_as_ppm(cid->am[0], ndit, ndit, message, "MAG", 1, 1); { VirtualAsciiFile *outam1 = new VirtualAsciiFile(); string outam1_name = output_path + "/c_MAGMA_AM1_JXI" + to_string(jxi488) + ".txt"; sprintf(virtual_line, " AM matrix after CMS before LUCIN\n"); outam1->append_line(virtual_line); sprintf(virtual_line, " %d\n", ndit); outam1->append_line(virtual_line); sprintf(virtual_line, " I1+1 I2+1 Real Imag\n"); outam1->append_line(virtual_line); write_dcomplex_matrix(outam1, cid->am, ndit, ndit, " %5d %5d (%17.8lE,%17.8lE)\n", 1); outam1->write_to_disk(outam1_name); delete outam1; } */ #pragma omp target data use_device_ptr(vec_am) device(cid->proc_device) { magma_zinvert_resident(vec_am, (magma_int_t)ndit, jer, queue, cid->proc_device, rs); magma_queue_sync(queue); } if (jer == 0) { // Get the final inverted matrix. #pragma omp target update from(vec_am[0:ndit*ndit]) device(cid->proc_device) } else { sprintf(virtual_line, "ERROR: matrix inversion returned code %d!\n", jer); message = virtual_line; logger->err(message); } } magma_queue_destroy(queue); #else // NO_USE_MAGMA cms_flat(cid->am[0], cid->c1); #endif // USE_MAGMA #endif // USE_TARGET_OFFLOAD cms_gpu(cid->am[0], cid->c1); } #else // NO_USE_TARGET_OFFLOAD cms(cid->am[0], cid->c1); message = "c_cpu_cms_jxi_" + to_string(jxi488) + ".ppm"; write_matrix_as_ppm(cid->am[0], ndit, ndit, message, "MAG", 1, 1); { VirtualAsciiFile *outam1 = new VirtualAsciiFile(); string outam1_name = output_path + "/c_AM1_JXI" + to_string(jxi488) + ".txt"; sprintf(virtual_line, " AM matrix after CMS before LUCIN\n"); outam1->append_line(virtual_line); sprintf(virtual_line, " %d\n", ndit); outam1->append_line(virtual_line); sprintf(virtual_line, " I1+1 I2+1 Real Imag\n"); outam1->append_line(virtual_line); write_dcomplex_matrix(outam1, cid->am, ndit, ndit, " %5d %5d (%17.8lE,%17.8lE)\n", 1); outam1->write_to_disk(outam1_name); delete outam1; } #ifdef DEBUG_AM VirtualAsciiFile *outam1 = new VirtualAsciiFile(); string outam1_name = output_path + "/c_AM1_JXI" + to_string(jxi488) + ".txt"; Loading Loading @@ -947,6 +1003,7 @@ int cluster_jxi488_cycle( return jer; // break; // jxi488 loop: goes to memory clean } #endif // USE_TARGET_OFFLOAD interval_start = chrono::high_resolution_clock::now(); #ifdef USE_NVTX nvtxRangePush("Average calculation"); Loading
src/include/clu_subs.h +4 −4 Original line number Diff line number Diff line Loading @@ -104,10 +104,10 @@ double cgev(int ipamo, int mu, int l, int m); * This function constructs the multi-centered M-matrix of the cluster, according * to Eq. (5.28) of Borghese, Denti & Saija (2007). * * \param am: `complex double **` * \param c1: `ParticleDescriptor *` * \param[in,out] am: `complex double *` * \param[in] c1: `ParticleDescriptor *` */ void cms(dcomplex **am, ParticleDescriptor *c1); void cms(dcomplex *am, ParticleDescriptor *c1); #ifdef USE_TARGET_OFFLOAD /** Loading @@ -119,7 +119,7 @@ void cms(dcomplex **am, ParticleDescriptor *c1); * \param am: `complex double **` * \param c1: `ParticleDescriptor *` */ void cms_gpu(dcomplex *am, ParticleDescriptor *c1); void cms_flat(dcomplex *am, ParticleDescriptor *c1); #endif // USE_TARGET_OFFLOAD /** Loading
src/include/magma_calls.h +19 −0 Original line number Diff line number Diff line Loading @@ -105,11 +105,30 @@ template<int IHI> magmaDoubleComplex magma_ghit( void magma_r3jjr(int j2, int j3, int m2, int m3, double rac3j[]); #pragma omp end declare target /** * \brief Invert a pre-existing GPU complex matrix with double precision elements. * * \param[in,out] mat: Matrix of complex. The matrix to be inverted. * \param[in] n: `magma_int_t` The number of rows and columns of the [n x n] matrix. * \param[out] jer: `int &` Reference to an integer return flag. * \param[in] queue: `magma_queue_t` The MAGMA device communication queue. * \param[in] device_id: `int` ID of the device for matrix inversion offloading (optional, default 0). * \param[in] rs: `const RuntimeSettings &` Runtime settings instance (optional). */ void magma_zinvert_resident( magmaDoubleComplex *mat, magma_int_t n, int &jer, magma_queue_t queue, int device_id=0, const RuntimeSettings& rs=RuntimeSettings() ); #endif // USE_TARGET_OFFLOAD /** * \brief Invert a complex matrix with double precision elements. * * This function is a first implementation of GPU use. The data transfer * steps are handled internally, via standard MAGMA calls, so that the matrix * to be inverted is copied from the host memory to the GPU, inverted, possibly * refined and finally taken back to the host, leaving the GPU free. * * \param[in,out] mat: Matrix of complex. The matrix to be inverted. * \param[in] n: `np_int` The number of rows and columns of the [n x n] matrix. * \param[out] jer: `int &` Reference to an integer return flag. Loading
src/include/utils.h +9 −9 Original line number Diff line number Diff line Loading @@ -56,19 +56,19 @@ int write_dcomplex_matrix( /** * \brief Draw a PPM representation of a matrix. * * \param A: `const dcomplex *` Vector representation of the matrix. [IN] * \param m: `int` Number of rows in the matrix. [IN] * \param n: `int` Number of columns in the matrix. [IN] * \param file_name: `const string&` Reference to a string with the name of the output file. [IN] * \param mode: `const string&` Reference to a string for the value to be represented ("MAG" to * \param[in] A: `const dcomplex *` Vector representation of the matrix. * \param[in] m: `int64_t` Number of rows in the matrix. * \param[in] n: `int64_t` Number of columns in the matrix. * \param[in] file_name: `const string&` Reference to a string with the name of the output file. * \param[in] mode: `const string&` Reference to a string for the value to be represented ("MAG" to * draw magnitudes, "RE" to draw real parts, "IM" to draw imaginary parts. Optional, default is * "MAG"). [IN] * \param row_bin: `int` Number of rows to bin together (optional, default is 1). [IN] * \param col_bin: `int` Number of columns to bin together (optional, default is 1). [IN] * "MAG"). * \param[in] row_bin: `int` Number of rows to bin together (optional, default is 1). * \param[in] col_bin: `int` Number of columns to bin together (optional, default is 1). * \return result: `int` An exit code (0 if successful). */ int write_matrix_as_ppm( const dcomplex *A, int m, int n, const std::string& file_name, const std::string& mode="MAG", dcomplex *A, int64_t m, int64_t n, const std::string& file_name, const std::string& mode="MAG", int row_bin=1, int col_bin=1 ); #endif
src/libnptm/clu_subs.cpp +23 −21 Original line number Diff line number Diff line Loading @@ -457,13 +457,14 @@ double cgev(int ipamo, int mu, int l, int m) { } */ void cms(dcomplex **am, ParticleDescriptor *c1) { void cms(dcomplex *am, ParticleDescriptor *c1) { const int nsph = c1->nsph; const int nlim = c1->nlim; const int li = c1->li; const int ndi = nsph * nlim; const int ndit = ndi + ndi; const int nsphmo = nsph - 1; double *rac3j = c1->rac3j; const int lmtpo = c1->lmtpo; // int nbl = 0; for (int n1 = 1; n1 <= nsphmo; n1++) { Loading Loading @@ -492,6 +493,7 @@ void cms(dcomplex **am, ParticleDescriptor *c1) { double rsh = ((l2 + l1) % 2 == 0) ? 1.0 : -1.0; double rsk = -rsh; for (int im2 = 1; im2 <= l2tpo; im2++) { double rac3j[lmtpo]; int m2 = im2 - l2po; int ilm2 = il2 + m2; int ilm2e = ilm2 + ndi; Loading @@ -501,14 +503,14 @@ void cms(dcomplex **am, ParticleDescriptor *c1) { int j2e = in1 + ilm2e; dcomplex cgh = ghit(0, 0, nbl, l1, m1, l2, m2, c1, rac3j); dcomplex cgk = ghit(0, 1, nbl, l1, m1, l2, m2, c1, rac3j); am[i1 - 1][i2 - 1] = cgh; am[i1 - 1][i2e - 1] = cgk; am[i1e - 1][i2 - 1] = cgk; am[i1e - 1][i2e - 1] = cgh; am[j1 - 1][j2 - 1] = cgh * rsh; am[j1 - 1][j2e - 1] = cgk * rsk; am[j1e - 1][j2 - 1] = cgk * rsk; am[j1e - 1][j2e - 1] = cgh * rsh; am[(i1 - 1) * ndit + i2 - 1] = cgh; am[(i1 - 1) * ndit + i2e - 1] = cgk; am[(i1e - 1) * ndit + i2 - 1] = cgk; am[(i1e - 1) * ndit + i2e - 1] = cgh; am[(j1 - 1) * ndit + j2 - 1] = cgh * rsh; am[(j1 - 1) * ndit + j2e - 1] = cgk * rsk; am[(j1e - 1) * ndit + j2 - 1] = cgk * rsk; am[(j1e - 1) * ndit + j2e - 1] = cgh * rsh; } // im2 loop } // l2 loop } // im1 loop Loading @@ -516,7 +518,7 @@ void cms(dcomplex **am, ParticleDescriptor *c1) { } // n2 loop } // n1 loop #pragma omp parallel for collapse(2) for (int n1 = 1; n1 <= nsph; n1++) { // GPU portable? for (int n1 = 1; n1 <= nsph; n1++) { for (int l1 = 1; l1 <= li; l1++) { int in1 = (n1 - 1) * nlim; dcomplex dm = c1->rmi[l1 - 1][n1 - 1]; Loading @@ -533,20 +535,20 @@ void cms(dcomplex **am, ParticleDescriptor *c1) { // for (int ilm2 = 1; ilm2 <= c1->nlim; ilm2++) { // int i2 = in1 + ilm2; // int i2e = i2 + ndi; // am[i1 - 1][i2 - 1] = cc0; // am[i1 - 1][i2e - 1] = cc0; // am[i1e - 1][i2 - 1] = cc0; // am[i1e - 1][i2e - 1] = cc0; // am[(i1 - 1) * ndit + i2 - 1] = cc0; // am[(i1 - 1) * ndit + i2e - 1] = cc0; // am[(i1e - 1) * ndit + i2 - 1] = cc0; // am[(i1e - 1) * ndit + i2e - 1] = cc0; // } am[i1 - 1][i1 - 1] = dm; am[i1e - 1][i1e - 1] = de; am[(i1 - 1) * ndit + i1 - 1] = dm; am[(i1e - 1) * ndit + i1e - 1] = de; } // im1 loop } // l1 loop } // n1 loop } #ifdef USE_TARGET_OFFLOAD void cms_gpu(dcomplex *am, ParticleDescriptor *c1) { void cms_flat(dcomplex *am, ParticleDescriptor *c1) { const int nsph = c1->nsph; const int nlim = c1->nlim; const int li = c1->li; Loading