Loading src/cluster/cluster.cpp +5 −1 Original line number Diff line number Diff line Loading @@ -888,7 +888,11 @@ int cluster_jxi488_cycle( #ifdef USE_TARGET_OFFLOAD #ifdef USE_MAGMA magmaDoubleComplex* vec_am = (magmaDoubleComplex *)(cid->am[0]); int device_id = cid->proc_device; #pragma omp target data map(tofrom: vec_am[0:ndit*ndit]) device(device_id) { magma_cms(vec_am, cid->c1, cid->proc_device); } #endif //USE_MAGMA #endif // USE_TARGET_OFFLOAD cms_gpu(cid->am[0], cid->c1); Loading src/include/magma_calls.h +8 −0 Original line number Diff line number Diff line Loading @@ -24,6 +24,14 @@ #ifndef INCLUDE_MAGMA_CALLS_H_ #define INCLUDE_MAGMA_CALLS_H_ /** * \brief Initialize a MAGMA compliant T-matrix leaving it on the GPU. * * \param vec_am: `magmaDoubleComplex *` Vector form of the matrix. * \param c1: `ParticleDescriptor *` Pointer to a ParticleDescriptor instance. * \param device_id: `int` ID of the device to build the matrix on. * \return result: `magma_int_t` A return code (MAGMA_SUCCESS, if everything is fine). */ magma_int_t magma_cms(magmaDoubleComplex *vec_am, ParticleDescriptor *c1, int device_id); /** Loading src/libnptm/magma_calls.cpp +136 −7 Original line number Diff line number Diff line Loading @@ -66,17 +66,146 @@ using namespace std; magma_int_t magma_cms(magmaDoubleComplex *vec_am, ParticleDescriptor *c1, int device_id) { const magmaDoubleComplex cz0 = MAGMA_Z_MAKE(0.0, 0.0); const magmaDoubleComplex cz1 = MAGMA_Z_MAKE(1.0, 0.0); const magma_int_t ldam = 2 * c1->nsph * c1->li * (c1->li + 2); const magma_int_t size = ldam * ldam; const int nsph = c1->nsph; const int nsphmo = c1->nsph - 1; const int li = c1->li; const int lmtpo = c1->lmtpo; const int rsize = li * nsph; const magma_int_t max_litpo = 2 * li + 1; const magma_int_t nlim = li * (li + 2); const magma_int_t ndi = nsph * nlim; const magma_int_t ndit = 2 * ndi; const magma_int_t size = ndit * ndit; const magma_int_t num_pairs = (nsph * (nsph - 1)) / 2; const magma_int_t total_iters = num_pairs * li * max_litpo * li * max_litpo; magmaDoubleComplex *rmi = (magmaDoubleComplex *)(c1->rmi[0]); magmaDoubleComplex *rei = (magmaDoubleComplex *)(c1->rei[0]); #pragma omp target teams distribute parallel for map(tofrom:vec_am[0:size]) \ firstprivate(size, ldam, cz0, cz1) device(device_id) /* THIS IS AN EXAMPLE OF WORKING ID INITIALIZATION #pragma omp target teams distribute parallel for \ firstprivate(size, ndit, cz0, cz1) device(device_id) for (magma_int_t iter = 0; iter < size; ++iter) { magma_int_t i = iter / ldam; magma_int_t j = iter % ldam; magma_int_t i = iter / ndit; magma_int_t j = iter % ndit; vec_am[iter] = (i == j) ? cz1 : cz0; } */ int lut_n1[num_pairs]; int lut_n2[num_pairs]; for (int n1 = 1; n1 <= nsphmo; n1++) { for (int n2 = n1 + 1; n2 <= nsph; n2++) { int nbl = ((n1 - 1) * (2 * nsph - n1)) / 2 + n2 - n1; lut_n1[nbl - 1] = n1; lut_n2[nbl - 1] = n2; } } #pragma omp target teams distribute parallel for \ firstprivate(total_iters, max_litpo, num_pairs, li, ndi, ndit) \ map(to: lut_n1[0:num_pairs], lut_n2[0:num_pairs]) \ device(device_id) for (magma_int_t iter = 0; iter < total_iters; ++iter) { magma_int_t t = iter; // double rac3j[128]; // Index calculation (from innermost to outermost) int im2 = (t % max_litpo) + 1; t /= max_litpo; int l2 = (t % li) + 1; t /= li; int im1 = (t % max_litpo) + 1; t /= max_litpo; int l1 = (t % li) + 1; t /= li; int nbl_idx = (t % num_pairs); // 0-based for look-up tables // Look-up values int n1 = lut_n1[nbl_idx]; int n2 = lut_n2[nbl_idx]; int nbl = nbl_idx + 1; // Ranges of magnetic quantum numbers int l1po = l1 + 1; int l2po = l2 + 1; int l1tpo = 2 * l1 + 1; int l2tpo = 2 * l2 + 1; int il1 = l1po * l1; int in1 = (n1 - 1) * nlim; int in2 = (n2 - 1) * nlim; int il2 = l2po * l2; magmaDoubleComplex rsh = ((l2 + l1) % 2 == 0) ? MAGMA_Z_MAKE(1.0, 0.0) : MAGMA_Z_MAKE(-1.0, 0.0); magmaDoubleComplex rsk = MAGMA_Z_NEGATE(rsh); bool is_valid_iter = (im1 <= l1tpo && im2 <= l2tpo); // Index 1 magnetic quantum numbers int m1 = (is_valid_iter) ? im1 - l1po : 0; int ilm1 = (is_valid_iter) ? il1 + m1 : 0; int ilm1e = (is_valid_iter) ? ilm1 + ndi : 0; int i1 = (is_valid_iter) ? in1 + ilm1 : 0; int i1e = (is_valid_iter) ? in1 + ilm1e : 0; int j1 = (is_valid_iter) ? in2 + ilm1 : 0; int j1e = (is_valid_iter) ? in2 + ilm1e : 0; // End of index 1 magnetic quantum numbers // Index 2 magnetic quantum numbers int m2 = (is_valid_iter) ? im2 - l2po : 0; int ilm2 = (is_valid_iter) ? il2 + m2 : 0; int ilm2e = (is_valid_iter) ? ilm2 + ndi : 0; int i2 = (is_valid_iter) ? in2 + ilm2 : 0; int i2e = (is_valid_iter) ? in2 + ilm2e : 0; int j2 = (is_valid_iter) ? in1 + ilm2 : 0; int j2e = (is_valid_iter) ? in1 + ilm2e : 0; // End of index 2 magnetic quantum numbers magmaDoubleComplex cgh, cgk, zvalue; cgh = (is_valid_iter) ? cz1 : // ghit(0, 0, nbl, l1, m1, l2, m2, c1, rac3j) : cz0; cgk = (is_valid_iter) ? cz1 : // ghit(0, 1, nbl, l1, m1, l2, m2, c1, rac3j) : cz0; if (is_valid_iter) { vec_am[(i1 - 1) * ndit + i2 - 1] = cgh; vec_am[(i1 - 1) * ndit + i2e - 1] = cgk; vec_am[(i1e - 1) * ndit + i2 - 1] = cgk; vec_am[(i1e - 1) * ndit + i2e - 1] = cgh; zvalue = MAGMA_Z_MUL(cgh, rsh); vec_am[(j1 - 1) * ndit + j2 - 1] = zvalue; vec_am[(j1e - 1) * ndit + j2e - 1] = zvalue; zvalue = MAGMA_Z_MUL(cgk, rsk); vec_am[(j1 - 1) * ndit + j2e - 1] = zvalue; vec_am[(j1e - 1) * ndit + j2 - 1] = zvalue; } } magma_int_t diag_iters = nsph * li * max_litpo; #pragma omp target teams distribute parallel for \ firstprivate(diag_iters, max_litpo, li, nsph, ndi, ndit) \ map(to: rmi[0:rsize], rei[0:rsize]) \ device(device_id) for(magma_int_t iter = 0; iter < diag_iters; ++iter) { magma_int_t t = iter; int im1 = (t % max_litpo) + 1; t /= max_litpo; int l1 = (t % li) + 1; t /= li; int n1 = (t % nsph) + 1; magmaDoubleComplex result_e, result_m; int in1 = (n1 - 1) * nlim; int rindex = (l1 - 1) * nsph + n1 - 1; magmaDoubleComplex dm = rmi[rindex]; magmaDoubleComplex de = rei[rindex]; int l1po = l1 + 1; int il1 = l1po * l1; int l1tpo = l1po + l1; int m1 = im1 - l1po; int ilm1 = il1 + m1; int i1 = in1 + ilm1; int i1e = i1 + ndi; result_m = dm; result_e = de; if (im1 <= l1tpo) { vec_am[(i1 - 1) * ndit + i1 -1] = result_m; vec_am[(i1e - 1) * ndit + i1e -1] = result_e; } } // iter loop return MAGMA_SUCCESS; } Loading Loading
src/cluster/cluster.cpp +5 −1 Original line number Diff line number Diff line Loading @@ -888,7 +888,11 @@ int cluster_jxi488_cycle( #ifdef USE_TARGET_OFFLOAD #ifdef USE_MAGMA magmaDoubleComplex* vec_am = (magmaDoubleComplex *)(cid->am[0]); int device_id = cid->proc_device; #pragma omp target data map(tofrom: vec_am[0:ndit*ndit]) device(device_id) { magma_cms(vec_am, cid->c1, cid->proc_device); } #endif //USE_MAGMA #endif // USE_TARGET_OFFLOAD cms_gpu(cid->am[0], cid->c1); Loading
src/include/magma_calls.h +8 −0 Original line number Diff line number Diff line Loading @@ -24,6 +24,14 @@ #ifndef INCLUDE_MAGMA_CALLS_H_ #define INCLUDE_MAGMA_CALLS_H_ /** * \brief Initialize a MAGMA compliant T-matrix leaving it on the GPU. * * \param vec_am: `magmaDoubleComplex *` Vector form of the matrix. * \param c1: `ParticleDescriptor *` Pointer to a ParticleDescriptor instance. * \param device_id: `int` ID of the device to build the matrix on. * \return result: `magma_int_t` A return code (MAGMA_SUCCESS, if everything is fine). */ magma_int_t magma_cms(magmaDoubleComplex *vec_am, ParticleDescriptor *c1, int device_id); /** Loading
src/libnptm/magma_calls.cpp +136 −7 Original line number Diff line number Diff line Loading @@ -66,17 +66,146 @@ using namespace std; magma_int_t magma_cms(magmaDoubleComplex *vec_am, ParticleDescriptor *c1, int device_id) { const magmaDoubleComplex cz0 = MAGMA_Z_MAKE(0.0, 0.0); const magmaDoubleComplex cz1 = MAGMA_Z_MAKE(1.0, 0.0); const magma_int_t ldam = 2 * c1->nsph * c1->li * (c1->li + 2); const magma_int_t size = ldam * ldam; const int nsph = c1->nsph; const int nsphmo = c1->nsph - 1; const int li = c1->li; const int lmtpo = c1->lmtpo; const int rsize = li * nsph; const magma_int_t max_litpo = 2 * li + 1; const magma_int_t nlim = li * (li + 2); const magma_int_t ndi = nsph * nlim; const magma_int_t ndit = 2 * ndi; const magma_int_t size = ndit * ndit; const magma_int_t num_pairs = (nsph * (nsph - 1)) / 2; const magma_int_t total_iters = num_pairs * li * max_litpo * li * max_litpo; magmaDoubleComplex *rmi = (magmaDoubleComplex *)(c1->rmi[0]); magmaDoubleComplex *rei = (magmaDoubleComplex *)(c1->rei[0]); #pragma omp target teams distribute parallel for map(tofrom:vec_am[0:size]) \ firstprivate(size, ldam, cz0, cz1) device(device_id) /* THIS IS AN EXAMPLE OF WORKING ID INITIALIZATION #pragma omp target teams distribute parallel for \ firstprivate(size, ndit, cz0, cz1) device(device_id) for (magma_int_t iter = 0; iter < size; ++iter) { magma_int_t i = iter / ldam; magma_int_t j = iter % ldam; magma_int_t i = iter / ndit; magma_int_t j = iter % ndit; vec_am[iter] = (i == j) ? cz1 : cz0; } */ int lut_n1[num_pairs]; int lut_n2[num_pairs]; for (int n1 = 1; n1 <= nsphmo; n1++) { for (int n2 = n1 + 1; n2 <= nsph; n2++) { int nbl = ((n1 - 1) * (2 * nsph - n1)) / 2 + n2 - n1; lut_n1[nbl - 1] = n1; lut_n2[nbl - 1] = n2; } } #pragma omp target teams distribute parallel for \ firstprivate(total_iters, max_litpo, num_pairs, li, ndi, ndit) \ map(to: lut_n1[0:num_pairs], lut_n2[0:num_pairs]) \ device(device_id) for (magma_int_t iter = 0; iter < total_iters; ++iter) { magma_int_t t = iter; // double rac3j[128]; // Index calculation (from innermost to outermost) int im2 = (t % max_litpo) + 1; t /= max_litpo; int l2 = (t % li) + 1; t /= li; int im1 = (t % max_litpo) + 1; t /= max_litpo; int l1 = (t % li) + 1; t /= li; int nbl_idx = (t % num_pairs); // 0-based for look-up tables // Look-up values int n1 = lut_n1[nbl_idx]; int n2 = lut_n2[nbl_idx]; int nbl = nbl_idx + 1; // Ranges of magnetic quantum numbers int l1po = l1 + 1; int l2po = l2 + 1; int l1tpo = 2 * l1 + 1; int l2tpo = 2 * l2 + 1; int il1 = l1po * l1; int in1 = (n1 - 1) * nlim; int in2 = (n2 - 1) * nlim; int il2 = l2po * l2; magmaDoubleComplex rsh = ((l2 + l1) % 2 == 0) ? MAGMA_Z_MAKE(1.0, 0.0) : MAGMA_Z_MAKE(-1.0, 0.0); magmaDoubleComplex rsk = MAGMA_Z_NEGATE(rsh); bool is_valid_iter = (im1 <= l1tpo && im2 <= l2tpo); // Index 1 magnetic quantum numbers int m1 = (is_valid_iter) ? im1 - l1po : 0; int ilm1 = (is_valid_iter) ? il1 + m1 : 0; int ilm1e = (is_valid_iter) ? ilm1 + ndi : 0; int i1 = (is_valid_iter) ? in1 + ilm1 : 0; int i1e = (is_valid_iter) ? in1 + ilm1e : 0; int j1 = (is_valid_iter) ? in2 + ilm1 : 0; int j1e = (is_valid_iter) ? in2 + ilm1e : 0; // End of index 1 magnetic quantum numbers // Index 2 magnetic quantum numbers int m2 = (is_valid_iter) ? im2 - l2po : 0; int ilm2 = (is_valid_iter) ? il2 + m2 : 0; int ilm2e = (is_valid_iter) ? ilm2 + ndi : 0; int i2 = (is_valid_iter) ? in2 + ilm2 : 0; int i2e = (is_valid_iter) ? in2 + ilm2e : 0; int j2 = (is_valid_iter) ? in1 + ilm2 : 0; int j2e = (is_valid_iter) ? in1 + ilm2e : 0; // End of index 2 magnetic quantum numbers magmaDoubleComplex cgh, cgk, zvalue; cgh = (is_valid_iter) ? cz1 : // ghit(0, 0, nbl, l1, m1, l2, m2, c1, rac3j) : cz0; cgk = (is_valid_iter) ? cz1 : // ghit(0, 1, nbl, l1, m1, l2, m2, c1, rac3j) : cz0; if (is_valid_iter) { vec_am[(i1 - 1) * ndit + i2 - 1] = cgh; vec_am[(i1 - 1) * ndit + i2e - 1] = cgk; vec_am[(i1e - 1) * ndit + i2 - 1] = cgk; vec_am[(i1e - 1) * ndit + i2e - 1] = cgh; zvalue = MAGMA_Z_MUL(cgh, rsh); vec_am[(j1 - 1) * ndit + j2 - 1] = zvalue; vec_am[(j1e - 1) * ndit + j2e - 1] = zvalue; zvalue = MAGMA_Z_MUL(cgk, rsk); vec_am[(j1 - 1) * ndit + j2e - 1] = zvalue; vec_am[(j1e - 1) * ndit + j2 - 1] = zvalue; } } magma_int_t diag_iters = nsph * li * max_litpo; #pragma omp target teams distribute parallel for \ firstprivate(diag_iters, max_litpo, li, nsph, ndi, ndit) \ map(to: rmi[0:rsize], rei[0:rsize]) \ device(device_id) for(magma_int_t iter = 0; iter < diag_iters; ++iter) { magma_int_t t = iter; int im1 = (t % max_litpo) + 1; t /= max_litpo; int l1 = (t % li) + 1; t /= li; int n1 = (t % nsph) + 1; magmaDoubleComplex result_e, result_m; int in1 = (n1 - 1) * nlim; int rindex = (l1 - 1) * nsph + n1 - 1; magmaDoubleComplex dm = rmi[rindex]; magmaDoubleComplex de = rei[rindex]; int l1po = l1 + 1; int il1 = l1po * l1; int l1tpo = l1po + l1; int m1 = im1 - l1po; int ilm1 = il1 + m1; int i1 = in1 + ilm1; int i1e = i1 + ndi; result_m = dm; result_e = de; if (im1 <= l1tpo) { vec_am[(i1 - 1) * ndit + i1 -1] = result_m; vec_am[(i1e - 1) * ndit + i1e -1] = result_e; } } // iter loop return MAGMA_SUCCESS; } Loading