Commit 5220696a authored by Giovanni La Mura's avatar Giovanni La Mura
Browse files

Implement magma_cms()

parent 97917f8c
Loading
Loading
Loading
Loading
+5 −1
Original line number Diff line number Diff line
@@ -888,7 +888,11 @@ int cluster_jxi488_cycle(
#ifdef USE_TARGET_OFFLOAD
#ifdef USE_MAGMA
  magmaDoubleComplex* vec_am = (magmaDoubleComplex *)(cid->am[0]);
  int device_id = cid->proc_device;
#pragma omp target data map(tofrom: vec_am[0:ndit*ndit]) device(device_id)
  {
    magma_cms(vec_am, cid->c1, cid->proc_device);
  }
#endif //USE_MAGMA
#endif // USE_TARGET_OFFLOAD
  cms_gpu(cid->am[0], cid->c1);
+8 −0
Original line number Diff line number Diff line
@@ -24,6 +24,14 @@
#ifndef INCLUDE_MAGMA_CALLS_H_
#define INCLUDE_MAGMA_CALLS_H_

/**
 * \brief Initialize a MAGMA compliant T-matrix leaving it on the GPU.
 *
 * \param vec_am: `magmaDoubleComplex *` Vector form of the matrix.
 * \param c1: `ParticleDescriptor *` Pointer to a ParticleDescriptor instance.
 * \param device_id: `int` ID of the device to build the matrix on.
 * \return result: `magma_int_t` A return code (MAGMA_SUCCESS, if everything is fine).
 */
magma_int_t magma_cms(magmaDoubleComplex *vec_am, ParticleDescriptor *c1, int device_id);

/**
+136 −7
Original line number Diff line number Diff line
@@ -66,17 +66,146 @@ using namespace std;
magma_int_t magma_cms(magmaDoubleComplex *vec_am, ParticleDescriptor *c1, int device_id) {
  const magmaDoubleComplex cz0 = MAGMA_Z_MAKE(0.0, 0.0);
  const magmaDoubleComplex cz1 = MAGMA_Z_MAKE(1.0, 0.0); 
  const magma_int_t ldam = 2 * c1->nsph * c1->li * (c1->li + 2);
  const magma_int_t size = ldam * ldam;
  const int nsph = c1->nsph;
  const int nsphmo = c1->nsph - 1;
  const int li = c1->li;
  const int lmtpo = c1->lmtpo;
  const int rsize = li * nsph;
  const magma_int_t max_litpo = 2 * li + 1;
  const magma_int_t nlim = li * (li + 2);
  const magma_int_t ndi = nsph * nlim;
  const magma_int_t ndit = 2 * ndi;
  const magma_int_t size = ndit * ndit;
  const magma_int_t num_pairs = (nsph * (nsph - 1)) / 2;
  const magma_int_t total_iters = num_pairs * li * max_litpo * li * max_litpo;
  magmaDoubleComplex *rmi = (magmaDoubleComplex *)(c1->rmi[0]);
  magmaDoubleComplex *rei = (magmaDoubleComplex *)(c1->rei[0]);

#pragma omp target teams distribute parallel for map(tofrom:vec_am[0:size]) \
  firstprivate(size, ldam, cz0, cz1) device(device_id)
  /* THIS IS AN EXAMPLE OF WORKING ID INITIALIZATION
#pragma omp target teams distribute parallel for \
  firstprivate(size, ndit, cz0, cz1) device(device_id)
  for (magma_int_t iter = 0; iter < size; ++iter) {
    magma_int_t i = iter / ldam;
    magma_int_t j = iter % ldam;
    magma_int_t i = iter / ndit;
    magma_int_t j = iter % ndit;
    vec_am[iter] = (i == j) ? cz1 : cz0;
  }
  */
  
  int lut_n1[num_pairs];
  int lut_n2[num_pairs];
  for (int n1 = 1; n1 <= nsphmo; n1++) {
    for (int n2 = n1 + 1; n2 <= nsph; n2++) {
      int nbl = ((n1 - 1) * (2 * nsph - n1)) / 2 + n2 - n1;
      lut_n1[nbl - 1] = n1;
      lut_n2[nbl - 1] = n2;
    }
  }
  
#pragma omp target teams distribute parallel for \
  firstprivate(total_iters, max_litpo, num_pairs, li, ndi, ndit) \
  map(to: lut_n1[0:num_pairs], lut_n2[0:num_pairs]) \
  device(device_id)
  for (magma_int_t iter = 0; iter < total_iters; ++iter) {
    magma_int_t t = iter;
    // double rac3j[128];

    // Index calculation (from innermost to outermost)
    int im2 = (t % max_litpo) + 1;
    t /= max_litpo;
    int l2  = (t % li) + 1;
    t /= li;
    int im1 = (t % max_litpo) + 1;
    t /= max_litpo;
    int l1  = (t % li) + 1;
    t /= li;
    int nbl_idx = (t % num_pairs); // 0-based for look-up tables
    // Look-up values
    int n1 = lut_n1[nbl_idx];
    int n2 = lut_n2[nbl_idx];
    int nbl = nbl_idx + 1;
    // Ranges of magnetic quantum numbers
    int l1po = l1 + 1;
    int l2po = l2 + 1;
    int l1tpo = 2 * l1 + 1;
    int l2tpo = 2 * l2 + 1;

    int il1 = l1po * l1;
    int in1 = (n1 - 1) * nlim;
    int in2 = (n2 - 1) * nlim;
    int il2 = l2po * l2;
    magmaDoubleComplex rsh = ((l2 + l1) % 2 == 0) ? MAGMA_Z_MAKE(1.0, 0.0) : MAGMA_Z_MAKE(-1.0, 0.0);
    magmaDoubleComplex rsk = MAGMA_Z_NEGATE(rsh);
    bool is_valid_iter = (im1 <= l1tpo && im2 <= l2tpo);
    // Index 1 magnetic quantum numbers
    int m1 = (is_valid_iter) ? im1 - l1po : 0;
    int ilm1 = (is_valid_iter) ? il1 + m1 : 0;
    int ilm1e = (is_valid_iter) ? ilm1 + ndi : 0;
    int i1 = (is_valid_iter) ? in1 + ilm1 : 0;
    int i1e = (is_valid_iter) ? in1 + ilm1e : 0;
    int j1 = (is_valid_iter) ? in2 + ilm1 : 0;
    int j1e = (is_valid_iter) ? in2 + ilm1e : 0;
    // End of index 1 magnetic quantum numbers
    // Index 2 magnetic quantum numbers
    int m2 = (is_valid_iter) ? im2 - l2po : 0;
    int ilm2 = (is_valid_iter) ? il2 + m2 : 0;
    int ilm2e = (is_valid_iter) ? ilm2 + ndi : 0;
    int i2 = (is_valid_iter) ? in2 + ilm2 : 0;
    int i2e = (is_valid_iter) ? in2 + ilm2e : 0;
    int j2 = (is_valid_iter) ? in1 + ilm2 : 0;
    int j2e = (is_valid_iter) ? in1 + ilm2e : 0;
    // End of index 2 magnetic quantum numbers
    magmaDoubleComplex cgh, cgk, zvalue;
    cgh = (is_valid_iter) ?
      cz1 : // ghit(0, 0, nbl, l1, m1, l2, m2, c1, rac3j) :
      cz0;
    cgk = (is_valid_iter) ?
      cz1 : // ghit(0, 1, nbl, l1, m1, l2, m2, c1, rac3j) :
      cz0;
    if (is_valid_iter) {
      vec_am[(i1 - 1) * ndit + i2 - 1] = cgh;
      vec_am[(i1 - 1) * ndit + i2e - 1] = cgk;
      vec_am[(i1e - 1) * ndit + i2 - 1] = cgk;
      vec_am[(i1e - 1) * ndit + i2e - 1] = cgh;
      zvalue = MAGMA_Z_MUL(cgh, rsh);
      vec_am[(j1 - 1) * ndit + j2 - 1] = zvalue;
      vec_am[(j1e - 1) * ndit + j2e - 1] = zvalue;
      zvalue = MAGMA_Z_MUL(cgk, rsk);
      vec_am[(j1 - 1) * ndit + j2e - 1] = zvalue;
      vec_am[(j1e - 1) * ndit + j2 - 1] = zvalue;
    }
  }
  
  magma_int_t diag_iters = nsph * li * max_litpo;
#pragma omp target teams distribute parallel for \
  firstprivate(diag_iters, max_litpo, li, nsph, ndi, ndit) \
  map(to: rmi[0:rsize], rei[0:rsize]) \
  device(device_id)
  for(magma_int_t iter = 0; iter < diag_iters; ++iter) {
    magma_int_t t = iter;
    int im1 = (t % max_litpo) + 1;
    t /= max_litpo;
    int l1 = (t % li) + 1;
    t /= li;
    int n1 = (t % nsph) + 1;
    magmaDoubleComplex result_e, result_m;
    int in1 = (n1 - 1) * nlim;
    int rindex = (l1 - 1) * nsph + n1 - 1;
    magmaDoubleComplex dm = rmi[rindex];
    magmaDoubleComplex de = rei[rindex];
    int l1po = l1 + 1;
    int il1 = l1po * l1;
    int l1tpo = l1po + l1;
    int m1 = im1 - l1po;
    int ilm1 = il1 + m1;
    int i1 = in1 + ilm1;
    int i1e = i1 + ndi;
    result_m = dm;
    result_e = de;
    if (im1 <= l1tpo) {
      vec_am[(i1 - 1) * ndit + i1 -1] = result_m;
      vec_am[(i1e - 1) * ndit + i1e -1] = result_e;
    }
  } // iter loop
  return MAGMA_SUCCESS;
}