Loading src/libnptm/clu_subs.cpp +69 −18 Original line number Diff line number Diff line Loading @@ -47,7 +47,9 @@ #include <omp.h> #endif #ifdef USE_TARGET_OFFLOAD #pragma omp requires unified_shared_memory #endif using namespace std; Loading Loading @@ -405,7 +407,9 @@ dcomplex cdtp(dcomplex z, dcomplex **am, int i, int jf, int k, int nj) { return result; } #ifdef USE_TARGET_OFFLOAD #pragma omp begin declare target device_type(any) #endif double cgev(int ipamo, int mu, int l, int m) { double result = 0.0; double xd = 0.0, xn = 0.0; Loading Loading @@ -439,7 +443,9 @@ double cgev(int ipamo, int mu, int l, int m) { } return result; } #ifdef USE_TARGET_OFFLOAD #pragma omp end declare target #endif void cms(dcomplex **am, C1 *c1, C1_AddOns *c1ao, C4 *c4, C6 *c6) { dcomplex dm, de, cgh, cgk; Loading Loading @@ -643,7 +649,9 @@ void crsm1(double vk, double exri, C1 *c1, C1_AddOns *c1ao, C4 *c4, C6 *c6) { delete[] svs; } #ifdef USE_TARGET_OFFLOAD #pragma omp begin declare target device_type(any) #endif dcomplex ghit_d( int ihi, int ipamo, int nbl, int l1, int m1, int l2, int m2, C1 *c1, C1_AddOns *c1ao, C4 *c4, double *rac3j Loading Loading @@ -854,9 +862,13 @@ dcomplex ghit_d( } return result; } #ifdef USE_TARGET_OFFLOAD #pragma omp end declare target #endif #ifdef USE_TARGET_OFFLOAD #pragma omp begin declare target device_type(any) #endif dcomplex ghit( int ihi, int ipamo, int nbl, int l1, int m1, int l2, int m2, C1 *c1, C1_AddOns *c1ao, C4 *c4, C6 *c6 Loading Loading @@ -1067,7 +1079,9 @@ dcomplex ghit( } return result; } #ifdef USE_TARGET_OFFLOAD #pragma omp end declare target #endif void hjv( double exri, double vk, int &jer, int &lcalc, dcomplex &arg, Loading Loading @@ -1325,12 +1339,13 @@ void pcros(double vk, double exri, C1 *c1, C1_AddOns *c1ao, C4 *c4) { #ifdef USE_NVTX nvtxRangePush("pcros intermediate loop 1"); #endif #ifdef USE_TARGET_OFFLOAD #pragma omp target teams distribute parallel for simd reduction(+:sum, sump, sum1, sum2, sum3, sum4) #endif for (int i12 = 0; i12 < nlemt; i12++) { // int i = i12 - 1; dcomplex am = cc0; dcomplex amp = cc0; //#pragma omp target teams distribute parallel for simd reduction(+:am,amp) for (int j10 = 0; j10 < nlemt; j10++) { // int j = j10 - 1; am += (vec_am0m[nlemt*i12+j10] * vec_w[4*j10+ipo18]); Loading Loading @@ -1391,7 +1406,9 @@ void pcrsm0(double vk, double exri, int inpol, C1 *c1, C1_AddOns *c1ao, C4 *c4) csam = -(ccs / (exri * vk)) * 0.5 * I; sum2 = cc0; sum3 = cc0; #ifdef USE_TARGET_OFFLOAD #pragma omp target teams distribute parallel for simd reduction(+:sum2,sum3) #endif for (int i14 = 0; i14 < c4->nlem; i14++) { int ie = i14 + c4->nlem; sum2 += (vec_am0m[nlemt*i14 + i14] + vec_am0m[nlemt*ie + ie]); Loading @@ -1399,7 +1416,9 @@ void pcrsm0(double vk, double exri, int inpol, C1 *c1, C1_AddOns *c1ao, C4 *c4) } // i14 loop double sumpi = 0.0; dcomplex sumpd = cc0; #ifdef USE_TARGET_OFFLOAD #pragma omp target teams distribute parallel for simd collapse(2) reduction(+:sumpi,sumpd) #endif for (int i16 = 0; i16 < nlemt; i16++) { for (int j16 = 0; j16 < c4->nlem; j16++) { int je = j16 + c4->nlem; Loading Loading @@ -1599,7 +1618,9 @@ void r3j000(int j2, int j3, C6 *c6) { } } #ifdef USE_TARGET_OFFLOAD #pragma omp begin declare target device_type(any) #endif void r3jjr(int j2, int j3, int m2, int m3, C6 *c6) { int jmx = j3 + j2; int jdf = j3 - j2; Loading Loading @@ -1717,9 +1738,13 @@ void r3jjr(int j2, int j3, int m2, int m3, C6 *c6) { } } } #ifdef USE_TARGET_OFFLOAD #pragma omp end declare target #endif #ifdef USE_TARGET_OFFLOAD #pragma omp begin declare target device_type(any) #endif void r3jjr_d(int j2, int j3, int m2, int m3, double *rac3j) { int jmx = j3 + j2; int jdf = j3 - j2; Loading Loading @@ -1837,7 +1862,9 @@ void r3jjr_d(int j2, int j3, int m2, int m3, double *rac3j) { } } } #ifdef USE_TARGET_OFFLOAD #pragma omp end declare target #endif void r3jmr(int j1, int j2, int j3, int m1, C6 *c6) { int mmx = (j2 < j3 - m1) ? j2 : j3 - m1; Loading Loading @@ -1968,7 +1995,9 @@ void raba( #ifdef USE_NVTX nvtxRangePush("raba inner loop 1"); #endif #ifdef USE_TARGET_OFFLOAD #pragma omp target teams distribute parallel for simd reduction(+:c1, c2) #endif for (int j10 = 1; j10 <= nlemt; j10++) { int j = j10 - 1; c1 += (vec_am0m[i*nlemt+j] * vec_w[4*j]); Loading Loading @@ -2018,7 +2047,9 @@ void raba( #ifdef USE_NVTX nvtxRangePush("raba inner loop 2"); #endif #ifdef USE_TARGET_OFFLOAD #pragma omp target teams distribute parallel for simd reduction(+:ctqce0, ctqce1, ctqce2, ctqcs0, ctqcs1, ctqcs2, tqcpe0, tqcpe1, tqcpe2, tqcps0, tqcps1, tqcps2) #endif for (int k = 1; k<=kmax; k++) { int l60 = (int) sqrt(k+1); int im60 = k - (l60*l60) + 1; Loading Loading @@ -2091,7 +2122,9 @@ void raba( #ifdef USE_NVTX nvtxRangePush("raba loop 3"); #endif #ifdef USE_TARGET_OFFLOAD #pragma omp teams distribute parallel for simd #endif for (int ipo78 = 1; ipo78 <= 2; ipo78++) { int ipo = ipo78 - 1; tqce[ipo][0] = real(ctqce[ipo][0] - ctqce[ipo][2]) * sq2i; Loading Loading @@ -2161,7 +2194,9 @@ void scr0(double vk, double exri, C1 *c1, C1_AddOns *c1ao, C3 *c3, C4 * c4) { #ifdef USE_NVTX nvtxRangePush("scr0 inner loop 1"); #endif #ifdef USE_TARGET_OFFLOAD #pragma omp target teams distribute parallel for simd reduction(+:sums, sum21) #endif for (int l10 = 1; l10 <= c4->li; l10++) { double fl = 1.0 * (l10 + l10 + 1); // dcomplex rm = 1.0 / c1->rmi[l10 - 1][i14 - 1]; Loading Loading @@ -2205,7 +2240,9 @@ void scr0(double vk, double exri, C1 *c1, C1_AddOns *c1ao, C3 *c3, C4 * c4) { #ifdef USE_NVTX nvtxRangePush("scr0 loop 2"); #endif #ifdef USE_TARGET_OFFLOAD #pragma omp target teams distribute parallel for simd reduction(+:scs, ecs, acs, tfsas) #endif for (int i14 = 1; i14 <= c4->nsph; i14++) { int iogi = c1->iog[i14 - 1]; scs += c1->sscs[iogi - 1]; Loading Loading @@ -2267,7 +2304,9 @@ void scr2( #ifdef USE_NVTX nvtxRangePush("scr2 inner loop 1"); #endif #ifdef USE_TARGET_OFFLOAD #pragma omp target teams distribute parallel for simd reduction(-:s11, s21, s12, s22) #endif for (int k = 1; k<=kmax; k++) { int l10 = (int) sqrt(k+1); int im10 = k - (l10*l10) + 1; Loading Loading @@ -2319,7 +2358,9 @@ void scr2( #ifdef USE_NVTX nvtxRangePush("scr2 loop 2"); #endif #ifdef USE_TARGET_OFFLOAD #pragma omp target teams distribute parallel for simd reduction(+:tsas00, tsas10, tsas01, tsas11) #endif for (int i14 = 1; i14 <= c4->nsph; i14++) { int i = i14 - 1; int iogi = c1->iog[i14 - 1]; Loading Loading @@ -2349,7 +2390,9 @@ void scr2( #ifdef USE_NVTX nvtxRangePush("scr2 inner loop 3"); #endif #ifdef USE_TARGET_OFFLOAD #pragma omp target teams distribute parallel for simd collapse(4) #endif for (int ipo1 = 1; ipo1 <=2; ipo1++) { for (int jpo1 = 1; jpo1 <= 2; jpo1++) { for (int ipo2 = 1; ipo2 <= 2; ipo2++) { Loading @@ -2372,7 +2415,9 @@ void scr2( #ifdef USE_NVTX nvtxRangePush("scr2 loop 4"); #endif #ifdef USE_TARGET_OFFLOAD #pragma omp target parallel for collapse(4) #endif for (int ipo1 = 1; ipo1 <=2; ipo1++) { for (int jpo1 = 1; jpo1 <= 2; jpo1++) { for (int ipo2 = 1; ipo2 <= 2; ipo2++) { Loading Loading @@ -2505,7 +2550,9 @@ void ztm(dcomplex **am, C1 *c1, C1_AddOns *c1ao, C4 *c4, C6 *c6, C9 * c9) { // but if it results im = 0, then we set l = l-1 and im = 2*l+1 // furthermore if it results im > 2*l+1, then we set // im = im -(2*l+1) and l = l+1 (there was a rounding error in a nearly exact root) #ifdef USE_TARGET_OFFLOAD #pragma omp target teams distribute parallel for simd collapse(3) #endif for (int n2 = 1; n2 <= c4->nsph; n2++) { // GPU portable? for (int k2 = 1; k2<=k2max; k2++) { for (int k3 = 1; k3<=k3max; k3++) { Loading Loading @@ -2550,7 +2597,9 @@ void ztm(dcomplex **am, C1 *c1, C1_AddOns *c1ao, C4 *c4, C6 *c6, C9 * c9) { #endif dcomplex *am_v = am[0]; dcomplex *sam_v = c9->sam[0]; #ifdef USE_TARGET_OFFLOAD #pragma omp target teams distribute parallel for simd collapse(2) #endif for (int i1 = 1; i1 <= ndi; i1++) { // GPU portable? for (int i3 = 1; i3 <= c4->nlem; i3++) { dcomplex sum1 = cc0; Loading Loading @@ -2596,7 +2645,9 @@ void ztm(dcomplex **am, C1 *c1, C1_AddOns *c1ao, C4 *c4, C6 *c6, C9 * c9) { } // i1 loop int nlemt = c4->nlem + c4->nlem; dcomplex *am0m_v = c1ao->am0m[0]; #ifdef USE_TARGET_OFFLOAD #pragma omp target parallel for collapse(2) #endif for (int i0 = 1; i0 <= c4->nlem; i0++) { for (int i3 = 1; i3 <= nlemt; i3++) { int i0e = i0 + c4->nlem; Loading src/libnptm/sph_subs.cpp +6 −2 Original line number Diff line number Diff line Loading @@ -197,13 +197,17 @@ double cg1(int lmpml, int mu, int l, int m) { return result; } #ifdef USE_TARGET_OFFLOAD #pragma omp begin declare target device_type(any) #endif dcomplex dconjg(dcomplex z) { double zreal = real(z); double zimag = imag(z); return (zreal - zimag * I); } #ifdef USE_TARGET_OFFLOAD #pragma omp end declare target #endif void diel(int npntmo, int ns, int i, int ic, double vk, C1 *c1, C2 *c2) { const double dif = c1->rc[i - 1][ns] - c1->rc[i - 1][ns - 1]; Loading src/libnptm/types.cpp +8 −0 Original line number Diff line number Diff line Loading @@ -22,10 +22,18 @@ #include "../include/types.h" #endif #ifdef USE_TARGET_OFFLOAD # pragma omp begin declare target device_type(any) #endif double real(dcomplex z) { return __real__ z; } #ifdef USE_TARGET_OFFLOAD # pragma omp end declare target #endif #ifdef USE_TARGET_OFFLOAD # pragma omp begin declare target device_type(any) #endif double imag(dcomplex z) { return __imag__ z; } #ifdef USE_TARGET_OFFLOAD # pragma omp end declare target #endif Loading
src/libnptm/clu_subs.cpp +69 −18 Original line number Diff line number Diff line Loading @@ -47,7 +47,9 @@ #include <omp.h> #endif #ifdef USE_TARGET_OFFLOAD #pragma omp requires unified_shared_memory #endif using namespace std; Loading Loading @@ -405,7 +407,9 @@ dcomplex cdtp(dcomplex z, dcomplex **am, int i, int jf, int k, int nj) { return result; } #ifdef USE_TARGET_OFFLOAD #pragma omp begin declare target device_type(any) #endif double cgev(int ipamo, int mu, int l, int m) { double result = 0.0; double xd = 0.0, xn = 0.0; Loading Loading @@ -439,7 +443,9 @@ double cgev(int ipamo, int mu, int l, int m) { } return result; } #ifdef USE_TARGET_OFFLOAD #pragma omp end declare target #endif void cms(dcomplex **am, C1 *c1, C1_AddOns *c1ao, C4 *c4, C6 *c6) { dcomplex dm, de, cgh, cgk; Loading Loading @@ -643,7 +649,9 @@ void crsm1(double vk, double exri, C1 *c1, C1_AddOns *c1ao, C4 *c4, C6 *c6) { delete[] svs; } #ifdef USE_TARGET_OFFLOAD #pragma omp begin declare target device_type(any) #endif dcomplex ghit_d( int ihi, int ipamo, int nbl, int l1, int m1, int l2, int m2, C1 *c1, C1_AddOns *c1ao, C4 *c4, double *rac3j Loading Loading @@ -854,9 +862,13 @@ dcomplex ghit_d( } return result; } #ifdef USE_TARGET_OFFLOAD #pragma omp end declare target #endif #ifdef USE_TARGET_OFFLOAD #pragma omp begin declare target device_type(any) #endif dcomplex ghit( int ihi, int ipamo, int nbl, int l1, int m1, int l2, int m2, C1 *c1, C1_AddOns *c1ao, C4 *c4, C6 *c6 Loading Loading @@ -1067,7 +1079,9 @@ dcomplex ghit( } return result; } #ifdef USE_TARGET_OFFLOAD #pragma omp end declare target #endif void hjv( double exri, double vk, int &jer, int &lcalc, dcomplex &arg, Loading Loading @@ -1325,12 +1339,13 @@ void pcros(double vk, double exri, C1 *c1, C1_AddOns *c1ao, C4 *c4) { #ifdef USE_NVTX nvtxRangePush("pcros intermediate loop 1"); #endif #ifdef USE_TARGET_OFFLOAD #pragma omp target teams distribute parallel for simd reduction(+:sum, sump, sum1, sum2, sum3, sum4) #endif for (int i12 = 0; i12 < nlemt; i12++) { // int i = i12 - 1; dcomplex am = cc0; dcomplex amp = cc0; //#pragma omp target teams distribute parallel for simd reduction(+:am,amp) for (int j10 = 0; j10 < nlemt; j10++) { // int j = j10 - 1; am += (vec_am0m[nlemt*i12+j10] * vec_w[4*j10+ipo18]); Loading Loading @@ -1391,7 +1406,9 @@ void pcrsm0(double vk, double exri, int inpol, C1 *c1, C1_AddOns *c1ao, C4 *c4) csam = -(ccs / (exri * vk)) * 0.5 * I; sum2 = cc0; sum3 = cc0; #ifdef USE_TARGET_OFFLOAD #pragma omp target teams distribute parallel for simd reduction(+:sum2,sum3) #endif for (int i14 = 0; i14 < c4->nlem; i14++) { int ie = i14 + c4->nlem; sum2 += (vec_am0m[nlemt*i14 + i14] + vec_am0m[nlemt*ie + ie]); Loading @@ -1399,7 +1416,9 @@ void pcrsm0(double vk, double exri, int inpol, C1 *c1, C1_AddOns *c1ao, C4 *c4) } // i14 loop double sumpi = 0.0; dcomplex sumpd = cc0; #ifdef USE_TARGET_OFFLOAD #pragma omp target teams distribute parallel for simd collapse(2) reduction(+:sumpi,sumpd) #endif for (int i16 = 0; i16 < nlemt; i16++) { for (int j16 = 0; j16 < c4->nlem; j16++) { int je = j16 + c4->nlem; Loading Loading @@ -1599,7 +1618,9 @@ void r3j000(int j2, int j3, C6 *c6) { } } #ifdef USE_TARGET_OFFLOAD #pragma omp begin declare target device_type(any) #endif void r3jjr(int j2, int j3, int m2, int m3, C6 *c6) { int jmx = j3 + j2; int jdf = j3 - j2; Loading Loading @@ -1717,9 +1738,13 @@ void r3jjr(int j2, int j3, int m2, int m3, C6 *c6) { } } } #ifdef USE_TARGET_OFFLOAD #pragma omp end declare target #endif #ifdef USE_TARGET_OFFLOAD #pragma omp begin declare target device_type(any) #endif void r3jjr_d(int j2, int j3, int m2, int m3, double *rac3j) { int jmx = j3 + j2; int jdf = j3 - j2; Loading Loading @@ -1837,7 +1862,9 @@ void r3jjr_d(int j2, int j3, int m2, int m3, double *rac3j) { } } } #ifdef USE_TARGET_OFFLOAD #pragma omp end declare target #endif void r3jmr(int j1, int j2, int j3, int m1, C6 *c6) { int mmx = (j2 < j3 - m1) ? j2 : j3 - m1; Loading Loading @@ -1968,7 +1995,9 @@ void raba( #ifdef USE_NVTX nvtxRangePush("raba inner loop 1"); #endif #ifdef USE_TARGET_OFFLOAD #pragma omp target teams distribute parallel for simd reduction(+:c1, c2) #endif for (int j10 = 1; j10 <= nlemt; j10++) { int j = j10 - 1; c1 += (vec_am0m[i*nlemt+j] * vec_w[4*j]); Loading Loading @@ -2018,7 +2047,9 @@ void raba( #ifdef USE_NVTX nvtxRangePush("raba inner loop 2"); #endif #ifdef USE_TARGET_OFFLOAD #pragma omp target teams distribute parallel for simd reduction(+:ctqce0, ctqce1, ctqce2, ctqcs0, ctqcs1, ctqcs2, tqcpe0, tqcpe1, tqcpe2, tqcps0, tqcps1, tqcps2) #endif for (int k = 1; k<=kmax; k++) { int l60 = (int) sqrt(k+1); int im60 = k - (l60*l60) + 1; Loading Loading @@ -2091,7 +2122,9 @@ void raba( #ifdef USE_NVTX nvtxRangePush("raba loop 3"); #endif #ifdef USE_TARGET_OFFLOAD #pragma omp teams distribute parallel for simd #endif for (int ipo78 = 1; ipo78 <= 2; ipo78++) { int ipo = ipo78 - 1; tqce[ipo][0] = real(ctqce[ipo][0] - ctqce[ipo][2]) * sq2i; Loading Loading @@ -2161,7 +2194,9 @@ void scr0(double vk, double exri, C1 *c1, C1_AddOns *c1ao, C3 *c3, C4 * c4) { #ifdef USE_NVTX nvtxRangePush("scr0 inner loop 1"); #endif #ifdef USE_TARGET_OFFLOAD #pragma omp target teams distribute parallel for simd reduction(+:sums, sum21) #endif for (int l10 = 1; l10 <= c4->li; l10++) { double fl = 1.0 * (l10 + l10 + 1); // dcomplex rm = 1.0 / c1->rmi[l10 - 1][i14 - 1]; Loading Loading @@ -2205,7 +2240,9 @@ void scr0(double vk, double exri, C1 *c1, C1_AddOns *c1ao, C3 *c3, C4 * c4) { #ifdef USE_NVTX nvtxRangePush("scr0 loop 2"); #endif #ifdef USE_TARGET_OFFLOAD #pragma omp target teams distribute parallel for simd reduction(+:scs, ecs, acs, tfsas) #endif for (int i14 = 1; i14 <= c4->nsph; i14++) { int iogi = c1->iog[i14 - 1]; scs += c1->sscs[iogi - 1]; Loading Loading @@ -2267,7 +2304,9 @@ void scr2( #ifdef USE_NVTX nvtxRangePush("scr2 inner loop 1"); #endif #ifdef USE_TARGET_OFFLOAD #pragma omp target teams distribute parallel for simd reduction(-:s11, s21, s12, s22) #endif for (int k = 1; k<=kmax; k++) { int l10 = (int) sqrt(k+1); int im10 = k - (l10*l10) + 1; Loading Loading @@ -2319,7 +2358,9 @@ void scr2( #ifdef USE_NVTX nvtxRangePush("scr2 loop 2"); #endif #ifdef USE_TARGET_OFFLOAD #pragma omp target teams distribute parallel for simd reduction(+:tsas00, tsas10, tsas01, tsas11) #endif for (int i14 = 1; i14 <= c4->nsph; i14++) { int i = i14 - 1; int iogi = c1->iog[i14 - 1]; Loading Loading @@ -2349,7 +2390,9 @@ void scr2( #ifdef USE_NVTX nvtxRangePush("scr2 inner loop 3"); #endif #ifdef USE_TARGET_OFFLOAD #pragma omp target teams distribute parallel for simd collapse(4) #endif for (int ipo1 = 1; ipo1 <=2; ipo1++) { for (int jpo1 = 1; jpo1 <= 2; jpo1++) { for (int ipo2 = 1; ipo2 <= 2; ipo2++) { Loading @@ -2372,7 +2415,9 @@ void scr2( #ifdef USE_NVTX nvtxRangePush("scr2 loop 4"); #endif #ifdef USE_TARGET_OFFLOAD #pragma omp target parallel for collapse(4) #endif for (int ipo1 = 1; ipo1 <=2; ipo1++) { for (int jpo1 = 1; jpo1 <= 2; jpo1++) { for (int ipo2 = 1; ipo2 <= 2; ipo2++) { Loading Loading @@ -2505,7 +2550,9 @@ void ztm(dcomplex **am, C1 *c1, C1_AddOns *c1ao, C4 *c4, C6 *c6, C9 * c9) { // but if it results im = 0, then we set l = l-1 and im = 2*l+1 // furthermore if it results im > 2*l+1, then we set // im = im -(2*l+1) and l = l+1 (there was a rounding error in a nearly exact root) #ifdef USE_TARGET_OFFLOAD #pragma omp target teams distribute parallel for simd collapse(3) #endif for (int n2 = 1; n2 <= c4->nsph; n2++) { // GPU portable? for (int k2 = 1; k2<=k2max; k2++) { for (int k3 = 1; k3<=k3max; k3++) { Loading Loading @@ -2550,7 +2597,9 @@ void ztm(dcomplex **am, C1 *c1, C1_AddOns *c1ao, C4 *c4, C6 *c6, C9 * c9) { #endif dcomplex *am_v = am[0]; dcomplex *sam_v = c9->sam[0]; #ifdef USE_TARGET_OFFLOAD #pragma omp target teams distribute parallel for simd collapse(2) #endif for (int i1 = 1; i1 <= ndi; i1++) { // GPU portable? for (int i3 = 1; i3 <= c4->nlem; i3++) { dcomplex sum1 = cc0; Loading Loading @@ -2596,7 +2645,9 @@ void ztm(dcomplex **am, C1 *c1, C1_AddOns *c1ao, C4 *c4, C6 *c6, C9 * c9) { } // i1 loop int nlemt = c4->nlem + c4->nlem; dcomplex *am0m_v = c1ao->am0m[0]; #ifdef USE_TARGET_OFFLOAD #pragma omp target parallel for collapse(2) #endif for (int i0 = 1; i0 <= c4->nlem; i0++) { for (int i3 = 1; i3 <= nlemt; i3++) { int i0e = i0 + c4->nlem; Loading
src/libnptm/sph_subs.cpp +6 −2 Original line number Diff line number Diff line Loading @@ -197,13 +197,17 @@ double cg1(int lmpml, int mu, int l, int m) { return result; } #ifdef USE_TARGET_OFFLOAD #pragma omp begin declare target device_type(any) #endif dcomplex dconjg(dcomplex z) { double zreal = real(z); double zimag = imag(z); return (zreal - zimag * I); } #ifdef USE_TARGET_OFFLOAD #pragma omp end declare target #endif void diel(int npntmo, int ns, int i, int ic, double vk, C1 *c1, C2 *c2) { const double dif = c1->rc[i - 1][ns] - c1->rc[i - 1][ns - 1]; Loading
src/libnptm/types.cpp +8 −0 Original line number Diff line number Diff line Loading @@ -22,10 +22,18 @@ #include "../include/types.h" #endif #ifdef USE_TARGET_OFFLOAD # pragma omp begin declare target device_type(any) #endif double real(dcomplex z) { return __real__ z; } #ifdef USE_TARGET_OFFLOAD # pragma omp end declare target #endif #ifdef USE_TARGET_OFFLOAD # pragma omp begin declare target device_type(any) #endif double imag(dcomplex z) { return __imag__ z; } #ifdef USE_TARGET_OFFLOAD # pragma omp end declare target #endif