Loading .gitlab-ci.yml +3 −3 Original line number Diff line number Diff line Loading @@ -78,13 +78,13 @@ compatibility_stage: - CXX=g++-14 FC=gfortran-14 ./configure - make wipe - make -j - echo "Running make with refinement with gnu compilers version 14..." - echo "Running make with gnu compilers version 14..." - cd .. - rm -rf build_gnu14 - mkdir build_gnu14_refine - cd build_gnu14_refine - cp -r ../build/* . - CXX=g++-14 FC=gfortran-14 ./configure --enable-refinement - CXX=g++-14 FC=gfortran-14 ./configure - make wipe - make -j #- echo "Running make with flang version 16 and clang version 16..." Loading Loading @@ -173,7 +173,7 @@ building_stage: - cat /etc/os-release - cd build - echo "Configuring with default compilers (MAGMA disabled)..." - ./configure --without-magma --without-cublas --disable-offload --enable-refinement --enable-shared - ./configure --without-magma --without-cublas --disable-offload --enable-shared - make wipe - echo "Building the default configuration..." - make -j Loading src/libnptm/clu_subs.cpp +68 −42 Original line number Diff line number Diff line Loading @@ -1341,6 +1341,8 @@ void pcros(double vk, double exri, ParticleDescriptor *c1) { #endif #ifdef USE_TARGET_OFFLOAD #pragma omp target teams distribute parallel for simd reduction(+:sum, sump, sum1, sum2, sum3, sum4) #else #pragma omp parallel for simd reduction(+:sum, sump, sum1, sum2, sum3, sum4) #endif for (int i12 = 0; i12 < nlemt; i12++) { // int i = i12 - 1; Loading Loading @@ -1408,6 +1410,8 @@ void pcrsm0(double vk, double exri, int inpol, ParticleDescriptor *c1) { sum3 = cc0; #ifdef USE_TARGET_OFFLOAD #pragma omp target teams distribute parallel for simd reduction(+:sum2,sum3) #else #pragma omp parallel for simd reduction(+:sum2,sum3) #endif for (int i14 = 0; i14 < c1->nlem; i14++) { int ie = i14 + c1->nlem; Loading @@ -1418,6 +1422,8 @@ void pcrsm0(double vk, double exri, int inpol, ParticleDescriptor *c1) { dcomplex sumpd = cc0; #ifdef USE_TARGET_OFFLOAD #pragma omp target teams distribute parallel for simd collapse(2) reduction(+:sumpi,sumpd) #else #pragma omp parallel for simd collapse(2) reduction(+:sumpi,sumpd) #endif for (int i16 = 0; i16 < nlemt; i16++) { for (int j16 = 0; j16 < c1->nlem; j16++) { Loading Loading @@ -2001,6 +2007,8 @@ void raba( #endif #ifdef USE_TARGET_OFFLOAD #pragma omp target teams distribute parallel for simd reduction(+:c1, c2) #else #pragma omp parallel for simd reduction(+:c1, c2) #endif for (int j10 = 1; j10 <= nlemt; j10++) { int j = j10 - 1; Loading @@ -2021,6 +2029,8 @@ void raba( #endif #ifdef USE_TARGET_OFFLOAD #pragma omp teams distribute parallel for #else #pragma omp parallel for #endif for (int ipo = 0; ipo < 2; ipo++) { int jpo = 1 - ipo; Loading Loading @@ -2055,6 +2065,8 @@ void raba( #endif #ifdef USE_TARGET_OFFLOAD #pragma omp target teams distribute parallel for simd reduction(+:ctqce0, ctqce1, ctqce2, ctqcs0, ctqcs1, ctqcs2, tqcpe0, tqcpe1, tqcpe2, tqcps0, tqcps1, tqcps2) #else #pragma omp parallel for simd reduction(+:ctqce0, ctqce1, ctqce2, ctqcs0, ctqcs1, ctqcs2, tqcpe0, tqcpe1, tqcpe2, tqcps0, tqcps1, tqcps2) #endif for (int k = 1; k<=kmax; k++) { int l60 = (int) sqrt(k+1); Loading Loading @@ -2129,7 +2141,9 @@ void raba( nvtxRangePush("raba loop 3"); #endif #ifdef USE_TARGET_OFFLOAD #pragma omp teams distribute parallel for simd #pragma omp target teams distribute parallel for simd #else #pragma omp parallel for simd #endif for (int ipo78 = 1; ipo78 <= 2; ipo78++) { int ipo = ipo78 - 1; Loading Loading @@ -2202,6 +2216,8 @@ void scr0(double vk, double exri, ParticleDescriptor *c1) { #endif #ifdef USE_TARGET_OFFLOAD #pragma omp target teams distribute parallel for simd reduction(+:sums, sum21) #else #pragma omp parallel for simd reduction(+:sums, sum21) #endif for (int l10 = 1; l10 <= c1->li; l10++) { double fl = 1.0 * (l10 + l10 + 1); Loading Loading @@ -2248,6 +2264,8 @@ void scr0(double vk, double exri, ParticleDescriptor *c1) { #endif #ifdef USE_TARGET_OFFLOAD #pragma omp target teams distribute parallel for simd reduction(+:scs, ecs, acs, tfsas) #else #pragma omp parallel for simd reduction(+:scs, ecs, acs, tfsas) #endif for (int i14 = 1; i14 <= c1->nsph; i14++) { int iogi = c1->iog[i14 - 1]; Loading Loading @@ -2312,6 +2330,8 @@ void scr2( #endif #ifdef USE_TARGET_OFFLOAD #pragma omp target teams distribute parallel for simd reduction(-:s11, s21, s12, s22) #else #pragma omp parallel for simd reduction(-:s11, s21, s12, s22) #endif for (int k = 1; k<=kmax; k++) { int l10 = (int) sqrt(k+1); Loading Loading @@ -2366,6 +2386,8 @@ void scr2( #endif #ifdef USE_TARGET_OFFLOAD #pragma omp target teams distribute parallel for simd reduction(+:tsas00, tsas10, tsas01, tsas11) #else #pragma omp parallel for simd reduction(+:tsas00, tsas10, tsas01, tsas11) #endif for (int i14 = 1; i14 <= c1->nsph; i14++) { int i = i14 - 1; Loading Loading @@ -2398,6 +2420,8 @@ void scr2( #endif #ifdef USE_TARGET_OFFLOAD #pragma omp target teams distribute parallel for simd collapse(4) #else #pragma omp parallel for simd collapse(4) #endif for (int ipo1 = 1; ipo1 <=2; ipo1++) { for (int jpo1 = 1; jpo1 <= 2; jpo1++) { Loading @@ -2422,7 +2446,9 @@ void scr2( nvtxRangePush("scr2 loop 4"); #endif #ifdef USE_TARGET_OFFLOAD #pragma omp target parallel for collapse(4) #pragma omp target teams distribute parallel for collapse(4) #else #pragma omp parallel for collapse(4) #endif for (int ipo1 = 1; ipo1 <=2; ipo1++) { for (int jpo1 = 1; jpo1 <= 2; jpo1++) { Loading Loading
.gitlab-ci.yml +3 −3 Original line number Diff line number Diff line Loading @@ -78,13 +78,13 @@ compatibility_stage: - CXX=g++-14 FC=gfortran-14 ./configure - make wipe - make -j - echo "Running make with refinement with gnu compilers version 14..." - echo "Running make with gnu compilers version 14..." - cd .. - rm -rf build_gnu14 - mkdir build_gnu14_refine - cd build_gnu14_refine - cp -r ../build/* . - CXX=g++-14 FC=gfortran-14 ./configure --enable-refinement - CXX=g++-14 FC=gfortran-14 ./configure - make wipe - make -j #- echo "Running make with flang version 16 and clang version 16..." Loading Loading @@ -173,7 +173,7 @@ building_stage: - cat /etc/os-release - cd build - echo "Configuring with default compilers (MAGMA disabled)..." - ./configure --without-magma --without-cublas --disable-offload --enable-refinement --enable-shared - ./configure --without-magma --without-cublas --disable-offload --enable-shared - make wipe - echo "Building the default configuration..." - make -j Loading
src/libnptm/clu_subs.cpp +68 −42 Original line number Diff line number Diff line Loading @@ -1341,6 +1341,8 @@ void pcros(double vk, double exri, ParticleDescriptor *c1) { #endif #ifdef USE_TARGET_OFFLOAD #pragma omp target teams distribute parallel for simd reduction(+:sum, sump, sum1, sum2, sum3, sum4) #else #pragma omp parallel for simd reduction(+:sum, sump, sum1, sum2, sum3, sum4) #endif for (int i12 = 0; i12 < nlemt; i12++) { // int i = i12 - 1; Loading Loading @@ -1408,6 +1410,8 @@ void pcrsm0(double vk, double exri, int inpol, ParticleDescriptor *c1) { sum3 = cc0; #ifdef USE_TARGET_OFFLOAD #pragma omp target teams distribute parallel for simd reduction(+:sum2,sum3) #else #pragma omp parallel for simd reduction(+:sum2,sum3) #endif for (int i14 = 0; i14 < c1->nlem; i14++) { int ie = i14 + c1->nlem; Loading @@ -1418,6 +1422,8 @@ void pcrsm0(double vk, double exri, int inpol, ParticleDescriptor *c1) { dcomplex sumpd = cc0; #ifdef USE_TARGET_OFFLOAD #pragma omp target teams distribute parallel for simd collapse(2) reduction(+:sumpi,sumpd) #else #pragma omp parallel for simd collapse(2) reduction(+:sumpi,sumpd) #endif for (int i16 = 0; i16 < nlemt; i16++) { for (int j16 = 0; j16 < c1->nlem; j16++) { Loading Loading @@ -2001,6 +2007,8 @@ void raba( #endif #ifdef USE_TARGET_OFFLOAD #pragma omp target teams distribute parallel for simd reduction(+:c1, c2) #else #pragma omp parallel for simd reduction(+:c1, c2) #endif for (int j10 = 1; j10 <= nlemt; j10++) { int j = j10 - 1; Loading @@ -2021,6 +2029,8 @@ void raba( #endif #ifdef USE_TARGET_OFFLOAD #pragma omp teams distribute parallel for #else #pragma omp parallel for #endif for (int ipo = 0; ipo < 2; ipo++) { int jpo = 1 - ipo; Loading Loading @@ -2055,6 +2065,8 @@ void raba( #endif #ifdef USE_TARGET_OFFLOAD #pragma omp target teams distribute parallel for simd reduction(+:ctqce0, ctqce1, ctqce2, ctqcs0, ctqcs1, ctqcs2, tqcpe0, tqcpe1, tqcpe2, tqcps0, tqcps1, tqcps2) #else #pragma omp parallel for simd reduction(+:ctqce0, ctqce1, ctqce2, ctqcs0, ctqcs1, ctqcs2, tqcpe0, tqcpe1, tqcpe2, tqcps0, tqcps1, tqcps2) #endif for (int k = 1; k<=kmax; k++) { int l60 = (int) sqrt(k+1); Loading Loading @@ -2129,7 +2141,9 @@ void raba( nvtxRangePush("raba loop 3"); #endif #ifdef USE_TARGET_OFFLOAD #pragma omp teams distribute parallel for simd #pragma omp target teams distribute parallel for simd #else #pragma omp parallel for simd #endif for (int ipo78 = 1; ipo78 <= 2; ipo78++) { int ipo = ipo78 - 1; Loading Loading @@ -2202,6 +2216,8 @@ void scr0(double vk, double exri, ParticleDescriptor *c1) { #endif #ifdef USE_TARGET_OFFLOAD #pragma omp target teams distribute parallel for simd reduction(+:sums, sum21) #else #pragma omp parallel for simd reduction(+:sums, sum21) #endif for (int l10 = 1; l10 <= c1->li; l10++) { double fl = 1.0 * (l10 + l10 + 1); Loading Loading @@ -2248,6 +2264,8 @@ void scr0(double vk, double exri, ParticleDescriptor *c1) { #endif #ifdef USE_TARGET_OFFLOAD #pragma omp target teams distribute parallel for simd reduction(+:scs, ecs, acs, tfsas) #else #pragma omp parallel for simd reduction(+:scs, ecs, acs, tfsas) #endif for (int i14 = 1; i14 <= c1->nsph; i14++) { int iogi = c1->iog[i14 - 1]; Loading Loading @@ -2312,6 +2330,8 @@ void scr2( #endif #ifdef USE_TARGET_OFFLOAD #pragma omp target teams distribute parallel for simd reduction(-:s11, s21, s12, s22) #else #pragma omp parallel for simd reduction(-:s11, s21, s12, s22) #endif for (int k = 1; k<=kmax; k++) { int l10 = (int) sqrt(k+1); Loading Loading @@ -2366,6 +2386,8 @@ void scr2( #endif #ifdef USE_TARGET_OFFLOAD #pragma omp target teams distribute parallel for simd reduction(+:tsas00, tsas10, tsas01, tsas11) #else #pragma omp parallel for simd reduction(+:tsas00, tsas10, tsas01, tsas11) #endif for (int i14 = 1; i14 <= c1->nsph; i14++) { int i = i14 - 1; Loading Loading @@ -2398,6 +2420,8 @@ void scr2( #endif #ifdef USE_TARGET_OFFLOAD #pragma omp target teams distribute parallel for simd collapse(4) #else #pragma omp parallel for simd collapse(4) #endif for (int ipo1 = 1; ipo1 <=2; ipo1++) { for (int jpo1 = 1; jpo1 <= 2; jpo1++) { Loading @@ -2422,7 +2446,9 @@ void scr2( nvtxRangePush("scr2 loop 4"); #endif #ifdef USE_TARGET_OFFLOAD #pragma omp target parallel for collapse(4) #pragma omp target teams distribute parallel for collapse(4) #else #pragma omp parallel for collapse(4) #endif for (int ipo1 = 1; ipo1 <=2; ipo1++) { for (int jpo1 = 1; jpo1 <= 2; jpo1++) { Loading