Loading src/cluster/cluster.cpp +16 −15 Original line number Diff line number Diff line Loading @@ -419,12 +419,8 @@ void cluster(const string& config_file, const string& data_file, const string& o // the parallel loop over MPI processes covers a different set of indices for each thread #pragma omp barrier int myjxi488 = ixi488+myompthread; // each thread opens new virtual files and stores their pointers in the shared array if (myompthread > 0) { // UPDATE: non-0 threads need to allocate memory for one scale at a time. p_output_2 = new ClusterOutputInfo(sconf, gconf, mpidata, myjxi488, 1); } vtppoanp_2 = new VirtualBinaryFile(); // each thread opens new virtual files and stores their pointers in the shared array // each thread puts a copy of the pointers to its virtual files in the shared arrays p_outarray[myompthread] = p_output_2; vtppoanarray[myompthread] = vtppoanp_2; Loading @@ -432,6 +428,10 @@ void cluster(const string& config_file, const string& data_file, const string& o // each MPI process handles a number of contiguous scales corresponding to its number of OMP threads at this omp level of parallelism if (myjxi488 <= cid_2->number_of_scales) { if (myompthread > 0) { // UPDATE: non-0 threads need to allocate memory for one scale at a time. p_output_2 = new ClusterOutputInfo(sconf, gconf, mpidata, myjxi488, 1); } int jer = cluster_jxi488_cycle(myjxi488, sconf, gconf, p_scattering_angles, cid_2, p_output_2, output_path, vtppoanp_2); } #pragma omp barrier Loading @@ -443,8 +443,10 @@ void cluster(const string& config_file, const string& data_file, const string& o // threads different from 0 append their virtual files to the one of thread 0, and delete them if (myompthread == 0) { for (int ti=1; ti<ompnumthreads; ti++) { if (p_outarray[ti] != NULL) { p_outarray[0]->insert(*(p_outarray[ti])); delete p_outarray[ti]; } vtppoanarray[0]->append(*(vtppoanarray[ti])); delete vtppoanarray[ti]; } Loading Loading @@ -481,10 +483,6 @@ void cluster(const string& config_file, const string& data_file, const string& o } } #endif // ClusterOutputInfo : the VirtualAsciiFile instances were appended to // disk here. This is no longer the case. // p_outarray[0]->write(output_path + "/c_OCLU", "LEGACY"); // delete p_outarray[0]; } // end block writing to disk #ifdef USE_NVTX Loading Loading @@ -599,8 +597,6 @@ void cluster(const string& config_file, const string& data_file, const string& o // output of all threads. if (myompthread == 0) p_output_2 = new ClusterOutputInfo(sconf, gconf, mpidata, myjxi488, ompnumthreads); else p_output_2 = new ClusterOutputInfo(sconf, gconf, mpidata, myjxi488, 1); // each thread opens new virtual files and stores their pointers in the shared array vtppoanp_2 = new VirtualBinaryFile(); // each thread puts a copy of the pointers to its virtual files in the shared arrays Loading @@ -611,6 +607,9 @@ void cluster(const string& config_file, const string& data_file, const string& o // ok, now I can actually start the parallel calculations // each MPI process handles a number of contiguous scales corresponding to its number of OMP threads at this omp level of parallelism if (myjxi488 <= cid_2->number_of_scales) { if (myompthread > 0) { p_output_2 = new ClusterOutputInfo(sconf, gconf, mpidata, myjxi488, 1); } int jer = cluster_jxi488_cycle(myjxi488, sconf, gconf, p_scattering_angles, cid_2, p_output_2, output_path, vtppoanp_2); } // close the OMP parallel for loop Loading @@ -618,8 +617,10 @@ void cluster(const string& config_file, const string& data_file, const string& o // threads different from 0 append their virtual files to the one of thread 0, and delete them if (myompthread == 0) { for (int ti=1; ti<ompnumthreads; ti++) { if (p_outarray[ti] != NULL) { p_outarray[0]->insert(*(p_outarray[ti])); delete p_outarray[ti]; } vtppoanarray[0]->append(*(vtppoanarray[ti])); delete vtppoanarray[ti]; } Loading src/libnptm/Commons.cpp +2 −1 Original line number Diff line number Diff line Loading @@ -1363,7 +1363,8 @@ ParticleDescriptorCluster::ParticleDescriptorCluster(const ParticleDescriptorClu ecscm[ci] = rhs.ecscm[ci]; } v3j0 = new double[_nv3j]; for (int vj = 0; vj < _nv3j; vj++) v3j0[vj] = rhs.v3j0[_nv3j]; // for (int vj = 0; vj < _nv3j; vj++) v3j0[vj] = rhs.v3j0[_nv3j]; REPORT: AAAAH! ORRORE E DISGUSTO! for (int vj = 0; vj < _nv3j; vj++) v3j0[vj] = rhs.v3j0[vj]; ind3j = new int*[_lm + 1]; for (int ii = 0; ii <= _lm; ii++) ind3j[ii] = vec_ind3j + (_lm * ii); rac3j = new double[_lmtpo]; Loading src/libnptm/outputs.cpp +18 −18 Original line number Diff line number Diff line Loading @@ -630,14 +630,14 @@ int ClusterOutputInfo::insert(const ClusterOutputInfo &rhs) { memcpy(vec_dir_sat21 + offset, rhs.vec_dir_sat21, chunk_size * sizeof(dcomplex)); memcpy(vec_dir_sat12 + offset, rhs.vec_dir_sat12, chunk_size * sizeof(dcomplex)); memcpy(vec_dir_sat22 + offset, rhs.vec_dir_sat22, chunk_size * sizeof(dcomplex)); memcpy(vec_dir_scc1 + offset, rhs.vec_scc1, chunk_size * sizeof(double)); memcpy(vec_dir_scc2 + offset, rhs.vec_scc2, chunk_size * sizeof(double)); memcpy(vec_dir_abc1 + offset, rhs.vec_abc1, chunk_size * sizeof(double)); memcpy(vec_dir_abc2 + offset, rhs.vec_abc2, chunk_size * sizeof(double)); memcpy(vec_dir_exc1 + offset, rhs.vec_exc1, chunk_size * sizeof(double)); memcpy(vec_dir_exc2 + offset, rhs.vec_exc2, chunk_size * sizeof(double)); memcpy(vec_dir_albedc1 + offset, rhs.vec_albedc1, chunk_size * sizeof(double)); memcpy(vec_dir_albedc2 + offset, rhs.vec_albedc2, chunk_size * sizeof(double)); memcpy(vec_dir_scc1 + offset, rhs.vec_dir_scc1, chunk_size * sizeof(double)); memcpy(vec_dir_scc2 + offset, rhs.vec_dir_scc2, chunk_size * sizeof(double)); memcpy(vec_dir_abc1 + offset, rhs.vec_dir_abc1, chunk_size * sizeof(double)); memcpy(vec_dir_abc2 + offset, rhs.vec_dir_abc2, chunk_size * sizeof(double)); memcpy(vec_dir_exc1 + offset, rhs.vec_dir_exc1, chunk_size * sizeof(double)); memcpy(vec_dir_exc2 + offset, rhs.vec_dir_exc2, chunk_size * sizeof(double)); memcpy(vec_dir_albedc1 + offset, rhs.vec_dir_albedc1, chunk_size * sizeof(double)); memcpy(vec_dir_albedc2 + offset, rhs.vec_dir_albedc2, chunk_size * sizeof(double)); memcpy(vec_dir_qscc1 + offset, rhs.vec_dir_qscc1, chunk_size * sizeof(double)); memcpy(vec_dir_qscc2 + offset, rhs.vec_dir_qscc2, chunk_size * sizeof(double)); memcpy(vec_dir_qabc1 + offset, rhs.vec_dir_qabc1, chunk_size * sizeof(double)); Loading @@ -658,16 +658,16 @@ int ClusterOutputInfo::insert(const ClusterOutputInfo &rhs) { memcpy(vec_dir_sac21 + offset, rhs.vec_dir_sac21, chunk_size * sizeof(dcomplex)); memcpy(vec_dir_sac12 + offset, rhs.vec_dir_sac12, chunk_size * sizeof(dcomplex)); memcpy(vec_dir_sac22 + offset, rhs.vec_dir_sac22, chunk_size * sizeof(dcomplex)); memcpy(vec_dir_qschuc1 + offset, rhs.vec_qschuc1, chunk_size * sizeof(double)); memcpy(vec_dir_qschuc2 + offset, rhs.vec_qschuc2, chunk_size * sizeof(double)); memcpy(vec_dir_pschuc1 + offset, rhs.vec_pschuc1, chunk_size * sizeof(double)); memcpy(vec_dir_pschuc2 + offset, rhs.vec_pschuc2, chunk_size * sizeof(double)); memcpy(vec_dir_s0magc1 + offset, rhs.vec_s0magc1, chunk_size * sizeof(double)); memcpy(vec_dir_s0magc2 + offset, rhs.vec_s0magc2, chunk_size * sizeof(double)); memcpy(vec_dir_cosavc1 + offset, rhs.vec_cosavc1, chunk_size * sizeof(double)); memcpy(vec_dir_cosavc2 + offset, rhs.vec_cosavc2, chunk_size * sizeof(double)); memcpy(vec_dir_raprc1 + offset, rhs.vec_raprc1, chunk_size * sizeof(double)); memcpy(vec_dir_raprc2 + offset, rhs.vec_raprc2, chunk_size * sizeof(double)); memcpy(vec_dir_qschuc1 + offset, rhs.vec_dir_qschuc1, chunk_size * sizeof(double)); memcpy(vec_dir_qschuc2 + offset, rhs.vec_dir_qschuc2, chunk_size * sizeof(double)); memcpy(vec_dir_pschuc1 + offset, rhs.vec_dir_pschuc1, chunk_size * sizeof(double)); memcpy(vec_dir_pschuc2 + offset, rhs.vec_dir_pschuc2, chunk_size * sizeof(double)); memcpy(vec_dir_s0magc1 + offset, rhs.vec_dir_s0magc1, chunk_size * sizeof(double)); memcpy(vec_dir_s0magc2 + offset, rhs.vec_dir_s0magc2, chunk_size * sizeof(double)); memcpy(vec_dir_cosavc1 + offset, rhs.vec_dir_cosavc1, chunk_size * sizeof(double)); memcpy(vec_dir_cosavc2 + offset, rhs.vec_dir_cosavc2, chunk_size * sizeof(double)); memcpy(vec_dir_raprc1 + offset, rhs.vec_dir_raprc1, chunk_size * sizeof(double)); memcpy(vec_dir_raprc2 + offset, rhs.vec_dir_raprc2, chunk_size * sizeof(double)); memcpy(vec_dir_flc1 + offset, rhs.vec_dir_flc1, chunk_size * sizeof(double)); memcpy(vec_dir_flc2 + offset, rhs.vec_dir_flc2, chunk_size * sizeof(double)); memcpy(vec_dir_frc1 + offset, rhs.vec_dir_frc1, chunk_size * sizeof(double)); Loading Loading
src/cluster/cluster.cpp +16 −15 Original line number Diff line number Diff line Loading @@ -419,12 +419,8 @@ void cluster(const string& config_file, const string& data_file, const string& o // the parallel loop over MPI processes covers a different set of indices for each thread #pragma omp barrier int myjxi488 = ixi488+myompthread; // each thread opens new virtual files and stores their pointers in the shared array if (myompthread > 0) { // UPDATE: non-0 threads need to allocate memory for one scale at a time. p_output_2 = new ClusterOutputInfo(sconf, gconf, mpidata, myjxi488, 1); } vtppoanp_2 = new VirtualBinaryFile(); // each thread opens new virtual files and stores their pointers in the shared array // each thread puts a copy of the pointers to its virtual files in the shared arrays p_outarray[myompthread] = p_output_2; vtppoanarray[myompthread] = vtppoanp_2; Loading @@ -432,6 +428,10 @@ void cluster(const string& config_file, const string& data_file, const string& o // each MPI process handles a number of contiguous scales corresponding to its number of OMP threads at this omp level of parallelism if (myjxi488 <= cid_2->number_of_scales) { if (myompthread > 0) { // UPDATE: non-0 threads need to allocate memory for one scale at a time. p_output_2 = new ClusterOutputInfo(sconf, gconf, mpidata, myjxi488, 1); } int jer = cluster_jxi488_cycle(myjxi488, sconf, gconf, p_scattering_angles, cid_2, p_output_2, output_path, vtppoanp_2); } #pragma omp barrier Loading @@ -443,8 +443,10 @@ void cluster(const string& config_file, const string& data_file, const string& o // threads different from 0 append their virtual files to the one of thread 0, and delete them if (myompthread == 0) { for (int ti=1; ti<ompnumthreads; ti++) { if (p_outarray[ti] != NULL) { p_outarray[0]->insert(*(p_outarray[ti])); delete p_outarray[ti]; } vtppoanarray[0]->append(*(vtppoanarray[ti])); delete vtppoanarray[ti]; } Loading Loading @@ -481,10 +483,6 @@ void cluster(const string& config_file, const string& data_file, const string& o } } #endif // ClusterOutputInfo : the VirtualAsciiFile instances were appended to // disk here. This is no longer the case. // p_outarray[0]->write(output_path + "/c_OCLU", "LEGACY"); // delete p_outarray[0]; } // end block writing to disk #ifdef USE_NVTX Loading Loading @@ -599,8 +597,6 @@ void cluster(const string& config_file, const string& data_file, const string& o // output of all threads. if (myompthread == 0) p_output_2 = new ClusterOutputInfo(sconf, gconf, mpidata, myjxi488, ompnumthreads); else p_output_2 = new ClusterOutputInfo(sconf, gconf, mpidata, myjxi488, 1); // each thread opens new virtual files and stores their pointers in the shared array vtppoanp_2 = new VirtualBinaryFile(); // each thread puts a copy of the pointers to its virtual files in the shared arrays Loading @@ -611,6 +607,9 @@ void cluster(const string& config_file, const string& data_file, const string& o // ok, now I can actually start the parallel calculations // each MPI process handles a number of contiguous scales corresponding to its number of OMP threads at this omp level of parallelism if (myjxi488 <= cid_2->number_of_scales) { if (myompthread > 0) { p_output_2 = new ClusterOutputInfo(sconf, gconf, mpidata, myjxi488, 1); } int jer = cluster_jxi488_cycle(myjxi488, sconf, gconf, p_scattering_angles, cid_2, p_output_2, output_path, vtppoanp_2); } // close the OMP parallel for loop Loading @@ -618,8 +617,10 @@ void cluster(const string& config_file, const string& data_file, const string& o // threads different from 0 append their virtual files to the one of thread 0, and delete them if (myompthread == 0) { for (int ti=1; ti<ompnumthreads; ti++) { if (p_outarray[ti] != NULL) { p_outarray[0]->insert(*(p_outarray[ti])); delete p_outarray[ti]; } vtppoanarray[0]->append(*(vtppoanarray[ti])); delete vtppoanarray[ti]; } Loading
src/libnptm/Commons.cpp +2 −1 Original line number Diff line number Diff line Loading @@ -1363,7 +1363,8 @@ ParticleDescriptorCluster::ParticleDescriptorCluster(const ParticleDescriptorClu ecscm[ci] = rhs.ecscm[ci]; } v3j0 = new double[_nv3j]; for (int vj = 0; vj < _nv3j; vj++) v3j0[vj] = rhs.v3j0[_nv3j]; // for (int vj = 0; vj < _nv3j; vj++) v3j0[vj] = rhs.v3j0[_nv3j]; REPORT: AAAAH! ORRORE E DISGUSTO! for (int vj = 0; vj < _nv3j; vj++) v3j0[vj] = rhs.v3j0[vj]; ind3j = new int*[_lm + 1]; for (int ii = 0; ii <= _lm; ii++) ind3j[ii] = vec_ind3j + (_lm * ii); rac3j = new double[_lmtpo]; Loading
src/libnptm/outputs.cpp +18 −18 Original line number Diff line number Diff line Loading @@ -630,14 +630,14 @@ int ClusterOutputInfo::insert(const ClusterOutputInfo &rhs) { memcpy(vec_dir_sat21 + offset, rhs.vec_dir_sat21, chunk_size * sizeof(dcomplex)); memcpy(vec_dir_sat12 + offset, rhs.vec_dir_sat12, chunk_size * sizeof(dcomplex)); memcpy(vec_dir_sat22 + offset, rhs.vec_dir_sat22, chunk_size * sizeof(dcomplex)); memcpy(vec_dir_scc1 + offset, rhs.vec_scc1, chunk_size * sizeof(double)); memcpy(vec_dir_scc2 + offset, rhs.vec_scc2, chunk_size * sizeof(double)); memcpy(vec_dir_abc1 + offset, rhs.vec_abc1, chunk_size * sizeof(double)); memcpy(vec_dir_abc2 + offset, rhs.vec_abc2, chunk_size * sizeof(double)); memcpy(vec_dir_exc1 + offset, rhs.vec_exc1, chunk_size * sizeof(double)); memcpy(vec_dir_exc2 + offset, rhs.vec_exc2, chunk_size * sizeof(double)); memcpy(vec_dir_albedc1 + offset, rhs.vec_albedc1, chunk_size * sizeof(double)); memcpy(vec_dir_albedc2 + offset, rhs.vec_albedc2, chunk_size * sizeof(double)); memcpy(vec_dir_scc1 + offset, rhs.vec_dir_scc1, chunk_size * sizeof(double)); memcpy(vec_dir_scc2 + offset, rhs.vec_dir_scc2, chunk_size * sizeof(double)); memcpy(vec_dir_abc1 + offset, rhs.vec_dir_abc1, chunk_size * sizeof(double)); memcpy(vec_dir_abc2 + offset, rhs.vec_dir_abc2, chunk_size * sizeof(double)); memcpy(vec_dir_exc1 + offset, rhs.vec_dir_exc1, chunk_size * sizeof(double)); memcpy(vec_dir_exc2 + offset, rhs.vec_dir_exc2, chunk_size * sizeof(double)); memcpy(vec_dir_albedc1 + offset, rhs.vec_dir_albedc1, chunk_size * sizeof(double)); memcpy(vec_dir_albedc2 + offset, rhs.vec_dir_albedc2, chunk_size * sizeof(double)); memcpy(vec_dir_qscc1 + offset, rhs.vec_dir_qscc1, chunk_size * sizeof(double)); memcpy(vec_dir_qscc2 + offset, rhs.vec_dir_qscc2, chunk_size * sizeof(double)); memcpy(vec_dir_qabc1 + offset, rhs.vec_dir_qabc1, chunk_size * sizeof(double)); Loading @@ -658,16 +658,16 @@ int ClusterOutputInfo::insert(const ClusterOutputInfo &rhs) { memcpy(vec_dir_sac21 + offset, rhs.vec_dir_sac21, chunk_size * sizeof(dcomplex)); memcpy(vec_dir_sac12 + offset, rhs.vec_dir_sac12, chunk_size * sizeof(dcomplex)); memcpy(vec_dir_sac22 + offset, rhs.vec_dir_sac22, chunk_size * sizeof(dcomplex)); memcpy(vec_dir_qschuc1 + offset, rhs.vec_qschuc1, chunk_size * sizeof(double)); memcpy(vec_dir_qschuc2 + offset, rhs.vec_qschuc2, chunk_size * sizeof(double)); memcpy(vec_dir_pschuc1 + offset, rhs.vec_pschuc1, chunk_size * sizeof(double)); memcpy(vec_dir_pschuc2 + offset, rhs.vec_pschuc2, chunk_size * sizeof(double)); memcpy(vec_dir_s0magc1 + offset, rhs.vec_s0magc1, chunk_size * sizeof(double)); memcpy(vec_dir_s0magc2 + offset, rhs.vec_s0magc2, chunk_size * sizeof(double)); memcpy(vec_dir_cosavc1 + offset, rhs.vec_cosavc1, chunk_size * sizeof(double)); memcpy(vec_dir_cosavc2 + offset, rhs.vec_cosavc2, chunk_size * sizeof(double)); memcpy(vec_dir_raprc1 + offset, rhs.vec_raprc1, chunk_size * sizeof(double)); memcpy(vec_dir_raprc2 + offset, rhs.vec_raprc2, chunk_size * sizeof(double)); memcpy(vec_dir_qschuc1 + offset, rhs.vec_dir_qschuc1, chunk_size * sizeof(double)); memcpy(vec_dir_qschuc2 + offset, rhs.vec_dir_qschuc2, chunk_size * sizeof(double)); memcpy(vec_dir_pschuc1 + offset, rhs.vec_dir_pschuc1, chunk_size * sizeof(double)); memcpy(vec_dir_pschuc2 + offset, rhs.vec_dir_pschuc2, chunk_size * sizeof(double)); memcpy(vec_dir_s0magc1 + offset, rhs.vec_dir_s0magc1, chunk_size * sizeof(double)); memcpy(vec_dir_s0magc2 + offset, rhs.vec_dir_s0magc2, chunk_size * sizeof(double)); memcpy(vec_dir_cosavc1 + offset, rhs.vec_dir_cosavc1, chunk_size * sizeof(double)); memcpy(vec_dir_cosavc2 + offset, rhs.vec_dir_cosavc2, chunk_size * sizeof(double)); memcpy(vec_dir_raprc1 + offset, rhs.vec_dir_raprc1, chunk_size * sizeof(double)); memcpy(vec_dir_raprc2 + offset, rhs.vec_dir_raprc2, chunk_size * sizeof(double)); memcpy(vec_dir_flc1 + offset, rhs.vec_dir_flc1, chunk_size * sizeof(double)); memcpy(vec_dir_flc2 + offset, rhs.vec_dir_flc2, chunk_size * sizeof(double)); memcpy(vec_dir_frc1 + offset, rhs.vec_dir_frc1, chunk_size * sizeof(double)); Loading