Loading build/configure.sh +1 −1 Original line number Diff line number Diff line Loading @@ -826,7 +826,7 @@ else fi # End of offload checks if [ "x$CXXFLAGS" = "x" ]; then CXXFLAGS="-O${CXX_OPT}${CXX_DBG}${CLANGFLAGS}${INCLUDEFLAGS}${HDF5FLAGS}${OMPFLAGS}${MPIFLAGS}${LAPACKFLAGS}${CUBLASFLAGS}${MAGMAFLAGS}${REFINEFLAGS}${DEBUGFLAGS}${OFFLOADFLAGS}" CXXFLAGS="-O${CXX_OPT}${CXX_DBG}${CLANGFLAGS}${INCLUDEFLAGS}${HDF5FLAGS}${OMPFLAGS}${MPIFLAGS}${LAPACKFLAGS}${CUBLASFLAGS}${MAGMAFLAGS}${REFINEFLAGS}${DEBUGFLAGS}${OFFLOADFLAGS}${NVTXFLAGS}" fi if [ "x$CXXLDFLAGS" = "x" ]; then if [ "x$LIBMODE" = "xstatic" ]; then Loading src/include/tfrfme.h +3 −6 Original line number Diff line number Diff line Loading @@ -63,6 +63,9 @@ protected: void write_legacy(const std::string& file_name); public: //! \brief Read only view on WK. const dcomplex *vec_wk; /*! \brief Swap1 instance constructor. * * \param lm: `int` Maximum field expansion order. Loading Loading @@ -97,12 +100,6 @@ public: */ static long get_memory_requirement(int lm, int _nkv); /*! \brief Get the pointer to the WK vector. * * \return value: `complex double *` Memory address of the WK vector. */ dcomplex *get_vector() { return wk; } /*! \brief Bring the pointer to the next element at the start of vector. */ void reset() { last_index = 0; } Loading src/libnptm/tfrfme.cpp +5 −8 Original line number Diff line number Diff line Loading @@ -52,6 +52,7 @@ Swap1::Swap1(int lm, int _nkv) { nlmmt = 2 * lm * (lm + 2); const int size = nkv * nkv * nlmmt; wk = new dcomplex[size](); vec_wk = wk; last_index = 0; } Loading @@ -77,21 +78,19 @@ Swap1* Swap1::from_hdf5(const std::string& file_name) { string str_type; int _nlmmt, _nkv, lm, num_elements, index; dcomplex value; dcomplex *_wk = NULL; if (status == 0) { status = hdf_file->read("NLMMT", "INT32", &_nlmmt); status = hdf_file->read("NKV", "INT32", &_nkv); lm = (int)((-2.0 + sqrt(4.0 + 2.0 * _nlmmt)) / 2.0); lm = (int)(sqrt(4.0 + 2.0 * _nlmmt) / 2.0) - 1; num_elements = 2 * _nlmmt * _nkv * _nkv; instance = new Swap1(lm, _nkv); _wk = instance->get_vector(); elements = new double[num_elements](); str_type = "FLOAT64_(" + to_string(num_elements) + ")"; status = hdf_file->read("WK", str_type, elements); for (int wi = 0; wi < num_elements / 2; wi++) { index = 2 * wi; value = elements[index] + elements[index + 1] * I; _wk[wi] = value; instance->wk[wi] = value; } // wi loop delete[] elements; status = hdf_file->close(); Loading @@ -103,21 +102,19 @@ Swap1* Swap1::from_hdf5(const std::string& file_name) { Swap1* Swap1::from_legacy(const std::string& file_name) { fstream input; Swap1 *instance = NULL; dcomplex *_wk = NULL; int _nlmmt, _nkv, lm; double rval, ival; input.open(file_name.c_str(), ios::in | ios::binary); if (input.is_open()) { input.read(reinterpret_cast<char *>(&_nlmmt), sizeof(int)); lm = (int)((-2.0 + sqrt(4.0 + 2.0 * _nlmmt)) / 2.0); lm = (int)(sqrt(4.0 + 2.0 * _nlmmt) / 2.0) - 1; input.read(reinterpret_cast<char *>(&_nkv), sizeof(int)); instance = new Swap1(lm, _nkv); _wk = instance->get_vector(); int num_elements = _nlmmt * _nkv * _nkv; for (int j = 0; j < num_elements; j++) { input.read(reinterpret_cast<char *>(&rval), sizeof(double)); input.read(reinterpret_cast<char *>(&ival), sizeof(double)); _wk[j] = rval + ival * I; instance->wk[j] = rval + ival * I; } input.close(); } else { Loading src/scripts/model_maker.py +199 −173 Original line number Diff line number Diff line Loading @@ -31,6 +31,7 @@ import pdb import random import yaml ## \brief 3D software generation capability flag. allow_3d = True try: import pyvista as pv Loading @@ -46,7 +47,7 @@ from sys import argv # `main()` is the function that handles the creation of the code configuration. # It returns an integer value as exit code, using 0 to signal successful execution. # # \returns result: `int` Number of detected error-level inconsistencies. # \returns result: `int` Exit code (0 = SUCCESS). def main(): result = 0 config = parse_arguments() Loading @@ -68,11 +69,14 @@ def main(): # \return result: `int` An exit code (0 if successful). def interpolate_constants(sconf): result = 0 err_arg = "" try: for i in range(sconf['configurations']): for j in range(sconf['nshl'][i]): file_idx = sconf['dielec_id'][i][j] dielec_path = Path(sconf['dielec_path'], sconf['dielec_file'][int(file_idx) - 1]) file_name = str(dielec_path) err_arg = str(dielec_path) file_name = err_arg dielec_file = open(file_name, 'r') wavelengths = [] rpart = [] Loading Loading @@ -125,6 +129,9 @@ def interpolate_constants(sconf): else: print("ERROR: file %s does not cover requested wavelengths!"%file_name) return 2 except FileNotFoundError as ex: print("ERROR: file not found %s!"%err_arg) return 3 return result ## \brief Create tha calculation configuration structure from YAML input. Loading Loading @@ -261,12 +268,16 @@ def load_model(model_file): [0.0 for k in range(sconf['nxi'])] for j in range(sconf['configurations']) ] for i in range(max_layers) ] interpolate_constants(sconf) check = interpolate_constants(sconf) if (check != 0): return (None, None) else: # sconf[idfc] != 0 and scaling on wavelength print("ERROR: for wavelength scaling, optical constants must be tabulated!") return (None, None) elif (model['material_settings']['match_mode'] == "GRID"): match_grid(sconf) check = match_grid(sconf) if (check != 0): return(None, None) else: print("ERROR: %s is not a recognized match mode!"%(model['material_settings']['match_mode'])) return (None, None) Loading Loading @@ -384,17 +395,25 @@ def load_model(model_file): rnd_engine = "COMPACT" if (rnd_engine == "COMPACT"): check = random_compact(sconf, gconf, rnd_seed, max_rad) if (check == 1): if (check == -1): print("ERROR: compact random generator works only when all sphere types have the same radius.") return (None, None) elif (check == -2): print("ERROR: sub-particle radius larger than particle radius.") return (None, None) elif (check == -3): print("ERROR: requested number of spheres cannot fit in allowed volume.") return (None, None) elif (rnd_engine == "LOOSE"): # random_aggregate() checks internally whether application is INCLUSION check = random_aggregate(sconf, gconf, rnd_seed, max_rad) else: print("ERROR: unrecognized random generator engine.") return (None, None) if (check != 0): print("WARNING: %d sphere(s) could not be placed."%check) if (check != sconf['nsph']): print("WARNING: placed only %d out of %d requested spheres."%(check, sconf['nsph'])) sconf['nsph'] = check gconf['nsph'] = check else: if (len(model['geometry_settings']['x_coords']) != gconf['nsph']): print("ERROR: coordinate vectors do not match the number of spheres!") Loading @@ -409,13 +428,16 @@ def load_model(model_file): write_obj(sconf, gconf, max_rad) try: max_gpu_ram = int(model['system_settings']['max_gpu_ram']) if (max_gpu_ram > 0): max_gpu_ram_bytes = max_gpu_ram * 1024 * 1024 * 1024 matrix_dim = 2 * gconf['nsph'] * gconf['li'] * (gconf['li'] + 2) matrix_size_bytes = 16 * matrix_dim * matrix_dim matrix_size_Gb = float(matrix_size_bytes) / 1024.0 / 1024.0 / 1024.0 print("INFO: estimated matrix size is {0:.3g} Gb.".format(matrix_size_Gb)) if (max_gpu_ram > 0): max_gpu_ram_bytes = max_gpu_ram * 1024 * 1024 * 1024 if (matrix_size_bytes < max_gpu_ram_bytes): max_gpu_processes = int(max_gpu_ram_bytes / matrix_size_bytes) print("INFO: system supports up to %d simultaneous processes on GPU."%max_gpu_processes) print("INFO: only %d GPU processes allowed, if using refinement."%(max_gpu_processes / 3)) else: print("WARNING: estimated matrix size is larger than available GPU memory!") else: Loading Loading @@ -454,6 +476,8 @@ def match_grid(sconf): max_layers = 0 nxi = 0 sconf['vec_xi'] = [] err_arg = "" try: for i in range(sconf['configurations']): layers = sconf['nshl'][i] if (sconf['application'] == "INCLUSION" and i == 0): Loading @@ -461,7 +485,8 @@ def match_grid(sconf): for j in range(layers): file_idx = sconf['dielec_id'][i][j] dielec_path = Path(sconf['dielec_path'], sconf['dielec_file'][int(file_idx) - 1]) file_name = str(dielec_path) err_arg = str(dielec_path) file_name = err_arg dielec_file = open(file_name, 'r') wavelengths = [] rpart = [] Loading Loading @@ -520,6 +545,9 @@ def match_grid(sconf): wi += 1 sconf['rdc0'][j][i][dci] = ry sconf['idc0'][j][i][dci] = iy except FileNotFoundError as ex: print("ERROR: file not found %s!"%err_arg) return 3 return result ## \brief Parse the command line arguments. Loading Loading @@ -587,15 +615,19 @@ def random_aggregate(scatterer, geometry, seed, max_rad, max_attempts=100): vec_thetas = [0.0 for i in range(nsph)] vec_phis = [0.0 for i in range(nsph)] vec_rads = [0.0 for i in range(nsph)] vec_types = [] n_types = scatterer['configurations'] if (0 in scatterer['vec_types']): tincrement = 1 if scatterer['application'] != "INCLUSION" else 2 for ti in range(nsph): itype = tincrement + int(n_types * random.random()) scatterer['vec_types'][ti] = itype if (scatterer['application'] == "INCLUSION"): scatterer['vec_types'][0] = 1 sph_type_index = scatterer['vec_types'][0] - 1 vec_spheres = [{'itype': sph_type_index + 1, 'x': 0.0, 'y': 0.0, 'z': 0.0}] vec_rads[0] = scatterer['ros'][sph_type_index] vec_types.append(sph_type_index + 1) placed_spheres = 1 attempts = 0 for i in range(1, nsph): Loading Loading @@ -669,7 +701,9 @@ def random_aggregate(scatterer, geometry, seed, max_rad, max_attempts=100): }) is_placed = True placed_spheres += 1 vec_types.append(sph_type_index + 1) attempts = 0 scatterer['vec_types'] = vec_types sph_index = 0 for sphere in sorted(vec_spheres, key=lambda item: item['itype']): scatterer['vec_types'][sph_index] = sphere['itype'] Loading @@ -677,6 +711,7 @@ def random_aggregate(scatterer, geometry, seed, max_rad, max_attempts=100): geometry['vec_sph_y'][sph_index] = sphere['y'] geometry['vec_sph_z'][sph_index] = sphere['z'] sph_index += 1 result = placed_spheres return result ## \brief Generate a random compact cluster from YAML configuration options. Loading @@ -699,45 +734,30 @@ def random_compact(scatterer, geometry, seed, max_rad): random.seed(seed) nsph = scatterer['nsph'] n_types = scatterer['configurations'] if (0 in scatterer['vec_types']): tincrement = 1 if scatterer['application'] != "INCLUSION" else 2 for ti in range(nsph): itype = tincrement + int(n_types * random.random()) scatterer['vec_types'][ti] = itype radius = scatterer['ros'][0] # Return an error code if types have different radii if (max(scatterer['ros']) != min(scatterer['ros'])): result = 1 result = -1 elif (radius > max_rad): # Requested spheres are larger than the maximum allowed volume. # End function with error code -2. result = -2 else: radius = scatterer['ros'][0] x_centers = np.arange(-1.0 * max_rad + radius, max_rad, 2.0 * radius) y_centers = np.arange(-1.0 * max_rad + radius, max_rad, math.sqrt(3.0) * radius) z_centers = np.arange(-1.0 * max_rad + radius, max_rad, math.sqrt(3.0) * radius) x_offset = radius y_offset = radius x_layer_offset = radius y_layer_offset = radius / math.sqrt(3.0) x_centers = np.arange(-1.0 * max_rad + 2.0 * radius, max_rad, 2.0 * radius) x_size = len(x_centers) y_size = int(2.0 * max_rad / ((1.0 + math.sqrt(3.0) / 3.0) * radius)) z_size = int(2.0 * max_rad / ((1.0 + 2.0 * math.sqrt(6.0) / 3.0) * radius)) tmp_spheres = [] n_cells = len(x_centers) * len(y_centers) * len(z_centers) n_cells = x_size * y_size * z_size print("INFO: the cubic space would contain %d spheres."%n_cells) n_max_spheres = int((max_rad / radius) * (max_rad / radius) * (max_rad / radius) * 0.74) print("INFO: the maximum radius allows for %d spheres."%n_max_spheres) for zi in range(len(z_centers)): if (x_layer_offset == 0.0): x_layer_offset = radius else: x_layer_offset = 0.0 if (y_offset == 0.0): y_offset = radius else: y_offset = 0.0 for yi in range(len(y_centers)): if (x_offset == 0.0): x_offset = radius else: x_offset = 0.0 for xi in range(len(x_centers)): x = x_centers[xi] + x_offset + x_layer_offset y = y_centers[yi] + y_offset z = z_centers[zi] k = 0 z = -max_rad + radius while (z < max_rad - radius): j = 0 y = -max_rad + radius while (y < max_rad - radius): for i in range(len(x_centers)): x = (2 * (i + 1) + (j + k) % 2) * radius - max_rad extent = radius + math.sqrt(x * x + y * y + z * z) if (extent < max_rad): tmp_spheres.append({ Loading @@ -746,6 +766,11 @@ def random_compact(scatterer, geometry, seed, max_rad): 'y': y, 'z': z }) # j += 1 y = math.sqrt(3.0) * (j + (k % 2) / 3.0) * radius - max_rad + radius k += 1 z = 2.0 / 3.0 * math.sqrt(6.0) * k * radius - max_rad + radius #tmp_spheres = [{'itype': 1, 'x': 0.0, 'y': 0.0, 'z': 0.0}] current_n = len(tmp_spheres) print("INFO: before erosion there are %d spheres in use."%current_n) Loading Loading @@ -777,20 +802,20 @@ def random_compact(scatterer, geometry, seed, max_rad): current_n -= 1 vec_spheres = [] sph_index = 0 # Generate a vector of types if none is given if (0 in scatterer['vec_types']): tincrement = 1 if scatterer['application'] != "INCLUSION" else 2 for ti in range(current_n): itype = tincrement + int(n_types * random.random()) scatterer['vec_types'][ti] = itype if (scatterer['application'] == "INCLUSION"): scatterer['vec_types'][0] = 1 for ti in range(len(tmp_spheres)): sphere = tmp_spheres[ti] if (sphere['x'] < max_rad): sphere['itype'] = scatterer['vec_types'][sph_index] sph_index += 1 vec_spheres.append(sphere) #pl = pv.Plotter() #for si in range(len(vec_spheres)): # x = vec_spheres[si]['x'] / max_rad # y = vec_spheres[si]['y'] / max_rad # z = vec_spheres[si]['z'] / max_rad # mesh = pv.Sphere(radius / max_rad, (x, y, z)) # pl.add_mesh(mesh) #pl.export_obj("scene.obj") sph_index = 0 for sphere in sorted(vec_spheres, key=lambda item: item['itype']): scatterer['vec_types'][sph_index] = sphere['itype'] Loading @@ -798,7 +823,7 @@ def random_compact(scatterer, geometry, seed, max_rad): geometry['vec_sph_y'][sph_index] = sphere['y'] geometry['vec_sph_z'][sph_index] = sphere['z'] sph_index += 1 return result return current_n ## \brief Write the geometry configuration dictionary to legacy format. # Loading Loading @@ -952,9 +977,10 @@ def write_legacy_sconf(conf): ## \brief Export the model to a set of OBJ files for 3D visualization. # # This function exports the model as a set of OBJ files (one for every # spherical unit, plus a single scene file) to allow for model visualization # with 3D software tools. # This function exports the model as a single OBJ file, containing the # information to visualize the particle with 3D software tools. The model # file is associated with a MTL material libray file, used to assign colors # to spheres of different type. # # \param scatterer: `dict` Scatterer configuration dictionary (gets modified) # \param geometry: `dict` Geometry configuration dictionary (gets modified) Loading src/trapping/cfrfme.cpp +77 −15 Original line number Diff line number Diff line Loading @@ -56,6 +56,10 @@ #include "../include/tra_subs.h" #endif #ifdef USE_NVTX #include <nvtx3/nvToolsExt.h> #endif using namespace std; /*! \brief C++ implementation of FRFME Loading @@ -64,13 +68,15 @@ using namespace std; * \param output_path: `string` Directory to write the output files in. */ void frfme(string data_file, string output_path) { #ifdef USE_NVTX nvtxRangePush("Running frfme()"); #endif string tfrfme_name = output_path + "/c_TFRFME.hd5"; TFRFME *tfrfme = NULL; Swap1 *tt1 = NULL; Swap2 *tt2 = NULL; char namef[7]; char more; dcomplex **w = NULL; dcomplex *wk = NULL; const dcomplex cc0 = 0.0 + 0.0 * I; const dcomplex uim = 0.0 + 1.0 * I; Loading Loading @@ -98,6 +104,9 @@ void frfme(string data_file, string output_path) { int wsum_size; // End of vector size variables if (jlmf != 1) { #ifdef USE_NVTX nvtxRangePush("frfme() with jlmf != 1"); #endif int nxv, nyv, nzv; if (tfrfme == NULL) tfrfme = TFRFME::from_binary(tfrfme_name, "HDF5"); if (tfrfme != NULL) { Loading Loading @@ -140,7 +149,16 @@ void frfme(string data_file, string output_path) { printf("ERROR: could not open TFRFME file.\n"); } nks = nkv - 1; } else { // label 16 #ifdef USE_NVTX nvtxRangePop(); #endif } else { // label 16; jlfm = 1 #ifdef USE_NVTX nvtxRangePush("frfme() with jlmf == 1"); #endif #ifdef USE_NVTX nvtxRangePush("Setup operations"); #endif int nksh, nrsh, nxsh, nysh, nzsh; str_target = file_lines[last_read_line++]; for (int cli = 0; cli < 7; cli++) { Loading Loading @@ -176,6 +194,9 @@ void frfme(string data_file, string output_path) { } str_target = file_lines[last_read_line++]; re = regex("[eEmM]"); #ifdef USE_NVTX nvtxRangePop(); #endif if (regex_search(str_target, m, re)) { more = m.str().at(0); if (more == 'm' || more == 'M') { Loading @@ -193,6 +214,9 @@ void frfme(string data_file, string output_path) { string tedf_name = output_path + "/" + namef + ".hd5"; ScattererConfiguration *tedf = ScattererConfiguration::from_binary(tedf_name, "HDF5"); if (tedf != NULL) { #ifdef USE_NVTX nvtxRangePush("TEDF data import"); #endif int iduml, idum; iduml = tedf->number_of_spheres; idum = tedf->get_iog(iduml - 1); Loading @@ -216,6 +240,9 @@ void frfme(string data_file, string output_path) { xi = xip; } // label 20 #ifdef USE_NVTX nvtxRangePop(); #endif delete tedf; double wn = wp / 3.0e8; vk = xi * wn; Loading @@ -236,6 +263,9 @@ void frfme(string data_file, string output_path) { fshmx = spd * (rir * (sqrt(uy - sthmx * sthmx) / sqrt(uy - sthlmx * sthlmx)) - uy); } // label 22 #ifdef USE_NVTX nvtxRangePush("Memory data loading"); #endif nlmmt = lm * (lm + 2) * 2; nks = nksh * 2; nkv = nks + 1; Loading Loading @@ -279,6 +309,12 @@ void frfme(string data_file, string output_path) { double *_yv = tfrfme->get_y(); double *_zv = tfrfme->get_z(); dcomplex **_wsum = tfrfme->get_matrix(); #ifdef USE_NVTX nvtxRangePop(); #endif #ifdef USE_NVTX nvtxRangePush("Looped vector initialization"); #endif for (int i24 = nxshpo; i24 <= nxs; i24++) { _xv[i24] = _xv[i24 - 1] + delxyz; _xv[nxv - i24 - 1] = -_xv[i24]; Loading @@ -297,7 +333,13 @@ void frfme(string data_file, string output_path) { vkv[i28] = vkv[i28 - 1] + delk; vkv[nkv - i28 - 1] = -vkv[i28]; } // i28 loop #ifdef USE_NVTX nvtxRangePop(); #endif if (tfrfme != NULL) { #ifdef USE_NVTX nvtxRangePush("TFRFME initialization"); #endif tfrfme->set_param("vk", vk); tfrfme->set_param("exri", exri); tfrfme->set_param("an", an); Loading Loading @@ -329,19 +371,20 @@ void frfme(string data_file, string output_path) { tt2->set_param("nlmmt", 1.0 * nlmmt); tt2->set_param("nrvc", 1.0 * nrvc); tt2->write_binary(temp_name2, "HDF5"); #ifdef USE_NVTX nvtxRangePop(); #endif dcomplex *vec_w = new dcomplex[nkv * nkv](); dcomplex **w = new dcomplex*[nkv]; for (int wi = 0; wi < nkv; wi++) w[wi] = vec_w + wi * nkv; #ifdef USE_NVTX nvtxRangePush("j80 loop"); #endif for (int j80 = jlmf; j80 <= jlml; j80++) { dcomplex *tt1_wk = tt1->get_vector(); int wk_index = 0; // w matrix if (w != NULL) { for (int wi = nkv - 1; wi > -1; wi--) delete[] w[wi]; delete[] w; } w = new dcomplex*[nkv]; for (int wi = 0; wi < nkv; wi++) w[wi] = new dcomplex[nkv](); for (int jy50 = 0; jy50 < nkv; jy50++) { for (int jx50 = 0; jx50 < nkv; jx50++) { for (int wi = 0; wi < nlmmt; wi++) wk[wi] = tt1_wk[wk_index++]; for (int wi = 0; wi < nlmmt; wi++) wk[wi] = tt1->vec_wk[wk_index++]; w[jx50][jy50] = wk[j80 - 1]; } // jx50 } // jy50 loop Loading Loading @@ -377,7 +420,15 @@ void frfme(string data_file, string output_path) { } // iy70 loop } // iz75 loop } // j80 loop delete[] vec_w; delete[] w; #ifdef USE_NVTX nvtxRangePop(); #endif // label 88 #ifdef USE_NVTX nvtxRangePush("Closing operations"); #endif tfrfme->write_binary(tfrfme_name, "HDF5"); string output_name = output_path + "/c_OFRFME"; FILE *output = fopen(output_name.c_str(), "w"); Loading @@ -386,6 +437,9 @@ void frfme(string data_file, string output_path) { if (spd > 0.0) fprintf(output, " FSHMX =%15.7lE\n", fshmx); fprintf(output, " FRSH =%15.7lE\n", frsh); fclose(output); #ifdef USE_NVTX nvtxRangePop(); #endif } else { // Should never happen. printf("ERROR: could not open TFRFME file for output.\n"); } Loading @@ -398,16 +452,24 @@ void frfme(string data_file, string output_path) { fprintf(output, " WRONG INPUT TAPE\n"); fclose(output); } #ifdef USE_NVTX nvtxRangePop(); #endif } // label 45 #ifdef USE_NVTX nvtxRangePush("frfme() memory clean"); #endif if (tfrfme != NULL) delete tfrfme; delete[] file_lines; if (tt2 != NULL) delete tt2; if (w != NULL) { for (int wi = nkv - 1; wi > -1; wi--) delete[] w[wi]; delete[] w; } if (wk != NULL) delete[] wk; if (tt1 != NULL) delete tt1; #ifdef USE_NVTX nvtxRangePop(); #endif printf("FRFME: Done.\n"); #ifdef USE_NVTX nvtxRangePop(); #endif } Loading
build/configure.sh +1 −1 Original line number Diff line number Diff line Loading @@ -826,7 +826,7 @@ else fi # End of offload checks if [ "x$CXXFLAGS" = "x" ]; then CXXFLAGS="-O${CXX_OPT}${CXX_DBG}${CLANGFLAGS}${INCLUDEFLAGS}${HDF5FLAGS}${OMPFLAGS}${MPIFLAGS}${LAPACKFLAGS}${CUBLASFLAGS}${MAGMAFLAGS}${REFINEFLAGS}${DEBUGFLAGS}${OFFLOADFLAGS}" CXXFLAGS="-O${CXX_OPT}${CXX_DBG}${CLANGFLAGS}${INCLUDEFLAGS}${HDF5FLAGS}${OMPFLAGS}${MPIFLAGS}${LAPACKFLAGS}${CUBLASFLAGS}${MAGMAFLAGS}${REFINEFLAGS}${DEBUGFLAGS}${OFFLOADFLAGS}${NVTXFLAGS}" fi if [ "x$CXXLDFLAGS" = "x" ]; then if [ "x$LIBMODE" = "xstatic" ]; then Loading
src/include/tfrfme.h +3 −6 Original line number Diff line number Diff line Loading @@ -63,6 +63,9 @@ protected: void write_legacy(const std::string& file_name); public: //! \brief Read only view on WK. const dcomplex *vec_wk; /*! \brief Swap1 instance constructor. * * \param lm: `int` Maximum field expansion order. Loading Loading @@ -97,12 +100,6 @@ public: */ static long get_memory_requirement(int lm, int _nkv); /*! \brief Get the pointer to the WK vector. * * \return value: `complex double *` Memory address of the WK vector. */ dcomplex *get_vector() { return wk; } /*! \brief Bring the pointer to the next element at the start of vector. */ void reset() { last_index = 0; } Loading
src/libnptm/tfrfme.cpp +5 −8 Original line number Diff line number Diff line Loading @@ -52,6 +52,7 @@ Swap1::Swap1(int lm, int _nkv) { nlmmt = 2 * lm * (lm + 2); const int size = nkv * nkv * nlmmt; wk = new dcomplex[size](); vec_wk = wk; last_index = 0; } Loading @@ -77,21 +78,19 @@ Swap1* Swap1::from_hdf5(const std::string& file_name) { string str_type; int _nlmmt, _nkv, lm, num_elements, index; dcomplex value; dcomplex *_wk = NULL; if (status == 0) { status = hdf_file->read("NLMMT", "INT32", &_nlmmt); status = hdf_file->read("NKV", "INT32", &_nkv); lm = (int)((-2.0 + sqrt(4.0 + 2.0 * _nlmmt)) / 2.0); lm = (int)(sqrt(4.0 + 2.0 * _nlmmt) / 2.0) - 1; num_elements = 2 * _nlmmt * _nkv * _nkv; instance = new Swap1(lm, _nkv); _wk = instance->get_vector(); elements = new double[num_elements](); str_type = "FLOAT64_(" + to_string(num_elements) + ")"; status = hdf_file->read("WK", str_type, elements); for (int wi = 0; wi < num_elements / 2; wi++) { index = 2 * wi; value = elements[index] + elements[index + 1] * I; _wk[wi] = value; instance->wk[wi] = value; } // wi loop delete[] elements; status = hdf_file->close(); Loading @@ -103,21 +102,19 @@ Swap1* Swap1::from_hdf5(const std::string& file_name) { Swap1* Swap1::from_legacy(const std::string& file_name) { fstream input; Swap1 *instance = NULL; dcomplex *_wk = NULL; int _nlmmt, _nkv, lm; double rval, ival; input.open(file_name.c_str(), ios::in | ios::binary); if (input.is_open()) { input.read(reinterpret_cast<char *>(&_nlmmt), sizeof(int)); lm = (int)((-2.0 + sqrt(4.0 + 2.0 * _nlmmt)) / 2.0); lm = (int)(sqrt(4.0 + 2.0 * _nlmmt) / 2.0) - 1; input.read(reinterpret_cast<char *>(&_nkv), sizeof(int)); instance = new Swap1(lm, _nkv); _wk = instance->get_vector(); int num_elements = _nlmmt * _nkv * _nkv; for (int j = 0; j < num_elements; j++) { input.read(reinterpret_cast<char *>(&rval), sizeof(double)); input.read(reinterpret_cast<char *>(&ival), sizeof(double)); _wk[j] = rval + ival * I; instance->wk[j] = rval + ival * I; } input.close(); } else { Loading
src/scripts/model_maker.py +199 −173 Original line number Diff line number Diff line Loading @@ -31,6 +31,7 @@ import pdb import random import yaml ## \brief 3D software generation capability flag. allow_3d = True try: import pyvista as pv Loading @@ -46,7 +47,7 @@ from sys import argv # `main()` is the function that handles the creation of the code configuration. # It returns an integer value as exit code, using 0 to signal successful execution. # # \returns result: `int` Number of detected error-level inconsistencies. # \returns result: `int` Exit code (0 = SUCCESS). def main(): result = 0 config = parse_arguments() Loading @@ -68,11 +69,14 @@ def main(): # \return result: `int` An exit code (0 if successful). def interpolate_constants(sconf): result = 0 err_arg = "" try: for i in range(sconf['configurations']): for j in range(sconf['nshl'][i]): file_idx = sconf['dielec_id'][i][j] dielec_path = Path(sconf['dielec_path'], sconf['dielec_file'][int(file_idx) - 1]) file_name = str(dielec_path) err_arg = str(dielec_path) file_name = err_arg dielec_file = open(file_name, 'r') wavelengths = [] rpart = [] Loading Loading @@ -125,6 +129,9 @@ def interpolate_constants(sconf): else: print("ERROR: file %s does not cover requested wavelengths!"%file_name) return 2 except FileNotFoundError as ex: print("ERROR: file not found %s!"%err_arg) return 3 return result ## \brief Create tha calculation configuration structure from YAML input. Loading Loading @@ -261,12 +268,16 @@ def load_model(model_file): [0.0 for k in range(sconf['nxi'])] for j in range(sconf['configurations']) ] for i in range(max_layers) ] interpolate_constants(sconf) check = interpolate_constants(sconf) if (check != 0): return (None, None) else: # sconf[idfc] != 0 and scaling on wavelength print("ERROR: for wavelength scaling, optical constants must be tabulated!") return (None, None) elif (model['material_settings']['match_mode'] == "GRID"): match_grid(sconf) check = match_grid(sconf) if (check != 0): return(None, None) else: print("ERROR: %s is not a recognized match mode!"%(model['material_settings']['match_mode'])) return (None, None) Loading Loading @@ -384,17 +395,25 @@ def load_model(model_file): rnd_engine = "COMPACT" if (rnd_engine == "COMPACT"): check = random_compact(sconf, gconf, rnd_seed, max_rad) if (check == 1): if (check == -1): print("ERROR: compact random generator works only when all sphere types have the same radius.") return (None, None) elif (check == -2): print("ERROR: sub-particle radius larger than particle radius.") return (None, None) elif (check == -3): print("ERROR: requested number of spheres cannot fit in allowed volume.") return (None, None) elif (rnd_engine == "LOOSE"): # random_aggregate() checks internally whether application is INCLUSION check = random_aggregate(sconf, gconf, rnd_seed, max_rad) else: print("ERROR: unrecognized random generator engine.") return (None, None) if (check != 0): print("WARNING: %d sphere(s) could not be placed."%check) if (check != sconf['nsph']): print("WARNING: placed only %d out of %d requested spheres."%(check, sconf['nsph'])) sconf['nsph'] = check gconf['nsph'] = check else: if (len(model['geometry_settings']['x_coords']) != gconf['nsph']): print("ERROR: coordinate vectors do not match the number of spheres!") Loading @@ -409,13 +428,16 @@ def load_model(model_file): write_obj(sconf, gconf, max_rad) try: max_gpu_ram = int(model['system_settings']['max_gpu_ram']) if (max_gpu_ram > 0): max_gpu_ram_bytes = max_gpu_ram * 1024 * 1024 * 1024 matrix_dim = 2 * gconf['nsph'] * gconf['li'] * (gconf['li'] + 2) matrix_size_bytes = 16 * matrix_dim * matrix_dim matrix_size_Gb = float(matrix_size_bytes) / 1024.0 / 1024.0 / 1024.0 print("INFO: estimated matrix size is {0:.3g} Gb.".format(matrix_size_Gb)) if (max_gpu_ram > 0): max_gpu_ram_bytes = max_gpu_ram * 1024 * 1024 * 1024 if (matrix_size_bytes < max_gpu_ram_bytes): max_gpu_processes = int(max_gpu_ram_bytes / matrix_size_bytes) print("INFO: system supports up to %d simultaneous processes on GPU."%max_gpu_processes) print("INFO: only %d GPU processes allowed, if using refinement."%(max_gpu_processes / 3)) else: print("WARNING: estimated matrix size is larger than available GPU memory!") else: Loading Loading @@ -454,6 +476,8 @@ def match_grid(sconf): max_layers = 0 nxi = 0 sconf['vec_xi'] = [] err_arg = "" try: for i in range(sconf['configurations']): layers = sconf['nshl'][i] if (sconf['application'] == "INCLUSION" and i == 0): Loading @@ -461,7 +485,8 @@ def match_grid(sconf): for j in range(layers): file_idx = sconf['dielec_id'][i][j] dielec_path = Path(sconf['dielec_path'], sconf['dielec_file'][int(file_idx) - 1]) file_name = str(dielec_path) err_arg = str(dielec_path) file_name = err_arg dielec_file = open(file_name, 'r') wavelengths = [] rpart = [] Loading Loading @@ -520,6 +545,9 @@ def match_grid(sconf): wi += 1 sconf['rdc0'][j][i][dci] = ry sconf['idc0'][j][i][dci] = iy except FileNotFoundError as ex: print("ERROR: file not found %s!"%err_arg) return 3 return result ## \brief Parse the command line arguments. Loading Loading @@ -587,15 +615,19 @@ def random_aggregate(scatterer, geometry, seed, max_rad, max_attempts=100): vec_thetas = [0.0 for i in range(nsph)] vec_phis = [0.0 for i in range(nsph)] vec_rads = [0.0 for i in range(nsph)] vec_types = [] n_types = scatterer['configurations'] if (0 in scatterer['vec_types']): tincrement = 1 if scatterer['application'] != "INCLUSION" else 2 for ti in range(nsph): itype = tincrement + int(n_types * random.random()) scatterer['vec_types'][ti] = itype if (scatterer['application'] == "INCLUSION"): scatterer['vec_types'][0] = 1 sph_type_index = scatterer['vec_types'][0] - 1 vec_spheres = [{'itype': sph_type_index + 1, 'x': 0.0, 'y': 0.0, 'z': 0.0}] vec_rads[0] = scatterer['ros'][sph_type_index] vec_types.append(sph_type_index + 1) placed_spheres = 1 attempts = 0 for i in range(1, nsph): Loading Loading @@ -669,7 +701,9 @@ def random_aggregate(scatterer, geometry, seed, max_rad, max_attempts=100): }) is_placed = True placed_spheres += 1 vec_types.append(sph_type_index + 1) attempts = 0 scatterer['vec_types'] = vec_types sph_index = 0 for sphere in sorted(vec_spheres, key=lambda item: item['itype']): scatterer['vec_types'][sph_index] = sphere['itype'] Loading @@ -677,6 +711,7 @@ def random_aggregate(scatterer, geometry, seed, max_rad, max_attempts=100): geometry['vec_sph_y'][sph_index] = sphere['y'] geometry['vec_sph_z'][sph_index] = sphere['z'] sph_index += 1 result = placed_spheres return result ## \brief Generate a random compact cluster from YAML configuration options. Loading @@ -699,45 +734,30 @@ def random_compact(scatterer, geometry, seed, max_rad): random.seed(seed) nsph = scatterer['nsph'] n_types = scatterer['configurations'] if (0 in scatterer['vec_types']): tincrement = 1 if scatterer['application'] != "INCLUSION" else 2 for ti in range(nsph): itype = tincrement + int(n_types * random.random()) scatterer['vec_types'][ti] = itype radius = scatterer['ros'][0] # Return an error code if types have different radii if (max(scatterer['ros']) != min(scatterer['ros'])): result = 1 result = -1 elif (radius > max_rad): # Requested spheres are larger than the maximum allowed volume. # End function with error code -2. result = -2 else: radius = scatterer['ros'][0] x_centers = np.arange(-1.0 * max_rad + radius, max_rad, 2.0 * radius) y_centers = np.arange(-1.0 * max_rad + radius, max_rad, math.sqrt(3.0) * radius) z_centers = np.arange(-1.0 * max_rad + radius, max_rad, math.sqrt(3.0) * radius) x_offset = radius y_offset = radius x_layer_offset = radius y_layer_offset = radius / math.sqrt(3.0) x_centers = np.arange(-1.0 * max_rad + 2.0 * radius, max_rad, 2.0 * radius) x_size = len(x_centers) y_size = int(2.0 * max_rad / ((1.0 + math.sqrt(3.0) / 3.0) * radius)) z_size = int(2.0 * max_rad / ((1.0 + 2.0 * math.sqrt(6.0) / 3.0) * radius)) tmp_spheres = [] n_cells = len(x_centers) * len(y_centers) * len(z_centers) n_cells = x_size * y_size * z_size print("INFO: the cubic space would contain %d spheres."%n_cells) n_max_spheres = int((max_rad / radius) * (max_rad / radius) * (max_rad / radius) * 0.74) print("INFO: the maximum radius allows for %d spheres."%n_max_spheres) for zi in range(len(z_centers)): if (x_layer_offset == 0.0): x_layer_offset = radius else: x_layer_offset = 0.0 if (y_offset == 0.0): y_offset = radius else: y_offset = 0.0 for yi in range(len(y_centers)): if (x_offset == 0.0): x_offset = radius else: x_offset = 0.0 for xi in range(len(x_centers)): x = x_centers[xi] + x_offset + x_layer_offset y = y_centers[yi] + y_offset z = z_centers[zi] k = 0 z = -max_rad + radius while (z < max_rad - radius): j = 0 y = -max_rad + radius while (y < max_rad - radius): for i in range(len(x_centers)): x = (2 * (i + 1) + (j + k) % 2) * radius - max_rad extent = radius + math.sqrt(x * x + y * y + z * z) if (extent < max_rad): tmp_spheres.append({ Loading @@ -746,6 +766,11 @@ def random_compact(scatterer, geometry, seed, max_rad): 'y': y, 'z': z }) # j += 1 y = math.sqrt(3.0) * (j + (k % 2) / 3.0) * radius - max_rad + radius k += 1 z = 2.0 / 3.0 * math.sqrt(6.0) * k * radius - max_rad + radius #tmp_spheres = [{'itype': 1, 'x': 0.0, 'y': 0.0, 'z': 0.0}] current_n = len(tmp_spheres) print("INFO: before erosion there are %d spheres in use."%current_n) Loading Loading @@ -777,20 +802,20 @@ def random_compact(scatterer, geometry, seed, max_rad): current_n -= 1 vec_spheres = [] sph_index = 0 # Generate a vector of types if none is given if (0 in scatterer['vec_types']): tincrement = 1 if scatterer['application'] != "INCLUSION" else 2 for ti in range(current_n): itype = tincrement + int(n_types * random.random()) scatterer['vec_types'][ti] = itype if (scatterer['application'] == "INCLUSION"): scatterer['vec_types'][0] = 1 for ti in range(len(tmp_spheres)): sphere = tmp_spheres[ti] if (sphere['x'] < max_rad): sphere['itype'] = scatterer['vec_types'][sph_index] sph_index += 1 vec_spheres.append(sphere) #pl = pv.Plotter() #for si in range(len(vec_spheres)): # x = vec_spheres[si]['x'] / max_rad # y = vec_spheres[si]['y'] / max_rad # z = vec_spheres[si]['z'] / max_rad # mesh = pv.Sphere(radius / max_rad, (x, y, z)) # pl.add_mesh(mesh) #pl.export_obj("scene.obj") sph_index = 0 for sphere in sorted(vec_spheres, key=lambda item: item['itype']): scatterer['vec_types'][sph_index] = sphere['itype'] Loading @@ -798,7 +823,7 @@ def random_compact(scatterer, geometry, seed, max_rad): geometry['vec_sph_y'][sph_index] = sphere['y'] geometry['vec_sph_z'][sph_index] = sphere['z'] sph_index += 1 return result return current_n ## \brief Write the geometry configuration dictionary to legacy format. # Loading Loading @@ -952,9 +977,10 @@ def write_legacy_sconf(conf): ## \brief Export the model to a set of OBJ files for 3D visualization. # # This function exports the model as a set of OBJ files (one for every # spherical unit, plus a single scene file) to allow for model visualization # with 3D software tools. # This function exports the model as a single OBJ file, containing the # information to visualize the particle with 3D software tools. The model # file is associated with a MTL material libray file, used to assign colors # to spheres of different type. # # \param scatterer: `dict` Scatterer configuration dictionary (gets modified) # \param geometry: `dict` Geometry configuration dictionary (gets modified) Loading
src/trapping/cfrfme.cpp +77 −15 Original line number Diff line number Diff line Loading @@ -56,6 +56,10 @@ #include "../include/tra_subs.h" #endif #ifdef USE_NVTX #include <nvtx3/nvToolsExt.h> #endif using namespace std; /*! \brief C++ implementation of FRFME Loading @@ -64,13 +68,15 @@ using namespace std; * \param output_path: `string` Directory to write the output files in. */ void frfme(string data_file, string output_path) { #ifdef USE_NVTX nvtxRangePush("Running frfme()"); #endif string tfrfme_name = output_path + "/c_TFRFME.hd5"; TFRFME *tfrfme = NULL; Swap1 *tt1 = NULL; Swap2 *tt2 = NULL; char namef[7]; char more; dcomplex **w = NULL; dcomplex *wk = NULL; const dcomplex cc0 = 0.0 + 0.0 * I; const dcomplex uim = 0.0 + 1.0 * I; Loading Loading @@ -98,6 +104,9 @@ void frfme(string data_file, string output_path) { int wsum_size; // End of vector size variables if (jlmf != 1) { #ifdef USE_NVTX nvtxRangePush("frfme() with jlmf != 1"); #endif int nxv, nyv, nzv; if (tfrfme == NULL) tfrfme = TFRFME::from_binary(tfrfme_name, "HDF5"); if (tfrfme != NULL) { Loading Loading @@ -140,7 +149,16 @@ void frfme(string data_file, string output_path) { printf("ERROR: could not open TFRFME file.\n"); } nks = nkv - 1; } else { // label 16 #ifdef USE_NVTX nvtxRangePop(); #endif } else { // label 16; jlfm = 1 #ifdef USE_NVTX nvtxRangePush("frfme() with jlmf == 1"); #endif #ifdef USE_NVTX nvtxRangePush("Setup operations"); #endif int nksh, nrsh, nxsh, nysh, nzsh; str_target = file_lines[last_read_line++]; for (int cli = 0; cli < 7; cli++) { Loading Loading @@ -176,6 +194,9 @@ void frfme(string data_file, string output_path) { } str_target = file_lines[last_read_line++]; re = regex("[eEmM]"); #ifdef USE_NVTX nvtxRangePop(); #endif if (regex_search(str_target, m, re)) { more = m.str().at(0); if (more == 'm' || more == 'M') { Loading @@ -193,6 +214,9 @@ void frfme(string data_file, string output_path) { string tedf_name = output_path + "/" + namef + ".hd5"; ScattererConfiguration *tedf = ScattererConfiguration::from_binary(tedf_name, "HDF5"); if (tedf != NULL) { #ifdef USE_NVTX nvtxRangePush("TEDF data import"); #endif int iduml, idum; iduml = tedf->number_of_spheres; idum = tedf->get_iog(iduml - 1); Loading @@ -216,6 +240,9 @@ void frfme(string data_file, string output_path) { xi = xip; } // label 20 #ifdef USE_NVTX nvtxRangePop(); #endif delete tedf; double wn = wp / 3.0e8; vk = xi * wn; Loading @@ -236,6 +263,9 @@ void frfme(string data_file, string output_path) { fshmx = spd * (rir * (sqrt(uy - sthmx * sthmx) / sqrt(uy - sthlmx * sthlmx)) - uy); } // label 22 #ifdef USE_NVTX nvtxRangePush("Memory data loading"); #endif nlmmt = lm * (lm + 2) * 2; nks = nksh * 2; nkv = nks + 1; Loading Loading @@ -279,6 +309,12 @@ void frfme(string data_file, string output_path) { double *_yv = tfrfme->get_y(); double *_zv = tfrfme->get_z(); dcomplex **_wsum = tfrfme->get_matrix(); #ifdef USE_NVTX nvtxRangePop(); #endif #ifdef USE_NVTX nvtxRangePush("Looped vector initialization"); #endif for (int i24 = nxshpo; i24 <= nxs; i24++) { _xv[i24] = _xv[i24 - 1] + delxyz; _xv[nxv - i24 - 1] = -_xv[i24]; Loading @@ -297,7 +333,13 @@ void frfme(string data_file, string output_path) { vkv[i28] = vkv[i28 - 1] + delk; vkv[nkv - i28 - 1] = -vkv[i28]; } // i28 loop #ifdef USE_NVTX nvtxRangePop(); #endif if (tfrfme != NULL) { #ifdef USE_NVTX nvtxRangePush("TFRFME initialization"); #endif tfrfme->set_param("vk", vk); tfrfme->set_param("exri", exri); tfrfme->set_param("an", an); Loading Loading @@ -329,19 +371,20 @@ void frfme(string data_file, string output_path) { tt2->set_param("nlmmt", 1.0 * nlmmt); tt2->set_param("nrvc", 1.0 * nrvc); tt2->write_binary(temp_name2, "HDF5"); #ifdef USE_NVTX nvtxRangePop(); #endif dcomplex *vec_w = new dcomplex[nkv * nkv](); dcomplex **w = new dcomplex*[nkv]; for (int wi = 0; wi < nkv; wi++) w[wi] = vec_w + wi * nkv; #ifdef USE_NVTX nvtxRangePush("j80 loop"); #endif for (int j80 = jlmf; j80 <= jlml; j80++) { dcomplex *tt1_wk = tt1->get_vector(); int wk_index = 0; // w matrix if (w != NULL) { for (int wi = nkv - 1; wi > -1; wi--) delete[] w[wi]; delete[] w; } w = new dcomplex*[nkv]; for (int wi = 0; wi < nkv; wi++) w[wi] = new dcomplex[nkv](); for (int jy50 = 0; jy50 < nkv; jy50++) { for (int jx50 = 0; jx50 < nkv; jx50++) { for (int wi = 0; wi < nlmmt; wi++) wk[wi] = tt1_wk[wk_index++]; for (int wi = 0; wi < nlmmt; wi++) wk[wi] = tt1->vec_wk[wk_index++]; w[jx50][jy50] = wk[j80 - 1]; } // jx50 } // jy50 loop Loading Loading @@ -377,7 +420,15 @@ void frfme(string data_file, string output_path) { } // iy70 loop } // iz75 loop } // j80 loop delete[] vec_w; delete[] w; #ifdef USE_NVTX nvtxRangePop(); #endif // label 88 #ifdef USE_NVTX nvtxRangePush("Closing operations"); #endif tfrfme->write_binary(tfrfme_name, "HDF5"); string output_name = output_path + "/c_OFRFME"; FILE *output = fopen(output_name.c_str(), "w"); Loading @@ -386,6 +437,9 @@ void frfme(string data_file, string output_path) { if (spd > 0.0) fprintf(output, " FSHMX =%15.7lE\n", fshmx); fprintf(output, " FRSH =%15.7lE\n", frsh); fclose(output); #ifdef USE_NVTX nvtxRangePop(); #endif } else { // Should never happen. printf("ERROR: could not open TFRFME file for output.\n"); } Loading @@ -398,16 +452,24 @@ void frfme(string data_file, string output_path) { fprintf(output, " WRONG INPUT TAPE\n"); fclose(output); } #ifdef USE_NVTX nvtxRangePop(); #endif } // label 45 #ifdef USE_NVTX nvtxRangePush("frfme() memory clean"); #endif if (tfrfme != NULL) delete tfrfme; delete[] file_lines; if (tt2 != NULL) delete tt2; if (w != NULL) { for (int wi = nkv - 1; wi > -1; wi--) delete[] w[wi]; delete[] w; } if (wk != NULL) delete[] wk; if (tt1 != NULL) delete tt1; #ifdef USE_NVTX nvtxRangePop(); #endif printf("FRFME: Done.\n"); #ifdef USE_NVTX nvtxRangePop(); #endif }