Loading etl/prs/prs_inspect_results.py +90 −34 Original line number Diff line number Diff line import matplotlib.pyplot as plt import numpy as np import os import pandas as pd import xarray as xr from typing import Union, Tuple from contextlib import closing Loading Loading @@ -56,13 +58,13 @@ def get_aggregated_ratio_from_db( return results[1].aggregated_ratio def get_density_distribution( def get_results( dust_temperature: float, gas_density: float, lines: Union[list, tuple], run_id: str, session: Session, is_isothermal: bool = False) -> Tuple[float, str, str, tuple, np.array, np.array]: is_isothermal: bool = False) -> Tuple[float, str, str, tuple, np.array]: # Select correct column for filtering if is_isothermal is True: dust_temperature_column = GridPars.dust_temperature Loading @@ -85,10 +87,7 @@ def get_density_distribution( and_(dust_temperature_column == dust_temperature, GridPars.density_at_reference == gas_density, GridFiles.quantity == 'gas_number_density', ModelPars.iline.in_(lines), GridPars.run_id == run_id)).order_by(GridPars.created_on.desc()).first() with fits.open(os.path.join('prs', 'fits', 'grids', results[2].fits_grid_name)) as fitsfile: grid_values = fitsfile[0].data with fits.open(os.path.join('prs', 'fits', 'ratios', results[1].ratio_map_name)) as ratio_fitsfile: ratio_values = ratio_fitsfile[0].data assert (results[3].npix % results[0].grid_shape_1 == 0) and (results[3].npix % results[0].grid_shape_2 == 0) Loading @@ -96,10 +95,35 @@ def get_density_distribution( results[1].ratio_map_name, \ results[2].fits_grid_name, \ (int(results[3].npix / results[0].grid_shape_1), int(results[3].npix / results[0].grid_shape_2)), \ grid_values, \ ratio_values def get_grid_values( quantity_name: str, dust_temperature: float, gas_density: float, run_id: str, session: Session, is_isothermal: bool = False) -> np.array: # Select correct column for filtering if is_isothermal is True: dust_temperature_column = GridPars.dust_temperature else: dust_temperature_column = GridPars.dust_temperature_at_reference # Query database results = session.query(GridPars, GridFiles) \ .join(GridFiles) \ .filter( and_(dust_temperature_column == dust_temperature, GridPars.density_at_reference == gas_density, GridFiles.quantity == quantity_name, GridPars.run_id == run_id)).order_by(GridPars.created_on.desc()).first() with fits.open(os.path.join('prs', 'fits', 'grids', results[1].fits_grid_name)) as fitsfile: data = fitsfile[0].data return data def main(run_id: str, is_isothermal: bool, engine: sqla_engine = None): Loading @@ -107,7 +131,6 @@ def main(run_id: str, engine = get_pg_engine(logger=logger) config = load_config_file(os.path.join('config', 'config.yml')) with closing(Session(engine)) as session: # grid definition dust_temperatures = parse_grid_overrides(par_name='dust_temperature', config=config) Loading @@ -121,14 +144,11 @@ def main(run_id: str, 'dust_temperature': dust_temperatures, 'gas_density': central_densities }) density_correlation = xr.DataArray(np.empty(shape=[len(central_densities)]), dims=('gas_density'), coords={ 'gas_density': central_densities }) scatter_data = {} results_dict = {} with closing(Session(engine)) as session: for lines in line_pairs: scatter_data[str(lines)] = {str(density): list() for density in central_densities} results_dict[f'{"-".join(lines)}'] = {} for (tdust, nh2) in product(dust_temperatures, central_densities): aggregated_ratio = get_aggregated_ratio_from_db(dust_temperature=tdust, gas_density=nh2, Loading @@ -137,8 +157,8 @@ def main(run_id: str, run_id=run_id) results.loc[tdust, nh2] = aggregated_ratio logger.debug(f'The aggregated ratio for lines {lines}, using {nh2}, {tdust} is: {aggregated_ratio}') ratio, ratio_fitsname, grid_fitsname, zoom_ratios, density_grid, ratio_values = \ get_density_distribution( ratio, ratio_fitsname, grid_fitsname, zoom_ratios, ratio_values = \ get_results( dust_temperature=tdust, gas_density=nh2, lines=lines, Loading @@ -146,13 +166,33 @@ def main(run_id: str, run_id=run_id, is_isothermal=is_isothermal ) density_correlation.loc[nh2] = np.nanmean(np.where(density_grid == 0, np.nan, density_grid)) density_grid = get_grid_values( quantity_name='gas_number_density', dust_temperature=tdust, gas_density=nh2, session=session, run_id=run_id, is_isothermal=is_isothermal ) temperature_grid = get_grid_values( quantity_name='dust_temperature', dust_temperature=tdust, gas_density=nh2, session=session, run_id=run_id, is_isothermal=is_isothermal ) # density_correlation.loc[nh2] = np.nanmean(np.where(density_grid == 0, np.nan, density_grid)) avg_density_map = np.nansum(np.where(density_grid == 0, np.nan, density_grid ** 2), axis=2) / \ np.nansum(np.where(density_grid == 0, np.nan, density_grid), axis=2) test = np.array([avg_density_map.flatten(), ratio_values[::zoom_ratios[0], ::zoom_ratios[1]].flatten()]) scatter_data[str(lines)][str(nh2)].append(test) plt.scatter(test[0], test[1]) avg_temperature_map = np.nansum(np.where(density_grid == 0, np.nan, temperature_grid * density_grid), axis=2) / \ np.nansum(np.where(density_grid == 0, np.nan, density_grid), axis=2) correlation_data = np.array([avg_density_map.flatten(), avg_temperature_map.flatten(), ratio_values[::zoom_ratios[0], ::zoom_ratios[1]].flatten()]) results_dict[f'{"-".join(lines)}'][f'{"_".join([str(tdust), str(nh2)])}'] = correlation_data plt.scatter(correlation_data[0], correlation_data[2]) plt.semilogx() plt.xlabel('avg(gas_density)') Loading @@ -162,8 +202,24 @@ def main(run_id: str, results.plot(x='dust_temperature', y='gas_density', yscale='log') plt.savefig(os.path.join('prs', 'output', f'ratio_grid_lines_{"-".join(lines)}.png')) plt.clf() # df_list_concat = [] for (tdust, nh2) in product(dust_temperatures, central_densities): df_list = [] for lines in line_pairs: df = pd.DataFrame(data=results_dict[f'{"-".join(lines)}'][f'{"_".join([str(tdust), str(nh2)])}'].transpose(), columns=['avg_nh2', 'avg_tdust', f'ratio_{"-".join(lines)}']) df_list.append(df.dropna()) df_tmp = pd.concat(df_list, axis=1) # df_tmp = df_tmp.loc[~df_tmp.index.duplicated(), :].copy() df_tmp = df_tmp.loc[:, ~df_tmp.columns.duplicated()].copy() df_tmp['nh2'] = nh2 df_tmp['tdust'] = tdust df_list_concat.append(df_tmp) df_all = pd.concat(df_list_concat) df_all.to_csv('full_dataset.csv') if __name__ == '__main__': main(run_id='97000860-29ae-4510-b7d3-5daaf57eb4e0', is_isothermal=False) main(run_id='0d0761f8-fcf3-4293-9d9d-ba1a37f79a19', is_isothermal=False) # main(run_id='ba7fd3ed-5947-4dc6-bcef-38f151f19b77', is_isothermal=False) etl/stg/stg_radmc_input_generator.py +10 −4 Original line number Diff line number Diff line Loading @@ -288,7 +288,10 @@ def save_fits_grid_profile(quantity: np.array, """ _path = validate_parameter(path, default=os.path.join('prs', 'fits', 'grids')) if not os.path.isfile(os.path.join(_path, filename)): try: fits.writeto(os.path.join(_path, filename), quantity) except OSError: pass else: logger.info('Skipping saving of fits grid. File already present!') Loading Loading @@ -515,7 +518,7 @@ def main(run_id: str, config_mdl = load_config_file(os.path.join('mdl', 'config', 'config.yml'), override_config=_override_config['model']) if engine is None: engine = get_pg_engine(logger=logger, engine_kwargs={'pool_size': 3}) engine = get_pg_engine(logger=logger) input_files_dir = validate_parameter(path_radmc_files, default=os.path.join('mdl', 'radmc_files')) cleanup_directory(directory=input_files_dir, Loading Loading @@ -588,7 +591,6 @@ def main(run_id: str, logger.info('Using cached dust temperature distribution') shutil.copy(os.path.join(execution_dir, 'model', 'data', 'dust_temperature.dat'), os.path.join('.', 'dust_temperature.dat')) engine.dispose() os.chdir(execution_dir) for quantity_name in ('gas_number_density', 'dust_temperature'): Loading @@ -597,6 +599,7 @@ def main(run_id: str, run_id=run_id, zip_filename=zip_filename, quantity_name=quantity_name) engine.dispose() make_archive(output_filename=zip_filename, source_dir=input_files_dir, Loading @@ -616,7 +619,10 @@ def save_and_persist_grid(engine: sqla_engine, quantity_name=quantity_name) save_fits_grid_profile(quantity=profiles[quantity_name], filename=grid_file_name) populate_grid_files(quantity_name=quantity_name, engine=engine, zip_filename=zip_filename, filename=grid_file_name, populate_grid_files(quantity_name=quantity_name, engine=engine, zip_filename=zip_filename, filename=grid_file_name, run_id=run_id) Loading Loading
etl/prs/prs_inspect_results.py +90 −34 Original line number Diff line number Diff line import matplotlib.pyplot as plt import numpy as np import os import pandas as pd import xarray as xr from typing import Union, Tuple from contextlib import closing Loading Loading @@ -56,13 +58,13 @@ def get_aggregated_ratio_from_db( return results[1].aggregated_ratio def get_density_distribution( def get_results( dust_temperature: float, gas_density: float, lines: Union[list, tuple], run_id: str, session: Session, is_isothermal: bool = False) -> Tuple[float, str, str, tuple, np.array, np.array]: is_isothermal: bool = False) -> Tuple[float, str, str, tuple, np.array]: # Select correct column for filtering if is_isothermal is True: dust_temperature_column = GridPars.dust_temperature Loading @@ -85,10 +87,7 @@ def get_density_distribution( and_(dust_temperature_column == dust_temperature, GridPars.density_at_reference == gas_density, GridFiles.quantity == 'gas_number_density', ModelPars.iline.in_(lines), GridPars.run_id == run_id)).order_by(GridPars.created_on.desc()).first() with fits.open(os.path.join('prs', 'fits', 'grids', results[2].fits_grid_name)) as fitsfile: grid_values = fitsfile[0].data with fits.open(os.path.join('prs', 'fits', 'ratios', results[1].ratio_map_name)) as ratio_fitsfile: ratio_values = ratio_fitsfile[0].data assert (results[3].npix % results[0].grid_shape_1 == 0) and (results[3].npix % results[0].grid_shape_2 == 0) Loading @@ -96,10 +95,35 @@ def get_density_distribution( results[1].ratio_map_name, \ results[2].fits_grid_name, \ (int(results[3].npix / results[0].grid_shape_1), int(results[3].npix / results[0].grid_shape_2)), \ grid_values, \ ratio_values def get_grid_values( quantity_name: str, dust_temperature: float, gas_density: float, run_id: str, session: Session, is_isothermal: bool = False) -> np.array: # Select correct column for filtering if is_isothermal is True: dust_temperature_column = GridPars.dust_temperature else: dust_temperature_column = GridPars.dust_temperature_at_reference # Query database results = session.query(GridPars, GridFiles) \ .join(GridFiles) \ .filter( and_(dust_temperature_column == dust_temperature, GridPars.density_at_reference == gas_density, GridFiles.quantity == quantity_name, GridPars.run_id == run_id)).order_by(GridPars.created_on.desc()).first() with fits.open(os.path.join('prs', 'fits', 'grids', results[1].fits_grid_name)) as fitsfile: data = fitsfile[0].data return data def main(run_id: str, is_isothermal: bool, engine: sqla_engine = None): Loading @@ -107,7 +131,6 @@ def main(run_id: str, engine = get_pg_engine(logger=logger) config = load_config_file(os.path.join('config', 'config.yml')) with closing(Session(engine)) as session: # grid definition dust_temperatures = parse_grid_overrides(par_name='dust_temperature', config=config) Loading @@ -121,14 +144,11 @@ def main(run_id: str, 'dust_temperature': dust_temperatures, 'gas_density': central_densities }) density_correlation = xr.DataArray(np.empty(shape=[len(central_densities)]), dims=('gas_density'), coords={ 'gas_density': central_densities }) scatter_data = {} results_dict = {} with closing(Session(engine)) as session: for lines in line_pairs: scatter_data[str(lines)] = {str(density): list() for density in central_densities} results_dict[f'{"-".join(lines)}'] = {} for (tdust, nh2) in product(dust_temperatures, central_densities): aggregated_ratio = get_aggregated_ratio_from_db(dust_temperature=tdust, gas_density=nh2, Loading @@ -137,8 +157,8 @@ def main(run_id: str, run_id=run_id) results.loc[tdust, nh2] = aggregated_ratio logger.debug(f'The aggregated ratio for lines {lines}, using {nh2}, {tdust} is: {aggregated_ratio}') ratio, ratio_fitsname, grid_fitsname, zoom_ratios, density_grid, ratio_values = \ get_density_distribution( ratio, ratio_fitsname, grid_fitsname, zoom_ratios, ratio_values = \ get_results( dust_temperature=tdust, gas_density=nh2, lines=lines, Loading @@ -146,13 +166,33 @@ def main(run_id: str, run_id=run_id, is_isothermal=is_isothermal ) density_correlation.loc[nh2] = np.nanmean(np.where(density_grid == 0, np.nan, density_grid)) density_grid = get_grid_values( quantity_name='gas_number_density', dust_temperature=tdust, gas_density=nh2, session=session, run_id=run_id, is_isothermal=is_isothermal ) temperature_grid = get_grid_values( quantity_name='dust_temperature', dust_temperature=tdust, gas_density=nh2, session=session, run_id=run_id, is_isothermal=is_isothermal ) # density_correlation.loc[nh2] = np.nanmean(np.where(density_grid == 0, np.nan, density_grid)) avg_density_map = np.nansum(np.where(density_grid == 0, np.nan, density_grid ** 2), axis=2) / \ np.nansum(np.where(density_grid == 0, np.nan, density_grid), axis=2) test = np.array([avg_density_map.flatten(), ratio_values[::zoom_ratios[0], ::zoom_ratios[1]].flatten()]) scatter_data[str(lines)][str(nh2)].append(test) plt.scatter(test[0], test[1]) avg_temperature_map = np.nansum(np.where(density_grid == 0, np.nan, temperature_grid * density_grid), axis=2) / \ np.nansum(np.where(density_grid == 0, np.nan, density_grid), axis=2) correlation_data = np.array([avg_density_map.flatten(), avg_temperature_map.flatten(), ratio_values[::zoom_ratios[0], ::zoom_ratios[1]].flatten()]) results_dict[f'{"-".join(lines)}'][f'{"_".join([str(tdust), str(nh2)])}'] = correlation_data plt.scatter(correlation_data[0], correlation_data[2]) plt.semilogx() plt.xlabel('avg(gas_density)') Loading @@ -162,8 +202,24 @@ def main(run_id: str, results.plot(x='dust_temperature', y='gas_density', yscale='log') plt.savefig(os.path.join('prs', 'output', f'ratio_grid_lines_{"-".join(lines)}.png')) plt.clf() # df_list_concat = [] for (tdust, nh2) in product(dust_temperatures, central_densities): df_list = [] for lines in line_pairs: df = pd.DataFrame(data=results_dict[f'{"-".join(lines)}'][f'{"_".join([str(tdust), str(nh2)])}'].transpose(), columns=['avg_nh2', 'avg_tdust', f'ratio_{"-".join(lines)}']) df_list.append(df.dropna()) df_tmp = pd.concat(df_list, axis=1) # df_tmp = df_tmp.loc[~df_tmp.index.duplicated(), :].copy() df_tmp = df_tmp.loc[:, ~df_tmp.columns.duplicated()].copy() df_tmp['nh2'] = nh2 df_tmp['tdust'] = tdust df_list_concat.append(df_tmp) df_all = pd.concat(df_list_concat) df_all.to_csv('full_dataset.csv') if __name__ == '__main__': main(run_id='97000860-29ae-4510-b7d3-5daaf57eb4e0', is_isothermal=False) main(run_id='0d0761f8-fcf3-4293-9d9d-ba1a37f79a19', is_isothermal=False) # main(run_id='ba7fd3ed-5947-4dc6-bcef-38f151f19b77', is_isothermal=False)
etl/stg/stg_radmc_input_generator.py +10 −4 Original line number Diff line number Diff line Loading @@ -288,7 +288,10 @@ def save_fits_grid_profile(quantity: np.array, """ _path = validate_parameter(path, default=os.path.join('prs', 'fits', 'grids')) if not os.path.isfile(os.path.join(_path, filename)): try: fits.writeto(os.path.join(_path, filename), quantity) except OSError: pass else: logger.info('Skipping saving of fits grid. File already present!') Loading Loading @@ -515,7 +518,7 @@ def main(run_id: str, config_mdl = load_config_file(os.path.join('mdl', 'config', 'config.yml'), override_config=_override_config['model']) if engine is None: engine = get_pg_engine(logger=logger, engine_kwargs={'pool_size': 3}) engine = get_pg_engine(logger=logger) input_files_dir = validate_parameter(path_radmc_files, default=os.path.join('mdl', 'radmc_files')) cleanup_directory(directory=input_files_dir, Loading Loading @@ -588,7 +591,6 @@ def main(run_id: str, logger.info('Using cached dust temperature distribution') shutil.copy(os.path.join(execution_dir, 'model', 'data', 'dust_temperature.dat'), os.path.join('.', 'dust_temperature.dat')) engine.dispose() os.chdir(execution_dir) for quantity_name in ('gas_number_density', 'dust_temperature'): Loading @@ -597,6 +599,7 @@ def main(run_id: str, run_id=run_id, zip_filename=zip_filename, quantity_name=quantity_name) engine.dispose() make_archive(output_filename=zip_filename, source_dir=input_files_dir, Loading @@ -616,7 +619,10 @@ def save_and_persist_grid(engine: sqla_engine, quantity_name=quantity_name) save_fits_grid_profile(quantity=profiles[quantity_name], filename=grid_file_name) populate_grid_files(quantity_name=quantity_name, engine=engine, zip_filename=zip_filename, filename=grid_file_name, populate_grid_files(quantity_name=quantity_name, engine=engine, zip_filename=zip_filename, filename=grid_file_name, run_id=run_id) Loading