Loading .gitignore +1 −0 Original line number Diff line number Diff line Loading @@ -4,3 +4,4 @@ scratches_and_co/ /etl/prs/fits/cubes/* /etl/prs/fits/moments/* /etl/prs/fits/ratios/* /etl/credentials/* etl/assets/commons/__init__.py +95 −0 Original line number Diff line number Diff line import glob import logging import sys import astropy.units import sqlalchemy import yaml import numpy as np import os import urllib.request import base64 import json import tarfile import sqlalchemy as sqla from sqlalchemy.orm import Session from sqlalchemy.dialects.postgresql import insert from hashlib import sha1 from typing import (List, Union) from astropy import units as u Loading Loading @@ -33,6 +42,34 @@ def setup_logger(name: str, return logger def get_credentials(logger: logging.Logger, credentials_filename: Union[None, str] = None, set_envs: bool = False) -> Union[None, dict]: """ Retrieves credentials, if available, for logging in to the requested service. :param domain: the name of the service, as it appears in the credentials file (case sensitive!). :param logger: the logger to use. :param credentials_filename: the name of the credentials file to be used. If None, uses the default. :param set_envs: whether to set the credentials as environment variables. :return: a dictionary with username and password, as retrieved from the credentials file or None. """ _credentials_filename = credentials_filename if credentials_filename is not None \ else os.path.join('credentials', 'credentials.yml') try: with open(_credentials_filename) as credentials_file: credentials = yaml.load(credentials_file, Loader=yaml.FullLoader) credentials['DB_USER'] = base64.b64decode(credentials['DB_USER'].encode()).decode() credentials['DB_PASS'] = base64.b64decode(credentials['DB_PASS'].encode()).decode() if set_envs is True: for key in credentials: os.environ[key] = str(credentials[key]) return credentials except FileNotFoundError: logger.info('Credentials not found!') return None def validate_parameter(param_to_validate, default): """ Loading Loading @@ -250,6 +287,64 @@ def get_moldata(species_names: list, logger.info(f'File molecule_{species}.inp found, skipping download...') def compute_unique_hash_filename(config: dict) -> str: """ Compute a unique and reproducible filename given a configuration dictionary :param config: the configuration dictionary to hash :return: a unique hash of the dictionary, to use as a key and filename """ hashed_dict = sha1(json.dumps(config, sort_keys=True).encode()) return f'{hashed_dict.hexdigest()}' def make_tarfile(output_filename: str, source_dir: str, archive_path: Union[str, None] = None): """ Compresses all files in a directory into a .tgz file :param output_filename: the output filename :param source_dir: the directory to compress :param archive_path: path to store the compressed-grid archives """ _archive_path = validate_parameter(archive_path, default=os.path.join('stg', 'archive')) with tarfile.open(output_filename, "w:gz") as tar: tar.add(source_dir, arcname=os.path.join(_archive_path, os.path.basename(source_dir))) def cleanup_directory(directory: str, logger: logging.Logger): if os.path.isdir(directory): file_list = glob.glob(f'{directory}/*') for filename in file_list: if os.path.isfile: logger.debug(f'Removing file {filename}') os.remove(filename) else: logger.debug(f'Skipping {filename}') def upsert(table_object: sqla.orm.decl_api.DeclarativeMeta, row_dict: dict, conflict_keys: List[sqla.Column], engine: sqla.engine): statement = insert(table_object).values(row_dict) statement = statement.on_conflict_do_update( index_elements=conflict_keys, set_=row_dict) with Session(engine) as session: session.execute(statement) session.commit() def convert_frequency_to_wavelength(frequency: astropy.units.Quantity, output_units: astropy.units.Unit): return frequency.to(output_units, equivalencies=u.spectral()) def get_pg_engine(logger: logging.Logger) -> sqlalchemy.engine.Engine: credentials = get_credentials(logger=logger, credentials_filename=os.path.join('credentials', 'db_credentials.yml')) url = f"postgresql://{credentials['DB_USER']}:{credentials['DB_PASS']}@{credentials['DB_HOST']}:{credentials['DB_PORT']}/{credentials['DB_NAME']}" engine = sqlalchemy.create_engine(url) return engine etl/assets/constants/__init__.py +1 −0 Original line number Diff line number Diff line Loading @@ -56,3 +56,4 @@ radmc_options_mapping = { 'nchannels': 'linenlam', 'npix': 'npix' } etl/docker-compose.yaml 0 → 100644 +16 −0 Original line number Diff line number Diff line version: '3.8' services: db: image: postgres:14.1-alpine restart: always environment: - POSTGRES_DB=$DB_NAME - POSTGRES_USER=$DB_USER - POSTGRES_PASSWORD=$DB_PASS ports: - '5432:5432' volumes: - db:/var/lib/postgresql/data volumes: db: driver: local No newline at end of file etl/mdl/config/config.yml +7 −3 Original line number Diff line number Diff line radmc: radmc_postprocessing: nphotons: 1000000, scattering_mode_max: 0 iranfreqmode: 1 tgas_eq_tdust: 1 radmc_observation: inclination: 0 position_angle: 0 iline: 2 central_frequency: 230.538 frequency_units: GHz width_kms: 10 nchannels: 100 npix: 200 Loading
.gitignore +1 −0 Original line number Diff line number Diff line Loading @@ -4,3 +4,4 @@ scratches_and_co/ /etl/prs/fits/cubes/* /etl/prs/fits/moments/* /etl/prs/fits/ratios/* /etl/credentials/*
etl/assets/commons/__init__.py +95 −0 Original line number Diff line number Diff line import glob import logging import sys import astropy.units import sqlalchemy import yaml import numpy as np import os import urllib.request import base64 import json import tarfile import sqlalchemy as sqla from sqlalchemy.orm import Session from sqlalchemy.dialects.postgresql import insert from hashlib import sha1 from typing import (List, Union) from astropy import units as u Loading Loading @@ -33,6 +42,34 @@ def setup_logger(name: str, return logger def get_credentials(logger: logging.Logger, credentials_filename: Union[None, str] = None, set_envs: bool = False) -> Union[None, dict]: """ Retrieves credentials, if available, for logging in to the requested service. :param domain: the name of the service, as it appears in the credentials file (case sensitive!). :param logger: the logger to use. :param credentials_filename: the name of the credentials file to be used. If None, uses the default. :param set_envs: whether to set the credentials as environment variables. :return: a dictionary with username and password, as retrieved from the credentials file or None. """ _credentials_filename = credentials_filename if credentials_filename is not None \ else os.path.join('credentials', 'credentials.yml') try: with open(_credentials_filename) as credentials_file: credentials = yaml.load(credentials_file, Loader=yaml.FullLoader) credentials['DB_USER'] = base64.b64decode(credentials['DB_USER'].encode()).decode() credentials['DB_PASS'] = base64.b64decode(credentials['DB_PASS'].encode()).decode() if set_envs is True: for key in credentials: os.environ[key] = str(credentials[key]) return credentials except FileNotFoundError: logger.info('Credentials not found!') return None def validate_parameter(param_to_validate, default): """ Loading Loading @@ -250,6 +287,64 @@ def get_moldata(species_names: list, logger.info(f'File molecule_{species}.inp found, skipping download...') def compute_unique_hash_filename(config: dict) -> str: """ Compute a unique and reproducible filename given a configuration dictionary :param config: the configuration dictionary to hash :return: a unique hash of the dictionary, to use as a key and filename """ hashed_dict = sha1(json.dumps(config, sort_keys=True).encode()) return f'{hashed_dict.hexdigest()}' def make_tarfile(output_filename: str, source_dir: str, archive_path: Union[str, None] = None): """ Compresses all files in a directory into a .tgz file :param output_filename: the output filename :param source_dir: the directory to compress :param archive_path: path to store the compressed-grid archives """ _archive_path = validate_parameter(archive_path, default=os.path.join('stg', 'archive')) with tarfile.open(output_filename, "w:gz") as tar: tar.add(source_dir, arcname=os.path.join(_archive_path, os.path.basename(source_dir))) def cleanup_directory(directory: str, logger: logging.Logger): if os.path.isdir(directory): file_list = glob.glob(f'{directory}/*') for filename in file_list: if os.path.isfile: logger.debug(f'Removing file {filename}') os.remove(filename) else: logger.debug(f'Skipping {filename}') def upsert(table_object: sqla.orm.decl_api.DeclarativeMeta, row_dict: dict, conflict_keys: List[sqla.Column], engine: sqla.engine): statement = insert(table_object).values(row_dict) statement = statement.on_conflict_do_update( index_elements=conflict_keys, set_=row_dict) with Session(engine) as session: session.execute(statement) session.commit() def convert_frequency_to_wavelength(frequency: astropy.units.Quantity, output_units: astropy.units.Unit): return frequency.to(output_units, equivalencies=u.spectral()) def get_pg_engine(logger: logging.Logger) -> sqlalchemy.engine.Engine: credentials = get_credentials(logger=logger, credentials_filename=os.path.join('credentials', 'db_credentials.yml')) url = f"postgresql://{credentials['DB_USER']}:{credentials['DB_PASS']}@{credentials['DB_HOST']}:{credentials['DB_PORT']}/{credentials['DB_NAME']}" engine = sqlalchemy.create_engine(url) return engine
etl/assets/constants/__init__.py +1 −0 Original line number Diff line number Diff line Loading @@ -56,3 +56,4 @@ radmc_options_mapping = { 'nchannels': 'linenlam', 'npix': 'npix' }
etl/docker-compose.yaml 0 → 100644 +16 −0 Original line number Diff line number Diff line version: '3.8' services: db: image: postgres:14.1-alpine restart: always environment: - POSTGRES_DB=$DB_NAME - POSTGRES_USER=$DB_USER - POSTGRES_PASSWORD=$DB_PASS ports: - '5432:5432' volumes: - db:/var/lib/postgresql/data volumes: db: driver: local No newline at end of file
etl/mdl/config/config.yml +7 −3 Original line number Diff line number Diff line radmc: radmc_postprocessing: nphotons: 1000000, scattering_mode_max: 0 iranfreqmode: 1 tgas_eq_tdust: 1 radmc_observation: inclination: 0 position_angle: 0 iline: 2 central_frequency: 230.538 frequency_units: GHz width_kms: 10 nchannels: 100 npix: 200