Loading pyproject.toml +3 −0 Original line number Diff line number Diff line Loading @@ -102,3 +102,6 @@ lsst_inaf_agile = ["py.typed"] [tool.coverage.run] omit=["src/lsst_inaf_agile/_version.py"] [tool.mypy] exclude = "src/scripts/butler/create_truth_summary.py" src/scripts/butler/create_truth_summary.py +95 −64 Original line number Diff line number Diff line Loading @@ -6,31 +6,55 @@ import os import numpy as np import pandas import util from catalog_galaxy_agn import get_catalog_from_config from lsst.daf import butler from lsst.daf.butler import Butler from lsst.geom import Angle, SpherePoint import util dirname = "data/catalog/dr1_new_new" ### # initializations # c = get_catalog_from_config("etc/config_dr1.ini") c = get_catalog_from_config("etc/config_dr1_new_new.ini") my_butler = butler.Butler(f"{dirname}/repo", collections="skymaps") my_butler = Butler(f"{dirname}/repo", collections="skymaps") # type: ignore my_skymap = my_butler.get("skyMap") # Columns to write to file COLUMNS = [ "id", "ra", "dec", "flux_u", "flux_g", "flux_r", "flux_i", "flux_z", "flux_y", "truth_type", "is_unique_truth_entry", "is_variable", "is_pointsource", "tract", "patch", "skymap", ] # Set nan values to some sufficiently small magnitude def get_mag(b): ret = util.flux_to_mag(c[f"lsst-{b}_total"]) return np.where(np.isfinite(ret), ret, 99.0) umag, gmag, rmag, imag, zmag, ymag = [get_mag(b) for b in "ugrizy"] umag, gmag, rmag, imag, zmag, ymag = (get_mag(b) for b in "ugrizy") def get_flux(b): return c[f"lsst-{b}_total"] * 1000.0 uflux, gflux, rflux, iflux, zflux, yflux = [get_flux(b) for b in "ugrizy"] uflux, gflux, rflux, iflux, zflux, yflux = (get_flux(b) for b in "ugrizy") # get truth_type # 0: agn type 1 Loading @@ -42,23 +66,16 @@ is_agn = c["is_agn"] is_agn_type1 = is_agn * ~c["is_optical_type2"] is_agn_type2 = is_agn * c["is_optical_type2"] is_galaxy = ~is_star * ~is_agn truth_type = ( 0 * is_agn_type1 + 1 * is_agn_type2 + 2 * is_galaxy + 3 * is_star ) truth_type = 0 * is_agn_type1 + 1 * is_agn_type2 + 2 * is_galaxy + 3 * is_star files = {} filenames = [] for i in range(c["RA"].size): print(i, end='\r') print(i, end="\r") longitude = Angle(c["RA"][i] * np.pi / 180.0) latitude = Angle(c["DEC"][i] * np.pi / 180.0) coords = SpherePoint(longitude, latitude) for ret in my_skymap.findTractPatchList([coords]): # Gets the tract/patch tract = ret[0]._id patch = ret[1][0]._sequentialIndex Loading @@ -67,14 +84,18 @@ for i in range(c["RA"].size): # filename = f"data/dr1/extra/truth/{tract}/{patch}/truth_catalog.csv" # filename = f"data/dr1/extra/truth/{tract}/truth_catalog.csv" filename = f"{dirname}/extra/truth/{tract}/truth_catalog.csv" if not os.path.exists(os.path.dirname(filename)): os.makedirs(os.path.dirname(filename)) if not filename in files: files[filename] = open(filename, 'w') print("id,ra,dec,flux_u,flux_g,flux_r,flux_i,flux_z,flux_y,truth_type,is_unique_truth_entry,is_variable,is_pointsource,tract,patch,skymap", file=files[filename]) if filename not in filenames: filenames.append(filename) with open(filename, "w") as ff: print(",".join(COLUMNS), file=ff) print( ','.join([ ",".join( [ str(c["ID"][i]), # id str(c["RA"][i]), # ra str(c["DEC"][i]), # dec Loading @@ -91,45 +112,55 @@ for i in range(c["RA"].size): str(tract), # tract str(patch), # patch "DC2", # skymap ]), file=files[filename] ] ), file=ff, ) ### # Creates the truth summary cwd = os.getcwd() + '/' cwd = os.getcwd() + "/" # filename = "data/dr1/extra/truth/truth_summary.csv" filename = f"{dirname}/extra/truth/truth_summary.csv" print("Writing", filename) with open(filename, 'w') as f: print("filename,tract,skymap", file=f) for filename_csv, v in files.items(): with open(filename, "w") as ff: print("filename,tract,skymap", file=ff) for filename_csv in filenames: ### # Converts .csv to .parq v.close() filename_parq = filename_csv.replace(".csv", ".parq") pandas.read_csv(filename_csv).to_parquet(filename_parq) print(filename_parq) tract = filename_parq.split('/')[-2] patch = filename_parq.split('/')[-1] tract = filename_parq.split("/")[-2] patch = filename_parq.split("/")[-1] # print(','.join([cwd + filename_csv, tract, patch, "DC2"]), file=f) print(','.join([cwd + filename_parq, tract, "DC2"]), file=f) print(",".join([cwd + filename_parq, tract, "DC2"]), file=ff) print(f"===============================================================================") print(f"Run the following commands next") print(f"===============================================================================") print("===============================================================================") print("Run the following commands next") print("===============================================================================") print(f"butler remove-runs {dirname}/repo truth_summary") print(f"butler ingest-files --transfer direct {dirname}/repo truth_summary truth_summary {dirname}/extra/truth/truth_summary.csv") print( f""" butler ingest-files --transfer direct \ {dirname}/repo truth_summary truth_summary \ {dirname}/extra/truth/truth_summary.csv """ ) print(f"butler collection-chain --mode=extend {dirname}/repo u/viitanen/output truth_summary") print(f"===============================================================================") print("===============================================================================") # https://rtn-029.lsst.io/#creating-and-populating-the-repository # butler register-dataset-type $REPO 'truth_summary' DataFrame skymap tract os.system(f"butler remove-runs {dirname}/repo truth_summary") os.system(f"butler ingest-files --transfer direct {dirname}/repo truth_summary truth_summary {dirname}/extra/truth/truth_summary.csv") os.system( f""" butler ingest-files --transfer direct \ {dirname}/repo truth_summary truth_summary \ {dirname}/extra/truth/truth_summary.csv """ ) os.system(f"butler collection-chain --mode=extend {dirname}/repo u/viitanen/output truth_summary") Loading
pyproject.toml +3 −0 Original line number Diff line number Diff line Loading @@ -102,3 +102,6 @@ lsst_inaf_agile = ["py.typed"] [tool.coverage.run] omit=["src/lsst_inaf_agile/_version.py"] [tool.mypy] exclude = "src/scripts/butler/create_truth_summary.py"
src/scripts/butler/create_truth_summary.py +95 −64 Original line number Diff line number Diff line Loading @@ -6,31 +6,55 @@ import os import numpy as np import pandas import util from catalog_galaxy_agn import get_catalog_from_config from lsst.daf import butler from lsst.daf.butler import Butler from lsst.geom import Angle, SpherePoint import util dirname = "data/catalog/dr1_new_new" ### # initializations # c = get_catalog_from_config("etc/config_dr1.ini") c = get_catalog_from_config("etc/config_dr1_new_new.ini") my_butler = butler.Butler(f"{dirname}/repo", collections="skymaps") my_butler = Butler(f"{dirname}/repo", collections="skymaps") # type: ignore my_skymap = my_butler.get("skyMap") # Columns to write to file COLUMNS = [ "id", "ra", "dec", "flux_u", "flux_g", "flux_r", "flux_i", "flux_z", "flux_y", "truth_type", "is_unique_truth_entry", "is_variable", "is_pointsource", "tract", "patch", "skymap", ] # Set nan values to some sufficiently small magnitude def get_mag(b): ret = util.flux_to_mag(c[f"lsst-{b}_total"]) return np.where(np.isfinite(ret), ret, 99.0) umag, gmag, rmag, imag, zmag, ymag = [get_mag(b) for b in "ugrizy"] umag, gmag, rmag, imag, zmag, ymag = (get_mag(b) for b in "ugrizy") def get_flux(b): return c[f"lsst-{b}_total"] * 1000.0 uflux, gflux, rflux, iflux, zflux, yflux = [get_flux(b) for b in "ugrizy"] uflux, gflux, rflux, iflux, zflux, yflux = (get_flux(b) for b in "ugrizy") # get truth_type # 0: agn type 1 Loading @@ -42,23 +66,16 @@ is_agn = c["is_agn"] is_agn_type1 = is_agn * ~c["is_optical_type2"] is_agn_type2 = is_agn * c["is_optical_type2"] is_galaxy = ~is_star * ~is_agn truth_type = ( 0 * is_agn_type1 + 1 * is_agn_type2 + 2 * is_galaxy + 3 * is_star ) truth_type = 0 * is_agn_type1 + 1 * is_agn_type2 + 2 * is_galaxy + 3 * is_star files = {} filenames = [] for i in range(c["RA"].size): print(i, end='\r') print(i, end="\r") longitude = Angle(c["RA"][i] * np.pi / 180.0) latitude = Angle(c["DEC"][i] * np.pi / 180.0) coords = SpherePoint(longitude, latitude) for ret in my_skymap.findTractPatchList([coords]): # Gets the tract/patch tract = ret[0]._id patch = ret[1][0]._sequentialIndex Loading @@ -67,14 +84,18 @@ for i in range(c["RA"].size): # filename = f"data/dr1/extra/truth/{tract}/{patch}/truth_catalog.csv" # filename = f"data/dr1/extra/truth/{tract}/truth_catalog.csv" filename = f"{dirname}/extra/truth/{tract}/truth_catalog.csv" if not os.path.exists(os.path.dirname(filename)): os.makedirs(os.path.dirname(filename)) if not filename in files: files[filename] = open(filename, 'w') print("id,ra,dec,flux_u,flux_g,flux_r,flux_i,flux_z,flux_y,truth_type,is_unique_truth_entry,is_variable,is_pointsource,tract,patch,skymap", file=files[filename]) if filename not in filenames: filenames.append(filename) with open(filename, "w") as ff: print(",".join(COLUMNS), file=ff) print( ','.join([ ",".join( [ str(c["ID"][i]), # id str(c["RA"][i]), # ra str(c["DEC"][i]), # dec Loading @@ -91,45 +112,55 @@ for i in range(c["RA"].size): str(tract), # tract str(patch), # patch "DC2", # skymap ]), file=files[filename] ] ), file=ff, ) ### # Creates the truth summary cwd = os.getcwd() + '/' cwd = os.getcwd() + "/" # filename = "data/dr1/extra/truth/truth_summary.csv" filename = f"{dirname}/extra/truth/truth_summary.csv" print("Writing", filename) with open(filename, 'w') as f: print("filename,tract,skymap", file=f) for filename_csv, v in files.items(): with open(filename, "w") as ff: print("filename,tract,skymap", file=ff) for filename_csv in filenames: ### # Converts .csv to .parq v.close() filename_parq = filename_csv.replace(".csv", ".parq") pandas.read_csv(filename_csv).to_parquet(filename_parq) print(filename_parq) tract = filename_parq.split('/')[-2] patch = filename_parq.split('/')[-1] tract = filename_parq.split("/")[-2] patch = filename_parq.split("/")[-1] # print(','.join([cwd + filename_csv, tract, patch, "DC2"]), file=f) print(','.join([cwd + filename_parq, tract, "DC2"]), file=f) print(",".join([cwd + filename_parq, tract, "DC2"]), file=ff) print(f"===============================================================================") print(f"Run the following commands next") print(f"===============================================================================") print("===============================================================================") print("Run the following commands next") print("===============================================================================") print(f"butler remove-runs {dirname}/repo truth_summary") print(f"butler ingest-files --transfer direct {dirname}/repo truth_summary truth_summary {dirname}/extra/truth/truth_summary.csv") print( f""" butler ingest-files --transfer direct \ {dirname}/repo truth_summary truth_summary \ {dirname}/extra/truth/truth_summary.csv """ ) print(f"butler collection-chain --mode=extend {dirname}/repo u/viitanen/output truth_summary") print(f"===============================================================================") print("===============================================================================") # https://rtn-029.lsst.io/#creating-and-populating-the-repository # butler register-dataset-type $REPO 'truth_summary' DataFrame skymap tract os.system(f"butler remove-runs {dirname}/repo truth_summary") os.system(f"butler ingest-files --transfer direct {dirname}/repo truth_summary truth_summary {dirname}/extra/truth/truth_summary.csv") os.system( f""" butler ingest-files --transfer direct \ {dirname}/repo truth_summary truth_summary \ {dirname}/extra/truth/truth_summary.csv """ ) os.system(f"butler collection-chain --mode=extend {dirname}/repo u/viitanen/output truth_summary")