Commit 330eb125 authored by Gianalfredo Nicolini's avatar Gianalfredo Nicolini
Browse files

first sync

parent 12597660
Loading
Loading
Loading
Loading

createTOC.py

0 → 100644
+165 −0
Original line number Diff line number Diff line
#!/usr/bin/env python3

# Owned
__author__ = "Giana Nicolini (INAF - OATo)"
__copyright__ = "TBD"
__credits__ = [""]
__license__ = "GPL"
__maintainer__ = "Giana Nicolini"
__email__ = "gianalfredo.nicolini@inaf.it"
__status__ = "beta"

# Change Log
# Ver       Date       Description
# --------- ---------- -----------------------------------------------
#     1.0.0 2022-01-11 First release
__version__ = "1.1.0_20250909"

# Generic/Built-in
# import datetime
from genericpath import isfile
import os
import sys
import yaml
import pandas as pd
import requests

# Other Libs
from astropy.io import fits


def createTocLine(fitsf, hdr):
    nodata = ','
    if hdr is not None:
        hul = fits.open(fitsf)
        nhul = len(hul)
        tocLine = fitsf + ','
        for kw in hdr:
            if kw not in ['RELPATH','FILENAME']:
                nokey = True
                for idx in range(nhul):
                    try:
                        tocLine += str(hul[idx].header[kw]) + ','
                        nokey = False
                    except KeyError:
                        pass
                if nokey:
                    if mergeDT and kw == 'DATE_TIME':
                        # merge DATE-OBS and TIME-OBS into a single column DATE_TIME with iso format
                        nokey = True
                        date_obs = ''
                        time_obs = ''
                        for idx in range(nhul):
                            try:
                                date_obs = str(hul[idx].header['DATE-OBS'])
                                time_obs = str(hul[idx].header['TIME-OBS'])
                                if date_obs != '' and time_obs != '':
                                    tocLine += date_obs.replace('/','-') + 'T' + time_obs + ','
                                    nokey = False
                                    break
                            except KeyError:
                                pass
                        if nokey:
                            tocLine += nodata
                    else:
                        tocLine += nodata
        hul.close()
    else:
        pass
    return tocLine[:-1]


if __name__ == '__main__':
    try:
        with open('createTOC.yaml', 'r', encoding='utf-8') as cf:
            cfg = yaml.safe_load(cf)
            header = []
            for kw in cfg['Header']:
                header.append(kw.upper())
            rootPath = cfg['RootPath']
            if rootPath[-1] != os.sep:
                rootPath = rootPath + os.sep
            if rootPath == '':
                rootPath = os.getcwd()
            if not os.path.isdir(rootPath):
                print('The specified RootPath does not exist!')
                sys.exit()
            mergeDT = cfg['MergeDateTime'] if 'MergeDateTime' in cfg else False
            if mergeDT and 'DATE-OBS' in header and 'TIME-OBS' in header and 'DATE_TIME' not in header:
                header.append('DATE_TIME')
            getHaloList = cfg['getHaloList'] if 'getHaloList' in cfg else False
            if getHaloList and 'HALO_FLAG' not in header:
                header.append('HALO_FLAG')
    except FileExistsError:
        if mergeDT:
            header = ['RELPATH','FILENAME','INSTRUME','DETECTOR','DATE-OBS','TIME-OBS','DATE_TIME','FILTER','EXPTIME']
        else:
            header = ['RELPATH','FILENAME','INSTRUME','DETECTOR','DATE-OBS','TIME-OBS','FILTER','EXPTIME']
        rootPath = os.getcwd()
    if sys.argv.__len__() > 1:
        rootPath = sys.argv[1]
        if rootPath[-1] != os.sep:
            rootPath = rootPath + os.sep
        if not os.path.isdir(rootPath):
            print('The specified RootPath does not exist!')
            sys.exit()
        print('Using rootPath from command line argument: ' + rootPath)
    print('Creating TOC file in ' + rootPath)
    headerline = ''
    for kw in header:
        headerline = headerline + kw + ','
    tocLines = [headerline[:-1]+'\n']
    for root, dirs_list, files_list in os.walk(rootPath):
        for fn in files_list:
            if fn.split('.')[-1].lower() in ['fits', 'fts']:
                fitsf = os.path.join(root, fn)
                tocL = createTocLine(fitsf, header)
                relPath = root.split(rootPath)[1]
                tocLines.append('.' + os.sep + relPath + ',' + tocL.split(root + os.sep)[1] + '\n')
                # else:
                #     tocLines.append(root + ',' + tocL.split(root + os.sep)[1] + '\n')
    file = open(rootPath + 'TOC.csv', 'w')
    file.writelines(tocLines)
    file.close()
    # find any FILENAME duplicate in the TOC file
    df = None
    df = pd.read_csv(rootPath + 'TOC.csv')
    try:    
        if 'FILENAME' in df.columns:
            dups = df['FILENAME'][df['FILENAME'].duplicated(keep=False)]
            if not dups.empty:
                print('Warning! The following FILENAMEs are duplicated in the TOC file:')
                print(dups.to_string(index=False))
                print('Please check the TOC file and rename the duplicated files to avoid issues in the analysis.')
    except:
        print('FILENAME column not found in TOC file, cannot check for duplicates!')
    # remove one of the duplicated FILENAMEs in the TOC file
    if 'FILENAME' in df.columns:
        dfnd = df.drop_duplicates(subset=['FILENAME'], keep='first')
        print('Duplicated FILENAMEs have been removed from the TOC file.')
        if len(dfnd) < len(df):
            print(f'The TOC file has been reduced from {len(df)} to {len(dfnd)} entries.')
            # save to file the duplicated FILENAMEs
            dups = df['FILENAME'][df['FILENAME'].duplicated(keep=False)]
            dups.to_csv(rootPath + 'TOC_duplicatedFilenames.csv', index=False)
            print('The duplicated FILENAMEs have been saved to TOC_duplicatedFilenames.csv')
    # sort the TOC file for DATE-OBS and TIME-OBS if present
    try:    
        if 'DATE-OBS' in df.columns and 'TIME-OBS' in df.columns:
            df = dfnd.sort_values(by=['DATE-OBS','TIME-OBS'])
            df = df.reset_index(drop=True)  
            df.to_csv(rootPath + 'TOC.csv', index=False)
            print('The TOC file has been sorted for DATE-OBS and TIME-OBS.')
    except:
        print('DATE-OBS and/or TIME-OBS columns not found in TOC file, cannot sort the TOC file!')
    # save a backup of the TOC file
    if isfile(rootPath + 'TOC.csv'):
        os.rename(rootPath + 'TOC.csv', rootPath + 'TOC_backup.csv')
        print('A backup of the TOC file has been saved as TOC_backup.csv')
    # save df to the new TOC file
    if df is not None:
        df.to_csv(rootPath + 'TOC.csv', index=False)
        print('The TOC file has been created/updated successfully.')
    else:
        print('Error: TOC file could not be created/updated.')
    sys.exit()

createTOC.yaml

0 → 100644
+49 −0
Original line number Diff line number Diff line
# __author__ = "Giana Nicolini (INAF - OATo)"
# __copyright__ = "TBD"
# __credits__ = [""]
# __license__ = "GPL"
# __maintainer__ = "Giana Nicolini"
# __email__ = "gianalfredo.nicolini@inaf.it"
# __status__ = "beta"
# Change Log

Description: 'Example of yaml file to create a TOC csv of a set  of LASCO-C2 fits files'
HowToUseIt: 'In the Header section below, list all fits header keywords to be included in the csv line associated to each fits file.'
RootPath: '/archive/SolarData/LASCO/'  ## Root path where to search for the fits files.
MergeDateTime: True  ## If True, Date-Obs and Time-Obs are merged into a single column DATE_TIME in the output csv file.
## If RootPath is not void, it must exist, otherwise the program will stop.
## If RootPath is void, the current working directory will be used.
Header:
  RELPATH: 'relative path of the file wrt the TOC file location'
  FILENAME: 'Filename of the fits file'
  INSTRUME: 'it should be LASCO'
  DETECTOR: 'it should be C2'
  DATE-OBS: 'Date'
  TIME-OBS: 'Time'
  OBT_TIME: ''
  NAXIS1: ''
  NAXIS2: ''
  FILTER: ''
  POLAR: ''
  LP_NUM: ''
  EXPTIME: ''
  EXP0: ''
  EXP1: ''
  EXP2: ''
  EXP3: ''
  OS_NUM: ''
  IMGCTR: ''
  IMGSEQ: ''
  CRPIX1: 'WCS'
  CRPIX2: 'WCS'
  CRVAL1: 'WCS'
  CRVAL2: 'WCS'
  CROTA1: 'WCS'
  CROTA2: 'WCS'
  CTYPE1: 'WCS'
  CTYPE2: 'WCS'
  CUNIT1: 'WCS'
  CUNIT2: 'WCS'
  CDELT1: 'WCS'
  CDELT2: 'WCS'

getLasco.py

0 → 100644
+53 −0
Original line number Diff line number Diff line
#!/usr/bin/env python3

# Owned
__author__ = "Giana Nicolini (INAF - OATo)"
__copyright__ = "TBD"
__credits__ = [""]
__license__ = "GPL"
__maintainer__ = "Giana Nicolini"
__email__ = "gianalfredo.nicolini@inaf.it"
__status__ = "beta"

# Change Log
# Ver       Date       Description
# --------- ---------- -----------------------------------------------
#     1.0.0 2022-01-11 First release
__version__ = "1.0.0_20250909"

# Generic/Built-in
# import datetime
import os
import sys
import shutil
from datetime import datetime, timedelta
from pathlib import Path
import urllib.request, urllib.error
import requests
import yaml
from bs4 import BeautifulSoup
import pandas as pd
import time
from Lasco import get_lasco_data, get_halo_list


if __name__ == '__main__':
    with open('getLasco.yaml', 'r') as file:
        cfg = yaml.safe_load(file)
    getData = cfg['getData'] if 'getData' in cfg else False
    archive = cfg['moveToArchive'] if 'moveToArchive' in cfg else ''
    if cfg['moveToArchive'] != '' and os.path.isdir(archive):
        if archive[-1] != os.sep:
            archive = archive + os.sep
    else:
        archive = ''
    if getData:
        dateList = cfg['Date']
        if cfg['Instrument'] == '':
            CI = ['c2', 'c3']
        else:
            CI = [cfg['Instrument'].lower]
        get_lasco_data(dateList=dateList, CI=CI,archive=archive)
    getHaloList = cfg['getHaloList'] if 'getHaloList' in cfg else False
    if getHaloList:
        get_halo_list()
 No newline at end of file

getLasco.yaml

0 → 100644
+20 −0
Original line number Diff line number Diff line
# __author__ = "Giana Nicolini (INAF - OATo)"
# __copyright__ = "TBD"
# __credits__ = [""]
# __license__ = "GPL"
# __maintainer__ = "Giana Nicolini"
# __email__ = "gianalfredo.nicolini@inaf.it"
# __status__ = "beta"
# Change Log

Description: 'Example of yaml file to cdownload a set  of LASCO-C2 fits files'
HowToUseIt: 'Specify a Date (cannot be void), optionally the instrument "C2", "C3" or "" for both'
getData: True  ## If True, the data of the dates specified below are downloaded.
# list in Date all dates from 20150601 to 20150731
Date: ['20150717','20150718','20150719','20150720',
       '20150721','20150722','20150723','20150724','20150725','20150726','20150727','20150728','20150729','20150730','20150731']
       ## Format YYYY-MM-DD (separator can be "-" "/" " " or no separator)
Instrument: ''
Filter: ''
moveToArchive: '/archive/SolarData/LASCO/'
getHaloList: False  ## If True, the HaloList is downloaded from https://cdaw.gsfc.nasa.gov/CME_list/halo/halo.html and the column HALO_FLAG is added to the output csv file.

img_stat.py

0 → 100644
+636 −0

File added.

Preview size limit exceeded, changes collapsed.

Loading