Commit 12597660 authored by Gianalfredo Nicolini's avatar Gianalfredo Nicolini
Browse files

first sync

parents
Loading
Loading
Loading
Loading

Lasco.py

0 → 100644
+387 −0
Original line number Diff line number Diff line
from astropy.io import fits
import numpy as np
import cupy as cp
import os               # <-- change import
import urllib.request, urllib.error
import time
from pathlib import Path
import shutil
import pandas as pd
import sys
import requests
from datetime import datetime, timedelta
from bs4 import BeautifulSoup

def download_file(url: str, dest_path: Path):
    """Download a single file, creating parent directories as needed."""
    dest_path.parent.mkdir(parents=True, exist_ok=True)
    NetworkNOK = True
    while NetworkNOK:
        try:
            urllib.request.urlretrieve(url, dest_path)
        except:
            print("Network down, retrying in 15 seconds…")
            time.sleep(15)
        else:
            NetworkNOK = False

def get_lasco_data(dateList, CI=['c2', 'c3'], archive=None):

    # presently we consider only one date at time
    # CI = ['c2', 'c3']  # instruments to download, can be 'c2', 'c3' or both
    # dateList = ['20150701', '20150702']  # list of dates to download in YYYYMMDD format

    # ------------------------------------------------------------------
    # Check the date
    # ------------------------------------------------------------------
    # Try a few common formats – adjust as needed.
    for date in dateList:
        for fmt in ("%Y-%m-%d", "%Y/%m/%d", "%Y%m%d", "%Y %m %d"):
            try:
                dt = datetime.strptime(date, fmt)
                break
            except ValueError:
                continue
        else:
            raise ValueError(f"Unable to parse date string: {date}")
        year, month, day = dt.year, dt.month, dt.day

        # Format components exactly as IDL does
        y = f"{year:04d}"[2:]   # last two digits
        m = f"{month:02d}"
        d = f"{day:02d}"

        for camera in CI:
            # ------------------------------------------------------------------
            # 2. Build base URL
            # ------------------------------------------------------------------
            base_url = f"https://umbra.nascom.nasa.gov/pub/lasco_level05/{y}{m}{d}/{camera}/"

            # ------------------------------------------------------------------
            # 3. Download the header file (img_hdr.txt)
            # ------------------------------------------------------------------
            hdr_local_dir = Path("data/lasco") / camera.upper()
            hdr_local_path = hdr_local_dir / "img_hdr.txt"
            hdr_url = base_url + "img_hdr.txt"
            print(f"Downloading header file from {hdr_url}")
            try:
                download_file(hdr_url, dest_path=hdr_local_path)
            except:
                print('Error: ' + hdr_url + ' not fount, skipping')
                continue
            # ------------------------------------------------------------------
            # 4. Parse the header file
            # ------------------------------------------------------------------
            # Expected columns (IDL format string):
            # a, a, a, a, f, i, i, i, i, a, a, a
            # => filename, d, t, c, e (float), n1, n2, a, b (ints), f, p, pr (strings)
            #
            # We'll split on whitespace and cast accordingly.
            #
            selected_files = []
            with open(hdr_local_path, "r") as f:
                for line in f:
                    parts = line.strip().split()
                    if len(parts) != 14:
                        continue  # skip malformed lines

                    filename, _d, _t, _c, e_str, n1_str, n2_str, a_str, b_str, f_str, p_str, pr_str, z_str, os_num = parts

                    # Apply the same filter as IDL:
                    # (f == 'Orange' or f == 'Clear') and p == 'Clear' and pr == 'Normal'
                    if (f_str in ("Orange", "Clear")) and (p_str == "Clear") and (pr_str == "Normal"):
                        selected_files.append(filename)

            # ------------------------------------------------------------------
            # 5. Download each selected image file
            # ------------------------------------------------------------------
            for fname in selected_files:
                img_url = base_url + fname
                img_dest = hdr_local_dir / fname
                # Let's cjeck if the fname exists already in archive
                if Path(archive+os.sep+fname).is_file():
                    print(f"{fname} already exists, skipping download.")
                    # move the existing file to the local archive
                    shutil.move(archive+os.sep+fname,img_dest)
                else:
                    print(f"Downloading {fname}")
                    download_file(img_url, img_dest)
            if Path(archive).is_dir():
                a = Path(archive+f"{year:04d}"+os.sep+m+os.sep+d)
                a.mkdir(parents=True, exist_ok=True)
                shutil.move(hdr_local_dir,a)

            print(camera + " files have been downloaded.")
        print(date + " files have been downloaded.")
    print("All selected files have been downloaded.")


def get_halo_list():
        # ------------------------------------------------------------------
        # 1  Fetch the HTML page
        # ------------------------------------------------------------------
        URL = "https://cdaw.gsfc.nasa.gov/CME_list/halo/halo.html"
        try:
            resp = requests.get(URL, timeout=10)
            resp.raise_for_status()
        except Exception as exc:
            raise RuntimeError(f"Failed to download page: {exc}")

        html = resp.text
        # ------------------------------------------------------------------
        # 2  Parse with BeautifulSoup (lxml parser is fast & tolerant)
        # ------------------------------------------------------------------
        soup = BeautifulSoup(html, "lxml")
        def _extract_tables(soup_obj):
            """Return a list of raw HTML strings for each <table> found."""
            tables_html = []

            # 1) Normal <table> tags (just in case)
            for tbl in soup_obj.find_all("table"):
                tables_html.append(str(tbl))

            # # 2) Tables that live inside HTML comments <!-- ... -->
            # #    BeautifulSoup treats comment nodes as NavigableString objects.
            # for comment in soup_obj.find_all(string=lambda t: isinstance(t, type(soup_obj.Comment))):
            #     # Parse the comment content as its own mini‑HTML document
            #     comment_soup = BeautifulSoup(comment, "lxml")
            #     for tbl in comment_soup.find_all("table"):
            #         tables_html.append(str(tbl))

            return tables_html

        raw_tables = _extract_tables(soup)

        if not raw_tables:
            raise RuntimeError("No <table> elements could be located on the page.")
        # ------------------------------------------------------------------
        # 3  Convert each raw table HTML into a pandas DataFrame
        # ------------------------------------------------------------------
        dfs = []
        for idx, tbl_html in enumerate(raw_tables):
            # saveeach table to file for debbuging
            with open(f"halo_table_{idx+1}.html", "w") as f:
                f.write(tbl_html)
            # read_html expects an iterable of HTML strings; we give it a list with one element
            df_list = pd.read_html(tbl_html, header=0)   # header=0 picks the first row as column names
            if df_list:
                dfs.append(df_list[0])
            else:
                print(f"⚠️  Table #{idx} could not be parsed by pandas.")

        # At this point `dfs[0]` is the halo‑CME list, `dfs[1]` (if present) is the notes table.
        heather_df = dfs[0]          # the main table you care about
        halo_df = dfs[1] if len(dfs) > 1 else None
        # rename column 'First C2 Appearance Date Time [UT]' to 'DATE_OBS'
        halo_df = halo_df.rename(columns={'First C2 Appearance Date Time [UT]': 'DATE_OBS'})
        halo_df = halo_df.rename(columns={'First C2 Appearance Date Time [UT].1': 'TIME_OBS'})
        # create a new column 'DATE_TIME' by merging DATE_OBS TIME_OBS in iso format
        halo_df['DATE_TIME'] = pd.to_datetime(halo_df['DATE_OBS'] + ' ' + halo_df['TIME_OBS'], format='%Y/%m/%d %H:%M:%S')
        # convert DATE_TIME to the format YYYY-MM-DDTHH:MM:SS
        halo_df['DATE_TIME'] = halo_df['DATE_TIME'].dt.strftime('%Y-%m-%dT%H:%M:%S')
        # keep only columns DATE_OBS, CDAW Speed [
        print("Halo CME list has been downloaded.")
        halo_df.to_csv(archive + 'HaloList.csv', index=False)
        print('HaloList.csv file has been created.')


# Lets create a class for LASCO images
class LascoImage:
    def __init__(self, filepath):
        self.filepath = filepath
        self.hdul = fits.open(filepath)
        self.header = self.hdul[0].header

    def get_keyword(self, keyword):
        return self.header.get(keyword, None)
    
    def get_data(self):
        return self.hdul[0].data

    def close(self):
        self.hdul.close()
    
    def get_masks(self,cuda = False):
            # Center of the image
            cy, cx = 512, 512
            ny, nx = 1024, 1024

            # -------------------------------------------------
            # Build radial distance map once
            # -------------------------------------------------
            if cuda:
                y_idx, x_idx = cp.ogrid[:ny, :nx]           # cheap broadcasting grids
                r = cp.sqrt((y_idx - cy)**2 + (x_idx - cx)**2)
            else:
                y_idx, x_idx = np.ogrid[:ny, :nx]           # cheap broadcasting grids
                r = np.sqrt((y_idx - cy)**2 + (x_idx - cx)**2)

            # -------------------------------------------------
            # Create boolean masks for each region
            # -------------------------------------------------
            self.mask_DC   = r > self.DcornersR                     # Dark corners
            self.mask_NC   = r > self.NcornersR                     # Normal corners
            self.mask_OC   = r < self.occulterR                     # Occulter
            self.mask_FOV  = (r >= self.fovR[0]) & (r <= self.fovR[1])   # Field‑of‑view

        
    def compute_stat(self,DC,NC,OC,FOV,bias,nsigma):
        self.stat = {}
        self.stat_header = ['FILENAME','DC_mean','DC_count','DC_std',
                            'NC_mean','NC_count','NC_std',
                            'OC_mean','OC_count','OC_std',
                            'FOV_mean','FOV_count','FOV_std',
                            'bias_mean','bias_count','bias_std']
        for nx in range(1, nsigma +1):
            self.stat_header.append(f'DC_{nx}sigma_count')
            self.stat_header.append(f'NC_{nx}sigma_count')
            self.stat_header.append(f'OC_{nx}sigma_count')
            self.stat_header.append(f'FOV_{nx}sigma_count')
            self.stat_header.append(f'bias_{nx}sigma_count')
        # compute mean and std
        self.stat['FILENAME'] = os.path.basename(self.filepath)
        if self.cuda:
            DCm = cp.mean(DC) if DC.size > 0 else np.nan
            NCm = cp.mean(NC) if NC.size > 0 else np.nan
            OCm = cp.mean(OC) if OC.size > 0 else np.nan
            FOVm = cp.mean(FOV) if FOV.size > 0 else np.nan
            biasm = cp.mean(bias) if bias.size > 0 else np.nan
            DCstd = cp.std(DC) if DC.size > 0 else np.nan
            NCstd = cp.std(NC) if NC.size > 0 else np.nan
            OCstd = cp.std(OC) if OC.size > 0 else np.nan
            FOVstd = cp.std(FOV) if FOV.size > 0 else np.nan
            biasstd = cp.std(bias) if bias.size > 0 else np.nan
        else:
            DCm = np.mean(DC) if DC.size > 0 else np.nan
            NCm = np.mean(NC) if NC.size > 0 else np.nan
            OCm = np.mean(OC) if OC.size > 0 else np.nan
            FOVm = np.mean(FOV) if FOV.size > 0 else np.nan
            biasm = np.mean(bias) if bias.size > 0 else np.nan
            DCstd = np.std(DC) if DC.size > 0 else np.nan
            NCstd = np.std(NC) if NC.size > 0 else np.nan
            OCstd = np.std(OC) if OC.size > 0 else np.nan
            FOVstd = np.std(FOV) if FOV.size > 0 else np.nan
            biasstd = np.std(bias) if bias.size > 0 else np.nan
        self.stat['DC_mean'] = DCm
        self.stat['DC_count'] = DC.size
        self.stat['NC_mean'] = NCm
        self.stat['NC_count'] = NC.size
        self.stat['OC_mean'] = OCm  
        self.stat['OC_count'] = OC.size
        self.stat['FOV_mean'] = FOVm
        self.stat['FOV_count'] = FOV.size
        self.stat['bias_mean'] = biasm
        self.stat['bias_count'] = bias.size
        self.stat['DC_std'] = DCstd
        self.stat['NC_std'] = NCstd
        self.stat['OC_std'] = OCstd
        self.stat['FOV_std'] = FOVstd
        self.stat['bias_std'] = biasstd
        for nx in range(1, nsigma +1):
            # compute for each area the number of pixels haveing value > n*sigma from the mean
            if self.cuda:
                DCn = cp.sum(cp.abs(DC - DCm) > nx * DCstd) if DC.size > 0 else np.nan
                NCn = cp.sum(cp.abs(NC - NCm) > nx * NCstd) if NC.size > 0 else np.nan
                OCn = cp.sum(cp.abs(OC - OCm) > nx * OCstd) if OC.size > 0 else np.nan
                FOVn = cp.sum(cp.abs(FOV - FOVm) > nx * FOVstd) if FOV.size > 0 else np.nan
                biasn = cp.sum(cp.abs(bias - biasm) > nx * biasstd) if bias.size > 0 else np.nan
            else:
                DCn = np.sum(np.abs(DC - DCm) > nx * DCstd) if DC.size > 0 else np.nan
                NCn = np.sum(np.abs(NC - NCm) > nx * NCstd) if NC.size > 0 else np.nan
                OCn = np.sum(np.abs(OC - OCm) > nx * OCstd) if OC.size > 0 else np.nan
                FOVn = np.sum(np.abs(FOV - FOVm) > nx * FOVstd) if FOV.size > 0 else np.nan
                biasn = np.sum(np.abs(bias - biasm) > nx * biasstd) if bias.size > 0 else np.nan
            self.stat[f'DC_{nx}sigma_count'] = DCn
            self.stat[f'NC_{nx}sigma_count'] = NCn
            self.stat[f'OC_{nx}sigma_count'] = OCn
            self.stat[f'FOV_{nx}sigma_count'] = FOVn
            self.stat[f'bias_{nx}sigma_count'] = biasn

    
    # Let create a child class for C2 images
class C2(LascoImage):
    def __init__(self, filepath, cuda=False):
        super().__init__(filepath)
        if self.get_keyword('DETECTOR') != 'C2':
            raise ValueError("The provided file is not a LASCO C2 image.")
        # Lasco C2 specific geometric parameters:
        # -------------------------------------------------
        # Define the radii thresholds (same values you used)
        # -----------------------------------
        self.DcornersR = 703 # pixels; everiting above this radius is a dark corner
        self.NcornersR = 653 # pixels; everything above this radius is a normal corner
        self.occulterR = 60 # pixels; everything below this radius is the occulter
        self.fovR = [150, 555] # pixels; field of view min and max radii
        self.cuda = cuda
        self.get_masks()
    
    # Add any C2 specific methods here 
    def get_stat(self,nsigma=3,previous=None):
        # if previous is None, compute the statistics on the array given by self.get_data()
        # if previous is not None, compute the statistics on the array given by the difference of sel.get_data() - previous.get_data()
        if previous is None:
            if self.cuda:
                data = cp.asarray(self.get_data())  # Transfer data to GPU
            else:
                data = self.get_data()
        else:
            if self.cuda:
                data = cp.asarray(self.get_data()) - cp.asarray(previous.get_data())  # Transfer data to GPU and compute difference
            else:
                data = self.get_data() - previous.get_data()
        ny, nx = data.shape
        if nx != 1024 or ny != 1024:
            raise ValueError("Unexpected image dimensions ("+str(nx)+","+str(ny)+") for LASCO C2.")
        DC   = data[self.mask_DC]
        NC   = data[self.mask_NC]
        OC   = data[self.mask_OC]
        FOV  = data[self.mask_FOV]
        if self.cuda:  # use cupy for GPU acceleration (less efficient than masks, but faster than pure numpy)
            bias = cp.concatenate([DC, OC])
        else:
            bias = np.concatenate([DC, OC])
        self.compute_stat(DC, NC, OC, FOV, bias, nsigma)

    # Let create a child class for C3 images
class C3(LascoImage):
    def __init__(self, filepath, cuda=False):
        super().__init__(filepath)
        if self.get_keyword('DETECTOR') != 'C3':
            raise ValueError("The provided file is not a LASCO C3 image.")
        # Lasco C3 specific geometric parameters:
        # -------------------------------------------------
        # Define the radii thresholds (same values you used)
        # -----------------------------------
        self.DcornersR = 618 # pixels; everiting above this radius is a dark corner
        self.NcornersR = 582 # pixels; everything above this radius is a normal corner
        self.occulterR = 30 # pixels; everything below this radius is the occulter
        self.fovR = [90, 554] # pixels; field of view min and max radii
        self.cuda = cuda
        self.get_masks()
    
    # Add any C3 specific methods here 
    def get_stat(self,nsigma=3.0,previous=None):
        # if previous is None, compute the statistics on the array given by self.get_data()
        # if previous is not None, compute the statistics on the array given by the difference of sel.get_data() - previous.get_data()
        if previous is None:
            if self.cuda:
                data = cp.asarray(self.get_data())  # Transfer data to GPU
            else:
                data = self.get_data()
        else:
            if self.cuda:
                data = cp.asarray(self.get_data()) - cp.asarray(previous.get_data())  # Transfer data to GPU and compute difference
            else:
                data = self.get_data() - previous.get_data()
        ny, nx = data.shape
        if nx != 1024 or ny != 1024:
            raise ValueError("Unexpected image dimensions ("+str(nx)+","+str(ny)+") for LASCO C2.")
        DC   = data[self.mask_DC]
        NC   = data[self.mask_NC]
        OC   = data[self.mask_OC]
        FOV  = data[self.mask_FOV]
        if self.cuda:  # use cupy for GPU acceleration (less efficient than masks, but faster than pure numpy)
            bias = cp.concatenate([DC, OC])
        else:
            bias = np.concatenate([DC, OC])
        self.compute_stat(DC, NC, OC, FOV, bias, nsigma)