Commit 761bf9d4 authored by Claudio Gheller's avatar Claudio Gheller
Browse files

added scripts to create syntetic observations

parent 1db60710
Loading
Loading
Loading
Loading

scripts/copycolumn.py

0 → 100644
+25 −0
Original line number Diff line number Diff line
import sys
import pyrap
from pyrap.tables import table, maketabdesc, makecoldesc
from pyrap.tables import tablecolumn

print(sys.argv[1])

#inputMS = '/local/work/gheller/mock/test1hr_t201806301100_SBL150.MS'
#inputMS = 'gauss_t201806301100_SBL180.MS'
#inputMS = 'gauss_t201806301100_SBH300.MS'
#inputMS = 'C_RADIO30MHz500_t201806301100_SBL180.MS'
#inputMS = 'C_sky802_t201806301100_SBL180.MS'
#inputMS = 'onehour_10asec_t201806301100_SBL180.MS'
#oinputMS = 'sixhours_t201806301100_SBL180.MS'

inputMS = sys.argv[1]
t=table(inputMS,readonly=False)

print("     ")
print("Columns in the MS")
print(t.colnames())
print("     ")

t.putcol('DATA', t.getcol('MODEL_DATA'))
t.flush()
+113 −0
Original line number Diff line number Diff line
import subprocess
import sys
import time

# input data
datadir = "cgheller@login.m100.cineca.it://m100_scratch/userexternal/cgheller/Lofar/data_model"
cleandir = "cgheller@login.m100.cineca.it://m100_scratch/userexternal/cgheller/Lofar/data_wsclean"
dirtydir = "cgheller@login.m100.cineca.it://m100_scratch/userexternal/cgheller/Lofar/data_wsdirty"
filename_prefix = "RADIO30MHz"
file_ext = ".fits"

# measurement set
workdir = "."
ms_name = "hba-10hrs_t201806301100_SBH255.MS"    #"visibilities_aux.MS"
ms_tar = workdir+"/"+ms_name+"-template.tar"

# general parameters
resolution = "2.0asec"
size = "2000"

# command for creating visibilities
vis_command = "wsclean"

# parameters for the noise
noise_command = "../useful_scripts/noise.py"
noise_factor = "0.0001"

# command for imaging
img_command = "wsclean"
img_parallel = "4"
img_iter = "10000"
#img_options = "-weight briggs 0 -taper-gaussian 60 -apply-primary-beam -reorder -niter 10000 -mgain 0.8 -auto-threshold 5"
#img_options = "-apply-primary-beam -reorder -niter 10000 -mgain 0.8 -auto-threshold 5"

## create empty MS
## LBA 30 MHz
#/opt/losito/bin/synthms --name ms_name --tobs 1 --ra 1.0 --dec 1.57 --lofarversion 1 --station LBA -minfreq 30e6 --maxfreq 30e6
## HBA 150 MHz
#/opt/losito/bin/synthms --name dataset --tobs 1 --ra 1.0 --dec 1.57 --lofarversion 1 --station HBA --minfreq 150e6 --maxfreq 150e6
#
#tar cvf ms_tar ms_name

tinverse = 0.0
tcopy = 0.0
tnoise = 0.0
timaging = 0.0
tget = 0.0
tput = 0.0

for id in range(1,2):
    imgname = filename_prefix+str(id)
    #fitsfilename = datadir+'/'+imgname+file_ext
    fitsfilename = datadir+'/'+filename_prefix+file_ext
    commandline = "scp "+datadir+"/"+imgname+file_ext+" ."
    t0 = time.time()
    error = subprocess.call(commandline,shell=True)
    error = subprocess.run(["cp",imgname+file_ext,"test-model.fits"])
    tget = tget+(time.time()-t0)

    # untar the template MS
    error = subprocess.run(["tar","xvf",ms_tar])

    # create the visibilities
    t0 = time.time()
    error = subprocess.run([vis_command,"-j",img_parallel,"-scale",resolution,"--predict","-use-idg","-grid-with-beam","-name", "test", ms_name])
    tinverse = tinverse+(time.time()-t0)

    # Copy the MODEL_DATA column int the DATA column
    t0 = time.time()
    error = subprocess.run(["python","../useful_scripts/copycolumn.py",ms_name])
    tcopy = tcopy+(time.time()-t0)

    # Add noise
    t0 = time.time()
    error = subprocess.run([noise_command,"--msin",ms_name,"--factor",noise_factor])
    tnoise = tnoise+(time.time()-t0)

    # Imaging
    t0 = time.time()
    error = subprocess.run([img_command,"-j",img_parallel,"-apply-primary-beam","-reorder","-niter",img_iter,\
                            "-mgain", "0.8", "-auto-threshold", "5", "-size",size,size,"-scale",resolution,\
                            "-name",imgname,ms_name])
    timaging = timaging+(time.time()-t0)

    # remove at the end the measurement set
    error = subprocess.run(["rm","-fr",workdir+"/"+ms_name])
    error = subprocess.run(["rm","test-model.fits"])
    error = subprocess.run(["mkdir",imgname])
    #commandline = "rm *beam*.fits"
    #error = subprocess.Popen(commandline, shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
    t0 = time.time()
    commandline = "scp "+imgname+"-dirty.fits"+" "+dirtydir
    error = subprocess.call(commandline, shell=True)
    commandline = "scp "+imgname+"-image-pb.fits"+" "+cleandir
    error = subprocess.call(commandline, shell=True)
    tput = tput+(time.time()-t0)
    commandline = "mv *.fits "+imgname
    error = subprocess.call(commandline, shell=True)
##    error = subprocess.run(["tar","cvf",imgname+".tar",imgname])
    error = subprocess.run(["rm","-fr",imgname])
    #commandline = "tar cvf "+imgname+".tar *.fits"
    #error = subprocess.Popen(commandline, shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
    #commandline = "rm *.fits"
    #error = subprocess.Popen(commandline, shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)


print("Summary of timings: ")
print("Get data (sec)     = ",tget)
print("Put data (sec)     = ",tput)
print("Inverse time (sec) = ",tinverse)
print("Copy time (sec)    = ",tcopy)
print("Noise time (sec)   = ",tnoise)
print("Imaging time (sec) = ",timaging)

scripts/noise.py

0 → 100755
+157 −0
Original line number Diff line number Diff line
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Noise operation for losito: adds Gaussian noise to a data column
"""
import os, argparse
import pkg_resources
import numpy as np
from scipy.interpolate import interp1d
from losito.lib_io import progress, logger
from losito.lib_observation import MS

logger.debug('Loading NOISE module.')


def _run_parser(obs, parser, step):
    column = parser.getstr(step, 'outputColumn', 'DATA')
    factor = parser.getfloat(step, 'factor', 1.0)
    parser.checkSpelling( step, ['outputColumn','factor'])
    return run(obs, column, factor)

def SEFD(ms, station1, station2, freq):
    '''
    Return the system equivalent flux density (SEFD) for all rows and a
    single fequency channel.
    The values for the SEFD were derived from van Haarlem
    et al. (2013).

    Parameters
    ----------
    ms : MS-object
    station1 : (n,) ndarray, dtype = int
        ANTENNA1 indices.
    station2 : (n,) ndarray, dtpe = int
        ANTENNA2 indices.
    freq : float
        Channel frequency in Hz.
    Returns
    -------
    SEFD : (n,) ndarray
        SEFD in Jansky.
    '''

    def interp_sefd(freq, antennamode):
        # Load antennatype datapoints and interpolate, then reevaluate at freq.
        sefd_pth = pkg_resources.resource_filename('losito', 'data/noise/SEFD_{}.csv'.format(antennamode))
        points = np.loadtxt(sefd_pth, dtype=float, delimiter=',')
        # Lin. extrapolation, so edge band noise is not very accurate.
        fun = interp1d(points[:, 0], points[:, 1], fill_value='extrapolate',
                        kind='linear')
        return fun(freq)

    if ms.antennatype in ['LBA_INNER', 'LBA_OUTER', 'LBA_ALL']:
        SEFD = interp_sefd(freq, ms.antennatype)
        return np.repeat(SEFD, len(station1))  # SEFD same for all BL
    elif 'HBA' in ms.antennatype:
        # For HBA, the SEFD differs between core and remote stations
        names = np.array([_n[0:2] for _n in ms.stations])
        CSids = ms.stationids[np.where(names =='CS')]
        lim = np.max(CSids)  # this id separates the core/remote stations

        # The SEFD for 1 BL is the sqrt of the products of the SEFD per station
        SEFD_cs = interp_sefd(freq, 'HBA_CS')
        SEFD_rs = interp_sefd(freq, 'HBA_RS')
        SEFD_s1 = np.where(station1 <= lim, SEFD_cs, SEFD_rs)
        SEFD_s2 = np.where(station2 <= lim, SEFD_cs, SEFD_rs)
        return np.sqrt(SEFD_s1 * SEFD_s2)
    else:
        logger.error('Stationtype "{}" unknown.'.format(ms.stationtype))
        return 1

def add_noise_to_ms(ms, error, ntot, column='DATA', factor=1.0):
    # TODO: ensure eta = 1 is appropriate
    tab = ms.table(readonly=False)
    eta = 0.95  # system efficiency. Roughly 1.0

    chan_width = ms.channelwidth
    freq = ms.get_frequencies()
    ant1 = tab.getcol('ANTENNA1')
    ant2 = tab.getcol('ANTENNA2')
    exposure = ms.timepersample

    # std = eta * SEFD(ms, ant1, ant2, freq) #TODO
    # Iterate over frequency channels to save memory.
    for i, nu in enumerate(freq):
        # find correct standard deviation from SEFD
        std = factor * eta * SEFD(ms, ant1, ant2, nu)
        std /= np.sqrt(2 * exposure * chan_width[i])
        # draw complex valued samples of shape (row, corr_pol)
        noise = np.random.normal(loc=0, scale=std, size=[4, *np.shape(std)]).T
        noise = noise + 1.j * np.random.normal(loc=0, scale=std, size=[4, *np.shape(std)]).T
        noise = noise[:, np.newaxis, :]
        noisereal = noise.real
        noiseflat = noisereal.flatten()
        ntot = ntot + noiseflat.size
        noiseflat = noiseflat*noiseflat
        error = error + noiseflat.sum()
        noisereal = noise.imag
        noiseflat = noisereal.flatten()
        noiseflat = noiseflat*noiseflat
        error = error + noiseflat.sum()

        prediction = tab.getcolslice(column, blc=[i, -1], trc=[i, -1])
        tab.putcolslice(column, prediction + noise, blc=[i, -1], trc=[i, -1])
    tab.close()

    return error,ntot

def run(obs, column='DATA', factor=1.0):
    """
    Adds Gaussian noise to a data column. Scale of the noise, frequency-
    and station-dependency are calculated according to 'Synthesis Imaging
    in Radio Astronomy II' (1999) by Taylor et al., page 175.

    Parameters
    ----------
    obs : Observation object
        Input obs object.
    column : str, optional. Default = DATA
        Name of column to which noise is added
    factor : float, optional. Default = 1.0
        Scaling factor to change the noise level.
    """
    s = obs.scheduler
    if s.qsub: # add noise in parallel on multiple nodes
        for i, ms in enumerate(obs):
            progress(i, len(obs), status='Estimating noise')  # progress bar
            thisfile = os.path.abspath(__file__)
            cmd = 'python {} --msin={} --start={} --end={} --column={} --factor={}'.format(thisfile,
                                 ms.ms_filename, ms.starttime, ms.endtime, column, factor)
            s.add(cmd, commandType='python', log='losito_add_noise', processors=1)
        s.run(check=True)
        return 0
    else: # add noise linear on one cpu
        results = []
        error = 0
        ntot = 0
        for i, ms in enumerate(obs):
            progress(i, len(obs), status='Estimating noise')  # progress bar
            error, ntot = results.append(add_noise_to_ms(ms, error, ntot, column, factor))
        return sum(results)

if __name__ == '__main__':
    # This file can also be executed directly for a single MS.
    parser = argparse.ArgumentParser(description='Executable of the LoSiTo-noise operation')
    parser.add_argument('--msin', help='MS file prefix', type=str)
    parser.add_argument('--starttime', help='MJDs starttime', type=float)
    parser.add_argument('--endtime', help='MJDs endtime', type=float)
    parser.add_argument('--column', help='Column', default='DATA', type=str)
    parser.add_argument('--factor', help='Factor', default=1.0, type=float)
    # Parse parset
    args = parser.parse_args()
    ms = MS(args.msin, args.starttime, args.endtime)
    error = 0
    ntot = 0 
    error, ntot = add_noise_to_ms(ms, error, ntot, args.column, args.factor)
    print("STANDARD DEVIATION = ",np.sqrt(error/ntot), " Jy/beam")