Commit c67f2cc5 authored by Jay's avatar Jay Committed by Jason R Laura
Browse files

Removing all spectral items (now in PySAT) and updating environment.yml to build (hopefully).

parent 53ed63af
Loading
Loading
Loading
Loading

autocnet/fileio/header_parser.py

deleted100644 → 0
+0 −22
Original line number Diff line number Diff line
# -*- coding: utf-8 -*-
"""
Created on Tue Nov 24 13:35:56 2015

@author: rbanderson
"""

def header_parser(row,delim):
    #get rid of various unwanted characters
    badlist=['#','^',"'",'*']
    for i in badlist:
        row=row.replace(i,'')
    row.strip()   

    if delim in row:    
        tmp=row.split(delim)
        label=tmp[0].strip().lower().replace('  ','_').replace(' ','_')
        data=row.split(tmp[0]+delim)[1].strip() 
        headinfo={label:data}
    else:
        headinfo={}
    return headinfo
 No newline at end of file

autocnet/fileio/io_ccs.py

deleted100644 → 0
+0 −139
Original line number Diff line number Diff line
# This code is used to read individual ChemCam CCS .csv files
# Header data is stored as attributes of the data frame
# White space is stripped from the column names
import os
import numpy as np
import pandas as pd
import scipy
from autocnet.fileio.header_parser import header_parser
from autocnet.fileio.utils import file_search
import copy

def CCS(input_data):
    df = pd.DataFrame.from_csv(input_data, header=14)
    df.rename(columns=lambda x: x.strip(),inplace=True) #strip whitespace from column names
    df=df.transpose()
    
    cols=df.columns.tolist()
    for i,x in enumerate(cols):
        cols[i]=('wvl',round(float(x),5))
    df.columns=pd.MultiIndex.from_tuples(cols)
    #extract info from the file name
    fname=os.path.basename(input_data)
    df['sclock']=fname[4:13]
    df['sclock']=pd.to_numeric(df['sclock'])
    df['seqid']=fname[25:34].upper()
    df['Pversion']=fname[34:36]        
    #transpose the data frame
    
    #read the file header and put information into the dataframe as new columns
    #(inefficient to store this data many times, but much easier to concatenate data from multiple files)
    with open(input_data,'r') as f:
        header={}
        for i,row in enumerate(f.readlines()):
            if i<14:
                row=row.split(',')[0]
                header.update(header_parser(row,'='))    
                
    for label,data in header.items(): 
        if '_float' in label:
            label=label.replace('_float','')
        if label=='dark':
            label='darkspec'
        df[label]=data 
    
    df.index.rename('shotnum',inplace=True)
    df.reset_index(level=0,inplace=True)
    return df
        
def CCS_SAV(input_data):
    
    d=scipy.io.readsav(input_data,python_dict=True)
    #combine the three spectrometers
    spectra=np.vstack([d['uv'],d['vis'],d['vnir']])
    aspectra=np.array([np.hstack([d['auv'],d['avis'],d['avnir']])]).T
    mspectra=np.array([np.hstack([d['muv'],d['mvis'],d['mvnir']])]).T
    
    #create tuples for the spectral columns to use as multiindex
    wvls=list(np.hstack([d['defuv'],d['defvis'],d['defvnir']]))
    for i,x in enumerate(wvls):
        wvls[i]=('wvl',round(x,5))
    
    #define column names
    shotnums=list(range(1,d['nshots']+1))
    shots=['shot'+str(i) for i in shotnums]
    shots.extend(['ave','median'])
    
    #create the data frame to hold the spectral data
    df = pd.DataFrame(np.hstack([spectra,aspectra,mspectra]),columns=shots,index=pd.MultiIndex.from_tuples(wvls))        
    df=df.transpose()
    
    #remove the above elements from the dict
    to_remove=['uv','vis','vnir','auv','avis','avnir','muv','mvis','mvnir','defuv','defvis','defvnir','label_info']
    for x in to_remove:
        del d[x]
           
    #extract info from the file name
    fname=os.path.basename(input_data)
    d['sclock']=fname[4:13]
    d['seqid']=fname[25:34].upper()
    d['Pversion']=fname[34:36]
    
    #Add metadata to the data frame by stepping through the dict
    for label,data in d.items(): 
        if type(data) is bytes: data=data.decode()
        df[label]=data
    
    df['sclock']=pd.to_numeric(df['sclock'])
    df.index.rename('shotnum',inplace=True)
    df.reset_index(level=0,inplace=True)
    
    return df    

def ccs_batch(directory,searchstring='*CCS*.csv',is_sav=False):
   
    if 'SAV' in searchstring:
        is_sav=True
    else:
        is_sav=False
    filelist=file_search(directory,searchstring)
    basenames=np.zeros_like(filelist)
    sclocks=np.zeros_like(filelist)
    P_version=np.zeros_like(filelist,dtype='int')
    
    #Extract the sclock and version for each file and ensure that only one 
    #file per sclock is being read, and that it is the one with the highest version number
    for i,name in enumerate(filelist):
        basenames[i]=os.path.basename(name)
        sclocks[i]=basenames[i][4:13]
        P_version[i]=basenames[i][-5:-4]
    sclocks_unique=np.unique(sclocks)
    filelist_new=np.array([],dtype='str')
    for i in sclocks_unique:
        match=(sclocks==i)
        maxP=P_version[match]==max(P_version[match])
        filelist_new=np.append(filelist_new,filelist[match][maxP])
        
    filelist=filelist_new
    #any way to speed this up for large numbers of files? 
    #Should add a progress bar for importing large numbers of files    
    for i in filelist:
        if is_sav:
            tmp=CCS_SAV(i)
          
        else:
            tmp=CCS(i)
          
        try:
            #This ensures that rounding errors are not causing mismatches in columns            
            cols1=list(combined['wvl'].columns)
            cols2=list(tmp['wvl'].columns)
            if set(cols1)==set(cols2):
                combined=pd.concat([combined,tmp])
            else:
                print("Wavelengths don't match!")
        except:
            combined=tmp
    return combined
    
        
 No newline at end of file

autocnet/fileio/io_edr.py

deleted100644 → 0
+0 −27
Original line number Diff line number Diff line

import pandas as pd
from autocnet.fileio.header_parser import header_parser

def EDR(input_data):
    with open(input_data, 'r') as f:
        header={}
        for i, row in enumerate(f.readlines()):
            
            if i<2 or i==28:
                pass
            elif i<28:
                header.update(header_parser(row,':')) #read the header values into a dict
            elif i==29:
                row=row.split()
                shotnums=list(range(1,len(row)+1))
                shots=['shot'+str(i) for i in shotnums]

    df = pd.read_csv(input_data, sep='    ',skiprows=29,names=shots)        
    df=df.transpose()
            #insert the header metadata as columns
    for label,data in header.items(): 
        df[label]=data
    return df
          
          
                    

autocnet/fileio/io_jsc.py

deleted100644 → 0
+0 −120
Original line number Diff line number Diff line

import os
import numpy as np
import pandas as pd
from autocnet.fileio.utils import file_search
#This function reads the lookup tables used to expand metadata from the file names
#This is separated from parsing the filenames so that for large lists of files the 
#lookup tables don't need to be read over and over
#
#Info in the tables is stored in a dict of dataframes so that only one variable 
#(the dict) needs to be passed between functions
def read_refdata(LUT_files):
    spectrometer_info=pd.read_csv(LUT_files['spect'],index_col=0)
    #spectrometer_info.reset_index(inplace=True)
    laser_info=pd.read_csv(LUT_files['laser'],index_col=0)
    #laser_info.reset_index(inplace=True)
    exp_info=pd.read_csv(LUT_files['exp'],index_col=0)
    #exp_info.reset_index(inplace=True)
    sample_info=pd.read_csv(LUT_files['sample'],index_col=0)
    #sample_info.reset_index(inplace=True)
    refdata={'spect':spectrometer_info,'laser':laser_info,'exp':exp_info,'sample':sample_info}
    return refdata

#This function parses the file names to record metadata related to the observation
def jsc_filename_parse(filename,refdata):
    filename=os.path.basename(filename) #strip the path off of the file name
    filename=filename.split('_') #split the file name on underscores
    libs_ID=filename[0]
    laserID=filename[4][0]
    expID=filename[5]
    spectID=filename[6]
    if libs_ID in refdata['sample'].index:
        file_info=pd.DataFrame(refdata['sample'].loc[libs_ID]).T
    else:
        file_info=pd.DataFrame(refdata['sample'].loc['Unknown']).T
    file_info.index.name='LIBS ID'
    file_info.reset_index(level=0,inplace=True)
    file_info['loc']=int(filename[1])
    file_info['lab']=filename[2]
    file_info['gas']=filename[3][0]
    file_info['pressure']=float(filename[3][1:])
    
    if laserID in refdata['laser'].index:
        laser_info=pd.DataFrame(refdata['laser'].loc[laserID]).T
        laser_info.index.name='Laser Identifier'
        laser_info.reset_index(level=0,inplace=True)
        file_info=pd.concat([file_info,laser_info],axis=1)
        
    file_info['laser_power']=float(filename[4][1:])
    if expID in refdata['exp'].index:
        exp_info=pd.DataFrame(refdata['exp'].loc[expID]).T
        exp_info.index.name='Exp Identifier'
        exp_info.reset_index(level=0,inplace=True)
        file_info=pd.concat([file_info,exp_info],axis=1)
        
#    file_info['spectrometer']=spectID        
#    if spectID in refdata['spect'].index:
#        temp=refdata['spect'].loc[spectID]
#        temp=[temp[2],temp[4:]]
#        spect_info=pd.DataFrame(refdata['spect'].loc[spectID]).T
#        spect_info.index.name='Spectrometer Identifier'
#        spect_info.reset_index(level=0,inplace=True)
#        file_info=pd.concat([file_info,spect_info],axis=1)
    
    return file_info
    

def JSC(input_file,refdata):
    data=pd.read_csv(input_file,skiprows=14,sep='\t')
    data=data.rename(columns={data.columns[0]:'time1',data.columns[1]:'time2'})
    times=data[['time1','time2']] #split the two time columns from the data frame
    data=data[data.columns[2:]] #trim the data frame so it is just the spectra
    
    #make a multiindex for each wavlength column so they can be easily isolated from metadata later
    cols=data.columns.tolist()    
    for i,x in enumerate(cols):
        cols[i]=('wvl',round(float(x),5))
    data.columns=pd.MultiIndex.from_tuples(cols)
    
    #create a metadata frame and add the times to it
    metadata=pd.concat([jsc_filename_parse(input_file,refdata)]*len(data.index))
    metadata.index=data.index
    metadata=pd.concat([metadata,times],axis=1)
    
    #add the metadata columns to the data frame
    for col in metadata.columns.tolist():
        data[col]=metadata[col]
    
    return data
   
        


def jsc_batch(directory, LUT_files,searchstring='*.txt'):
    #Read in the lookup tables to expand filename metadata                  
    refdata=read_refdata(LUT_files)
    #get the list of files that match the search string in the given directory    
    filelist=file_search(directory,searchstring)
    spectIDs=[] #create an empty list to hold the spectrometer IDs
    
    for file in filelist:
        spectIDs.append(os.path.basename(file).split('_')[6]) #get the spectrometer IDs for each file in the list
    spectIDs_unique=np.unique(spectIDs) #get the unique spectrometer IDs
    dfs=[]  #create an empty list to hold the data frames for each spectrometer  

    #loop through each spectrometer, read the spectra and combine them into a single data frame for that spectrometer    
    for spect in spectIDs_unique:
        sublist=filelist[np.in1d(spectIDs,spect)]
        temp=[JSC(sublist[0],refdata)]
        for file in sublist[1:]:
            temp.append(JSC(file,refdata))
        dfs.append(pd.concat(temp))

    #now combine the data frames for the different spectrometers into a single data frame containing all the data    
    combined=dfs[0]
    for df in dfs[1:]:
        combined=combined.merge(df)
        
    return combined
                    
+0 −24
Original line number Diff line number Diff line
from osgeo import gdal
import numpy as np

def openm3(input_data):
    if input_data.split('.')[-1] == 'hdr':
        #GDAL wants the img, but many users aim at the .hdr
        input_data = input_data.split('.')[0] + '.img'
    ds = gdal.Open(input_data)
    ref_array = ds.GetRasterBand(1).ReadAsArray()
    metadata = ds.GetMetadata()
    wv_array = metadatatoband(metadata)
    return wv_array, ref_array, ds

def metadatatoband(metadata):
    wv2band = []
    for k, v in metadata.iteritems():
        try:
            wv2band.append(float(value))
        except:
            v = v.split(" ")[-1].split("(")[1].split(")")[0]
            wv2band.append(float(v))
    wv2band.sort(key=int)
    return np.asarray(wv2band)
Loading