Commit cb8452b8 authored by Ryan Anderson's avatar Ryan Anderson
Browse files

Make JSC files import with a multiindex similar to CCS files

Using a multiindex makes it easy to select just the spectral data
without the metadata which will be useful later on.
parent f4dcd86a
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -7,7 +7,7 @@ Created on Tue Nov 24 13:35:56 2015

def header_parser(row,delim):
    #get rid of various unwanted characters
    badlist=['#','^',"'",'*','/']
    badlist=['#','^',"'",'*']
    for i in badlist:
        row=row.replace(i,'')
    row.strip()   
+14 −28
Original line number Diff line number Diff line
@@ -25,7 +25,7 @@ def CCS(input_data):
    df['Pversion']=fname[34:36]        
    #transpose the data frame
    
    #read the file header and put information into the dataframe as new columns (inneficient, but much easier to concatenate data from multiple files)
    #read the file header and put information into the dataframe as new columns (inneficient to store this data many times, but much easier to concatenate data from multiple files)
    with open(input_data,'r') as f:
        header={}
        for i,row in enumerate(f.readlines()):
@@ -36,6 +36,8 @@ def CCS(input_data):
    for label,data in header.items(): 
        if '_float' in label:
            label=label.replace('_float','')
        if label=='dark':
            label='darkspec'
        df[label]=data 

    return df
@@ -48,48 +50,32 @@ def CCS_SAV(input_data):
    aspectra=np.array([np.hstack([d['auv'],d['avis'],d['avnir']])]).T
    mspectra=np.array([np.hstack([d['muv'],d['mvis'],d['mvnir']])]).T
    
    #create tuples for the spectral columns to use as multiindex
    wvls=list(np.hstack([d['defuv'],d['defvis'],d['defvnir']]))
    for i,x in enumerate(wvls):
        wvls[i]=('wvl',round(x,5))
    
    #remove the above elements from the dict
    del d['uv']
    del d['vis']
    del d['vnir']
    del d['auv']
    del d['avis']
    del d['avnir']
    del d['muv']
    del d['mvis']
    del d['mvnir']
    del d['defuv']
    del d['defvis']
    del d['defvnir']
    
    #define column names
    shotnums=list(range(1,d['nshots']+1))
    shots=['shot'+str(i) for i in shotnums]
    shots.extend(['ave','median'])
    
    #create the data frame to hold the spectral data
    df = pd.DataFrame(np.hstack([spectra,aspectra,mspectra]),columns=shots,index=pd.MultiIndex.from_tuples(wvls))        
    df=df.transpose()
    
        #        #extract data from the PDS label info
#        pdslabel={}
#        for i in d['label_info']:
#            print(str(i.decode()))
#            if type(i) is bytes:
#                pdslabel.update(io_header_parser(i.decode(),'='))
#            elif len(i)>0:
#                pdslabel.update(io_header_parser(i,'='))
        
        
    del d['label_info']  #not currently using PDS label info        
    #remove the above elements from the dict
    to_remove=['uv','vis','vnir','auv','avis','avnir','muv','mvis','mvnir','defuv','defvis','defvnir','label_info']
    for x in to_remove:
        del d[x]
           
    #extract info from the file name
    fname=os.path.basename(input_data)
    d['sclock']=fname[4:13]
    d['seqid']=fname[25:34].upper()
    d['Pversion']=fname[34:36]
    
    #Add metadata to the data frame by stepping through the d dict
    for label,data in d.items(): 
        if type(data) is bytes: data=data.decode()
        df[label]=data
+15 −2
Original line number Diff line number Diff line
@@ -70,12 +70,25 @@ def jsc_filename_parse(filename,refdata):
def JSC(input_file,refdata):
    data=pd.read_csv(input_file,skiprows=14,sep='\t')
    data=data.rename(columns={data.columns[0]:'time1',data.columns[1]:'time2'})
    times=data[['time1','time2']] #split the two time columns from the data frame
    data=data[data.columns[2:]] #trim the data frame so it is just the spectra
    
    #make a multiindex for each wavlength column so they can be easily isolated from metadata later
    cols=data.columns.tolist()    
    for i,x in enumerate(cols):
        cols[i]=('wvl',round(float(x),5))
    data.columns=pd.MultiIndex.from_tuples(cols)
    
    #create a metadata frame and add the times to it
    metadata=pd.concat([jsc_filename_parse(input_file,refdata)]*len(data.index))
    metadata.index=data.index
    df=pd.concat([metadata,data],axis=1)
    metadata=pd.concat([metadata,times],axis=1)
    
    #add the metadata columns to the data frame
    for col in metadata.columns.tolist():
        data[col]=metadata[col]
        
    return df
    return data
   
        

+33 −35
Original line number Diff line number Diff line
@@ -6,7 +6,7 @@ Created on Mon Nov 23 11:55:46 2015
"""
import autocnet
from autocnet.fileio.io_ccs import CCS,CCS_SAV,ccs_batch
from autocnet.fileio.io_jsc import JSC,jsc_batch
from autocnet.fileio.io_jsc import JSC,jsc_batch,read_refdata
from autocnet.fileio.io_edr import EDR
from autocnet.fileio.io_csv import CSV
from autocnet.fileio.lookup import lookup
@@ -16,64 +16,62 @@ from autocnet.spectral.spectra import Spectra
from autocnet.spectral.spectral_data import spectral_data
import pandas as pd

#
#df1=pd.DataFrame(['a','b',1.0,5.6,7.6,6.8],index=['A','B','C',1.0,2.0,3.0]).T
#df2=pd.DataFrame(['a','b',1.0,5.6,7.6,6.8],index=['A','B','C',4.0,5.0,6.0]).T
#df3=pd.DataFrame(['a','b',1.0,5.6,7.6,6.8],index=['A','B','C',7.0,8.0,9.0]).T
#df4=pd.DataFrame(['a1','b',1.0,5.6,7.6,6.8],index=['A','B','C',1.0,2.0,3.0]).T
#df5=pd.DataFrame(['a1','b',1.0,5.6,7.6,6.8],index=['A','B','C',4.0,5.0,6.0]).T
#df6=pd.DataFrame(['a1','b',1.0,5.6,7.6,6.8],index=['A','B','C',7.0,8.0,9.0]).T
#




print("Test reading Chemcam CCS data")
data_file=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Sample_Data\CCAM\CL5_398645626CCS_F0030004CCAM02013P1.csv"
ccs_result=CCS(data_file)
ccs=spectral_data(ccs_result)

data_file=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Sample_Data\CCAM\CL5_398645626CCS_F0030004CCAM02013P1.SAV"
ccs_sav_result=CCS_SAV(data_file)
#ccs_sav_result=spectral_data(ccs_sav_result)

data_dir=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Sample_Data\CCAM"
ccs_batch_csv=ccs_batch(data_dir,searchstring='*CCS*.csv')
ccs_batch_SAV=ccs_batch(data_dir,searchstring='*CCS*.SAV')

print("Test reading JSC data")
spect_table=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Input\Spectrometer_Table.csv"
experiment_table=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Input\Experiment_Setup_Table.csv"
laser_table=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Input\Laser_Setup_Table.csv"
sample_table=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Input\Sample_Table.csv"
LUT_files={'spect':spect_table,'exp':experiment_table,'laser':laser_table,'sample':sample_table}
refdata=read_refdata(LUT_files)

data_dir=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Sample_Data\LIBS USGS\DATA"
foo=jsc_batch(data_dir,LUT_files)
testdata=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\pysat\pysat\examples\ChemCam\CL5_398645626CCS_F0030004CCAM02013P3.csv"
ccs_result=CCS(testdata)
ccs=spectral_data(ccs_result)
ccs_br=ccs_batch(r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\pysat\pysat\examples\ChemCam",searchstring='*CCS*.csv')
ccs_br2=ccs_batch(r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\pysat\pysat\examples\ChemCam",searchstring='*CCS*.SAV')
ccs_br=spectral_data(ccs_br)
ccs_br.random_folds(nfolds=6,seed=1,groupby='seqid')
JSC_data=jsc_batch(data_dir,LUT_files)
data_file=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Sample_Data\LIBS USGS\DATA\LIB00001_02_J_B7.29_A12.48_A_S594_10-49-43-063.txt"
JSC_single=JSC(data_file,refdata)

foo=jsc_batch(r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\pysat\pysat\examples\LIBS_TEST",searchstring='*.txt')
foo.transpose().sort_index(level=1).to_csv('JSC_output_test.csv')

newx=list(foo.wvl.columns)
blah=ccs.interp(newx)

dbfile=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\pysat\pysat\examples\full_db_mars_corrected_dopedTiO2.csv"
print("Test reading data from CSV with lots of spectra and their metadata in one file")
dbfile=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Sample_Data\full_db_mars_corrected_dopedTiO2.csv"
db=CSV(dbfile,setindex='Name')

print('foo')
#
#
#ccs_br.random_folds(nfolds=6,seed=1,groupby='seqid')


#foo.transpose().sort_index(level=1).to_csv('JSC_output_test.csv')

#newx=list(foo.wvl.columns)
#blah=ccs.interp(newx)




##masterlist=["E:\ChemCam\ops_ccam_misc\MASTERLIST_SOL_0010_0801.csv",r"E:\ChemCam\ops_ccam_misc\MASTERLIST_SOL_0805_0980.csv",r"E:\ChemCam\ops_ccam_misc\MASTERLIST.csv"]
##blah=lookup(ccs_br,masterlist)
#
#jsctest=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\pysat\pysat\examples\LIBS_TEST\TestSS_UV_01.txt"
#jsc=JSC(jsctest)

foo=db.interp_spectra(db,newx)
#foo=db.interp_spectra(db,newx)
#
#edrtest=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\pysat\pysat\examples\ChemCam\CL5_399178818EDR_F0030078CCAM01019M1_spect.TXT"
#edr=EDR(edrtest)
#

savtest=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\pysat\pysat\examples\ChemCam\CL5_398736801CCS_F0030004CCAM01014P3.SAV"
sav=CCS_SAV(savtest)
maskfile=r"C:\Users\rbanderson\Documents\Projects\MSL\ChemCam\DataProcessing\Working\Input\mask_minors_noise.csv"
masksav=sav.mask(sav,maskfile)

#maskfile=r"C:\Users\rbanderson\Documents\Projects\MSL\ChemCam\DataProcessing\Working\Input\mask_minors_noise.csv"
#masksav=sav.mask(sav,maskfile)

print("foo")
 No newline at end of file