Commit 4ce552d7 authored by Ryan Anderson's avatar Ryan Anderson
Browse files

Getting norm_total working

Got norm_total working and handling multiple wavelength ranges, but it
is a convoluted mess and could probably be done a lot more cleanly.
Minor tweaks to a few other scripts.
parent 3aad4d7e
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -67,3 +67,8 @@ target/

# Vim
*.swp

#Data and output files
.csv
.png
.SAV
 No newline at end of file

Ti_spect_compare.py

0 → 100644
+303 −0
Original line number Diff line number Diff line
# -*- coding: utf-8 -*-
"""
Created on Thu Feb 25 08:25:47 2016

@author: rbanderson
"""
#import sys
#sys.path.append(r"C:\Users\rbanderson\Documents\Projects\LIBS PDART")
from autocnet.fileio.io_ccs import ccs_batch
from autocnet.fileio.io_jsc import JSC,jsc_batch,read_refdata
from autocnet.fileio.lookup import lookup
from autocnet.spectral.interp import interp_spect
from autocnet.spectral.mask import mask
from autocnet.spectral.spectra import Spectra
from autocnet.spectral.spectral_data import spectral_data
from autocnet.spectral.norm_total import norm_total,norm_spect
import pandas as pd
import numpy as np
import matplotlib.pyplot as plot
import time
from sklearn.decomposition import PCA


##Read CCAM data
#data_dir=r"E:\ChemCam\ops_ccam_team\sav\0-250"
#
#masterlists=[r"E:\ChemCam\ops_ccam_misc\MASTERLIST_SOL_0010_0801.csv","E:\ChemCam\ops_ccam_misc\MASTERLIST_SOL_0805_0980.csv",r"E:\ChemCam\ops_ccam_misc\MASTERLIST.csv"]
#t1=time.time()
#ccs=ccs_batch(data_dir,searchstring='*CCS*.SAV')
#dt1=time.time()-t1
#
##work only with average spectra
#ccs=ccs.loc[ccs['shotnum'].isin(['ave'])]
#ccs=ccs.reset_index(drop=True)  #This is important! without it, the lookup is screwed up
#ccs=lookup(ccs,masterlists)
#
##save ccs data
#ccs.to_csv('CCAM_data_aves_0-250.csv')
#
#data_dir=r"E:\ChemCam\ops_ccam_team\sav\251-500"
#
#masterlists=[r"E:\ChemCam\ops_ccam_misc\MASTERLIST_SOL_0010_0801.csv","E:\ChemCam\ops_ccam_misc\MASTERLIST_SOL_0805_0980.csv",r"E:\ChemCam\ops_ccam_misc\MASTERLIST.csv"]
#t1=time.time()
#ccs=ccs_batch(data_dir,searchstring='*CCS*.SAV')
#dt2=time.time()-t1
#
##work only with average spectra
#ccs=ccs.loc[ccs['shotnum'].isin(['ave'])]
#ccs=ccs.reset_index(drop=True)  #This is important! without it, the lookup is screwed up
#ccs=lookup(ccs,masterlists)
#
##save ccs data
#ccs.to_csv('CCAM_data_aves_251-500.csv')
#
#data_dir=r"E:\ChemCam\ops_ccam_team\sav\501-750"
#
#masterlists=[r"E:\ChemCam\ops_ccam_misc\MASTERLIST_SOL_0010_0801.csv","E:\ChemCam\ops_ccam_misc\MASTERLIST_SOL_0805_0980.csv",r"E:\ChemCam\ops_ccam_misc\MASTERLIST.csv"]
#t1=time.time()
#ccs=ccs_batch(data_dir,searchstring='*CCS*.SAV')
#dt3=time.time()-t1
#
##work only with average spectra
#ccs=ccs.loc[ccs['shotnum'].isin(['ave'])]
#ccs=ccs.reset_index(drop=True)  #This is important! without it, the lookup is screwed up
#ccs=lookup(ccs,masterlists)
#
##save ccs data
#ccs.to_csv('CCAM_data_aves_501-750.csv')
#
#data_dir=r"E:\ChemCam\ops_ccam_team\sav\751-1000"
#
#masterlists=[r"E:\ChemCam\ops_ccam_misc\MASTERLIST_SOL_0010_0801.csv","E:\ChemCam\ops_ccam_misc\MASTERLIST_SOL_0805_0980.csv",r"E:\ChemCam\ops_ccam_misc\MASTERLIST.csv"]
#t1=time.time()
#ccs=ccs_batch(data_dir,searchstring='*CCS*.SAV')
#dt4=time.time()-t1
#
##work only with average spectra
#ccs=ccs.loc[ccs['shotnum'].isin(['ave'])]
#ccs=ccs.reset_index(drop=True)  #This is important! without it, the lookup is screwed up
#ccs=lookup(ccs,masterlists)
#
##save ccs data
#ccs.to_csv('CCAM_data_aves_751-1000.csv')
#
#data_dir=r"E:\ChemCam\ops_ccam_team\sav\1001-1250"
#
#masterlists=[r"E:\ChemCam\ops_ccam_misc\MASTERLIST_SOL_0010_0801.csv","E:\ChemCam\ops_ccam_misc\MASTERLIST_SOL_0805_0980.csv",r"E:\ChemCam\ops_ccam_misc\MASTERLIST.csv"]
#t1=time.time()
#ccs=ccs_batch(data_dir,searchstring='*CCS*.SAV')
#dt5=time.time()-t1
#
##work only with average spectra
#ccs=ccs.loc[ccs['shotnum'].isin(['ave'])]
#ccs=ccs.reset_index(drop=True)  #This is important! without it, the lookup is screwed up
#ccs=lookup(ccs,masterlists)
#
##save ccs data
#ccs.to_csv('CCAM_data_aves_1001_1250.csv')

#f1=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\autocnet\CCAM_data_aves_0-250.csv"
#f2=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\autocnet\CCAM_data_aves_251-500.csv"
#f3=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\autocnet\CCAM_data_aves_501-750.csv"
#f4=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\autocnet\CCAM_data_aves_751-1000.csv"
#f5=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\autocnet\CCAM_data_aves_1001_1250.csv"
#
#ccs1=pd.read_csv(f1,header=[0,1])
#ccs2=pd.read_csv(f2,header=[0,1])
#ccs3=pd.read_csv(f3,header=[0,1])
#ccs4=pd.read_csv(f4,header=[0,1])
#ccs5=pd.read_csv(f5,header=[0,1])
#
#ccs=pd.concat([ccs1,ccs2,ccs3,ccs4,ccs5])
####
#ccs.to_csv('CCAM_data_aves.csv')
ccs=pd.read_csv(r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\autocnet\CCAM_data_aves.csv",header=[0,1])
pca=PCA(n_components=2)
ccs_geo=ccs.loc[ccs['meta']['Distance (mm)']>1.7]

##Filter out just Ti targets
#ccs_Ti=ccs.loc[np.squeeze(ccs['meta']['Target'].isin(['Cal Target 10']))]
#ccs_Ti.to_csv('CCAM_data_aves_Ti.csv')

ccs_Ti=pd.read_csv(r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\autocnet\CCAM_data_aves_Ti.csv",header=[0,1])


xnew=np.array(ccs_Ti['wvl'].columns)
ccs_Ti=interp_spect(ccs_Ti,xnew)
ccs_geo=interp_spect(ccs_geo,xnew)

plot.figure(figsize=(10,8))
plot.subplot(311)
plot.xlim([200,900])
rocknest3=ccs_geo.loc[ccs['meta']['Target'].isin(['Rocknest3'])]
plot.plot(rocknest3['wvl'].columns.values,rocknest3['wvl'].iloc[0,:],label='Raw',c='b')    
plot.legend()

#Mask spectra
maskfile=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Input\mask_minors_noise.csv"
ccs_Ti=mask(ccs_Ti,maskfile)
ccs_geo=mask(ccs_geo,maskfile)
plot.subplot(312)
plot.xlim([200,900])
rocknest3=ccs_geo.loc[ccs['meta']['Target'].isin(['Rocknest3'])]
plot.plot(rocknest3['wvl'].columns.values,rocknest3['wvl'].iloc[0,:],label='Masked',c='r')    
plot.legend()
#Normalize Spectra
ranges=[(0,350),(350,460),(460,1000)]
ccs_Ti=norm_spect(ccs_Ti,ranges)
ccs_geo=norm_spect(ccs_geo,ranges)
plot.subplot(313)
plot.xlim([200,900])
rocknest3=ccs_geo.loc[ccs['meta']['Target'].isin(['Rocknest3'])]
plot.plot(rocknest3['wvl'].columns.values,rocknest3['wvl'].iloc[0,:],label='Normalized',c='g')    

plot.legend()
plot.savefig('Rocknest_example.png',dpi=600)
plot.show()

do_pca=pca.fit(ccs_geo['wvl'])
seqs=ccs_geo['meta']['Sequence']
seqs_uniq=np.unique(seqs)
plot.figure(figsize=(8,8))
plot.title('PCA of Mars Targets')
plot.xlabel('PC1 ('+str(round(do_pca.explained_variance_ratio_[0],2))+'%)')
plot.ylabel('PC2 ('+str(round(do_pca.explained_variance_ratio_[1],2))+'%)')

colors=plot.cm.jet(np.linspace(0,1,len(seqs_uniq)))
for t,i in enumerate(seqs_uniq):
    
    scores=do_pca.transform(ccs_geo['wvl'].loc[ccs_geo['meta']['Sequence'].isin([i])])
    plot.scatter(scores[:,0],scores[:,1],c=colors[t,:],label=i)
plot.savefig('Full_CCS_PCA.png',dpi=600)    
plot.show()

pca=PCA(n_components=2)
do_pca=pca.fit(ccs_Ti['wvl'])
scores_ccs_Ti=do_pca.transform(ccs_Ti['wvl'])


plot.figure()
plot.scatter(scores_ccs_Ti[:,0],scores_ccs_Ti[:,1],c='r')
plot.show()

ccs_Ti=ccs_Ti.iloc[scores_ccs_Ti[:,0]<0.06,:]
do_pca=pca.fit(ccs_Ti['wvl'])
scores_ccs_Ti=do_pca.transform(ccs_Ti['wvl'])


plot.figure()
plot.scatter(scores_ccs_Ti[:,0],scores_ccs_Ti[:,1],c='r')
plot.show()


#get average mars spectra
ccs_Ti_ave=ccs_Ti['wvl'].sum(axis=0)/len(ccs_Ti.index)


#Read JSC data
#spect_table=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Input\Spectrometer_Table.csv"
#experiment_table=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Input\Experiment_Setup_Table.csv"
#laser_table=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Input\Laser_Setup_Table.csv"
#sample_table=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Input\Sample_Table.csv"
#LUT_files={'spect':spect_table,'exp':experiment_table,'laser':laser_table,'sample':sample_table}
#data_dir=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Sample_Data\LIBS USGS\DATA"
#JSC_data=jsc_batch(data_dir,LUT_files)
#JSC_data.to_csv('JSC_data.csv')
##Filter out just the Ti targets
#JSC_Ti=JSC_data.loc[np.squeeze(JSC_data['Sample ID'].isin(['TISDT01']))]

JSC_Ti=pd.read_csv(r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\autocnet\JSC_Ti_data.csv",header=[0,1])

#Interpolate JSC data to CCAM data
JSC_Ti=interp_spect(JSC_Ti,xnew)

##Combine JSC and CCAM Ti data
#data=pd.concat([JSC_Ti_interp,ccs_Ti])
#data.to_csv('JSC_CCS_Ti_data.csv')
#Mask spectra
JSC_Ti=mask(JSC_Ti,maskfile)
#Normalize Spectra

JSC_Ti=norm_spect(JSC_Ti,ranges)
#data_masked['wvl']=norm_total(data_masked['wvl'])
#
#data_masked['wvl']=data_masked['wvl'].div(data_masked['wvl'].sum(axis=1),axis=0)
#
#data_mask_norm=data_masked['wvl'].copy()
#for row in data_mask_norm.index.values:
#    data_mask_norm.iloc[row]/=sum(data_mask_norm.iloc[row])
#data_masked['wvl']=data_mask_norm
#data_masked_norm.to_csv('JSC_CCS_Ti_data_masked_norm.csv')    
#data_mask_norm=norm_total(data_masked)
#data_mask_norm.to_csv('JSC_CCS_Ti_data_mask_norm.csv')
#print('foo')

#get average of JSC spectra
JSC_ave=JSC_Ti['wvl'].sum(axis=0)/len(JSC_Ti.index)

ratio=ccs_Ti_ave/JSC_ave
ratio[abs(ratio)>100]=1.0
plot.plot(ratio)
plot.show()

JSC_Ti_r=JSC_Ti['wvl'].mul(ratio,axis=1)
JSC_Ti_1248=JSC_Ti.loc[JSC_Ti['meta']['laser_power'].isin([12.48])]
JSC_Ti_1196=JSC_Ti.loc[JSC_Ti['meta']['laser_power'].isin([11.98])]
JSC_Ti_1498=JSC_Ti.loc[JSC_Ti['meta']['laser_power'].isin([14.98])]
JSC_Ti_1723=JSC_Ti.loc[JSC_Ti['meta']['laser_power'].isin([17.23])]

JSC_Ti_1248_ave=JSC_Ti_1248['wvl'].sum(axis=0)/len(JSC_Ti_1248.index)
JSC_Ti_1196_ave=JSC_Ti_1196['wvl'].sum(axis=0)/len(JSC_Ti_1196.index)
JSC_Ti_1498_ave=JSC_Ti_1498['wvl'].sum(axis=0)/len(JSC_Ti_1498.index)
JSC_Ti_1723_ave=JSC_Ti_1723['wvl'].sum(axis=0)/len(JSC_Ti_1723.index)

dist_1248=np.linalg.norm(JSC_Ti_1248_ave-ccs_Ti_ave)
dist_1196=np.linalg.norm(JSC_Ti_1196_ave-ccs_Ti_ave)
dist_1498=np.linalg.norm(JSC_Ti_1498_ave-ccs_Ti_ave)
dist_1723=np.linalg.norm(JSC_Ti_1723_ave-ccs_Ti_ave)

#combine mars and JSC data
data=pd.concat([JSC_Ti_r,ccs_Ti['wvl']])


#Run PCA on spectra
pca=PCA(n_components=2)
do_pca=pca.fit(data)
scores_all=do_pca.transform(data)



#Extract different laser energies
mars_40A=ccs_Ti.loc[ccs_Ti['meta']['Laser Energy'].isin(['100A/40A/40A'])]['wvl']
mars_60A=ccs_Ti.loc[ccs_Ti['meta']['Laser Energy'].isin(['100A/60A/60A'])]['wvl']
mars_95A=ccs_Ti.loc[ccs_Ti['meta']['Laser Energy'].isin(['100A/95A/95A'])]['wvl']


JSC_1248=JSC_Ti.loc[JSC_Ti['meta']['laser_power'].isin([12.48])]['wvl'].mul(ratio,axis=1)
JSC_1196=JSC_Ti.loc[JSC_Ti['meta']['laser_power'].isin([11.96])]['wvl'].mul(ratio,axis=1)
JSC_1498=JSC_Ti.loc[JSC_Ti['meta']['laser_power'].isin([14.98])]['wvl'].mul(ratio,axis=1)


scores_40A=do_pca.transform(mars_40A)
scores_60A=do_pca.transform(mars_60A)
scores_95A=do_pca.transform(mars_95A)

scores_1248=do_pca.transform(JSC_1248)
scores_1196=do_pca.transform(JSC_1196)
scores_1498=do_pca.transform(JSC_1498)

plot.figure(figsize=(5,5))
plot.scatter(scores_40A[:,0],scores_40A[:,1],label='Mars (40A)',c='r')
plot.scatter(scores_60A[:,0],scores_60A[:,1],label='Mars (60A)',c='g')
plot.scatter(scores_95A[:,0],scores_95A[:,1],label='Mars (95A)',c='b')

plot.scatter(scores_1248[:,0],scores_1248[:,1],label='JSC (12.48 mJ)',c='c')
plot.scatter(scores_1196[:,0],scores_1196[:,1],label='JSC (11.96 mJ)',c='m')
plot.scatter(scores_1498[:,0],scores_1498[:,1],label='JSC (14.98 mJ)',c='y')
plot.legend()

plot.savefig('PCA_Ti_JSC_CCS.png',dpi=600)
plot.show()
print('foo')
+30 −7
Original line number Diff line number Diff line
@@ -7,6 +7,7 @@ import pandas as pd
import scipy
from autocnet.fileio.header_parser import header_parser
from autocnet.fileio.utils import file_search
import copy

def CCS(input_data):
    df = pd.DataFrame.from_csv(input_data, header=14)
@@ -25,7 +26,8 @@ def CCS(input_data):
    df['Pversion']=fname[34:36]        
    #transpose the data frame
    
    #read the file header and put information into the dataframe as new columns (inneficient to store this data many times, but much easier to concatenate data from multiple files)
    #read the file header and put information into the dataframe as new columns
    #(inefficient to store this data many times, but much easier to concatenate data from multiple files)
    with open(input_data,'r') as f:
        header={}
        for i,row in enumerate(f.readlines()):
@@ -77,7 +79,7 @@ def CCS_SAV(input_data):
    d['seqid']=fname[25:34].upper()
    d['Pversion']=fname[34:36]
    
    #Add metadata to the data frame by stepping through the d dict
    #Add metadata to the data frame by stepping through the dict
    for label,data in d.items(): 
        if type(data) is bytes: data=data.decode()
        df[label]=data
@@ -89,26 +91,47 @@ def CCS_SAV(input_data):
    return df    

def ccs_batch(directory,searchstring='*CCS*.csv',is_sav=False):
   
    if 'SAV' in searchstring:
        is_sav=True
    else:
        is_sav=False
    filelist=file_search(directory,searchstring)
    basenames=np.zeros_like(filelist)
    sclocks=np.zeros_like(filelist)
    P_version=np.zeros_like(filelist,dtype='int')
    
    #Extract the sclock and version for each file and ensure that only one 
    #file per sclock is being read, and that it is the one with the highest version number
    for i,name in enumerate(filelist):
        basenames[i]=os.path.basename(name)
        sclocks[i]=basenames[i][4:13]
        P_version[i]=basenames[i][-5:-4]
    sclocks_unique=np.unique(sclocks)
    filelist_new=np.array([],dtype='str')
    for i in sclocks_unique:
        match=(sclocks==i)
        maxP=P_version[match]==max(P_version[match])
        filelist_new=np.append(filelist_new,filelist[match][maxP])
        
    filelist=filelist_new
    #any way to speed this up for large numbers of files? 
    #Should add a progress bar for importing large numbers of files    
    for i in filelist:
        
        if is_sav:
            tmp=CCS_SAV(i)
          
        else:
            tmp=CCS(i)
          
        try:
            cols1=list(combined.columns[combined.dtypes=='float'])
            cols2=list(tmp.columns[tmp.dtypes=='float'])
            #This ensures that rounding errors are not causing mismatches in columns            
            cols1=list(combined['wvl'].columns)
            cols2=list(tmp['wvl'].columns)
            if set(cols1)==set(cols2):
                combined=pd.concat([combined,tmp])
            else:
                print("Wavelengths don't match!")
                print('foo')
        except:
            combined=tmp
    return combined

autocnet/fileio/io_csv.py

deleted100644 → 0
+0 −36
Original line number Diff line number Diff line
# -*- coding: utf-8 -*-
"""
Created on Mon Nov 30 08:58:07 2015

@author: rbanderson
This is a simple function to read in CSV data.
If setindex is specified, then it uses the columnd of the CSV with the 
specified name as the row index of the data frame
"""
import pandas as pd
def CSV(filename,sep=',',setindex=None):
    print('Reading '+filename)
    df = pd.read_csv(filename, sep=sep)
    wvlindex=[]
    cols_wvl=[]
    nonwvlindex=[]
    for i,x in enumerate(df.columns):
        try:
            x=round(float(x),5)
            cols_wvl.append(('wvl',x))
            wvlindex.extend([i])
        except:
            nonwvlindex.extend([i])
    
    df_spectra=df[wvlindex]
    df_data=df[nonwvlindex]
    df_spectra.columns=pd.MultiIndex.from_tuples(cols_wvl)
    for i,x in enumerate(df_data.columns):
        df_spectra[x]=df_data[x]
    df=df_spectra

    if setindex:
        df=df.set_index([setindex])


    return df
 No newline at end of file
+8 −10
Original line number Diff line number Diff line
import datetime
import os
import re

import os
import numpy as np
import pandas as pd

from autocnet.spectral.spectra import Spectra
from autocnet.fileio.header_parser import header_parser
from autocnet.fileio.utils import file_search

#This function reads the lookup tables used to expand metadata from the file names
#This is separated from parsing the filenames so that for large lists of files the 
#lookup tables don't need to be read over and over
@@ -17,9 +11,13 @@ from autocnet.fileio.utils import file_search
#(the dict) needs to be passed between functions
def read_refdata(LUT_files):
    spectrometer_info=pd.read_csv(LUT_files['spect'],index_col=0)
    #spectrometer_info.reset_index(inplace=True)
    laser_info=pd.read_csv(LUT_files['laser'],index_col=0)
    #laser_info.reset_index(inplace=True)
    exp_info=pd.read_csv(LUT_files['exp'],index_col=0)
    #exp_info.reset_index(inplace=True)
    sample_info=pd.read_csv(LUT_files['sample'],index_col=0)
    #sample_info.reset_index(inplace=True)
    refdata={'spect':spectrometer_info,'laser':laser_info,'exp':exp_info,'sample':sample_info}
    return refdata

@@ -31,7 +29,7 @@ def jsc_filename_parse(filename,refdata):
    laserID=filename[4][0]
    expID=filename[5]
    spectID=filename[6]
    if libs_ID in refdata['sample']:
    if libs_ID in refdata['sample'].index:
        file_info=pd.DataFrame(refdata['sample'].loc[libs_ID]).T
    else:
        file_info=pd.DataFrame(refdata['sample'].loc['Unknown']).T
Loading