Commit 3aad4d7e authored by Ryan Anderson's avatar Ryan Anderson
Browse files

Fiddling with generating random folds and looking up CCAM data in the master list file

parent cb8452b8
Loading
Loading
Loading
Loading
+5 −2
Original line number Diff line number Diff line
@@ -40,6 +40,8 @@ def CCS(input_data):
            label='darkspec'
        df[label]=data 
    
    df.index.rename('shotnum',inplace=True)
    df.reset_index(level=0,inplace=True)
    return df
        
def CCS_SAV(input_data):
@@ -81,7 +83,8 @@ def CCS_SAV(input_data):
        df[label]=data
    
    df['sclock']=pd.to_numeric(df['sclock'])

    df.index.rename('shotnum',inplace=True)
    df.reset_index(level=0,inplace=True)
    
    return df    

+9 −5
Original line number Diff line number Diff line
@@ -10,13 +10,17 @@ The default settings are for looking up ChemCam CCS csv data in the ChemCam mast
"""
import pandas as pd
def lookup(df,lookupfile,sep=',',skiprows=1,left_on='sclock',right_on='Spacecraft Clock'):
    #this loop concatenates together multiple lookup files if provided (mostly to handle the three different master lists for chemcam)
    #this loop concatenates together multiple lookup files if provided 
    #(mostly to handle the three different master lists for chemcam)
    for x in lookupfile:
        try:
            tmp=pd.read_csv(x,sep=sep,skiprows=skiprows)            
            tmp=pd.read_csv(x,sep=sep,skiprows=skiprows,error_bad_lines=False)            
            lookupdf=pd.concat([lookupdf,tmp])
        except:
            lookupdf=pd.read_csv(x, sep=sep,skiprows=skiprows)
            lookupdf=pd.read_csv(x, sep=sep,skiprows=skiprows,error_bad_lines=False)
    
    combined=pd.merge(df,lookupdf,left_on=left_on,right_on=right_on,how='inner')
    return combined
    temp=pd.DataFrame(df['sclock'])    
    metadata=pd.merge(temp,lookupdf,left_on=left_on,right_on=right_on,how='inner')
    for col in metadata.columns:
        df[col]=metadata[col]
    return df
+7 −4
Original line number Diff line number Diff line
@@ -7,9 +7,9 @@ Created on Fri Dec 4 12:51:34 2015
from sklearn import cross_validation
import numpy as np
def random(df,nfolds=5,seed=10,groupby=None):
    df['Folds']='None'
    df['Folds']='None' #Create an entry in the data frame that holds the folds
    foldslist=np.array(df['Folds'])
    if groupby==None:
    if groupby==None: #if no column name is listed to group on, just create random folds
        n=len(df.index)
        folds=cross_validation.KFold(n,nfolds,shuffle=True,random_state=seed)
        i=1        
@@ -18,6 +18,9 @@ def random(df,nfolds=5,seed=10,groupby=None):
            i=i+1
    
    else: 
        #if a column name is provided, get all the unique values and define folds
        #so that all rows of a given value fall in the same fold 
        #(this is useful to ensure that training and test data are truly independent)
        unique_inds=np.unique(df[groupby]) 
        folds=cross_validation.KFold(len(unique_inds),nfolds,shuffle=True,random_state=seed)
        foldslist=np.array(df['Folds'])
+8 −10
Original line number Diff line number Diff line
@@ -47,19 +47,17 @@ print("Test reading data from CSV with lots of spectra and their metadata in one
dbfile=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Sample_Data\full_db_mars_corrected_dopedTiO2.csv"
db=CSV(dbfile,setindex='Name')

#ccs_br.random_folds(nfolds=6,seed=1,groupby='seqid')
print("Test assigning random folds")
ccs=spectral_data(ccs_batch_csv)
ccs.random_folds(nfolds=6,seed=1,groupby='seqid')

print("Test looking up ChemCam metadata")

#foo.transpose().sort_index(level=1).to_csv('JSC_output_test.csv')
masterlist_files=[r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Sample_Data\CCAM\MASTERLIST.csv",
                  r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Sample_Data\CCAM\MASTERLIST_SOL_0010_0801.csv",
                  r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Sample_Data\CCAM\MASTERLIST_SOL_0805_0980.csv"]

#newx=list(foo.wvl.columns)
#blah=ccs.interp(newx)




##masterlist=["E:\ChemCam\ops_ccam_misc\MASTERLIST_SOL_0010_0801.csv",r"E:\ChemCam\ops_ccam_misc\MASTERLIST_SOL_0805_0980.csv",r"E:\ChemCam\ops_ccam_misc\MASTERLIST.csv"]
##blah=lookup(ccs_br,masterlist)
ccs=lookup(ccs.df,masterlist_files)
#
#jsctest=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\pysat\pysat\examples\LIBS_TEST\TestSS_UV_01.txt"
#jsc=JSC(jsctest)