Loading autocnet/fileio/io_ccs.py +5 −2 Original line number Diff line number Diff line Loading @@ -40,6 +40,8 @@ def CCS(input_data): label='darkspec' df[label]=data df.index.rename('shotnum',inplace=True) df.reset_index(level=0,inplace=True) return df def CCS_SAV(input_data): Loading Loading @@ -81,7 +83,8 @@ def CCS_SAV(input_data): df[label]=data df['sclock']=pd.to_numeric(df['sclock']) df.index.rename('shotnum',inplace=True) df.reset_index(level=0,inplace=True) return df Loading autocnet/fileio/lookup.py +9 −5 Original line number Diff line number Diff line Loading @@ -10,13 +10,17 @@ The default settings are for looking up ChemCam CCS csv data in the ChemCam mast """ import pandas as pd def lookup(df,lookupfile,sep=',',skiprows=1,left_on='sclock',right_on='Spacecraft Clock'): #this loop concatenates together multiple lookup files if provided (mostly to handle the three different master lists for chemcam) #this loop concatenates together multiple lookup files if provided #(mostly to handle the three different master lists for chemcam) for x in lookupfile: try: tmp=pd.read_csv(x,sep=sep,skiprows=skiprows) tmp=pd.read_csv(x,sep=sep,skiprows=skiprows,error_bad_lines=False) lookupdf=pd.concat([lookupdf,tmp]) except: lookupdf=pd.read_csv(x, sep=sep,skiprows=skiprows) lookupdf=pd.read_csv(x, sep=sep,skiprows=skiprows,error_bad_lines=False) combined=pd.merge(df,lookupdf,left_on=left_on,right_on=right_on,how='inner') return combined temp=pd.DataFrame(df['sclock']) metadata=pd.merge(temp,lookupdf,left_on=left_on,right_on=right_on,how='inner') for col in metadata.columns: df[col]=metadata[col] return df autocnet/utils/folds.py +7 −4 Original line number Diff line number Diff line Loading @@ -7,9 +7,9 @@ Created on Fri Dec 4 12:51:34 2015 from sklearn import cross_validation import numpy as np def random(df,nfolds=5,seed=10,groupby=None): df['Folds']='None' df['Folds']='None' #Create an entry in the data frame that holds the folds foldslist=np.array(df['Folds']) if groupby==None: if groupby==None: #if no column name is listed to group on, just create random folds n=len(df.index) folds=cross_validation.KFold(n,nfolds,shuffle=True,random_state=seed) i=1 Loading @@ -18,6 +18,9 @@ def random(df,nfolds=5,seed=10,groupby=None): i=i+1 else: #if a column name is provided, get all the unique values and define folds #so that all rows of a given value fall in the same fold #(this is useful to ensure that training and test data are truly independent) unique_inds=np.unique(df[groupby]) folds=cross_validation.KFold(len(unique_inds),nfolds,shuffle=True,random_state=seed) foldslist=np.array(df['Folds']) Loading scratch.py +8 −10 Original line number Diff line number Diff line Loading @@ -47,19 +47,17 @@ print("Test reading data from CSV with lots of spectra and their metadata in one dbfile=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Sample_Data\full_db_mars_corrected_dopedTiO2.csv" db=CSV(dbfile,setindex='Name') #ccs_br.random_folds(nfolds=6,seed=1,groupby='seqid') print("Test assigning random folds") ccs=spectral_data(ccs_batch_csv) ccs.random_folds(nfolds=6,seed=1,groupby='seqid') print("Test looking up ChemCam metadata") #foo.transpose().sort_index(level=1).to_csv('JSC_output_test.csv') masterlist_files=[r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Sample_Data\CCAM\MASTERLIST.csv", r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Sample_Data\CCAM\MASTERLIST_SOL_0010_0801.csv", r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Sample_Data\CCAM\MASTERLIST_SOL_0805_0980.csv"] #newx=list(foo.wvl.columns) #blah=ccs.interp(newx) ##masterlist=["E:\ChemCam\ops_ccam_misc\MASTERLIST_SOL_0010_0801.csv",r"E:\ChemCam\ops_ccam_misc\MASTERLIST_SOL_0805_0980.csv",r"E:\ChemCam\ops_ccam_misc\MASTERLIST.csv"] ##blah=lookup(ccs_br,masterlist) ccs=lookup(ccs.df,masterlist_files) # #jsctest=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\pysat\pysat\examples\LIBS_TEST\TestSS_UV_01.txt" #jsc=JSC(jsctest) Loading Loading
autocnet/fileio/io_ccs.py +5 −2 Original line number Diff line number Diff line Loading @@ -40,6 +40,8 @@ def CCS(input_data): label='darkspec' df[label]=data df.index.rename('shotnum',inplace=True) df.reset_index(level=0,inplace=True) return df def CCS_SAV(input_data): Loading Loading @@ -81,7 +83,8 @@ def CCS_SAV(input_data): df[label]=data df['sclock']=pd.to_numeric(df['sclock']) df.index.rename('shotnum',inplace=True) df.reset_index(level=0,inplace=True) return df Loading
autocnet/fileio/lookup.py +9 −5 Original line number Diff line number Diff line Loading @@ -10,13 +10,17 @@ The default settings are for looking up ChemCam CCS csv data in the ChemCam mast """ import pandas as pd def lookup(df,lookupfile,sep=',',skiprows=1,left_on='sclock',right_on='Spacecraft Clock'): #this loop concatenates together multiple lookup files if provided (mostly to handle the three different master lists for chemcam) #this loop concatenates together multiple lookup files if provided #(mostly to handle the three different master lists for chemcam) for x in lookupfile: try: tmp=pd.read_csv(x,sep=sep,skiprows=skiprows) tmp=pd.read_csv(x,sep=sep,skiprows=skiprows,error_bad_lines=False) lookupdf=pd.concat([lookupdf,tmp]) except: lookupdf=pd.read_csv(x, sep=sep,skiprows=skiprows) lookupdf=pd.read_csv(x, sep=sep,skiprows=skiprows,error_bad_lines=False) combined=pd.merge(df,lookupdf,left_on=left_on,right_on=right_on,how='inner') return combined temp=pd.DataFrame(df['sclock']) metadata=pd.merge(temp,lookupdf,left_on=left_on,right_on=right_on,how='inner') for col in metadata.columns: df[col]=metadata[col] return df
autocnet/utils/folds.py +7 −4 Original line number Diff line number Diff line Loading @@ -7,9 +7,9 @@ Created on Fri Dec 4 12:51:34 2015 from sklearn import cross_validation import numpy as np def random(df,nfolds=5,seed=10,groupby=None): df['Folds']='None' df['Folds']='None' #Create an entry in the data frame that holds the folds foldslist=np.array(df['Folds']) if groupby==None: if groupby==None: #if no column name is listed to group on, just create random folds n=len(df.index) folds=cross_validation.KFold(n,nfolds,shuffle=True,random_state=seed) i=1 Loading @@ -18,6 +18,9 @@ def random(df,nfolds=5,seed=10,groupby=None): i=i+1 else: #if a column name is provided, get all the unique values and define folds #so that all rows of a given value fall in the same fold #(this is useful to ensure that training and test data are truly independent) unique_inds=np.unique(df[groupby]) folds=cross_validation.KFold(len(unique_inds),nfolds,shuffle=True,random_state=seed) foldslist=np.array(df['Folds']) Loading
scratch.py +8 −10 Original line number Diff line number Diff line Loading @@ -47,19 +47,17 @@ print("Test reading data from CSV with lots of spectra and their metadata in one dbfile=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Sample_Data\full_db_mars_corrected_dopedTiO2.csv" db=CSV(dbfile,setindex='Name') #ccs_br.random_folds(nfolds=6,seed=1,groupby='seqid') print("Test assigning random folds") ccs=spectral_data(ccs_batch_csv) ccs.random_folds(nfolds=6,seed=1,groupby='seqid') print("Test looking up ChemCam metadata") #foo.transpose().sort_index(level=1).to_csv('JSC_output_test.csv') masterlist_files=[r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Sample_Data\CCAM\MASTERLIST.csv", r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Sample_Data\CCAM\MASTERLIST_SOL_0010_0801.csv", r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Sample_Data\CCAM\MASTERLIST_SOL_0805_0980.csv"] #newx=list(foo.wvl.columns) #blah=ccs.interp(newx) ##masterlist=["E:\ChemCam\ops_ccam_misc\MASTERLIST_SOL_0010_0801.csv",r"E:\ChemCam\ops_ccam_misc\MASTERLIST_SOL_0805_0980.csv",r"E:\ChemCam\ops_ccam_misc\MASTERLIST.csv"] ##blah=lookup(ccs_br,masterlist) ccs=lookup(ccs.df,masterlist_files) # #jsctest=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\pysat\pysat\examples\LIBS_TEST\TestSS_UV_01.txt" #jsc=JSC(jsctest) Loading