Fiddling with generating random folds and looking up CCAM data in the master list file (3aad4d7e) · Commits · aflab / astrogeology / Autocnet

autocnet/fileio/io_ccs.py

+5 −2

Original line number	Diff line number	Diff line
		@@ -40,6 +40,8 @@ def CCS(input_data):
		label='darkspec'
		df[label]=data

		df.index.rename('shotnum',inplace=True)
		df.reset_index(level=0,inplace=True)
		return df

		def CCS_SAV(input_data):
		@@ -81,7 +83,8 @@ def CCS_SAV(input_data):
		df[label]=data

		df['sclock']=pd.to_numeric(df['sclock'])

		df.index.rename('shotnum',inplace=True)
		df.reset_index(level=0,inplace=True)

		return df

+9 −5

Original line number	Diff line number	Diff line
		@@ -10,13 +10,17 @@ The default settings are for looking up ChemCam CCS csv data in the ChemCam mast
		"""
		import pandas as pd
		def lookup(df,lookupfile,sep=',',skiprows=1,left_on='sclock',right_on='Spacecraft Clock'):
		#this loop concatenates together multiple lookup files if provided (mostly to handle the three different master lists for chemcam)
		#this loop concatenates together multiple lookup files if provided
		#(mostly to handle the three different master lists for chemcam)
		for x in lookupfile:
		try:
		tmp=pd.read_csv(x,sep=sep,skiprows=skiprows)
		tmp=pd.read_csv(x,sep=sep,skiprows=skiprows,error_bad_lines=False)
		lookupdf=pd.concat([lookupdf,tmp])
		except:
		lookupdf=pd.read_csv(x, sep=sep,skiprows=skiprows)
		lookupdf=pd.read_csv(x, sep=sep,skiprows=skiprows,error_bad_lines=False)

		combined=pd.merge(df,lookupdf,left_on=left_on,right_on=right_on,how='inner')
		return combined
		temp=pd.DataFrame(df['sclock'])
		metadata=pd.merge(temp,lookupdf,left_on=left_on,right_on=right_on,how='inner')
		for col in metadata.columns:
		df[col]=metadata[col]
		return df

+7 −4

Original line number	Diff line number	Diff line
		@@ -7,9 +7,9 @@ Created on Fri Dec 4 12:51:34 2015
		from sklearn import cross_validation
		import numpy as np
		def random(df,nfolds=5,seed=10,groupby=None):
		df['Folds']='None'
		df['Folds']='None' #Create an entry in the data frame that holds the folds
		foldslist=np.array(df['Folds'])
		if groupby==None:
		if groupby==None: #if no column name is listed to group on, just create random folds
		n=len(df.index)
		folds=cross_validation.KFold(n,nfolds,shuffle=True,random_state=seed)
		i=1
		@@ -18,6 +18,9 @@ def random(df,nfolds=5,seed=10,groupby=None):
		i=i+1

		else:
		#if a column name is provided, get all the unique values and define folds
		#so that all rows of a given value fall in the same fold
		#(this is useful to ensure that training and test data are truly independent)
		unique_inds=np.unique(df[groupby])
		folds=cross_validation.KFold(len(unique_inds),nfolds,shuffle=True,random_state=seed)
		foldslist=np.array(df['Folds'])

+8 −10

Original line number	Diff line number	Diff line
		@@ -47,19 +47,17 @@ print("Test reading data from CSV with lots of spectra and their metadata in one
		dbfile=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Sample_Data\full_db_mars_corrected_dopedTiO2.csv"
		db=CSV(dbfile,setindex='Name')

		#ccs_br.random_folds(nfolds=6,seed=1,groupby='seqid')
		print("Test assigning random folds")
		ccs=spectral_data(ccs_batch_csv)
		ccs.random_folds(nfolds=6,seed=1,groupby='seqid')

		print("Test looking up ChemCam metadata")

		#foo.transpose().sort_index(level=1).to_csv('JSC_output_test.csv')
		masterlist_files=[r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Sample_Data\CCAM\MASTERLIST.csv",
		r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Sample_Data\CCAM\MASTERLIST_SOL_0010_0801.csv",
		r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\Sample_Data\CCAM\MASTERLIST_SOL_0805_0980.csv"]

		#newx=list(foo.wvl.columns)
		#blah=ccs.interp(newx)




		##masterlist=["E:\ChemCam\ops_ccam_misc\MASTERLIST_SOL_0010_0801.csv",r"E:\ChemCam\ops_ccam_misc\MASTERLIST_SOL_0805_0980.csv",r"E:\ChemCam\ops_ccam_misc\MASTERLIST.csv"]
		##blah=lookup(ccs_br,masterlist)
		ccs=lookup(ccs.df,masterlist_files)
		#
		#jsctest=r"C:\Users\rbanderson\Documents\Projects\LIBS PDART\pysat\pysat\examples\LIBS_TEST\TestSS_UV_01.txt"
		#jsc=JSC(jsctest)