Adding Ryan's LIBS stuff to autocnet (6c6fbf93) · Commits · aflab / astrogeology / Autocnet

autocnet/fileio/io_ccs.py

0 → 100644

+127 −0

Original line number	Diff line number	Diff line
		# This code is used to read individual ChemCam CCS .csv files
		# Header data is stored as attributes of the data frame
		# White space is stripped from the column names
		import os
		import numpy as np
		import pandas as pd
		import scipy
		from pysat.fileio.header_parser import header_parser
		from pysat.fileio.utils import file_search

		def CCS(input_data):
		df = pd.DataFrame.from_csv(input_data, header=14)
		df.rename(columns=lambda x: x.strip(),inplace=True) #strip whitespace from column names
		df=df.transpose()

		cols=df.columns.tolist()
		for i,x in enumerate(cols):
		cols[i]=('wvl',round(float(x),5))
		df.columns=pd.MultiIndex.from_tuples(cols)
		#extract info from the file name
		fname=os.path.basename(input_data)
		df['sclock']=fname[4:13]
		df['sclock']=pd.to_numeric(df['sclock'])
		df['seqid']=fname[25:34].upper()
		df['Pversion']=fname[34:36]
		#transpose the data frame

		#read the file header and put information into the dataframe as new columns (inneficient, but much easier to concatenate data from multiple files)
		with open(input_data,'r') as f:
		header={}
		for i,row in enumerate(f.readlines()):
		if i<14:
		row=row.split(',')[0]
		header.update(header_parser(row,'='))

		for label,data in header.items():
		if '_float' in label:
		label=label.replace('_float','')
		df[label]=data

		return df

		def CCS_SAV(input_data):

		d=scipy.io.readsav(input_data,python_dict=True)
		#combine the three spectrometers
		spectra=np.vstack([d['uv'],d['vis'],d['vnir']])
		aspectra=np.array([np.hstack([d['auv'],d['avis'],d['avnir']])]).T
		mspectra=np.array([np.hstack([d['muv'],d['mvis'],d['mvnir']])]).T

		wvls=list(np.hstack([d['defuv'],d['defvis'],d['defvnir']]))
		for i,x in enumerate(wvls):
		wvls[i]=('wvl',round(x,5))

		#remove the above elements from the dict
		del d['uv']
		del d['vis']
		del d['vnir']
		del d['auv']
		del d['avis']
		del d['avnir']
		del d['muv']
		del d['mvis']
		del d['mvnir']
		del d['defuv']
		del d['defvis']
		del d['defvnir']

		#define column names
		shotnums=list(range(1,d['nshots']+1))
		shots=['shot'+str(i) for i in shotnums]
		shots.extend(['ave','median'])
		df = pd.DataFrame(np.hstack([spectra,aspectra,mspectra]),columns=shots,index=pd.MultiIndex.from_tuples(wvls))
		df=df.transpose()

		# #extract data from the PDS label info
		# pdslabel={}
		# for i in d['label_info']:
		# print(str(i.decode()))
		# if type(i) is bytes:
		# pdslabel.update(io_header_parser(i.decode(),'='))
		# elif len(i)>0:
		# pdslabel.update(io_header_parser(i,'='))


		del d['label_info'] #not currently using PDS label info

		#extract info from the file name
		fname=os.path.basename(input_data)
		d['sclock']=fname[4:13]
		d['seqid']=fname[25:34].upper()
		d['Pversion']=fname[34:36]
		for label,data in d.items():
		if type(data) is bytes: data=data.decode()
		df[label]=data

		df['sclock']=pd.to_numeric(df['sclock'])


		return df

		def ccs_batch(directory,searchstring='CCS.csv',is_sav=False):
		if 'SAV' in searchstring:
		is_sav=True
		else:
		is_sav=False
		filelist=file_search(directory,searchstring)
		for i in filelist:

		if is_sav:
		tmp=CCS_SAV(i)
		else:
		tmp=CCS(i)

		try:
		cols1=list(combined.columns[combined.dtypes=='float'])
		cols2=list(tmp.columns[tmp.dtypes=='float'])
		if set(cols1)==set(cols2):
		combined=pd.concat([combined,tmp])
		else:
		print("Wavelengths don't match!")
		print('foo')
		except:
		combined=tmp
		return combined


		No newline at end of file

autocnet/fileio/io_csv.py

0 → 100644

+36 −0

Original line number	Diff line number	Diff line
		# -- coding: utf-8 --
		"""
		Created on Mon Nov 30 08:58:07 2015

		@author: rbanderson
		This is a simple function to read in CSV data.
		If setindex is specified, then it uses the columnd of the CSV with the
		specified name as the row index of the data frame
		"""
		import pandas as pd
		def CSV(filename,sep=',',setindex=None):
		print('Reading '+filename)
		df = pd.read_csv(filename, sep=sep)
		wvlindex=[]
		cols_wvl=[]
		nonwvlindex=[]
		for i,x in enumerate(df.columns):
		try:
		x=round(float(x),5)
		cols_wvl.append(('wvl',x))
		wvlindex.extend([i])
		except:
		nonwvlindex.extend([i])

		df_spectra=df[wvlindex]
		df_data=df[nonwvlindex]
		df_spectra.columns=pd.MultiIndex.from_tuples(cols_wvl)
		for i,x in enumerate(df_data.columns):
		df_spectra[x]=df_data[x]
		df=df_spectra

		if setindex:
		df=df.set_index([setindex])


		return df
		No newline at end of file

autocnet/fileio/io_edr.py

0 → 100644

+27 −0

Original line number	Diff line number	Diff line

		import pandas as pd
		from pysat.fileio.header_parser import header_parser

		def EDR(input_data):
		with open(input_data, 'r') as f:
		header={}
		for i, row in enumerate(f.readlines()):

		if i<2 or i==28:
		pass
		elif i<28:
		header.update(header_parser(row,':')) #read the header values into a dict
		elif i==29:
		row=row.split()
		shotnums=list(range(1,len(row)+1))
		shots=['shot'+str(i) for i in shotnums]

		df = pd.read_csv(input_data, sep=' ',skiprows=29,names=shots)
		df=df.transpose()
		#insert the header metadata as columns
		for label,data in header.items():
		df[label]=data
		return df

autocnet/fileio/io_jsc.py

0 → 100644

+109 −0

Original line number	Diff line number	Diff line
		import datetime
		import os
		import re

		import numpy as np
		import pandas as pd

		from pysat.spectral.spectra import Spectra
		from pysat.fileio.header_parser import header_parser
		from pysat.fileio.utils import file_search

		#This function reads the lookup tables used to expand metadata from the file names
		#This is separated from parsing the filenames so that for large lists of files the
		#lookup tables don't need to be read over and over
		#
		#Info in the tables is stored in a dict of dataframes so that only one variable
		#(the dict) needs to be passed between functions
		def read_refdata(LUT_files):
		spectrometer_info=pd.read_csv(LUT_files['spect'],index_col=0)
		laser_info=pd.read_csv(LUT_files['laser'],index_col=0)
		exp_info=pd.read_csv(LUT_files['exp'],index_col=0)
		sample_info=pd.read_csv(LUT_files['sample'],index_col=0)
		refdata={'spect':spectrometer_info,'laser':laser_info,'exp':exp_info,'sample':sample_info}
		return refdata

		#This function parses the file names to record metadata related to the observation
		def jsc_filename_parse(filename,refdata):
		filename=os.path.basename(filename) #strip the path off of the file name
		filename=filename.split('_') #split the file name on underscores
		libs_ID=filename[0]
		laserID=filename[4][0]
		expID=filename[5]
		spectID=filename[6]
		if libs_ID in refdata['sample']:
		file_info=pd.DataFrame(refdata['sample'].loc[libs_ID]).T
		else:
		file_info=pd.DataFrame(refdata['sample'].loc['Unknown']).T
		file_info.index.name='LIBS ID'
		file_info.reset_index(level=0,inplace=True)
		file_info['loc']=int(filename[1])
		file_info['lab']=filename[2]
		file_info['gas']=filename[3][0]
		file_info['pressure']=float(filename[3][1:])

		if laserID in refdata['laser'].index:
		laser_info=pd.DataFrame(refdata['laser'].loc[laserID]).T
		laser_info.index.name='Laser Identifier'
		laser_info.reset_index(level=0,inplace=True)
		file_info=pd.concat([file_info,laser_info],axis=1)

		file_info['laser_power']=float(filename[4][1:])
		if expID in refdata['exp'].index:
		exp_info=pd.DataFrame(refdata['exp'].loc[expID]).T
		exp_info.index.name='Exp Identifier'
		exp_info.reset_index(level=0,inplace=True)
		file_info=pd.concat([file_info,exp_info],axis=1)

		# file_info['spectrometer']=spectID
		# if spectID in refdata['spect'].index:
		# temp=refdata['spect'].loc[spectID]
		# temp=[temp[2],temp[4:]]
		# spect_info=pd.DataFrame(refdata['spect'].loc[spectID]).T
		# spect_info.index.name='Spectrometer Identifier'
		# spect_info.reset_index(level=0,inplace=True)
		# file_info=pd.concat([file_info,spect_info],axis=1)

		return file_info


		def JSC(input_file,refdata):
		data=pd.read_csv(input_file,skiprows=14,sep='\t')
		data=data.rename(columns={data.columns[0]:'time1',data.columns[1]:'time2'})

		metadata=pd.concat([jsc_filename_parse(input_file,refdata)]*len(data.index))
		metadata.index=data.index
		df=pd.concat([metadata,data],axis=1)

		return df




		def jsc_batch(directory, LUT_files,searchstring='*.txt'):
		#Read in the lookup tables to expand filename metadata
		refdata=read_refdata(LUT_files)
		#get the list of files that match the search string in the given directory
		filelist=file_search(directory,searchstring)
		spectIDs=[] #create an empty list to hold the spectrometer IDs

		for file in filelist:
		spectIDs.append(os.path.basename(file).split('_')[6]) #get the spectrometer IDs for each file in the list
		spectIDs_unique=np.unique(spectIDs) #get the unique spectrometer IDs
		dfs=[] #create an empty list to hold the data frames for each spectrometer

		#loop through each spectrometer, read the spectra and combine them into a single data frame for that spectrometer
		for spect in spectIDs_unique:
		sublist=filelist[np.in1d(spectIDs,spect)]
		temp=[JSC(sublist[0],refdata) ]
		for file in sublist[1:]:
		temp.append(JSC(file,refdata))
		dfs.append(pd.concat(temp))

		#now combine the data frames for the different spectrometers into a single data frame containing all the data
		combined=dfs[0]
		for df in dfs[1:]:
		combined=combined.merge(df)

		return combined

autocnet/fileio/io_libs.py

deleted100644 → 0

+0 −39

Original line number	Diff line number	Diff line
		import numpy as np
		import pandas as pd

		from autocnet.spectral.spectra import Spectra

		class LIBS(object):

		def __init__(self, input_data):
		self.spectra = None
		with open(input_data, 'r') as f:
		"""
		Could easily add regex to the parsing to be more robust reading,
		could also peg metadata to t
		"""

		for i, l in enumerate(f.readlines()):
		if i == 14:
		wavelengths = np.fromstring(l, sep=' ')
		elif i > 14:
		sl = l.split('\t')
		time = sl[0]
		sid = sl[1]
		rawsp = np.asarray(map(float,sl[2:]))
		if not self.spectra:
		df = pd.DataFrame(rawsp, columns=[sid],
		index=wavelengths)
		self.spectra = Spectra(df)
		else:
		self.spectra.df[sid] = rawsp
		elif i == 0 or i == 13:
		pass
		elif i == 1:
		date = ' '.join(l.rstrip().split(':')[1:])
		#date = datetime.datetime(date) #Format needs to be specified
		setattr(self, 'Date', date)
		else:
		key, v = l.split(':')
		k = '_'.join(key.split())
		setattr(self, k, v.rstrip())