Removing all spectral items (now in PySAT) and updating environment.yml to build (hopefully). (c67f2cc5) · Commits · aflab / astrogeology / Autocnet

autocnet/fileio/header_parser.py

deleted100644 → 0

+0 −22

Original line number	Diff line number	Diff line
		# -- coding: utf-8 --
		"""
		Created on Tue Nov 24 13:35:56 2015

		@author: rbanderson
		"""

		def header_parser(row,delim):
		#get rid of various unwanted characters
		badlist=['#','^',"'",'*']
		for i in badlist:
		row=row.replace(i,'')
		row.strip()

		if delim in row:
		tmp=row.split(delim)
		label=tmp[0].strip().lower().replace(' ','_').replace(' ','_')
		data=row.split(tmp[0]+delim)[1].strip()
		headinfo={label:data}
		else:
		headinfo={}
		return headinfo
		No newline at end of file

autocnet/fileio/io_ccs.py

deleted100644 → 0

+0 −139

Original line number	Diff line number	Diff line
		# This code is used to read individual ChemCam CCS .csv files
		# Header data is stored as attributes of the data frame
		# White space is stripped from the column names
		import os
		import numpy as np
		import pandas as pd
		import scipy
		from autocnet.fileio.header_parser import header_parser
		from autocnet.fileio.utils import file_search
		import copy

		def CCS(input_data):
		df = pd.DataFrame.from_csv(input_data, header=14)
		df.rename(columns=lambda x: x.strip(),inplace=True) #strip whitespace from column names
		df=df.transpose()

		cols=df.columns.tolist()
		for i,x in enumerate(cols):
		cols[i]=('wvl',round(float(x),5))
		df.columns=pd.MultiIndex.from_tuples(cols)
		#extract info from the file name
		fname=os.path.basename(input_data)
		df['sclock']=fname[4:13]
		df['sclock']=pd.to_numeric(df['sclock'])
		df['seqid']=fname[25:34].upper()
		df['Pversion']=fname[34:36]
		#transpose the data frame

		#read the file header and put information into the dataframe as new columns
		#(inefficient to store this data many times, but much easier to concatenate data from multiple files)
		with open(input_data,'r') as f:
		header={}
		for i,row in enumerate(f.readlines()):
		if i<14:
		row=row.split(',')[0]
		header.update(header_parser(row,'='))

		for label,data in header.items():
		if '_float' in label:
		label=label.replace('_float','')
		if label=='dark':
		label='darkspec'
		df[label]=data

		df.index.rename('shotnum',inplace=True)
		df.reset_index(level=0,inplace=True)
		return df

		def CCS_SAV(input_data):

		d=scipy.io.readsav(input_data,python_dict=True)
		#combine the three spectrometers
		spectra=np.vstack([d['uv'],d['vis'],d['vnir']])
		aspectra=np.array([np.hstack([d['auv'],d['avis'],d['avnir']])]).T
		mspectra=np.array([np.hstack([d['muv'],d['mvis'],d['mvnir']])]).T

		#create tuples for the spectral columns to use as multiindex
		wvls=list(np.hstack([d['defuv'],d['defvis'],d['defvnir']]))
		for i,x in enumerate(wvls):
		wvls[i]=('wvl',round(x,5))

		#define column names
		shotnums=list(range(1,d['nshots']+1))
		shots=['shot'+str(i) for i in shotnums]
		shots.extend(['ave','median'])

		#create the data frame to hold the spectral data
		df = pd.DataFrame(np.hstack([spectra,aspectra,mspectra]),columns=shots,index=pd.MultiIndex.from_tuples(wvls))
		df=df.transpose()

		#remove the above elements from the dict
		to_remove=['uv','vis','vnir','auv','avis','avnir','muv','mvis','mvnir','defuv','defvis','defvnir','label_info']
		for x in to_remove:
		del d[x]

		#extract info from the file name
		fname=os.path.basename(input_data)
		d['sclock']=fname[4:13]
		d['seqid']=fname[25:34].upper()
		d['Pversion']=fname[34:36]

		#Add metadata to the data frame by stepping through the dict
		for label,data in d.items():
		if type(data) is bytes: data=data.decode()
		df[label]=data

		df['sclock']=pd.to_numeric(df['sclock'])
		df.index.rename('shotnum',inplace=True)
		df.reset_index(level=0,inplace=True)

		return df

		def ccs_batch(directory,searchstring='CCS.csv',is_sav=False):

		if 'SAV' in searchstring:
		is_sav=True
		else:
		is_sav=False
		filelist=file_search(directory,searchstring)
		basenames=np.zeros_like(filelist)
		sclocks=np.zeros_like(filelist)
		P_version=np.zeros_like(filelist,dtype='int')

		#Extract the sclock and version for each file and ensure that only one
		#file per sclock is being read, and that it is the one with the highest version number
		for i,name in enumerate(filelist):
		basenames[i]=os.path.basename(name)
		sclocks[i]=basenames[i][4:13]
		P_version[i]=basenames[i][-5:-4]
		sclocks_unique=np.unique(sclocks)
		filelist_new=np.array([],dtype='str')
		for i in sclocks_unique:
		match=(sclocks==i)
		maxP=P_version[match]==max(P_version[match])
		filelist_new=np.append(filelist_new,filelist[match][maxP])

		filelist=filelist_new
		#any way to speed this up for large numbers of files?
		#Should add a progress bar for importing large numbers of files
		for i in filelist:
		if is_sav:
		tmp=CCS_SAV(i)

		else:
		tmp=CCS(i)

		try:
		#This ensures that rounding errors are not causing mismatches in columns
		cols1=list(combined['wvl'].columns)
		cols2=list(tmp['wvl'].columns)
		if set(cols1)==set(cols2):
		combined=pd.concat([combined,tmp])
		else:
		print("Wavelengths don't match!")
		except:
		combined=tmp
		return combined


		No newline at end of file

autocnet/fileio/io_edr.py

deleted100644 → 0

+0 −27

Original line number	Diff line number	Diff line

		import pandas as pd
		from autocnet.fileio.header_parser import header_parser

		def EDR(input_data):
		with open(input_data, 'r') as f:
		header={}
		for i, row in enumerate(f.readlines()):

		if i<2 or i==28:
		pass
		elif i<28:
		header.update(header_parser(row,':')) #read the header values into a dict
		elif i==29:
		row=row.split()
		shotnums=list(range(1,len(row)+1))
		shots=['shot'+str(i) for i in shotnums]

		df = pd.read_csv(input_data, sep=' ',skiprows=29,names=shots)
		df=df.transpose()
		#insert the header metadata as columns
		for label,data in header.items():
		df[label]=data
		return df

autocnet/fileio/io_jsc.py

deleted100644 → 0

+0 −120

Original line number	Diff line number	Diff line

		import os
		import numpy as np
		import pandas as pd
		from autocnet.fileio.utils import file_search
		#This function reads the lookup tables used to expand metadata from the file names
		#This is separated from parsing the filenames so that for large lists of files the
		#lookup tables don't need to be read over and over
		#
		#Info in the tables is stored in a dict of dataframes so that only one variable
		#(the dict) needs to be passed between functions
		def read_refdata(LUT_files):
		spectrometer_info=pd.read_csv(LUT_files['spect'],index_col=0)
		#spectrometer_info.reset_index(inplace=True)
		laser_info=pd.read_csv(LUT_files['laser'],index_col=0)
		#laser_info.reset_index(inplace=True)
		exp_info=pd.read_csv(LUT_files['exp'],index_col=0)
		#exp_info.reset_index(inplace=True)
		sample_info=pd.read_csv(LUT_files['sample'],index_col=0)
		#sample_info.reset_index(inplace=True)
		refdata={'spect':spectrometer_info,'laser':laser_info,'exp':exp_info,'sample':sample_info}
		return refdata

		#This function parses the file names to record metadata related to the observation
		def jsc_filename_parse(filename,refdata):
		filename=os.path.basename(filename) #strip the path off of the file name
		filename=filename.split('_') #split the file name on underscores
		libs_ID=filename[0]
		laserID=filename[4][0]
		expID=filename[5]
		spectID=filename[6]
		if libs_ID in refdata['sample'].index:
		file_info=pd.DataFrame(refdata['sample'].loc[libs_ID]).T
		else:
		file_info=pd.DataFrame(refdata['sample'].loc['Unknown']).T
		file_info.index.name='LIBS ID'
		file_info.reset_index(level=0,inplace=True)
		file_info['loc']=int(filename[1])
		file_info['lab']=filename[2]
		file_info['gas']=filename[3][0]
		file_info['pressure']=float(filename[3][1:])

		if laserID in refdata['laser'].index:
		laser_info=pd.DataFrame(refdata['laser'].loc[laserID]).T
		laser_info.index.name='Laser Identifier'
		laser_info.reset_index(level=0,inplace=True)
		file_info=pd.concat([file_info,laser_info],axis=1)

		file_info['laser_power']=float(filename[4][1:])
		if expID in refdata['exp'].index:
		exp_info=pd.DataFrame(refdata['exp'].loc[expID]).T
		exp_info.index.name='Exp Identifier'
		exp_info.reset_index(level=0,inplace=True)
		file_info=pd.concat([file_info,exp_info],axis=1)

		# file_info['spectrometer']=spectID
		# if spectID in refdata['spect'].index:
		# temp=refdata['spect'].loc[spectID]
		# temp=[temp[2],temp[4:]]
		# spect_info=pd.DataFrame(refdata['spect'].loc[spectID]).T
		# spect_info.index.name='Spectrometer Identifier'
		# spect_info.reset_index(level=0,inplace=True)
		# file_info=pd.concat([file_info,spect_info],axis=1)

		return file_info


		def JSC(input_file,refdata):
		data=pd.read_csv(input_file,skiprows=14,sep='\t')
		data=data.rename(columns={data.columns[0]:'time1',data.columns[1]:'time2'})
		times=data[['time1','time2']] #split the two time columns from the data frame
		data=data[data.columns[2:]] #trim the data frame so it is just the spectra

		#make a multiindex for each wavlength column so they can be easily isolated from metadata later
		cols=data.columns.tolist()
		for i,x in enumerate(cols):
		cols[i]=('wvl',round(float(x),5))
		data.columns=pd.MultiIndex.from_tuples(cols)

		#create a metadata frame and add the times to it
		metadata=pd.concat([jsc_filename_parse(input_file,refdata)]*len(data.index))
		metadata.index=data.index
		metadata=pd.concat([metadata,times],axis=1)

		#add the metadata columns to the data frame
		for col in metadata.columns.tolist():
		data[col]=metadata[col]

		return data




		def jsc_batch(directory, LUT_files,searchstring='*.txt'):
		#Read in the lookup tables to expand filename metadata
		refdata=read_refdata(LUT_files)
		#get the list of files that match the search string in the given directory
		filelist=file_search(directory,searchstring)
		spectIDs=[] #create an empty list to hold the spectrometer IDs

		for file in filelist:
		spectIDs.append(os.path.basename(file).split('_')[6]) #get the spectrometer IDs for each file in the list
		spectIDs_unique=np.unique(spectIDs) #get the unique spectrometer IDs
		dfs=[] #create an empty list to hold the data frames for each spectrometer

		#loop through each spectrometer, read the spectra and combine them into a single data frame for that spectrometer
		for spect in spectIDs_unique:
		sublist=filelist[np.in1d(spectIDs,spect)]
		temp=[JSC(sublist[0],refdata)]
		for file in sublist[1:]:
		temp.append(JSC(file,refdata))
		dfs.append(pd.concat(temp))

		#now combine the data frames for the different spectrometers into a single data frame containing all the data
		combined=dfs[0]
		for df in dfs[1:]:
		combined=combined.merge(df)

		return combined

autocnet/fileio/io_moon_minerology_mapper.py

deleted100755 → 0

+0 −24

Original line number	Diff line number	Diff line
		from osgeo import gdal
		import numpy as np

		def openm3(input_data):
		if input_data.split('.')[-1] == 'hdr':
		#GDAL wants the img, but many users aim at the .hdr
		input_data = input_data.split('.')[0] + '.img'
		ds = gdal.Open(input_data)
		ref_array = ds.GetRasterBand(1).ReadAsArray()
		metadata = ds.GetMetadata()
		wv_array = metadatatoband(metadata)
		return wv_array, ref_array, ds

		def metadatatoband(metadata):
		wv2band = []
		for k, v in metadata.iteritems():
		try:
		wv2band.append(float(value))
		except:
		v = v.split(" ")[-1].split("(")[1].split(")")[0]
		wv2band.append(float(v))
		wv2band.sort(key=int)
		return np.asarray(wv2band)