Merge branch 'master' of https://github.com/USGS-Astrogeology/plio (78acf6c2) · Commits · aflab / astrogeology / Plio

.travis.yml

+10 −11

Original line number	Diff line number	Diff line
		@@ -30,16 +30,15 @@ before_install:
		# Create the env
		- conda create -q -n test python=$PYTHON_VERSION
		- source activate test
		# https://github.com/travis-ci/travis-ci/issues/8982
		- python -c "import fcntl; fcntl.fcntl(1, fcntl.F_SETFL, 0)"

		install:
		- conda config --add channels conda-forge
		- conda config --add channels jlaura
		- conda install -c conda-forge gdal h5py
		- conda install pandas sqlalchemy pyyaml networkx affine protobuf
		- pip install pvl
		- conda install -q gdal h5py pandas sqlalchemy pyyaml networkx affine protobuf scipy pvl

		# Development installation
		- conda install pytest pytest-cov sh anaconda-client
		- conda install -q pytest pytest-cov sh

		script:
		- pytest --cov=plio
		@@ -48,9 +47,9 @@ after_success:
		- coveralls
		# Need to do the build in the root
		- source deactivate
		- conda install conda-build anaconda-client
		- conda install -q conda-build anaconda-client
		- conda config --set anaconda_upload yes
		- conda build --token $CONDA_UPLOAD_TOKEN --python $PYTHON_VERSION recipe
		- conda build --token $CONDA_UPLOAD_TOKEN --python $PYTHON_VERSION recipe -q

		notifications:
		webhooks:

appveyor.yml

+34 −64

Original line number	Diff line number	Diff line
		environment:

		CONDA_INSTALL_LOCN: "C:\\conda"

		# SDK v7.0 MSVC Express 2008's SetEnv.cmd script will fail if the
		# /E:ON and /V:ON options are not enabled in the batch script intepreter
		# See: http://stackoverflow.com/a/13751649/163740
		CMD_IN_ENV: "cmd /E:ON /V:ON /C obvci_appveyor_python_build_env.cmd"
		branches:
		only:
		- master

		# We set a default Python version for the miniconda that is to be installed. This can be
		# overridden in the matrix definition where appropriate.
		CONDA_PY: "27"
		version: '0.1.0.{build}'

		environment:
		matrix:
		- PYTHON: "C:\\Miniconda35-x64\\Scripts\\activate.bat"
		PYTHON_VERSION: 3.5
		- PYTHON: "C:\\Miniconda36-x64\\Scripts\\activate.bat"
		PYTHON_VERSION: 3.6

		- TARGET_ARCH: x64
		CONDA_PY: 35

		# We always use a 64-bit machine, but can build x86 distributions
		# with the TARGET_ARCH variable.
		platform:
		- x64

		install:
		# If there is a newer build queued for the same PR, cancel this one.
		# The AppVeyor 'rollout builds' option is supposed to serve the same
		# purpose but it is problematic because it tends to cancel builds pushed
		# directly to master instead of just PR builds (or the converse).
		# credits: JuliaLang developers.
		- ps: if ($env:APPVEYOR_PULL_REQUEST_NUMBER -and $env:APPVEYOR_BUILD_NUMBER -ne ((Invoke-RestMethod `
		https://ci.appveyor.com/api/projects/$env:APPVEYOR_ACCOUNT_NAME/$env:APPVEYOR_PROJECT_SLUG/history?recordsNumber=50).builds \| `
		Where-Object pullRequestId -eq $env:APPVEYOR_PULL_REQUEST_NUMBER)[0].buildNumber) { `
		throw "There are newer queued builds for this pull request, failing early." }

		# Cywing's git breaks conda-build. (See https://github.com/conda-forge/conda-smithy-feedstock/pull/2.)
		- cmd: rmdir C:\cygwin /s /q
		- appveyor DownloadFile "https://raw.githubusercontent.com/pelson/Obvious-CI/master/bootstrap-obvious-ci-and-miniconda.py"
		- cmd: python bootstrap-obvious-ci-and-miniconda.py %CONDA_INSTALL_LOCN% %TARGET_ARCH% %CONDA_PY:~0,1% --without-obvci
		- cmd: set PATH=%CONDA_INSTALL_LOCN%;%CONDA_INSTALL_LOCN%\scripts;%PATH%
		- cmd: set PYTHONUNBUFFERED=1

		- cmd: conda config --set show_channel_urls true
		- cmd: conda install --yes python=3.5
		- cmd: conda install -c pelson/channel/development --yes --quiet obvious-ci
		- cmd: conda config --add channels conda-forge
		- cmd: conda info
		- cmd: conda install -n root --quiet --yes conda-build anaconda-client jinja2 setuptools
		# Workaround for Python 3.4 and x64 bug in latest conda-build.
		# FIXME: Remove once there is a release that fixes the upstream issue
		# ( https://github.com/conda/conda-build/issues/895 ).
		- cmd: if "%TARGET_ARCH%" == "x64" if "%CONDA_PY%" == "34" conda install conda-build=1.20.0 --yes
		configuration:
		- Release

		# Now install the pacakge dependencies
		install:
		- cmd: call %PYTHON%
		- cmd: conda config --set always_yes yes --set changeps1 no
		- cmd: conda update -q conda
		- cmd: conda install conda-build anaconda-client
		- cmd: conda create -q -n test_env python=%PYTHON_VERSION%
		- cmd: activate test_env
		- cmd: conda config --add channels conda-forge
		- cmd: conda config --add channels jlaura
		- cmd: conda install --yes -c conda-forge gdal h5py
		- cmd: conda install --yes pandas sqlalchemy pyyaml networkx affine
		- cmd: conda install --yes -c jlaura protobuf pvl

		# Development installation
		- cmd: conda install --yes pytest pytest-cov
		- cmd: pip install coveralls
		- cmd: conda install -c conda-forge pvl protobuf gdal numpy pandas sqlalchemy pyyaml networkx affine h5py scipy
		- cmd: conda install pytest-cov
		# https://pythonhosted.org/CodeChat/appveyor.yml.html
		- cmd: python -m pip install -U pip
		- cmd: python -m easy_install -U setuptools

		# Skip .NET project specific build phase.
		build: off
		build_script:
		- cmd: python setup.py install

		test_script:
		- cmd: pytest --cov=plio --ignore=plio/examples
		- "%CMD_IN_ENV% conda build conda --quiet"

		deploy_script:
		- cmd: pytest plio/

		- 'python ci_support\upload_or_check_non_existence.py .\conda jlaura --channel=main'
		on_success:
		- cmd: deactivate
		- cmd: conda config --set anaconda_upload yes
		- cmd: conda build --token %CONDA_UPLOAD_TOKEN% .

plio/io/io_ccam_pds.py

0 → 100644

+201 −0

Original line number	Diff line number	Diff line
		# This code is used to read individual ChemCam files
		# Header data is stored as attributes of the data frame
		# White space is stripped from the column names
		import os

		import numpy as np
		import pandas as pd
		import scipy.io as io

		from plio.utils.utils import lookup
		from plio.utils.utils import file_search


		def CCAM_CSV(input_data, ave=True):
		try:
		df = pd.read_csv(input_data, header=14, engine='c')
		cols = list(df.columns.values)
		df.columns = [i.strip().replace('# ', '') for i in cols] # strip whitespace from column names
		df.set_index(['wave'], inplace=True) # use wavelengths as indices
		# read the file header and put information into the dataframe as new columns
		metadata = pd.read_csv(input_data, sep='=', nrows=14, comment=',', engine='c', index_col=0, header=None)
		except:
		try: # handle files with an extra header row containing temperature
		df = pd.read_csv(input_data, header=15, engine='c')
		cols = list(df.columns.values)
		df.columns = [i.strip().replace('# ', '') for i in cols] # strip whitespace from column names
		df.set_index(['wave'], inplace=True) # use wavelengths as indices
		# read the file header and put information into the dataframe as new columns
		metadata = pd.read_csv(input_data, sep='=', nrows=15, comment=',', engine='c', index_col=0, header=None)
		except: # handle files with an extra header row containing temperature and target name
		df = pd.read_csv(input_data, header=16, engine='c')
		cols = list(df.columns.values)
		df.columns = [i.strip().replace('# ', '') for i in cols] # strip whitespace from column names
		df.set_index(['wave'], inplace=True) # use wavelengths as indices
		# read the file header and put information into the dataframe as new columns
		metadata = pd.read_csv(input_data, sep='=', nrows=16, comment=',', engine='c', index_col=0, header=None)

		if ave:
		df = pd.DataFrame(df['mean'])
		else:
		df = df.drop(['mean', 'median'], axis=1)
		df.index = [['wvl'] * len(df.index),
		df.index.values.round(4)] # create multiindex so spectra can be easily extracted with a single key
		df = df.T # transpose so that each spectrum is a row

		# remove extraneous stuff from the metadataindices
		metadata.index = [i.strip().strip('# ').replace(' FLOAT', '').lower() for i in metadata.index.values]
		metadata = metadata.T

		# extract info from the file name
		fname = os.path.basename(input_data)
		metadata['sclock'] = fname[4:13]
		metadata['seqid'] = fname[25:34].upper()
		metadata['Pversion'] = fname[34:36]

		# duplicate the metadata for each row in the df
		if not ave:
		metadata = metadata.append([metadata] * (len(df.index) - 1), ignore_index=True)
		metadata.index = df.index # make the indices match
		metadata.columns = [['meta'] * len(metadata.columns), metadata.columns.values] # make the columns into multiindex
		df = pd.concat([metadata, df], axis=1) # combine the spectra with the metadata
		return df


		def CCAM_SAV(input_data, ave=True):
		# read the IDL .SAV file

		data = io.readsav(input_data, python_dict=True)

		# put the spectra into data frames and combine them
		df_UV = pd.DataFrame(data['uv'], index=data['defuv'])
		df_VIS = pd.DataFrame(data['vis'], index=data['defvis'])
		df_VNIR = pd.DataFrame(data['vnir'], index=data['defvnir'])
		df_spect = pd.concat([df_UV, df_VIS, df_VNIR])
		df_spect.columns = ['shot' + str(i + 1) for i in
		df_spect.columns] # add 1 to the columns so they correspond to shot number

		df_aUV = pd.DataFrame(data['auv'], index=data['defuv'], columns=['average'])
		df_aVIS = pd.DataFrame(data['avis'], index=data['defvis'], columns=['average'])
		df_aVNIR = pd.DataFrame(data['avnir'], index=data['defvnir'], columns=['average'])
		df_ave = pd.concat([df_aUV, df_aVIS, df_aVNIR])

		df_mUV = pd.DataFrame(data['muv'], index=data['defuv'], columns=['median'])
		df_mVIS = pd.DataFrame(data['mvis'], index=data['defvis'], columns=['median'])
		df_mVNIR = pd.DataFrame(data['mvnir'], index=data['defvnir'], columns=['median'])
		df_med = pd.concat([df_mUV, df_mVIS, df_mVNIR])

		df = pd.concat([df_spect, df_ave, df_med], axis=1)
		# create multiindex to access wavelength values
		# also, round the wavlength values to a more reasonable level of precision
		df.index = [['wvl'] * len(df.index), df.index.values.round(4)]
		# transpose so that spectra are rows rather than columns
		df = df.T

		# extract metadata from the file name and add it to the data frame
		# use the multiindex label "meta" for all metadata

		fname = os.path.basename(input_data)

		# for some reason, some ChemCam files have the 'darkname' key, others call it 'darkspect'
		# this try-except pair converts to 'darkname' when needed
		try:
		data['darkname']
		except:
		data['darkname'] = data['darkspec']

		metadata = [fname,
		fname[4:13],
		fname[25:34].upper(),
		fname[34:36],
		data['continuumvismin'],
		data['continuumvnirmin'],
		data['continuumuvmin'],
		data['continuumvnirend'],
		data['distt'],
		data['darkname'],
		data['nshots'],
		data['dnoiseiter'],
		data['dnoisesig'],
		data['matchedfilter']]
		metadata = np.tile(metadata, (len(df.index), 1))
		metadata_cols = list(zip(['meta'] * len(df.index), ['file',
		'sclock',
		'seqid',
		'Pversion',
		'continuumvismin',
		'continuumvnirmin',
		'continuumuvmin',
		'continuumvnirend',
		'distt',
		'dark',
		'nshots',
		'dnoiseiter',
		'dnoisesig',
		'matchedfilter']))
		metadata = pd.DataFrame(metadata, columns=pd.MultiIndex.from_tuples(metadata_cols), index=df.index)

		df = pd.concat([metadata, df], axis=1)
		if ave == True:
		df = df.loc['average']
		df = df.to_frame().T
		else:
		pass

		return df


		def ccam_batch(directory, searchstring='*.csv', to_csv=None, lookupfile=None, ave=True, progressbar=None):
		# Determine if the file is a .csv or .SAV
		if '.sav' in searchstring.lower():
		is_sav = True
		else:
		is_sav = False
		filelist = file_search(directory, searchstring)
		basenames = np.zeros_like(filelist)
		sclocks = np.zeros_like(filelist)
		P_version = np.zeros_like(filelist, dtype='int')

		# Extract the sclock and version for each file and ensure that only one
		# file per sclock is being read, and that it is the one with the highest version number
		for i, name in enumerate(filelist):
		basenames[i] = os.path.basename(name)
		sclocks[i] = basenames[i][4:13] # extract the sclock
		P_version[i] = basenames[i][-5:-4] # extract the version

		sclocks_unique = np.unique(sclocks) # find unique sclocks
		filelist_new = np.array([], dtype='str')
		for i in sclocks_unique:
		match = (sclocks == i) # find all instances with matching sclocks
		maxP = P_version[match] == max(P_version[match]) # find the highest version among these files
		filelist_new = np.append(filelist_new, filelist[match][maxP]) # keep only the file with thei highest version

		filelist = filelist_new
		# Should add a progress bar for importing large numbers of files
		dt = []

		for i, file in enumerate(filelist):
		print(file)
		if is_sav:
		tmp = CCAM_SAV(file, ave=ave)
		else:
		tmp = CCAM_CSV(file, ave=ave)
		if i == 0:
		combined = tmp
		else:
		# This ensures that rounding errors are not causing mismatches in columns
		cols1 = list(combined['wvl'].columns)
		cols2 = list(tmp['wvl'].columns)
		if set(cols1) == set(cols2):
		combined = pd.concat([combined, tmp])
		else:
		print("Wavelengths don't match!")

		combined.loc[:, ('meta', 'sclock')] = pd.to_numeric(combined.loc[:, ('meta', 'sclock')])

		if lookupfile is not None:

		combined = lookup(combined, lookupfile=lookupfile.replace('[','').replace(']','').replace("'",'').replace(' ','').split(','))
		if to_csv is not None:
		combined.to_csv(to_csv)
		return combined

plio/io/io_edr.py

0 → 100644

+80 −0

Original line number	Diff line number	Diff line
		import os

		import numpy as np
		import pandas as pd


		def EDR(input_file):
		f = open(input_file, 'rb') # read as bytes so python won't complain about the binary part of the file

		# read lines of the header until reaching the end of the libs table (collecting other metadata along the way)
		end_of_libs_table = False
		while end_of_libs_table is False:
		line = str(f.readline(), 'utf-8').replace('\r', '').replace('\n',
		'') # convert the current line to a string and get rid of newline characters
		line = line.split('=') # split the line on equals sign if present
		# look for the name of the value we want, if the current line has it, then set the value
		if 'RECORD_BYTES' in line[0]:
		rbytes = int(line[1])
		if 'LABEL_RECORDS' in line[0]:
		lrecs = int(line[1])
		if 'SPACECRAFT_CLOCK_START_COUNT' in line[0]:
		sclock = int(line[1].replace('"', '').split('.')[0])
		if 'SEQUENCE_ID' in line[0]:
		seqID = line[1].replace('"', '')
		if 'INSTRUMENT_FOCUS_DISTANCE' in line[0]:
		focus_dist = int(line[1])

		if 'INSTRUMENT_TEMPERATURE' in line[0]:
		instrument_temps = line[1] \
		+ str(f.readline(), 'utf-8').replace('\r', '').replace('\n', '') \
		+ str(f.readline(), 'utf-8').replace('\r', '').replace('\n', '') \
		+ str(f.readline(), 'utf-8').replace('\r', '').replace('\n', '')
		instrument_temps = [float(i) for i in
		instrument_temps.replace('<degC>', '').replace('(', '').replace(')', '').replace(' ',
		'').split(
		',')]
		instrument_temps_name = str(f.readline(), 'utf-8').replace('\r', '').replace('\n', '')
		instrument_temps_name = instrument_temps_name.split('=')[1] \
		+ str(f.readline(), 'utf-8').replace('\r', '').replace('\n', '') \
		+ str(f.readline(), 'utf-8').replace('\r', '').replace('\n', '') \
		+ str(f.readline(), 'utf-8').replace('\r', '').replace('\n', '') \
		+ str(f.readline(), 'utf-8').replace('\r', '').replace('\n', '')
		instrument_temps_name = instrument_temps_name.replace(' ', '').replace('(', '').replace(')', '').replace(
		'"', '').split(',')
		f.readline()
		pass
		try:
		if 'CCAM_LIBS_DATA_CONTAINER' in line[1]:
		nshots = int(str(f.readline(), 'utf-8').replace('\r', '').replace('\n', '').split('=')[1])
		start_byte = int(str(f.readline(), 'utf-8').replace('\r', '').replace('\n', '').split('=')[1])
		if 'END_OBJECT' in line[0] and 'CCAM_LIBS_TABLE' in line[1]:
		end_of_libs_table = True
		except:
		pass

		f.close()
		header_skip = lrecs * rbytes # calculate the number of header bytes to skip to get to the real data

		with open(input_file, "rb") as f:
		f.seek(header_skip + start_byte - 1, 0)
		spectra = []
		while spectra.__len__() < nshots:
		spectrum = []
		while spectrum.__len__() < 6444:
		spectrum.append(int.from_bytes(f.read(2), byteorder='big', signed=False))
		spectra.append(spectrum)
		spectra = np.array(spectra, dtype='int')
		cols = np.array(list(range(spectra.shape[1]))) + 1
		cols = [('channel', i) for i in cols]
		inds = np.array(list(range(spectra.shape[0]))) + 1
		sp = pd.DataFrame(spectra, columns=pd.MultiIndex.from_tuples(cols), index=inds)
		sp[('meta', 'EDR_file')] = os.path.basename(input_file)
		sp[('meta', 'Spacecraft_Clock')] = sclock
		sp[('meta', 'Shot')] = sp.index
		sp[('meta', 'SeqID')] = seqID
		sp[('meta', 'Focus_Distance')] = focus_dist
		for ind, name in enumerate(instrument_temps_name):
		sp[('meta', name + '_temp')] = instrument_temps[ind]
		sp.to_csv('test.csv')
		return sp

plio/io/io_jsc.py

0 → 100644

+202 −0

File added.

Preview size limit exceeded, changes collapsed.