Merge pull request #69 from jlaura/hdf (4540712b) · Commits · aflab / astrogeology / Autocnet

.travis.yml

+2 −2

Original line number	Diff line number	Diff line
		@@ -27,13 +27,13 @@ install:
		- conda info -a

		# Create a virtual env and install dependencies
		- conda create -y -q -n test-env python=$TRAVIS_PYTHON_VERSION nose numpy pillow scipy pandas networkx scikit-image sqlalchemy numexpr dill
		- conda create -y -q -n test-env python=$TRAVIS_PYTHON_VERSION nose numpy pillow scipy pandas networkx scikit-image sqlalchemy numexpr dill cython
		# Activate the env
		- source activate test-env

		# Install the non-conda packages if required, requirements.txt duplicates are ignored
		- conda install -c https://conda.anaconda.org/jlaura opencv3=3.0.0
		- conda install -c https://conda.anaconda.org/anaconda gdal
		- conda install -c https://conda.anaconda.org/jlaura h5py gdal
		- conda install -c osgeo proj4
		- conda upgrade numpy
		- pip install -r requirements.txt

AUTHORS.rst

+1 −0

Original line number	Diff line number	Diff line
		@@ -10,6 +10,7 @@ Development Team
		* Jeannie Backer <jwbacker@usgs.gov>
		* Dyer Lytle <dmlytle@usgs.gov>
		* Kelvin Rodriguez <krodriguez@usgs.gov>
		* Adam Paquette <acpaquette@usgs.gov>

		Contributors
		------------

autocnet/fileio/hdf.py

deleted100644 → 0

+0 −38

Original line number	Diff line number	Diff line
		import h5py as h5
		import numpy as np

		class HDFDataSet(object):
		"""
		Read / Write an HDF5 dataset using h5py
		"""

		#TODO: This is dumb, why did I hard code this...
		def __init__(self, filename='/scratch/jlaura/newrun.h5'):
		self.filename = filename
		self.groups = None

		@property
		def data(self):
		if not hasattr(self, '_data'):
		self._data = h5.File(self.filename)
		return self._data

		def getgroups(self):
		"""
		Get all of the first order neighbors to the root node.

		Returns
		-------
		groups : list
		A unicode list of the keys of the file.
		"""
		if self.groups == None:
		self.groups = self.data.keys()
		return self.groups

		def getattributes(self):
		if self.groups == None:
		self.groups = self.data.keys()

		for k in self.groups:
		print self.data[k].attrs.items()

autocnet/fileio/io_hdf.py

0 → 100644

+84 −0

Original line number	Diff line number	Diff line
		import h5py as h5
		import numpy as np
		import pandas as pd


		DEFAULT_COMPRESSION = 'gzip'
		DEFAULT_COMPRESSION_VALUE = 8 # 0 - 9


		class HDFDataset(h5.File):
		"""
		Read / Write an HDF5 dataset using h5py. If HDF5 is compiled with
		parallel support, this class will support parallel I/O of all supported
		types as well as Pandas dataframes.
		"""

		def __init__(self, filename, mode='a'):
		super(HDFDataset, self).__init__(filename, mode)

		def __del__(self):
		self.close()

		@staticmethod
		def df_to_sarray(df):
		"""
		Convert a pandas DataFrame object to a numpy structured array.
		This is functionally equivalent to but more efficient than
		np.array(df.to_array())

		From: http://stackoverflow.com/questions/30773073/save-pandas-dataframe-using-h5py-for-interoperabilty-with-other-hdf5-readers

		Parameters
		----------
		df : dataframe
		the data frame to convert

		Returns
		-------
		z : ndarray
		a numpy structured array representation of df
		"""
		v = df.values
		cols = df.columns
		types = [(cols[i], df[k].dtype.type) for (i, k) in enumerate(cols)]
		dtype = np.dtype(types)
		z = np.zeros(v.shape[0], dtype)
		for (i, k) in enumerate(z.dtype.names):
		z[k] = v[:, i]
		return z

		@staticmethod
		def sarray_to_df(sarray, index_column='index'):
		"""
		Convert from a structured array back to a Pandas Dataframe

		Parameters
		----------
		sarray : array
		numpy structured array

		index_column : str
		The name of the index column. Default: 'index'

		Returns
		-------
		: dataframe
		A pandas dataframe
		"""

		def remove_field_name(a, name):
		names = list(a.dtype.names)
		if name in names:
		names.remove(name)
		b = a[names]
		return b
		if index_column is not None:
		index = sarray[index_column]
		clean_array = remove_field_name(sarray, 'index')
		else:
		clean_array = sarray
		index = None
		columns = clean_array.dtype.names

		return pd.DataFrame(data=sarray, index=index, columns=columns)

autocnet/fileio/tests/test_io_gdal.py

+3 −3

Original line number	Diff line number	Diff line
		@@ -207,12 +207,12 @@ class TestWriter(unittest.TestCase):
		SPHEROID["Moon_2000_IAU_IAG",1737400,0]],
		PRIMEM["Reference_Meridian",0],
		UNIT["Degree",0.017453292519943295]],
		PROJECTION["Mercator_1SP"],
		PROJECTION["Mercator_2SP"],
		PARAMETER["central_meridian",180],
		PARAMETER["false_easting",0],
		PARAMETER["false_northing",0],
		UNIT["Meter",1],
		PARAMETER["latitude_of_origin",0.0]]"""
		PARAMETER["standard_parallel_1",0],
		UNIT["Meter",1]]"""
		dataset = io_gdal.GeoDataset('test.tif')
		test_srs = dataset.spatial_reference.__str__()
		self.assertEqual(test_srs.split(), expected_srs.split())