Commit 7673a2fd authored by Jay's avatar Jay Committed by jay
Browse files

HDF5 datastore in place - need to patch in to the node now

parent f6b6b0c0
Loading
Loading
Loading
Loading

autocnet/fileio/hdf.py

deleted100644 → 0
+0 −38
Original line number Diff line number Diff line
import h5py as h5
import numpy as np

class HDFDataSet(object):
    """
    Read / Write an HDF5 dataset using h5py
    """

    #TODO: This is dumb, why did I hard code this...
    def __init__(self, filename='/scratch/jlaura/newrun.h5'):
	    self.filename = filename
	    self.groups = None
    
    @property
    def data(self):
        if not hasattr(self, '_data'):
	        self._data = h5.File(self.filename)
        return self._data

    def getgroups(self):
        """
        Get all of the first order neighbors to the root node.

        Returns
        -------
        groups : list
            A unicode list of the keys of the file.
        """
        if self.groups == None:
            self.groups = self.data.keys()
        return self.groups

        def getattributes(self):
            if self.groups == None:
                self.groups = self.data.keys()
            
            for k in self.groups:
                print self.data[k].attrs.items()
+78 −0
Original line number Diff line number Diff line
import h5py as h5
import numpy as np
import pandas as pd


class HDFDataset(h5.File):
    """
    Read / Write an HDF5 dataset using h5py.  If HDF5 is compiled with
    parallel support, this class will support parallel I/O of all supported
    types as well as Pandas dataframes.
    """

    def __init__(self, filename, mode='a'):
        super(HDFDataset, self).__init__(filename, mode)

    def __del__(self):
        self.close()

    @staticmethod
    def df_to_sarray(df):
        """
        Convert a pandas DataFrame object to a numpy structured array.
        This is functionally equivalent to but more efficient than
        np.array(df.to_array())

        From: http://stackoverflow.com/questions/30773073/save-pandas-dataframe-using-h5py-for-interoperabilty-with-other-hdf5-readers

        Parameters
        ----------
        df : dataframe
             the data frame to convert

        Returns
        -------
        z : ndarray
            a numpy structured array representation of df
        """

        v = df.values
        cols = df.columns
        types = [(cols[i], df[k].dtype.type) for (i, k) in enumerate(cols)]
        dtype = np.dtype(types)
        z = np.zeros(v.shape[0], dtype)
        for (i, k) in enumerate(z.dtype.names):
            z[k] = v[:, i]
        return z

    @staticmethod
    def sarray_to_df(sarray, index_column='index'):
        """
        Convert from a structured array back to a Pandas Dataframe

        Parameters
        ----------
        sarray : array
                 numpy structured array

        Returns
        -------
         : dataframe
           A pandas dataframe
        """

        def remove_field_name(a, name):
            names = list(a.dtype.names)
            if name in names:
                names.remove(name)
            b = a[names]
            return b
        if index_column is not None:
            index = sarray[index_column]
            clean_array = remove_field_name(sarray, 'index')
        else:
            clean_array = sarray
            index = None
        columns = clean_array.dtype.names

        return pd.DataFrame(data=sarray, index=index, columns=columns)
 No newline at end of file
+15 −0
Original line number Diff line number Diff line
import unittest

import numpy as np
import pandas as pd

from .. import io_hdf


class TestHDF(unittest.TestCase):

    def test_df_sarray(self):
        self.assertTrue(False)

    def test_sarray_df(self):
        self.assertTrue(False)
+2 −1
Original line number Diff line number Diff line
@@ -10,3 +10,4 @@ pandas
scikit-image
sqlalchemy
dill
h5py
 No newline at end of file