Commit 7353260b authored by jlaura's avatar jlaura Committed by GitHub
Browse files

Moves PySAT data structures into plio (#31)

* Makes gdal an optional dependency

* Refactors all bae I/O into a single namespace.

* typos

* Fixes the gdal dependency test

* Adds tests for the JSON

* Tests fail without test data

* Updates for comments from @thareUSGS

* Adds ISIS3 V2 ControlNetwork Reader

* Adds hcube mixin object
parent d7286705
Loading
Loading
Loading
Loading

plio/io/hcube.py

0 → 100644
+75 −0
Original line number Diff line number Diff line
import numpy as np
import gdal

class HCube(object):
    """
    A Mixin class for use with the io_gdal.GeoDataset class
    to optionally add support for spectral labels, label
    based indexing, and lazy loading for reads. 
    """

    @property
    def wavelengths(self):
        if not hasattr(self, '_wavelengths'):
            try:
                info = gdal.Info(self.file_name, format='json')
                wavelengths = [float(j) for i, j in sorted(info['metadata'][''].items(),
                                 key=lambda x: float(x[0].split('_')[-1]))]
                self._original_wavelengths = wavelengths
                self._wavelengths = np.round(wavelengths, self.tolerance)
            except:
                self._wavelengths = []
        return self._wavelengths
    
    @property
    def tolerance(self):
        return getattr(self, '_tolerance', 2)
    
    @tolerance.setter
    def tolerance(self, val):
        wv = self.wavelengths
        if isinstance(val, int):
            self._tolerance = val
            self._reindex()
        else:
            raise TypeError

    def _reindex(self):
        if self._original_wavelengths is not None:
            self._wavelengths = np.round(self._original_wavelengths, decimals=self.tolerance)

    def __getitem__(self, key):
        return _iLocIndexer(self)
    
    @property
    def loc(self):
        return _LocIndexer(self)
    
    @property
    def iloc(self):
        return _iLocIndexer(self)
    
    def _read(self, key):
        ifnone = lambda a, b: b if a is None else a

        y = key[1]
        x = key[2]
        
        if isinstance(x, slice):
            xstart = ifnone(x.start,0)
            xstop = ifnone(x.stop,self.raster_size[0])
            xstep = xstop - xstart
        else:
            raise TypeError("Loc style access elements must be slices, e.g., [:] or [10:100]")
        if isinstance(y, slice):
            ystart = ifnone(y.start, 0)
            ystop = ifnone(y.stop, self.raster_size[1])
            ystep = ystop - ystart
        else:
            raise TypeError("Loc style access elements must be slices, e.g., [:] or [10:100]")
            
        pixels = (xstart, ystart, xstop, ystop)
        arrs = []
        for b in key[0]:
            arrs.append(self.read_array(band=int(b+1), pixels=pixels))
        return np.stack(arrs)
 No newline at end of file

plio/io/io_crism.py

0 → 100644
+32 −0
Original line number Diff line number Diff line
import os
import numpy as np
from .io_gdal import GeoDataset
from .hcube import HCube
import gdal


class Crism(GeoDataset, HCube):
    """
    An M3 specific reader with the spectral mixin.
    """
    @property
    def wavelengths(self):
        if not hasattr(self, '_wavelengths'):
            try:
                info = gdal.Info(self.file_name, format='json')
                wv = dict((k,v) for (k,v) in info['metadata'][''].items() if 'Band' in k) # Only get the 'Band_###' keys
                wavelengths = [float(j.split(" ")[0]) for i, j in sorted(wv.items(),
                                key=lambda x: int(x[0].split('_')[-1]))]
                self._original_wavelengths = wavelengths
                self._wavelengths = np.round(wavelengths, self.tolerance)
            except:
                self._wavelengths = []
        return self._wavelengths

def open(input_data):
    if os.path.splitext(input_data)[-1] == 'hdr':
        # GDAL wants the img, but many users aim at the .hdr
        input_data = os.path.splitext(input_data)[:-1] + '.img'
    ds = Crism(input_data)

    return ds
 No newline at end of file
+24 −17
Original line number Diff line number Diff line
import os
import numpy as np
from .io_gdal import GeoDataset
from .hcube import HCube
import gdal


class M3(GeoDataset, HCube):
    """
    An M3 specific reader with the spectral mixin.
    """
    @property
    def wavelengths(self):
        if not hasattr(self, '_wavelengths'):
            try:
                info = gdal.Info(self.file_name, format='json')
                wavelengths = [float(j) for i, j in sorted(info['metadata'][''].items(),
                                 key=lambda x: float(x[0].split('_')[-1]))]
                self._original_wavelengths = wavelengths
                self._wavelengths = np.round(wavelengths, self.tolerance)
            except:
                self._wavelengths = []
        return self._wavelengths

def open(input_data):
    if input_data.split('.')[-1] == 'hdr':
    if os.path.splitext(input_data)[-1] == 'hdr':
        # GDAL wants the img, but many users aim at the .hdr
        input_data = input_data.split('.')[0] + '.img'
    ds = GeoDataSet(input_data)
    ref_array = ds.read_array()
    metadata = ds.metadata
    wv_array = metadatatoband(metadata)
    return wv_array, ref_array, ds
        input_data = os.path.splitext(input_data)[:-1] + '.img'
    ds = M3(input_data)

def metadatatoband(metadata):
    wv2band = []
    for k, v in metadata.items():
        try:
            wv2band.append(float(v))
        except:
            v = v.split(" ")[-1].split("(")[1].split(")")[0]
            wv2band.append(float(v))
    wv2band.sort(key=int)
    return np.asarray(wv2band)
    return ds

plio/utils/indexing.py

0 → 100644
+73 −0
Original line number Diff line number Diff line
import warnings
import numpy as np

def is_dict_like(value):
    return hasattr(value, 'keys') and hasattr(value, '__getitem__')

def expanded_indexer(key, ndim):
    """Given a key for indexing an ndarray, return an equivalent key which is a
    tuple with length equal to the number of dimensions.
    The expansion is done by replacing all `Ellipsis` items with the right
    number of full slices and then padding the key with full slices so that it
    reaches the appropriate dimensionality.
    """
    if not isinstance(key, tuple):
        # numpy treats non-tuple keys equivalent to tuples of length 1
        key = (key,)
    new_key = []
    # handling Ellipsis right is a little tricky, see:
    # http://docs.scipy.org/doc/numpy/reference/arrays.indexing.html#advanced-indexing
    found_ellipsis = False
    for k in key:
        if k is Ellipsis:
            if not found_ellipsis:
                new_key.extend((ndim + 1 - len(key)) * [slice(None)])
                found_ellipsis = True
            else:
                new_key.append(slice(None))
        else:
            new_key.append(k)
    if len(new_key) > ndim:
        raise IndexError('too many indices')
    new_key.extend((ndim - len(new_key)) * [slice(None)])
    return tuple(new_key)

class _LocIndexer(object):
    def __init__(self, data_array):
        self.data_array = data_array
        
    def __getitem__(self, key):
        # expand the indexer so we can handle Ellipsis
        key = expanded_indexer(key, 3)
        sl = key[0]
        ifnone = lambda a, b: b if a is None else a
        if isinstance(sl, slice):
            sl = list(range(ifnone(sl.start, 0), self.data_array.nbands, ifnone(sl.step, 1)))
        
        idx = [self._get_idx(s) for s in sl]
        key = (idx, key[1], key[2])
        return self.data_array._read(key)   
    
    def _get_idx(self, value, tolerance=2):
        vals = np.abs(self.data_array.wavelengths-value)
        minidx = np.argmin(vals)
        if vals[minidx] >= tolerance:
            warning.warn("Absolute difference between requested value and found values is {}".format(vals[minidx]))
        return minidx
    
class _iLocIndexer(object):
    def __init__(self, data_array):
        self.data_array = data_array

    def __getitem__(self, key):
        # expand the indexer so we can handle Ellipsis
        key = expanded_indexer(key, 3)
        sl = key[0]
        ifnone = lambda a, b: b if a is None else a
        if isinstance(sl, slice):
            sl = list(range(ifnone(sl.start, 0),
                            ifnone(sl.stop, self.data_array.nbands),
                            ifnone(sl.step, 1)))
        
        key = (key[0], key[1], key[2])
        return self.data_array._read(key)
 No newline at end of file
+12 −0
Original line number Diff line number Diff line
@@ -7,6 +7,18 @@ import shutil
import tempfile
import pandas as pd

import numpy as np

def metadatatoband(metadata):
    wv2band = []
    for k, v in metadata.items():
        try:
            wv2band.append(float(v))
        except:
            v = v.split(" ")[-1].split("(")[1].split(")")[0]
            wv2band.append(float(v))
    wv2band.sort(key=int)
    return np.asarray(wv2band)

def create_dir(basedir=''):
    """