Loading plio/io/io_spectral_profiler.py 0 → 100755 +130 −0 Original line number Diff line number Diff line import pandas as pd import pvl import numpy as np from plio.utils.utils import find_in_dict class Spectral_Profiler(object): """ Attributes ---------- spectra : panel A pandas panel containing n individual spectra. ancillary_data : dataframe A pandas DataFrame of the parsed ancillary data (PVL label) label : object The raw PVL label object offsets : dict with key as the spectra index and value as the start byte offset """ def __init__(self, input_data, cleaned=True, qa_threshold=2000): """ Read the .spc file, parse the label, and extract the spectra Parameters ---------- input_data : string The PATH to the input .spc file cleaned : boolean If True, mask the data based on the QA array. nspectra : int The number of spectra in the given data file qa_threshold : int The threshold value over which observations are masked as noise if cleaned is True. """ label_dtype_map = {'IEEE_REAL':'f', 'MSB_INTEGER':'i', 'MSB_UNSIGNED_INTEGER':'u'} label = pvl.load(input_data) self.label = label with open(input_data, 'rb') as indata: # Extract and handle the ancillary data ancillary_data = find_in_dict(label, "ANCILLARY_AND_SUPPLEMENT_DATA") self.nspectra = nrows = ancillary_data['ROWS'] ncols = ancillary_data['COLUMNS'] rowbytes = ancillary_data['ROW_BYTES'] columns = [] bytelengths = [] datatypes = [] ancillary_data_offset = find_in_dict(label, "^ANCILLARY_AND_SUPPLEMENT_DATA").value indata.seek(ancillary_data_offset - 1) for i in ancillary_data.items(): if i[0] == 'COLUMN': entry = i[1] # Level 2B2 PVL has entries with 0 bytes, e.g. omitted. if entry['BYTES'] > 0: columns.append(str(entry['NAME'])) datatypes.append(label_dtype_map[entry['DATA_TYPE']]) bytelengths.append(entry['BYTES']) else: ncols -= 1 strbytes = map(str, bytelengths) rowdtype = list(zip(columns, map(''.join, zip(['>'] * ncols, datatypes, strbytes)))) d = np.fromstring(indata.read(rowbytes * nrows), dtype=rowdtype, count=nrows) self.ancillary_data = pd.DataFrame(d, columns=columns, index=np.arange(nrows)) assert(ncols == len(columns)) keys = [] array_offsets = [] for d in ['WAV', 'RAW', 'REF', 'REF1', 'REF2', 'DAR', 'QA']: search_key = '^SP_SPECTRUM_{}'.format(d) result = find_in_dict(label, search_key) if result: array_offsets.append(result.value) keys.append('SP_SPECTRUM_{}'.format(d)) offsets = dict(zip(keys, array_offsets)) arrays = {} for k, offset in offsets.items(): indata.seek(offset - 1) newk = k.split('_')[-1] d = find_in_dict(label, k) unit = d['UNIT'] lines = d['LINES'] scaling_factor = d['SCALING_FACTOR'] arr = np.fromstring(indata.read(lines * 296*2), dtype='>H').astype(np.float64) arr = arr.reshape(lines, -1) # If the data is scaled, apply the scaling factor if isinstance(scaling_factor, float): arr *= scaling_factor arrays[newk] = arr self.wavelengths = pd.Series(arrays['WAV'][0]) self.spectra = {} for i in range(nrows): self.spectra[i] = pd.DataFrame(index=self.wavelengths) for k in keys: k = k.split('_')[-1] if k == 'WAV': continue self.spectra[i][k] = arrays[k][i] if cleaned: self.spectra[i] = self.spectra[i][self.spectra[i]['QA'] < qa_threshold] self.spectra = pd.Panel(self.spectra) plio/io/tests/test_io_spectral_profiler.py 0 → 100644 +26 −0 Original line number Diff line number Diff line import os import sys import unittest import pandas as pd sys.path.insert(0, os.path.abspath('..')) from plio.examples import get_path from plio.io import io_spectral_profiler class Test_Spectral_Profiler_IO(unittest.TestCase): def setUp(self): self.examplefile = get_path('SP_2C_02_02358_S138_E3586.spc') def test_openspc(self): ds = io_spectral_profiler.Spectral_Profiler(self.examplefile) self.assertEqual(ds.nspectra, 38) self.assertIsInstance(ds.spectra, pd.Panel) self.assertEqual(ds.spectra[0].columns.tolist(), ['RAW', 'REF1', 'REF2', 'QA']) if __name__ == '__main__': unittest.main() Loading
plio/io/io_spectral_profiler.py 0 → 100755 +130 −0 Original line number Diff line number Diff line import pandas as pd import pvl import numpy as np from plio.utils.utils import find_in_dict class Spectral_Profiler(object): """ Attributes ---------- spectra : panel A pandas panel containing n individual spectra. ancillary_data : dataframe A pandas DataFrame of the parsed ancillary data (PVL label) label : object The raw PVL label object offsets : dict with key as the spectra index and value as the start byte offset """ def __init__(self, input_data, cleaned=True, qa_threshold=2000): """ Read the .spc file, parse the label, and extract the spectra Parameters ---------- input_data : string The PATH to the input .spc file cleaned : boolean If True, mask the data based on the QA array. nspectra : int The number of spectra in the given data file qa_threshold : int The threshold value over which observations are masked as noise if cleaned is True. """ label_dtype_map = {'IEEE_REAL':'f', 'MSB_INTEGER':'i', 'MSB_UNSIGNED_INTEGER':'u'} label = pvl.load(input_data) self.label = label with open(input_data, 'rb') as indata: # Extract and handle the ancillary data ancillary_data = find_in_dict(label, "ANCILLARY_AND_SUPPLEMENT_DATA") self.nspectra = nrows = ancillary_data['ROWS'] ncols = ancillary_data['COLUMNS'] rowbytes = ancillary_data['ROW_BYTES'] columns = [] bytelengths = [] datatypes = [] ancillary_data_offset = find_in_dict(label, "^ANCILLARY_AND_SUPPLEMENT_DATA").value indata.seek(ancillary_data_offset - 1) for i in ancillary_data.items(): if i[0] == 'COLUMN': entry = i[1] # Level 2B2 PVL has entries with 0 bytes, e.g. omitted. if entry['BYTES'] > 0: columns.append(str(entry['NAME'])) datatypes.append(label_dtype_map[entry['DATA_TYPE']]) bytelengths.append(entry['BYTES']) else: ncols -= 1 strbytes = map(str, bytelengths) rowdtype = list(zip(columns, map(''.join, zip(['>'] * ncols, datatypes, strbytes)))) d = np.fromstring(indata.read(rowbytes * nrows), dtype=rowdtype, count=nrows) self.ancillary_data = pd.DataFrame(d, columns=columns, index=np.arange(nrows)) assert(ncols == len(columns)) keys = [] array_offsets = [] for d in ['WAV', 'RAW', 'REF', 'REF1', 'REF2', 'DAR', 'QA']: search_key = '^SP_SPECTRUM_{}'.format(d) result = find_in_dict(label, search_key) if result: array_offsets.append(result.value) keys.append('SP_SPECTRUM_{}'.format(d)) offsets = dict(zip(keys, array_offsets)) arrays = {} for k, offset in offsets.items(): indata.seek(offset - 1) newk = k.split('_')[-1] d = find_in_dict(label, k) unit = d['UNIT'] lines = d['LINES'] scaling_factor = d['SCALING_FACTOR'] arr = np.fromstring(indata.read(lines * 296*2), dtype='>H').astype(np.float64) arr = arr.reshape(lines, -1) # If the data is scaled, apply the scaling factor if isinstance(scaling_factor, float): arr *= scaling_factor arrays[newk] = arr self.wavelengths = pd.Series(arrays['WAV'][0]) self.spectra = {} for i in range(nrows): self.spectra[i] = pd.DataFrame(index=self.wavelengths) for k in keys: k = k.split('_')[-1] if k == 'WAV': continue self.spectra[i][k] = arrays[k][i] if cleaned: self.spectra[i] = self.spectra[i][self.spectra[i]['QA'] < qa_threshold] self.spectra = pd.Panel(self.spectra)
plio/io/tests/test_io_spectral_profiler.py 0 → 100644 +26 −0 Original line number Diff line number Diff line import os import sys import unittest import pandas as pd sys.path.insert(0, os.path.abspath('..')) from plio.examples import get_path from plio.io import io_spectral_profiler class Test_Spectral_Profiler_IO(unittest.TestCase): def setUp(self): self.examplefile = get_path('SP_2C_02_02358_S138_E3586.spc') def test_openspc(self): ds = io_spectral_profiler.Spectral_Profiler(self.examplefile) self.assertEqual(ds.nspectra, 38) self.assertIsInstance(ds.spectra, pd.Panel) self.assertEqual(ds.spectra[0].columns.tolist(), ['RAW', 'REF1', 'REF2', 'QA']) if __name__ == '__main__': unittest.main()