Loading plio/io/io_bae.py +44 −31 Original line number Diff line number Diff line Loading @@ -7,53 +7,66 @@ from functools import singledispatch import numpy as np import pandas as pd def socetset_keywords_to_json(keywords, ell=None): from plio.utils.utils import is_number, convert_string_to_float def socetset_keywords_to_dict(keywords, ell=None): """ Convert a SocetCet keywords.list file to JSON Parameters ---------- keywords : str Path to the socetset keywords.list file Path to the socetset keywords.list file or a raw string that will be split on '\n' and parsed. ell : str Optional path to the ellipsoid keywords.list file or a raw string that will be split on '\n' and parsed Returns ------- : str The serialized JSON string. data : dict A dictionary containing the socet keywords parsed. """ matcher = re.compile(r'\b(?!\d)\w+\b') numeric_matcher = re.compile(r'\W?-?(?:0|[1-9]\d*)(?:\.\d*)?(?:[eE][+\-]?\d+)?') stream = {} def parse(fi): with open(fi, 'r') as f: for l in f: l = l.rstrip() data = {} def parse(lines): for l in lines: l = l.strip() if not l: continue matches = matcher.findall(l) if matches: key = matches[0] stream[key] = [] # Case where the kw are strings after the key if len(matches) > 1: stream[key] = matches[1:] # Case where the kw are numeric types after the key else: nums = numeric_matcher.findall(l) if len(nums) == 1: stream[key] = float(nums[0]) elems = l.split() if is_number(elems[0]) is False: key = elems[0] if key in data.keys(): raise ValueError('Duplicate dictionary key: {}'.format(key)) data[key] = [] if len(elems) == 1: continue if len(elems) == 2: data[key] = convert_string_to_float(elems[1]) else: stream[key] += map(float, nums) data[key] += [convert_string_to_float(e) for e in elems[1:]] else: # Case where the values are on a newline after the key nums = numeric_matcher.findall(l) stream[key] += map(float, nums) data[key] += [convert_string_to_float(e) for e in elems] if os.path.exists(keywords): with open(keywords, 'r') as f: keywords = f.readlines() else: keywords = keywords.split('\n') parse(keywords) if ell: if os.path.exists(ell): with open(ell, 'r') as f: ell = f.readlines() else: ell = ell.split('\n') parse(ell) return json.dumps(stream) return data @singledispatch def read_ipf(arg): # pragma: no cover Loading plio/io/tests/test_io_bae.py +45 −11 Original line number Diff line number Diff line Loading @@ -5,7 +5,7 @@ import numpy as np import pandas as pd from pandas.util.testing import assert_frame_equal from plio.io.io_bae import socetset_keywords_to_json, read_gpf, save_gpf, read_ipf, save_ipf from plio.io.io_bae import socetset_keywords_to_dict, read_gpf, save_gpf, read_ipf, save_ipf from plio.examples import get_path import pytest Loading Loading @@ -89,13 +89,47 @@ def test_write_gpf(gpf, file): # np.testing.assert_array_almost_equal(truth_arr, test_arr) def test_create_from_socet_lis(): socetlis = get_path('socet_isd.lis') socetell = get_path('ellipsoid.ell') js = json.loads(socetset_keywords_to_json(socetlis)) assert isinstance(js, dict) # This is essentially a JSON linter # Manually validated assert 'RECTIFICATION_TERMS' in js.keys() assert 'SEMI_MAJOR_AXIS' in js.keys() # From ellipsoid file assert 'NUMBER_OF_EPHEM' in js.keys() assert len(js['EPHEM_PTS']) / 3 == js['NUMBER_OF_EPHEM'] class TestISDFromSocetLis(): def test_parse_with_empty_newlines(self): # Ensure all keys read when whitespace present empty_newlines = r"""T0_QUAT 1.0000000000000000000000000e-01 T1_QUAT 1.0000000000000000000000000e-01""" data = socetset_keywords_to_dict(empty_newlines) assert len(data.keys()) == 2 def test_duplicate_key_check(self): duplicate_keys = r"""T 1 T 1""" with pytest.raises(ValueError): data = socetset_keywords_to_dict(duplicate_keys) def test_multiple_per_line(self): multiple_per_line = r"""T 1 1 1""" data = socetset_keywords_to_dict(multiple_per_line) assert len(data['T']) == 3 def test_key_on_different_line(self): key_on_different_line = r"""A 0.0 1.00000000000000e+00 2.00000000000000e+00 3.0000000000000e+00 4.00000000000000e+00 5.00000000000000e+00 B 1.0e-01 2.000000e+00 3.00000000000000e+00""" data = socetset_keywords_to_dict(key_on_different_line) assert len(data['A']) == 6 assert data['A'] == [0, 1, 2, 3, 4, 5] assert len(data['B']) == 3 assert data['B'] == [0.1, 2, 3] def test_key_on_different_line_whitespace(self): key_on_different_line_whitespace = r"""A 0.0 1.00000000000000e+00 2.00000000000000e+00 3.0000000000000e+00 4.00000000000000e+00 5.00000000000000e+00 B 1.0e-01 2.000000e+00 3.00000000000000e+00""" data = socetset_keywords_to_dict(key_on_different_line_whitespace) assert len(data['A']) == 6 assert data['A'] == [0, 1, 2, 3, 4, 5] assert len(data['B']) == 3 assert data['B'] == [0.1, 2, 3] plio/utils/utils.py +42 −0 Original line number Diff line number Diff line Loading @@ -9,6 +9,48 @@ import pandas as pd import numpy as np def is_number(s): """ Check if an argument is convertable to a number Parameters ---------- s : object The argument to check for conversion Returns ------- : bool True if conversion is possible, otherwise False. """ try: float(s) return True except ValueError: return False def convert_string_to_float(s): """ Attempt to convert a string to a float. Parameters --------- s : str The string to convert Returns ------- : float / str If successful, the converted value, else the argument is passed back out. """ try: return float(s) except TypeError: return s def metadatatoband(metadata): wv2band = [] for k, v in metadata.items(): Loading Loading
plio/io/io_bae.py +44 −31 Original line number Diff line number Diff line Loading @@ -7,53 +7,66 @@ from functools import singledispatch import numpy as np import pandas as pd def socetset_keywords_to_json(keywords, ell=None): from plio.utils.utils import is_number, convert_string_to_float def socetset_keywords_to_dict(keywords, ell=None): """ Convert a SocetCet keywords.list file to JSON Parameters ---------- keywords : str Path to the socetset keywords.list file Path to the socetset keywords.list file or a raw string that will be split on '\n' and parsed. ell : str Optional path to the ellipsoid keywords.list file or a raw string that will be split on '\n' and parsed Returns ------- : str The serialized JSON string. data : dict A dictionary containing the socet keywords parsed. """ matcher = re.compile(r'\b(?!\d)\w+\b') numeric_matcher = re.compile(r'\W?-?(?:0|[1-9]\d*)(?:\.\d*)?(?:[eE][+\-]?\d+)?') stream = {} def parse(fi): with open(fi, 'r') as f: for l in f: l = l.rstrip() data = {} def parse(lines): for l in lines: l = l.strip() if not l: continue matches = matcher.findall(l) if matches: key = matches[0] stream[key] = [] # Case where the kw are strings after the key if len(matches) > 1: stream[key] = matches[1:] # Case where the kw are numeric types after the key else: nums = numeric_matcher.findall(l) if len(nums) == 1: stream[key] = float(nums[0]) elems = l.split() if is_number(elems[0]) is False: key = elems[0] if key in data.keys(): raise ValueError('Duplicate dictionary key: {}'.format(key)) data[key] = [] if len(elems) == 1: continue if len(elems) == 2: data[key] = convert_string_to_float(elems[1]) else: stream[key] += map(float, nums) data[key] += [convert_string_to_float(e) for e in elems[1:]] else: # Case where the values are on a newline after the key nums = numeric_matcher.findall(l) stream[key] += map(float, nums) data[key] += [convert_string_to_float(e) for e in elems] if os.path.exists(keywords): with open(keywords, 'r') as f: keywords = f.readlines() else: keywords = keywords.split('\n') parse(keywords) if ell: if os.path.exists(ell): with open(ell, 'r') as f: ell = f.readlines() else: ell = ell.split('\n') parse(ell) return json.dumps(stream) return data @singledispatch def read_ipf(arg): # pragma: no cover Loading
plio/io/tests/test_io_bae.py +45 −11 Original line number Diff line number Diff line Loading @@ -5,7 +5,7 @@ import numpy as np import pandas as pd from pandas.util.testing import assert_frame_equal from plio.io.io_bae import socetset_keywords_to_json, read_gpf, save_gpf, read_ipf, save_ipf from plio.io.io_bae import socetset_keywords_to_dict, read_gpf, save_gpf, read_ipf, save_ipf from plio.examples import get_path import pytest Loading Loading @@ -89,13 +89,47 @@ def test_write_gpf(gpf, file): # np.testing.assert_array_almost_equal(truth_arr, test_arr) def test_create_from_socet_lis(): socetlis = get_path('socet_isd.lis') socetell = get_path('ellipsoid.ell') js = json.loads(socetset_keywords_to_json(socetlis)) assert isinstance(js, dict) # This is essentially a JSON linter # Manually validated assert 'RECTIFICATION_TERMS' in js.keys() assert 'SEMI_MAJOR_AXIS' in js.keys() # From ellipsoid file assert 'NUMBER_OF_EPHEM' in js.keys() assert len(js['EPHEM_PTS']) / 3 == js['NUMBER_OF_EPHEM'] class TestISDFromSocetLis(): def test_parse_with_empty_newlines(self): # Ensure all keys read when whitespace present empty_newlines = r"""T0_QUAT 1.0000000000000000000000000e-01 T1_QUAT 1.0000000000000000000000000e-01""" data = socetset_keywords_to_dict(empty_newlines) assert len(data.keys()) == 2 def test_duplicate_key_check(self): duplicate_keys = r"""T 1 T 1""" with pytest.raises(ValueError): data = socetset_keywords_to_dict(duplicate_keys) def test_multiple_per_line(self): multiple_per_line = r"""T 1 1 1""" data = socetset_keywords_to_dict(multiple_per_line) assert len(data['T']) == 3 def test_key_on_different_line(self): key_on_different_line = r"""A 0.0 1.00000000000000e+00 2.00000000000000e+00 3.0000000000000e+00 4.00000000000000e+00 5.00000000000000e+00 B 1.0e-01 2.000000e+00 3.00000000000000e+00""" data = socetset_keywords_to_dict(key_on_different_line) assert len(data['A']) == 6 assert data['A'] == [0, 1, 2, 3, 4, 5] assert len(data['B']) == 3 assert data['B'] == [0.1, 2, 3] def test_key_on_different_line_whitespace(self): key_on_different_line_whitespace = r"""A 0.0 1.00000000000000e+00 2.00000000000000e+00 3.0000000000000e+00 4.00000000000000e+00 5.00000000000000e+00 B 1.0e-01 2.000000e+00 3.00000000000000e+00""" data = socetset_keywords_to_dict(key_on_different_line_whitespace) assert len(data['A']) == 6 assert data['A'] == [0, 1, 2, 3, 4, 5] assert len(data['B']) == 3 assert data['B'] == [0.1, 2, 3]
plio/utils/utils.py +42 −0 Original line number Diff line number Diff line Loading @@ -9,6 +9,48 @@ import pandas as pd import numpy as np def is_number(s): """ Check if an argument is convertable to a number Parameters ---------- s : object The argument to check for conversion Returns ------- : bool True if conversion is possible, otherwise False. """ try: float(s) return True except ValueError: return False def convert_string_to_float(s): """ Attempt to convert a string to a float. Parameters --------- s : str The string to convert Returns ------- : float / str If successful, the converted value, else the argument is passed back out. """ try: return float(s) except TypeError: return s def metadatatoband(metadata): wv2band = [] for k, v in metadata.items(): Loading