socet read without regex (13e31075) · Commits · aflab / astrogeology / Plio

plio/io/io_bae.py

+44 −31

Original line number	Diff line number	Diff line
		@@ -7,53 +7,66 @@ from functools import singledispatch
		import numpy as np
		import pandas as pd

		def socetset_keywords_to_json(keywords, ell=None):
		from plio.utils.utils import is_number, convert_string_to_float

		def socetset_keywords_to_dict(keywords, ell=None):
		"""
		Convert a SocetCet keywords.list file to JSON

		Parameters
		----------
		keywords : str
		Path to the socetset keywords.list file
		Path to the socetset keywords.list file or a raw string that
		will be split on '\n' and parsed.

		ell : str
		Optional path to the ellipsoid keywords.list file or a raw string
		that will be split on '\n' and parsed

		Returns
		-------
		: str
		The serialized JSON string.
		data : dict
		A dictionary containing the socet keywords parsed.

		"""
		matcher = re.compile(r'\b(?!\d)\w+\b')
		numeric_matcher = re.compile(r'\W?-?(?:0\|[1-9]\d)(?:\.\d)?(?:[eE][+\-]?\d+)?')
		stream = {}

		def parse(fi):
		with open(fi, 'r') as f:
		for l in f:
		l = l.rstrip()
		data = {}

		def parse(lines):
		for l in lines:
		l = l.strip()
		if not l:
		continue
		matches = matcher.findall(l)
		if matches:
		key = matches[0]
		stream[key] = []
		# Case where the kw are strings after the key
		if len(matches) > 1:
		stream[key] = matches[1:]
		# Case where the kw are numeric types after the key
		else:
		nums = numeric_matcher.findall(l)
		if len(nums) == 1:
		stream[key] = float(nums[0])
		elems = l.split()
		if is_number(elems[0]) is False:
		key = elems[0]
		if key in data.keys():
		raise ValueError('Duplicate dictionary key: {}'.format(key))
		data[key] = []
		if len(elems) == 1:
		continue
		if len(elems) == 2:
		data[key] = convert_string_to_float(elems[1])
		else:
		stream[key] += map(float, nums)
		data[key] += [convert_string_to_float(e) for e in elems[1:]]
		else:
		# Case where the values are on a newline after the key
		nums = numeric_matcher.findall(l)
		stream[key] += map(float, nums)
		data[key] += [convert_string_to_float(e) for e in elems]

		if os.path.exists(keywords):
		with open(keywords, 'r') as f:
		keywords = f.readlines()
		else:
		keywords = keywords.split('\n')
		parse(keywords)

		if ell:
		if os.path.exists(ell):
		with open(ell, 'r') as f:
		ell = f.readlines()
		else:
		ell = ell.split('\n')
		parse(ell)
		return json.dumps(stream)

		return data

		@singledispatch
		def read_ipf(arg): # pragma: no cover

plio/io/tests/test_io_bae.py

+45 −11

Original line number	Diff line number	Diff line
		@@ -5,7 +5,7 @@ import numpy as np
		import pandas as pd
		from pandas.util.testing import assert_frame_equal

		from plio.io.io_bae import socetset_keywords_to_json, read_gpf, save_gpf, read_ipf, save_ipf
		from plio.io.io_bae import socetset_keywords_to_dict, read_gpf, save_gpf, read_ipf, save_ipf
		from plio.examples import get_path

		import pytest
		@@ -89,13 +89,47 @@ def test_write_gpf(gpf, file):

		# np.testing.assert_array_almost_equal(truth_arr, test_arr)

		def test_create_from_socet_lis():
		socetlis = get_path('socet_isd.lis')
		socetell = get_path('ellipsoid.ell')
		js = json.loads(socetset_keywords_to_json(socetlis))
		assert isinstance(js, dict) # This is essentially a JSON linter
		# Manually validated
		assert 'RECTIFICATION_TERMS' in js.keys()
		assert 'SEMI_MAJOR_AXIS' in js.keys() # From ellipsoid file
		assert 'NUMBER_OF_EPHEM' in js.keys()
		assert len(js['EPHEM_PTS']) / 3 == js['NUMBER_OF_EPHEM']
		class TestISDFromSocetLis():

		def test_parse_with_empty_newlines(self):
		# Ensure all keys read when whitespace present
		empty_newlines = r"""T0_QUAT 1.0000000000000000000000000e-01

		T1_QUAT 1.0000000000000000000000000e-01"""
		data = socetset_keywords_to_dict(empty_newlines)
		assert len(data.keys()) == 2

		def test_duplicate_key_check(self):
		duplicate_keys = r"""T 1
		T 1"""
		with pytest.raises(ValueError):
		data = socetset_keywords_to_dict(duplicate_keys)

		def test_multiple_per_line(self):
		multiple_per_line = r"""T 1 1 1"""
		data = socetset_keywords_to_dict(multiple_per_line)
		assert len(data['T']) == 3

		def test_key_on_different_line(self):
		key_on_different_line = r"""A
		0.0 1.00000000000000e+00 2.00000000000000e+00
		3.0000000000000e+00 4.00000000000000e+00 5.00000000000000e+00
		B 1.0e-01 2.000000e+00 3.00000000000000e+00"""
		data = socetset_keywords_to_dict(key_on_different_line)
		assert len(data['A']) == 6
		assert data['A'] == [0, 1, 2, 3, 4, 5]

		assert len(data['B']) == 3
		assert data['B'] == [0.1, 2, 3]

		def test_key_on_different_line_whitespace(self):
		key_on_different_line_whitespace = r"""A
		0.0 1.00000000000000e+00 2.00000000000000e+00
		3.0000000000000e+00 4.00000000000000e+00 5.00000000000000e+00
		B 1.0e-01 2.000000e+00 3.00000000000000e+00"""
		data = socetset_keywords_to_dict(key_on_different_line_whitespace)
		assert len(data['A']) == 6
		assert data['A'] == [0, 1, 2, 3, 4, 5]

		assert len(data['B']) == 3
		assert data['B'] == [0.1, 2, 3]

plio/utils/utils.py

+42 −0

Original line number	Diff line number	Diff line
		@@ -9,6 +9,48 @@ import pandas as pd

		import numpy as np

		def is_number(s):
		"""
		Check if an argument is convertable to a number

		Parameters
		----------
		s : object
		The argument to check for conversion

		Returns
		-------
		: bool
		True if conversion is possible, otherwise False.
		"""
		try:
		float(s)
		return True
		except ValueError:
		return False

		def convert_string_to_float(s):
		"""
		Attempt to convert a string to a float.

		Parameters
		---------
		s : str
		The string to convert

		Returns
		-------
		: float / str
		If successful, the converted value, else the argument is passed back
		out.
		"""

		try:
		return float(s)
		except TypeError:
		return s


		def metadatatoband(metadata):
		wv2band = []
		for k, v in metadata.items():