Commit 13e31075 authored by jay's avatar jay
Browse files

socet read without regex

parent e403b3f9
Loading
Loading
Loading
Loading
+44 −31
Original line number Diff line number Diff line
@@ -7,53 +7,66 @@ from functools import singledispatch
import numpy as np
import pandas as pd

def socetset_keywords_to_json(keywords, ell=None):
from plio.utils.utils import is_number, convert_string_to_float

def socetset_keywords_to_dict(keywords, ell=None):
    """
    Convert a SocetCet keywords.list file to JSON

    Parameters
    ----------
    keywords : str
               Path to the socetset keywords.list file
               Path to the socetset keywords.list file or a raw string that
               will be split on '\n' and parsed.
    
    ell : str
          Optional path to the ellipsoid keywords.list file or a raw string 
          that will be split on '\n' and parsed

    Returns
    -------
     : str
       The serialized JSON string.
     data : dict 
            A dictionary containing the socet keywords parsed.

    """
    matcher = re.compile(r'\b(?!\d)\w+\b')
    numeric_matcher = re.compile(r'\W?-?(?:0|[1-9]\d*)(?:\.\d*)?(?:[eE][+\-]?\d+)?')
    stream = {}

    def parse(fi):
        with open(fi, 'r') as f:
            for l in f:
                l = l.rstrip()
    data = {}

    def parse(lines):
        for l in lines:
            l = l.strip()
            if not l:
                continue
                matches = matcher.findall(l)
                if matches:
                    key = matches[0]
                    stream[key] = []
                    # Case where the kw are strings after the key
                    if len(matches) > 1:
                        stream[key] = matches[1:]
                    # Case where the kw are numeric types after the key
                    else:
                        nums = numeric_matcher.findall(l)
                        if len(nums) == 1:
                            stream[key] = float(nums[0])
            elems = l.split()
            if is_number(elems[0]) is False:
                key = elems[0]
                if key in data.keys():
                    raise ValueError('Duplicate dictionary key: {}'.format(key))
                data[key] = []
                if len(elems) == 1:
                    continue
                if len(elems) == 2:
                    data[key] = convert_string_to_float(elems[1])
                else:
                            stream[key] += map(float, nums)
                    data[key] += [convert_string_to_float(e) for e in elems[1:]]
            else:
                    # Case where the values are on a newline after the key
                    nums = numeric_matcher.findall(l)
                    stream[key] += map(float, nums)
                data[key] += [convert_string_to_float(e) for e in elems]

    if os.path.exists(keywords):
        with open(keywords, 'r') as f:
            keywords = f.readlines()
    else:
        keywords = keywords.split('\n')
    parse(keywords)
   
    if ell:
        if os.path.exists(ell):
            with open(ell, 'r') as f:
                ell = f.readlines()
        else:
            ell = ell.split('\n')
        parse(ell)
    return json.dumps(stream)

    return data

@singledispatch
def read_ipf(arg): # pragma: no cover
+45 −11
Original line number Diff line number Diff line
@@ -5,7 +5,7 @@ import numpy as np
import pandas as pd
from pandas.util.testing import assert_frame_equal

from plio.io.io_bae import socetset_keywords_to_json, read_gpf, save_gpf, read_ipf, save_ipf
from plio.io.io_bae import socetset_keywords_to_dict, read_gpf, save_gpf, read_ipf, save_ipf
from plio.examples import get_path

import pytest
@@ -89,13 +89,47 @@ def test_write_gpf(gpf, file):

    # np.testing.assert_array_almost_equal(truth_arr, test_arr)

def test_create_from_socet_lis():
    socetlis = get_path('socet_isd.lis')
    socetell = get_path('ellipsoid.ell')
    js = json.loads(socetset_keywords_to_json(socetlis))
    assert isinstance(js, dict)  # This is essentially a JSON linter
    # Manually validated
    assert 'RECTIFICATION_TERMS' in js.keys()
    assert 'SEMI_MAJOR_AXIS' in js.keys()  # From ellipsoid file
    assert 'NUMBER_OF_EPHEM' in js.keys()
    assert len(js['EPHEM_PTS']) / 3 == js['NUMBER_OF_EPHEM']
class TestISDFromSocetLis():

    def test_parse_with_empty_newlines(self):
        # Ensure all keys read when whitespace present
        empty_newlines = r"""T0_QUAT 1.0000000000000000000000000e-01

T1_QUAT 1.0000000000000000000000000e-01"""
        data = socetset_keywords_to_dict(empty_newlines)
        assert len(data.keys()) == 2

    def test_duplicate_key_check(self):
        duplicate_keys = r"""T 1
T 1"""
        with pytest.raises(ValueError):
            data = socetset_keywords_to_dict(duplicate_keys)

    def test_multiple_per_line(self):
        multiple_per_line = r"""T 1 1 1"""
        data = socetset_keywords_to_dict(multiple_per_line)
        assert len(data['T']) == 3

    def test_key_on_different_line(self):
        key_on_different_line = r"""A
0.0 1.00000000000000e+00 2.00000000000000e+00
3.0000000000000e+00 4.00000000000000e+00 5.00000000000000e+00
B 1.0e-01 2.000000e+00 3.00000000000000e+00"""
        data = socetset_keywords_to_dict(key_on_different_line)
        assert len(data['A']) == 6
        assert data['A'] == [0, 1, 2, 3, 4, 5]

        assert len(data['B']) == 3
        assert data['B'] == [0.1, 2, 3]

    def test_key_on_different_line_whitespace(self):
        key_on_different_line_whitespace = r"""A
    0.0 1.00000000000000e+00 2.00000000000000e+00
    3.0000000000000e+00 4.00000000000000e+00 5.00000000000000e+00
B 1.0e-01 2.000000e+00 3.00000000000000e+00"""
        data = socetset_keywords_to_dict(key_on_different_line_whitespace)
        assert len(data['A']) == 6
        assert data['A'] == [0, 1, 2, 3, 4, 5]

        assert len(data['B']) == 3
        assert data['B'] == [0.1, 2, 3]
+42 −0
Original line number Diff line number Diff line
@@ -9,6 +9,48 @@ import pandas as pd

import numpy as np

def is_number(s):
    """
    Check if an argument is convertable to a number

    Parameters
    ----------
    s : object
        The argument to check for conversion

    Returns
    -------
     : bool
       True if conversion is possible, otherwise False.
    """
    try:
        float(s)
        return True
    except ValueError:
        return False
    
def convert_string_to_float(s):
    """
    Attempt to convert a string to a float.

    Parameters
    ---------
    s : str
        The string to convert

    Returns
    -------
    : float / str
      If successful, the converted value, else the argument is passed back
      out.
    """
    
    try:
        return float(s)
    except TypeError:
        return s


def metadatatoband(metadata):
    wv2band = []
    for k, v in metadata.items():