Unverified Commit 19ba13cc authored by Lauren Adoram-Kershner's avatar Lauren Adoram-Kershner Committed by GitHub
Browse files

Improving place_points_from_cnet speed (#603)

* updating from_cnet; tests still needed

* reverting transform/spatial reproject change

* adding a test to ensure df is formatted with Model columns

* addressing comment round one

* addressing comments round two

* Change log and bug fixes
parent bc85da3b
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -36,9 +36,11 @@ release.

### Added
- Added a mutual information matcher [#559](https://github.com/USGS-Astrogeology/autocnet/pull/559)
- Added residual column information to the Points model

### Changed
- `geom_match_simple` defaults to a 3rd order warp for interpolation
- Speed improvements for place_points_from_cnet dependent on COPY method instead of ORM update

### Fixed
- `update_from_jigsaw` failures due to stale code. Now uses a conntext on the engine to ensure closure
@@ -59,3 +61,5 @@ release.
- Image to ground to support multiple input types with proper output type handling #580
- Support for ISIS special pixels in image data #577
- Fix for no correlation map returned from `geom_match_simple` #556

+104 −39
Original line number Diff line number Diff line
@@ -2222,50 +2222,115 @@ class NetworkCandidateGraph(CandidateGraph):
                    except Exception as e:
                        warnings.warn(f'Failed to reset primary id sequence for table {t}')

    def place_points_from_cnet(self, cnet):
    def cnet_to_db(self, cnet):
        """
        Splits an isis control network into two subsets mirroring the points and measures
        database table formats.

        Parameters
        ----------
        cnet: str or IsisControlNetwork
              The ISIS control network or path to the ISIS control network to be loaded.

        Returns
        -------
        points: IsisControlNetwork
                Subset of the ISIS controlnetwork formatted as io.db.model.Points table

        measures: IsisControlNetwork
                  Subset of the Isis controlnetwork formatted as io.db.model.Measures table
        """

        semi_major, semi_minor = self.config["spatial"]["semimajor_rad"], self.config["spatial"]["semiminor_rad"]

        if isinstance(cnet, str):
            cnet = from_isis(cnet)
        cnet = cnet.rename(columns={'id':'identifier', 
                                    'measureChoosername': 'ChooserName',
                                    'sampleResidual':'sampler', 
                                    'lineResidual': 'liner'})

        points = cnet.copy(deep=True) # this prevents Pandas value being set on copy of slice warnings
        points.drop_duplicates(subset=['identifier'], inplace=True)
        points.insert(0, 'id', list(range(1,len(points)+1)))
        points[['overlapid','residuals', 'maxResidual']] = None
        points[['cam_type']] = 'isis'
        
        points['apriori'] = [geoalchemy2.shape.from_shape(shapely.geometry.Point(x,y,z)) for x,y,z in zip(points['aprioriX'].values, points['aprioriY'].values, points['aprioriZ'].values)]
        if (points['adjustedX'] == 0).all():
            points['adjusted'] = points['apriori']
            xyz_data = [points['aprioriX'].values, points['aprioriY'].values, points['aprioriZ'].values]
        else:
            points[['adjusted']] = [geoalchemy2.shape.from_shape(shapely.geometry.Point(x,y,z)) for x,y,z in zip(points['adjustedX'].values, points['adjustedY'].values, points['adjustedZ'].values)]
            xyz_data = [points['adjustedX'].values, points['adjustedY'].values, points['adjustedZ'].values]      

        cnetpoints = cnet.groupby('id')
        session = self.Session()
        og = reproject(xyz_data, semi_major, semi_minor, 'geocent', 'latlon')
        oc = og2oc(og[0], og[1], semi_major, semi_minor)
        points['geom'] = [geoalchemy2.shape.from_shape(shapely.geometry.Point(lon, lat), srid=self.config['spatial']['latitudinal_srid']) for lon, lat in zip(oc[0], oc[1])]

        for id, cnetpoint in cnetpoints:
            def get_measures(row):
                res = session.query(Images).filter(Images.serial == row.serialnumber).one()
                return Measures(pointid=id,
                         imageid=int(res.id), # Need to grab this
                         measuretype=int(row.measureType),
                         serial=row.serialnumber,
                         sample=float(row['sample']),
                         line=float(row['line']),
                         sampler=float(row.sampleResidual),
                         liner=float(row.lineResidual),
                         ignore=row.measureIgnore,
                         jigreject=row.measureJigsawRejected,
                         aprioriline=float(row.aprioriline),
                         apriorisample=float(row.apriorisample),
                         linesigma=float(row.linesigma),
                         samplesigma=float(row.samplesigma))

            measures = cnetpoint.apply(get_measures, axis=1)

            row = cnetpoint.iloc[0]
            x,y,z= row.adjustedX, row.adjustedY, row.adjustedZ
            lon_og, lat_og, alt = reproject([x, y, z], semi_major, semi_minor, 'geocent', 'latlon')
            lon, lat = og2oc(lon_og, lat_og, semi_major, semi_minor)

            point = Points(identifier=id,
                           ignore=row.pointIgnore,
                           apriori= shapely.geometry.Point(float(row.aprioriX), float(row.aprioriY), float(row.aprioriZ)),
                           adjusted= shapely.geometry.Point(float(row.adjustedX),float(row.adjustedY),float(row.adjustedZ)),
                           pointtype=float(row.pointType))

            point.measures = list(measures)
            session.add(point)
        session.commit()
        session.close()
        cnet.insert(0, 'id', list(range(1,len(cnet)+1)))
        pid_map = {ident: pid for ident, pid in zip(points['identifier'], points['id'])}
        cnet['pointid']  = cnet.apply(lambda row: pid_map[row['identifier']], axis=1)

        with self.session_scope() as session:
            imgs = session.query(Images.serial, Images.id).all()  
        iid_map = {ii[0]: ii[1] for ii in imgs}
        cnet['imageid'] = cnet.apply(lambda row: iid_map[row['serialnumber']], axis=1)
 
        def GoodnessOfFit_value_extract(row):
            mlog = row['measureLog']
            if mlog:
                for m in mlog:
                    if m.messagetype.name == "GoodnessOfFit":
                        return m.value
            return None

        cnet['templateMetric'] = cnet.apply(GoodnessOfFit_value_extract, axis=1)
        cnet['templateShift'] = cnet.apply(lambda row: np.sqrt((row['line']-row['aprioriline'])**2 + (row['sample']-row['apriorisample'])**2) if row['ChooserName'] != row['pointChoosername'] else 0, axis=1)
        cnet['residual'] = np.sqrt(cnet['liner']**2+cnet['sampler']**2)
        cnet['rms'] = np.sqrt(np.mean([cnet['liner']**2, cnet['sampler']**2], axis=0))
       
        cnet[['phaseError','phaseDiff','phaseShift']] = None
        cnet['weight'] = None

        point_columns = Points.__table__.columns.keys()
        measure_columns = Measures.__table__.columns.keys()
        points = points[point_columns]
        measures = cnet[measure_columns]

        return points, measures

    def place_points_from_cnet(self, cnet, clear_tables=True):
        """
        Loads points from a ISIS control network into an AutoCNet formatted database.

        Parameters
        ----------
        cnet: str or IsisControlNetwork
              The ISIS control network or path to the ISIS control network to be loaded.

        clear_tables: boolean
                  Clears enteries out of the points and measures database tables if True. 
                  Appends the control network points and measures onto the current points 
                  and measures database tables if False.
        """

        if isinstance(cnet, str):
            cnet = from_isis(cnet)

        points, measures = self.cnet_to_db(cnet)

        engine = self.engine
        with engine.connect() as connection:
            # Execute an SQL COPY from a CSV buffer into the DB
            
            if engine.dialect.has_table(engine.connect(), 'points', schema='public') and clear_tables:
                connection.execute('DROP TABLE measures, points;')
                Points.__table__.create(bind=engine, checkfirst=True)
                Measures.__table__.create(bind=engine, checkfirst=True)
            
            points.to_sql('points', connection, schema='public', if_exists='append', index=False, method=io_controlnetwork.copy_from_method)
            measures.to_sql('measures', connection, schema='public', if_exists='append', index=False, method=io_controlnetwork.copy_from_method)

    @classmethod
    def from_cnet(cls, cnet, filelist, config):
+67 −26
Original line number Diff line number Diff line
@@ -3,6 +3,7 @@ import pytest
import sys

import pandas as pd
from plio.io.io_controlnetwork import IsisControlNetwork

from autocnet.io.db import model
from autocnet.graph.network import NetworkCandidateGraph
@@ -14,32 +15,51 @@ if sys.platform.startswith("darwin"):

@pytest.fixture()
def cnet():
    return pd.DataFrame.from_dict({
            'id' : [1],
            'pointType' : 2,
            'serialnumber' : ['BRUH'],
            'measureJigsawRejected': [False],
            'sampleResidual' : [0.1],
            'pointIgnore' : [False],
            'pointJigsawRejected': [False],
            'lineResidual' : [0.1],
            'linesigma' : [0],
            'samplesigma': [0],
            'adjustedCovar' : [[]],
            'apriorisample' : [0],
            'aprioriline' : [0],
            'line' : [1],
            'sample' : [2],
            'measureIgnore': [False],
            'adjustedX' : [0],
            'adjustedY' : [0],
            'adjustedZ' : [0],
            'aprioriX' : [0],
            'aprioriY' : [0],
            'aprioriZ' : [0],
            'measureType' : [1]
    return IsisControlNetwork.from_dict({
            'id' : [1, 2, 3],
            'pointType' : [2]*3,
            'pointChoosername' : ['findfeatures']*3,
            'pointDatetime' : ['YYYY-MM-DDT00:00:00']*3,
            'pointEditLock': [False]*3,
            'pointIgnore' : [False]*3,
            'pointJigsawRejected': [False]*3,
            'referenceIndex' : [0]*3,
            'aprioriSurfPointSource': ['ground']*3,
            'aprioriSurfPointSourceFile' : ['ground.file']*3,
            'aprioriRadiusSource' : ['radius']*3, 
            'aprioriRadiusSourceFile' : ['radius.file']*3, 
            'latitudeConstrained' : [False]*3,
            'longitudeConstrained' : [False]*3, 
            'radiusConstrained' : [False]*3,
            'aprioriX' : [1017046.81161667, -1402345.22133465, 103571.17894436],
            'aprioriY' : [1017046.81161667, -1402345.22133465, 103571.17894436],
            'aprioriZ' : [1014022.55349016, -1404707.80219809, 101009.09763132],
            'aprioriCovar' : [[]]*3,
            'adjustedX' : [0]*3,
            'adjustedY' : [0]*3,
            'adjustedZ' : [0]*3,
            'adjustedCovar' : [[]]*3,
            'pointLog' : [[]]*3,
            'serialnumber' : ['SN1345', 'SN2348', 'SN9730'],
            'measureType' : [1]*3,
            'sample' : [2]*3,
            'line' : [1]*3,
            'sampleResidual' : [0.1]*3,
            'lineResidual' : [0.1]*3,
            'measureChoosername' : ['pointreg']*3,
            'measureDatetime' : ['YYYY-MM-DDT00:00:00']*3,
            'measureEditLock' : [False]*3,
            'measureIgnore': [False]*3,
            'measureJigsawRejected': [False]*3,
            'diameter' : [1000]*3,
            'apriorisample' : [0]*3,
            'aprioriline' : [0]*3,
            'samplesigma': [0]*3,
            'linesigma' : [0]*3,
            'measureLog' : [[]]*3
            }) 


"""@pytest.mark.parametrize("image_data, expected_npoints", [({'id':1, 'serial': 'BRUH'}, 1)])
def test_place_points_from_cnet(cnet, image_data, expected_npoints, ncg):
    with ncg.session_scope() as session:
@@ -112,3 +132,24 @@ def test_selective_clear_db(ncg):
        assert len(res) == 1
        res = session.query(model.Points).all()
        assert len(res) == 0

def test_cnet_to_db(ncg, cnet):
    # check that the resulting DB DFs have same columns as corresponding Models
    imgs = [model.Images(name='foo1', serial=cnet.iloc[0]['serialnumber']),
            model.Images(name='foo2', serial=cnet.iloc[1]['serialnumber']),
            model.Images(name='foo3', serial=cnet.iloc[2]['serialnumber'])]

    with ncg.session_scope() as session:
        session.add_all(imgs)

    p_df, m_df = ncg.cnet_to_db(cnet)
    
    point_columns = model.Points.__table__.columns.keys()
    measure_columns = model.Measures.__table__.columns.keys()
    
    for key in point_columns:
        assert key in p_df.columns, f"column \'{key}\' not in points dataframe"
    for key in measure_columns:
        assert key in m_df.columns, f"column \'{key}\' not in measures dataframe"

# TO DO: test the clear tables functionality on ncg.place_points_from_cnet
+23 −23
Original line number Diff line number Diff line
@@ -97,6 +97,29 @@ ORDER BY measures."pointid", measures."id";

        return df

def copy_from_method(table, conn, keys, data_iter, pre_truncate=False, fatal_failure=False):
    """
    Custom method for pandas.DataFrame.to_sql that will use COPY FROM
    From: https://stackoverflow.com/questions/24084710/to-sql-sqlalchemy-copy-from-postgresql-engine
        
    This is follows the API specified by pandas.
    """

    dbapi_conn = conn.connection
    cur = dbapi_conn.cursor()

    s_buf = StringIO()
    writer = csv_writer(s_buf, quoting=QUOTE_MINIMAL)
    writer.writerows(data_iter)
    s_buf.seek(0)

    columns = ', '.join('"{}"'.format(k) for k in keys)
    table_name = '{}.{}'.format(
        table.schema, table.name) if table.schema else table.name

    sql_query = 'COPY %s (%s) FROM STDIN WITH CSV' % (table_name, columns)
    cur.copy_expert(sql=sql_query, file=s_buf)
    return cur.rowcount

def update_from_jigsaw(cnet, measures, engine, pointid_func=None):
    """
@@ -131,29 +154,6 @@ def update_from_jigsaw(cnet, measures, engine, pointid_func=None):
                  numeric ID back. This callable is used to unmunge the id.
    """

    def copy_from_method(table, conn, keys, data_iter, pre_truncate=False, fatal_failure=False):
        """
        Custom method for pandas.DataFrame.to_sql that will use COPY FROM
        From: https://stackoverflow.com/questions/24084710/to-sql-sqlalchemy-copy-from-postgresql-engine
        
        This is follows the API specified by pandas.
        """

        dbapi_conn = conn.connection
        cur = dbapi_conn.cursor()

        s_buf = StringIO()
        writer = csv_writer(s_buf, quoting=QUOTE_MINIMAL)
        writer.writerows(data_iter)
        s_buf.seek(0)

        columns = ', '.join('"{}"'.format(k) for k in keys)
        table_name = '{}.{}'.format(
            table.schema, table.name) if table.schema else table.name

        sql_query = 'COPY %s (%s) FROM STDIN WITH CSV' % (table_name, columns)
        cur.copy_expert(sql=sql_query, file=s_buf)
        return cur.rowcount

    # Get the PID back from the id.
    if pointid_func:
+18 −0
Original line number Diff line number Diff line
@@ -428,6 +428,8 @@ class Points(Base, BaseMixin):
                            order_by="asc(Measures.id)", 
                            backref=backref('point', lazy='joined'))
    reference_index = Column("referenceIndex", Integer, default=0)
    _residuals = Column("residuals", ARRAY(Float))
    _maxresidual = Column("maxResidual", Float)

    _default_fields = [
        "pointtype",
@@ -501,6 +503,22 @@ class Points(Base, BaseMixin):
            v = PointType(v)
        self._pointtype = v

    @hybrid_property
    def residuals(self):
        return self._residuals

    @residuals.setter
    def residuals(self, v):
        self._residuals = v

    @hybrid_property
    def maxresidual(self):
        return self._maxresidual

    @maxresidual.setter
    def maxresidual(self, max_res):
        self._maxresidual = max_res

    #def subpixel_register(self, Session, pointid, **kwargs):
    #    subpixel.subpixel_register_point(args=(Session, pointid), **kwargs)

Loading