Commit fb6eb862 authored by jlaura's avatar jlaura
Browse files

Merge pull request #24 from kree/outlier_detection

Outlier detection functionality added
parents 2cb0fe08 ecd36ba1
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
@@ -177,7 +177,6 @@ class CandidateGraph(nx.Graph):
        matches : dataframe
                  The pandas dataframe containing the matches
        """

        source_groups = matches.groupby('source_image')
        for i, source_group in source_groups:
            for j, dest_group in source_group.groupby('destination_image'):
+100 −0
Original line number Diff line number Diff line
@@ -101,3 +101,103 @@ class FlannMatcher(object):
                                              'destination_image', 'destination_idx',
                                              'distance'])

class OutlierDetector(object):
    """
    A class which contains several outlier detection methods which all return
    True/False masks as pandas data series, which can be used as masks for
    the "matches" pandas dataframe which stores match information for each
    edge of the graph.

    Attributes
    ----------

    """
    def __init__(self):
        pass

    # (query only takes care of literal self-matches on a keypoint basis, not self-matches for the whole image)
    def self_neighbors(self, matches):
        """
        Returns a pandas data series intended to be used as a mask. Each row
        is True if it is not matched to a point in the same image (good) and
        False if it is (bad.)

        Parameters
        ----------
        matches : dataframe
                  the matches dataframe stored along the edge of the graph
                  containing matched points with columns containing:
                  matched image name, query index, train index, and
                  descriptor distance
        Returns
        -------
        : dataseries
          Intended to mask the matches dataframe. True means the row is not matched to a point in the same image
          and false the row is.
        """
        return matches.source_image != matches.destination_image

    def distance_ratio(self, matches, ratio=0.8):
        """
        Compute and return a mask for the matches dataframe stored on each edge of the graph
        using the ratio test and distance_ratio set during initialization.

        Parameters
        ----------
        matches : dataframe
                  the matches dataframe stored along the edge of the graph
                  containing matched points with columns containing:
                  matched image name, query index, train index, and
                  descriptor distance. ***Will only work as expected if matches already has dropped duplicates***

        ratio: float
               the ratio between the first and second-best match distances
               for each keypoint to use as a bound for marking the first keypoint
               as "good."
        Returns
        -------
         : dataseries
           Intended to mask the matches dataframe. Rows are True if the associated keypoint passes
           the ratio test and false otherwise. Keypoints without more than one match are True by
           default, since the ratio test will not work for them.
        """
        #0.8 is Lowe's paper value -- can be changed.
        mask = []
        temp_matches = matches.drop_duplicates() #don't want to deal with duplicates...
        for key, group in temp_matches.groupby('source_idx'):
            #won't work if there's only 1 match for each queryIdx
            if len(group) < 2:
                mask.append(True)
            else:
                if group['distance'].iloc[0] < ratio * group['distance'].iloc[1]: #this means distance _0_ is good and can drop all other distances
                    mask.append(True)
                    for i in range(len(group['distance']-1)):
                        mask.append(False)
                else:
                    for i in range(len(group['distance'])):
                        mask.append(False)
        return pd.Series(mask)

    def mirroring_test(self, matches):
        """
        Compute and return a mask for the matches dataframe on each edge of the graph which
        will keep only entries in which there is both a source -> destination match and a destination ->
        source match.

        Parameters
        ----------
        matches : dataframe
                  the matches dataframe stored along the edge of the graph
                  containing matched points with columns containing:
                  matched image name, query index, train index, and
                  descriptor distance

        Returns
        -------
         : dataseries
           Intended to mask the matches dataframe. Rows are True if the associated keypoint passes
           the mirroring test and false otherwise. That is, if 1->2, 2->1, both rows will be True,
           otherwise, they will be false. Keypoints with only one match will be False. Removes
           duplicate rows.
        """
        return matches.duplicated(keep='first')
+44 −0
Original line number Diff line number Diff line
@@ -67,3 +67,47 @@ class TestMatcher(unittest.TestCase):

    def tearDown(self):
        pass

class TestOutlierDetector(unittest.TestCase):

    def setUp(self):
        #actually set up everything for matches
        im1 = cv2.imread(get_path('AS15-M-0296_SML.png'))
        im2 = cv2.imread(get_path('AS15-M-0297_SML.png'))

        fd = {}

        sift = cv2.xfeatures2d.SIFT_create(10)

        fd['AS15-M-0296_SML.png'] = sift.detectAndCompute(im1, None)
        fd['AS15-M-0297_SML.png'] = sift.detectAndCompute(im2, None)

        fmatcher = matcher.FlannMatcher()
        truth_image_indices = {}
        counter = 0
        for imageid, (keypoint, descriptor) in fd.items():
            truth_image_indices[counter] = imageid
            fmatcher.add(descriptor, imageid)
            counter += 1

        fmatcher.train()
        self.matches = fmatcher.query(fd['AS15-M-0296_SML.png'][1],'AS15-M-0296_SML.png', k=3)

        self.outliers = matcher.OutlierDetector()

    def test_distance_ratio(self):
        self.assertTrue(len(self.outliers.distance_ratio(self.matches)), 13)

    def test_self_neighbors(self):
        print(self.matches[self.outliers.self_neighbors(self.matches)])
        #returned mask should be same length as input df
        self.assertEquals(len(self.outliers.self_neighbors(self.matches)), len(self.matches))


    def test_mirroring_test(self):
        #returned mask should be same length as input df
        self.assertEquals(len(self.outliers.mirroring_test(self.matches)), len(self.matches))


    def tearDown(self):
        pass
+3 −1
Original line number Diff line number Diff line
@@ -10,6 +10,7 @@ from autocnet.fileio.io_gdal import GeoDataset
from autocnet.graph.network import CandidateGraph
from autocnet.matcher import feature_extractor as fe
from autocnet.matcher.matcher import FlannMatcher
from autocnet.matcher.matcher import OutlierDetector


class TestTwoImageMatching(unittest.TestCase):
@@ -60,7 +61,8 @@ class TestTwoImageMatching(unittest.TestCase):

        for node, attributes in cg.nodes_iter(data=True):
            descriptors = attributes['descriptors']
            matches = fl.query(descriptors, node,  k=2)
            matches = fl.query(descriptors, node, k=3) #had to increase from 2 to test distance ratio test
            detectme = OutlierDetector()
            cg.add_matches(matches)

        # Step: And create a C object