Commit 1ed40ab8 authored by kberry's avatar kberry
Browse files

updated outlier detection for changes to matches df

parent bdfcf269
Loading
Loading
Loading
Loading
+6 −9
Original line number Diff line number Diff line
@@ -120,15 +120,12 @@ class CandidateGraph(nx.Graph):
                destination_keypoints = []

                for i, row in edge['matches'].iterrows():
                    source_matched_to = row['matched_to_x']
                    source_idx = row['trainIdx_x']
                    src_keypoint = [self.node[source_matched_to]['keypoints'][source_idx].pt[0],
                                    self.node[source_matched_to]['keypoints'][source_idx].pt[1]]

                    destination_matched_to = row['matched_to_y']
                    destination_idx = row['trainIdx_y']
                    dest_keypoint = [self.node[destination_matched_to]['keypoints'][destination_idx].pt[0],
                                     self.node[destination_matched_to]['keypoints'][destination_idx].pt[1]]
                    source_idx = row['source_idx']
                    src_keypoint = [self.node[source_key]['keypoints'][int(source_idx)].pt[0],
                                    self.node[source_key]['keypoints'][int(source_idx)].pt[1]]
                    destination_idx = row['destination_idx']
                    dest_keypoint = [self.node[destination_key]['keypoints'][int(destination_idx)].pt[0],
                                     self.node[destination_key]['keypoints'][int(destination_idx)].pt[1]]

                    source_keypoints.append(src_keypoint)
                    destination_keypoints.append(dest_keypoint)
+71 −41
Original line number Diff line number Diff line
@@ -101,48 +101,43 @@ class FlannMatcher(object):
                                              'destination_image', 'destination_idx',
                                              'distance'])

#TODO: decide on a consistent mask format to output.
#Do we want to also accept existing masks and just mask more things?
#consider passing in the matches and source_node to __init__
class MatchOutlierDetector(object):
class OutlierDetector(object):
    """
    Documentation
    A class which contains several outlier detection methods which all return
    True/False masks as pandas data series, which can be used as masks for
    the "matches" pandas dataframe which stores match information for each
    edge of the graph.

    Attributes
    ----------

    """
    def __init__(self, matches, ratio=0.8):
        #0.8 is Lowe's paper value -- can be changed.
        self.distance_ratio = ratio
        self.matches = matches
        self.mask = None #start with empty mask? I guess we could accept an input mask.
    def __init__(self):
        pass

    # return mask with self-neighbors set to zero. (query only takes care of literal self-matches on a keypoint basis, not self-matches for the whole image)
    #TODO: turn this into a mask-style thing. just returns a mask of bad values
    def self_neighbors(self, source_node):
    # (query only takes care of literal self-matches on a keypoint basis, not self-matches for the whole image)
    def self_neighbors(self, matches):
        """
        Returns a df containing self-neighbors that must be removed.
        (temporary return val?)
        Returns a pandas data series intended to be used as a mask. Each row
        is True if it is not matched to a point in the same image (good) and
        False if it is (bad.)

        Parameters
        ----------
        matches : dataframe
                  The pandas dataframe output by FlannMatcher.query()
                  the matches dataframe stored along the edge of the graph
                  containing matched points with columns containing:
                  matched image name, query index, train index, and
                  descriptor distance

        source_node: a string used as the key of the matched node

        Returns
        -------
        : dataseries
          Intended to mask the matches dataframe. True means the row is not matched to a point in the same image
          and false the row is.
        """
        mask = []
        self_matches = self.matches.loc[self.matches['matched_to'] == source_node]
        print(self_matches)
        return mask
        #this could maybe be return maches.source_node == matches.destination_node
        return matches.source_image != matches.destination_image

    #also add a mirroring(?) test?

    def distance_ratio(self):
    def distance_ratio(self, matches, ratio=0.8):
        """
        Compute and return a mask for the matches dataframe returned by FlannMatcher.query()
        using the ratio test and distance_ratio set during initialization.
@@ -150,28 +145,63 @@ class MatchOutlierDetector(object):
        Parameters
        ----------
        matches : dataframe
                  The pandas dataframe output by FlannMatcher.query()
                  the matches dataframe stored along the edge of the graph
                  containing matched points with columns containing:
                  matched image name, query index, train index, and
                  descriptor distance

        ratio: float
               the ratio between the first and second-best match distances
               for each keypoint to use as a bound for marking the first keypoint
               as "good."
        Returns
        -------
        mask : list
               a list of the same size as the matches dataframe
               with value = [1] if that entry in the df should be included
               and [0] if that entry in the df should be excluded
        """
        #mask = []
        mask = {}
        for key, group in self.matches.groupBy('queryIdx'):
         : dataseries
           Intended to mask the matches dataframe. Rows are True if the associated keypoint passes
           the ratio test and false otherwise. Keypoints without more than one match are True by
           default, since the ratio test will not work for them.
        """
        #0.8 is Lowe's paper value -- can be changed.
        mask = []
        temp_matches = matches.drop_duplicates() #don't want to deal with duplicates...
        for key, group in temp_matches.groupby('source_idx'): #change to searchId?
            #won't work if there's only 1 match for each queryIdx
            if len(group) < 2:
                pass #actually need to make sure that none of these are masked.
                mask.append(True)
            else:
                if group['distance'].iloc[0] < self.distance_ratio * group['distance'].iloc[1]:
                    mask.append([1])
                if group['distance'].iloc[0] < ratio * group['distance'].iloc[1]: #this means distance _0_ is good and can drop all other distances
                    mask.append(True)
                    for i in range(len(group['distance']-1)):
                        mask.append(False)
                else:
                    mask.append([0])
        return mask
         #make the mask a dict between indicies of the original df (if possible) and true/false values!
 No newline at end of file
                    for i in range(len(group['distance'])):
                        mask.append(False)
        return pd.Series(mask)

    def mirroring_test(self, matches):
        """
        Compute and return a mask for the matches dataframe returned by FlannMatcher.query() which
        will keep only entries in which there is both a source -> destination match and a destination ->
        source match.

        Parameters
        ----------
        matches : dataframe
                  the matches dataframe stored along the edge of the graph
                  containing matched points with columns containing:
                  matched image name, query index, train index, and
                  descriptor distance

        Returns
        -------
         : dataseries
           Intended to mask the matches dataframe. Rows are True if the associated keypoint passes
           the mirroring test and false otherwise. That is, if 1->2, 2->1, both rows will be True,
           otherwise, they will be false. Keypoints with only one match will be False. Removes
           duplicate rows.
        """
        return matches.duplicated(keep='first')



+1 −1
Original line number Diff line number Diff line
@@ -24,7 +24,7 @@ class TestFeatureExtractor(unittest.TestCase):
    def test_extract_features(self):
        features = feature_extractor.extract_features(self.data_array, self.parameters)
        self.assertEquals(len(features), 2)
        self.assertEqual(len(features[0]), 10)  # OpenCV +1 to
        self.assertEqual(len(features[0]), 11)  # OpenCV +1 to
        self.assertIsInstance(features[0][0], type(cv2.KeyPoint()))
        self.assertIsInstance(features[1][0], np.ndarray)
+34 −3
Original line number Diff line number Diff line
@@ -71,13 +71,44 @@ class TestMatcher(unittest.TestCase):
class TestOutlierDetector(unittest.TestCase):

    def setUp(self):
        self.outliers = matcher.OutlierDetector(matches)
        #actually set up everything for matches
        im1 = cv2.imread(get_path('AS15-M-0296_SML.png'))
        im2 = cv2.imread(get_path('AS15-M-0297_SML.png'))

        fd = {}

        sift = cv2.xfeatures2d.SIFT_create(10)

        fd['AS15-M-0296_SML.png'] = sift.detectAndCompute(im1, None)
        fd['AS15-M-0297_SML.png'] = sift.detectAndCompute(im2, None)

        fmatcher = matcher.FlannMatcher()
        truth_image_indices = {}
        counter = 0
        for imageid, (keypoint, descriptor) in fd.items():
            truth_image_indices[counter] = imageid
            fmatcher.add(descriptor, imageid)
            counter += 1

        fmatcher.train()
        self.matches = fmatcher.query(fd['AS15-M-0296_SML.png'][1],'AS15-M-0296_SML.png', k=3)

        self.outliers = matcher.OutlierDetector()

    def test_distance_ratio(self):
        self.assertTrue(False)
        #TODO: write real test
        self.assertTrue(len(self.outliers.distance_ratio(self.matches)), 13)

    def test_self_neighbors(self):
        self.assertTrue(False)
        print(self.matches[self.outliers.self_neighbors(self.matches)])
        #returned mask should be same length as input df
        self.assertEquals(len(self.outliers.self_neighbors(self.matches)), len(self.matches))


    def test_mirroring_test(self):
        #returned mask should be same length as input df
        self.assertEquals(len(self.outliers.mirroring_test(self.matches)), len(self.matches))


    def tearDown(self):
        pass
+4 −3
Original line number Diff line number Diff line
@@ -9,6 +9,7 @@ from autocnet.fileio.io_controlnetwork import to_isis
from autocnet.graph.network import CandidateGraph
from autocnet.matcher import feature_extractor as fe
from autocnet.matcher.matcher import FlannMatcher
from autocnet.matcher.matcher import OutlierDetector


class TestTwoImageMatching(unittest.TestCase):
@@ -66,12 +67,12 @@ class TestTwoImageMatching(unittest.TestCase):

        for node, attributes in cg.nodes_iter(data=True):
            descriptors = attributes['descriptors']
            matches = fl.query(descriptors, k=3) #had to increase from 2 to test distance ratio test
            detectme = MatchOutlierDetector(matches)
            matches = fl.query(descriptors, node, k=3) #had to increase from 2 to test distance ratio test
            detectme = OutlierDetector()
            cg.add_matches(matches)

        # Step: Compute Homography
        transformation_matrix, mask = cg.compute_homography('AS15-M-0297_SML.png', 'AS15-M-0298_SML.png')
        transformation_matrix, mask = cg.compute_homography(0, 1)
        self.assertEquals(len(transformation_matrix), 3)
        #TODO: write better test
        #self.assertEquals(len(mask), 19)