From 0783415684a24f03d6a7ef07c42b822ca71c8459 Mon Sep 17 00:00:00 2001
From: Paul-Edouard Sarlin <paul.edouard.sarlin@gmail.com>
Date: Thu, 17 Feb 2022 12:07:18 +0100
Subject: [PATCH 1/6] Avoid recomputation of pairs in self-matching

---
 hloc/localize_sfm.py   | 14 +++++---------
 hloc/match_features.py |  2 +-
 hloc/triangulation.py  | 16 ++--------------
 hloc/utils/io.py       | 24 ++++++++++++++++++++++++
 4 files changed, 32 insertions(+), 24 deletions(-)

diff --git a/hloc/localize_sfm.py b/hloc/localize_sfm.py
index de7b5824..2b8d8857 100644
--- a/hloc/localize_sfm.py
+++ b/hloc/localize_sfm.py
@@ -88,15 +88,11 @@ def pose_from_cluster(
         points3D_ids = np.array([p.point3D_id if p.has_point3D() else -1
                                  for p in image.points2D])
 
-        pair = names_to_pair(qname, image.name)
-        with h5py.File(matches_path, 'r') as f:
-            matches = f[pair]['matches0'].__array__()
-        valid = np.where(matches > -1)[0]
-        valid = valid[points3D_ids[matches[valid]] != -1]
-        num_matches += len(valid)
-
-        for idx in valid:
-            id_3D = points3D_ids[matches[idx]]
+        matches, _ = get_matches(matches_path, qname, image.name)
+        matches = matches[points3D_ids[matches[:, 1]] != -1]
+        num_matches += len(matches)
+        for idx, m in matches:
+            id_3D = points3D_ids[m]
             kp_idx_to_3D_to_db[idx][id_3D].append(i)
             # avoid duplicate observations
             if id_3D not in kp_idx_to_3D[idx]:
diff --git a/hloc/match_features.py b/hloc/match_features.py
index 0e35b89a..dbf91517 100644
--- a/hloc/match_features.py
+++ b/hloc/match_features.py
@@ -128,7 +128,7 @@ def match_from_paths(conf: Dict,
     for (name0, name1) in tqdm(pairs, smoothing=.1):
         pair = names_to_pair(name0, name1)
         # Avoid to recompute duplicates to save time
-        if pair in skip_pairs or names_to_pair(name0, name1) in skip_pairs:
+        if pair in skip_pairs or names_to_pair(name1, name0) in skip_pairs:
             continue
 
         data = {}
diff --git a/hloc/triangulation.py b/hloc/triangulation.py
index 4ed05d48..e24346e3 100644
--- a/hloc/triangulation.py
+++ b/hloc/triangulation.py
@@ -73,7 +73,6 @@ def import_matches(image_ids, database_path, pairs_path, matches_path,
     with open(str(pairs_path), 'r') as f:
         pairs = [p.split() for p in f.readlines()]
 
-    hfile = h5py.File(str(matches_path), 'r')
     db = COLMAPDatabase.connect(database_path)
 
     matched = set()
@@ -81,20 +80,9 @@ def import_matches(image_ids, database_path, pairs_path, matches_path,
         id0, id1 = image_ids[name0], image_ids[name1]
         if len({(id0, id1), (id1, id0)} & matched) > 0:
             continue
-        pair = names_to_pair(name0, name1)
-        if pair not in hfile:
-            raise ValueError(
-                f'Could not find pair {(name0, name1)}... '
-                'Maybe you matched with a different list of pairs? '
-                f'Reverse in file: {names_to_pair(name0, name1) in hfile}.')
-
-        matches = hfile[pair]['matches0'].__array__()
-        valid = matches > -1
+        matches, scores = get_matches(matches_path, name0, name1)
         if min_match_score:
-            scores = hfile[pair]['matching_scores0'].__array__()
-            valid = valid & (scores > min_match_score)
-        matches = np.stack([np.where(valid)[0], matches[valid]], -1)
-
+            matches = matches[scores > min_match_score]
         db.add_matches(id0, id1, matches)
         matched |= {(id0, id1), (id1, id0)}
 
diff --git a/hloc/utils/io.py b/hloc/utils/io.py
index 54eae485..4f001504 100644
--- a/hloc/utils/io.py
+++ b/hloc/utils/io.py
@@ -1,6 +1,9 @@
+from pathlib import Path
 import cv2
 import h5py
 
+from .parsers import names_to_pair
+
 
 def read_image(path, grayscale=False):
     if grayscale:
@@ -23,3 +26,24 @@ def visit_fn(_, obj):
                 names.append(obj.parent.name.strip('/'))
         fd.visititems(visit_fn)
     return list(set(names))
+
+
+def get_matches(path: Path, name0: str, name1: str) -> Tuple[np.ndarray]:
+    with h5py.File(str(path), 'r') as hfile:
+        reverse = False
+        pair = names_to_pair(name0, name1)
+        if pair not in hfile:
+            pair = names_to_pair(name1, name0)
+            if pair not in hfile:
+                raise ValueError(
+                    f'Could not find pair {(name0, name1)}... '
+                    'Maybe you matched with a different list of pairs? ')
+            reverse = True
+        matches = hfile[pair]['matches0'].__array__()
+        scores = hfile[pair]['matching_scores0'].__array__()
+    idx = np.where(matches != -1)[0]
+    matches = np.stack([idx, matches[idx]], -1)
+    if reverse:
+        matches = matches[:, ::-1]
+    scores = scores[idx]
+    return matches, scores

From a50e53f3611cbe650ff3202d898ebfb8abdd77a0 Mon Sep 17 00:00:00 2001
From: Paul-Edouard Sarlin <paul.edouard.sarlin@gmail.com>
Date: Thu, 17 Feb 2022 18:56:23 +0100
Subject: [PATCH 2/6] Fix

---
 hloc/utils/io.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/hloc/utils/io.py b/hloc/utils/io.py
index 4f001504..96da7cf4 100644
--- a/hloc/utils/io.py
+++ b/hloc/utils/io.py
@@ -1,4 +1,6 @@
+from typing import Tuple
 from pathlib import Path
+import numpy as np
 import cv2
 import h5py
 
@@ -44,6 +46,6 @@ def get_matches(path: Path, name0: str, name1: str) -> Tuple[np.ndarray]:
     idx = np.where(matches != -1)[0]
     matches = np.stack([idx, matches[idx]], -1)
     if reverse:
-        matches = matches[:, ::-1]
+        matches = np.flip(matches, -1)
     scores = scores[idx]
     return matches, scores

From 49bd1967de9bda08b806217aba7556ca0cade2cc Mon Sep 17 00:00:00 2001
From: Paul-Edouard Sarlin <paul.edouard.sarlin@gmail.com>
Date: Sun, 27 Feb 2022 11:19:29 +0100
Subject: [PATCH 3/6] Add keypoint accessor, bugfixes

---
 hloc/localize_sfm.py  | 7 +++----
 hloc/triangulation.py | 9 ++-------
 hloc/utils/io.py      | 6 ++++++
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/hloc/localize_sfm.py b/hloc/localize_sfm.py
index 2b8d8857..ba68d0e5 100644
--- a/hloc/localize_sfm.py
+++ b/hloc/localize_sfm.py
@@ -3,13 +3,13 @@
 from pathlib import Path
 from collections import defaultdict
 from typing import Dict, List, Union
-import h5py
 from tqdm import tqdm
 import pickle
 import pycolmap
 
 from . import logger
-from .utils.parsers import parse_image_lists, parse_retrieval, names_to_pair
+from .utils.io import get_keypoints, get_matches
+from .utils.parsers import parse_image_lists, parse_retrieval
 
 
 def do_covisibility_clustering(frame_ids: List[int],
@@ -73,8 +73,7 @@ def pose_from_cluster(
         matches_path: Path,
         **kwargs):
 
-    with h5py.File(features_path, 'r') as f:
-        kpq = f[qname]['keypoints'].__array__()
+    kpq = get_keypoints(features_path, qname)
     kpq += 0.5  # COLMAP coordinates
 
     kp_idx_to_3D = defaultdict(list)
diff --git a/hloc/triangulation.py b/hloc/triangulation.py
index e24346e3..782ce4c8 100644
--- a/hloc/triangulation.py
+++ b/hloc/triangulation.py
@@ -4,13 +4,11 @@
 import sys
 from pathlib import Path
 from tqdm import tqdm
-import h5py
-import numpy as np
 import pycolmap
 
 from . import logger
 from .utils.database import COLMAPDatabase
-from .utils.parsers import names_to_pair
+from .utils.io import get_keypoints, get_matches
 
 
 class OutputCapture:
@@ -53,15 +51,13 @@ def create_db_from_model(reconstruction, database_path):
 
 def import_features(image_ids, database_path, features_path):
     logger.info('Importing features into the database...')
-    hfile = h5py.File(str(features_path), 'r')
     db = COLMAPDatabase.connect(database_path)
 
     for image_name, image_id in tqdm(image_ids.items()):
-        keypoints = hfile[image_name]['keypoints'].__array__()
+        keypoints = get_keypoints(features_path, image_name)
         keypoints += 0.5  # COLMAP origin
         db.add_keypoints(image_id, keypoints)
 
-    hfile.close()
     db.commit()
     db.close()
 
@@ -89,7 +85,6 @@ def import_matches(image_ids, database_path, pairs_path, matches_path,
         if skip_geometric_verification:
             db.add_two_view_geometry(id0, id1, matches)
 
-    hfile.close()
     db.commit()
     db.close()
 
diff --git a/hloc/utils/io.py b/hloc/utils/io.py
index 96da7cf4..54cadaf0 100644
--- a/hloc/utils/io.py
+++ b/hloc/utils/io.py
@@ -30,6 +30,12 @@ def visit_fn(_, obj):
     return list(set(names))
 
 
+def get_keypoints(path: Path, name: str) -> np.ndarray:
+    with h5py.File(str(path), 'r') as hfile:
+        p = hfile[name]['keypoints'].__array__()
+    return p
+
+
 def get_matches(path: Path, name0: str, name1: str) -> Tuple[np.ndarray]:
     with h5py.File(str(path), 'r') as hfile:
         reverse = False

From 1a63832921a20169876aa0d7d8fdf22d76abc863 Mon Sep 17 00:00:00 2001
From: Mihai Dusmanu <mihai.dusmanu@gmail.com>
Date: Tue, 1 Mar 2022 11:51:16 +0100
Subject: [PATCH 4/6] Updated pair format to use slash for separation.

---
 hloc/utils/parsers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hloc/utils/parsers.py b/hloc/utils/parsers.py
index ab98e082..5e97fe7f 100644
--- a/hloc/utils/parsers.py
+++ b/hloc/utils/parsers.py
@@ -49,4 +49,4 @@ def parse_retrieval(path):
 
 
 def names_to_pair(name0, name1):
-    return '_'.join((name0.replace('/', '-'), name1.replace('/', '-')))
+    return '/'.join((name0.replace('/', '-'), name1.replace('/', '-')))

From f7036cb8061a7d8bd700529860ca23bc50d0c24e Mon Sep 17 00:00:00 2001
From: Paul-Edouard Sarlin <paul.edouard.sarlin@gmail.com>
Date: Tue, 1 Mar 2022 16:45:33 +0100
Subject: [PATCH 5/6] Backward compatibility for the previous pair format

---
 hloc/match_features.py | 42 +++++++++++++++++++++++++++++-------------
 hloc/utils/io.py       | 31 +++++++++++++++++++++----------
 hloc/utils/parsers.py  |  8 ++++++--
 3 files changed, 56 insertions(+), 25 deletions(-)

diff --git a/hloc/match_features.py b/hloc/match_features.py
index dbf91517..7d8fb56e 100644
--- a/hloc/match_features.py
+++ b/hloc/match_features.py
@@ -1,5 +1,5 @@
 import argparse
-from typing import Union, Optional, Dict
+from typing import Union, Optional, Dict, List, Tuple
 from pathlib import Path
 import pprint
 import collections.abc as collections
@@ -9,7 +9,7 @@
 
 from . import matchers, logger
 from .utils.base_model import dynamic_load
-from .utils.parsers import names_to_pair, parse_retrieval
+from .utils.parsers import names_to_pair, names_to_pair_old, parse_retrieval
 from .utils.io import list_h5_names
 
 
@@ -95,6 +95,27 @@ def main(conf: Dict,
     return matches
 
 
+def find_pairs_to_match(pairs_all: List[Tuple[str]], match_path: Path = None):
+    '''Avoid to recompute duplicates to save time.'''
+    pairs = set()
+    for i, j in pairs_all:
+        if (j, i) not in pairs:
+            pairs.add((i, j))
+    pairs = list(pairs)
+    if match_path is not None and match_path.exists():
+        with h5py.File(str(match_path), 'r') as fd:
+            pairs_filtered = []
+            for i, j in pairs:
+                if (names_to_pair(i, j) in fd or
+                        names_to_pair(j, i) in fd or
+                        names_to_pair_old(i, j) in fd or
+                        names_to_pair_old(j, i) in fd):
+                    continue
+                pairs_filtered.append((i, j))
+        return pairs_filtered
+    return pairs
+
+
 @torch.no_grad()
 def match_from_paths(conf: Dict,
                      pairs_path: Path,
@@ -112,25 +133,21 @@ def match_from_paths(conf: Dict,
             raise FileNotFoundError(f'Reference feature file {path}.')
     name2ref = {n: i for i, p in enumerate(feature_paths_refs)
                 for n in list_h5_names(p)}
+    match_path.parent.mkdir(exist_ok=True, parents=True)
 
     assert pairs_path.exists(), pairs_path
     pairs = parse_retrieval(pairs_path)
     pairs = [(q, r) for q, rs in pairs.items() for r in rs]
+    pairs = find_pairs_to_match(pairs, None if overwrite else match_path)
+    if len(pairs) == 0:
+        logger.info('Skipping the matching.')
+        return
 
     device = 'cuda' if torch.cuda.is_available() else 'cpu'
     Model = dynamic_load(matchers, conf['model']['name'])
     model = Model(conf['model']).eval().to(device)
 
-    match_path.parent.mkdir(exist_ok=True, parents=True)
-    skip_pairs = set(list_h5_names(match_path)
-                     if match_path.exists() and not overwrite else ())
-
     for (name0, name1) in tqdm(pairs, smoothing=.1):
-        pair = names_to_pair(name0, name1)
-        # Avoid to recompute duplicates to save time
-        if pair in skip_pairs or names_to_pair(name1, name0) in skip_pairs:
-            continue
-
         data = {}
         with h5py.File(str(feature_path_q), 'r') as fd:
             grp = fd[name0]
@@ -146,6 +163,7 @@ def match_from_paths(conf: Dict,
         data = {k: v[None] for k, v in data.items()}
 
         pred = model(data)
+        pair = names_to_pair(name0, name1)
         with h5py.File(str(match_path), 'a') as fd:
             if pair in fd:
                 del fd[pair]
@@ -157,8 +175,6 @@ def match_from_paths(conf: Dict,
                 scores = pred['matching_scores0'][0].cpu().half().numpy()
                 grp.create_dataset('matching_scores0', data=scores)
 
-        skip_pairs.add(pair)
-
     logger.info('Finished exporting matches.')
 
 
diff --git a/hloc/utils/io.py b/hloc/utils/io.py
index 54cadaf0..2b3d9902 100644
--- a/hloc/utils/io.py
+++ b/hloc/utils/io.py
@@ -4,7 +4,7 @@
 import cv2
 import h5py
 
-from .parsers import names_to_pair
+from .parsers import names_to_pair, names_to_pair_old
 
 
 def read_image(path, grayscale=False):
@@ -36,17 +36,28 @@ def get_keypoints(path: Path, name: str) -> np.ndarray:
     return p
 
 
+def find_pair(hfile: h5py.File, name0: str, name1: str):
+    pair = names_to_pair(name0, name1)
+    if pair in hfile:
+        return pair, False
+    pair = names_to_pair(name1, name0)
+    if pair in hfile:
+        return pair, True
+    # older, less efficient format
+    pair = names_to_pair_old(name0, name1)
+    if pair in hfile:
+        return pair, False
+    pair = names_to_pair_old(name1, name0)
+    if pair in hfile:
+        return pair, True
+    raise ValueError(
+        f'Could not find pair {(name0, name1)}... '
+        'Maybe you matched with a different list of pairs? ')
+
+
 def get_matches(path: Path, name0: str, name1: str) -> Tuple[np.ndarray]:
     with h5py.File(str(path), 'r') as hfile:
-        reverse = False
-        pair = names_to_pair(name0, name1)
-        if pair not in hfile:
-            pair = names_to_pair(name1, name0)
-            if pair not in hfile:
-                raise ValueError(
-                    f'Could not find pair {(name0, name1)}... '
-                    'Maybe you matched with a different list of pairs? ')
-            reverse = True
+        reverse, pair = find_pair(hfile, name0, name1)
         matches = hfile[pair]['matches0'].__array__()
         scores = hfile[pair]['matching_scores0'].__array__()
     idx = np.where(matches != -1)[0]
diff --git a/hloc/utils/parsers.py b/hloc/utils/parsers.py
index 5e97fe7f..1f4d9c19 100644
--- a/hloc/utils/parsers.py
+++ b/hloc/utils/parsers.py
@@ -48,5 +48,9 @@ def parse_retrieval(path):
     return dict(retrieval)
 
 
-def names_to_pair(name0, name1):
-    return '/'.join((name0.replace('/', '-'), name1.replace('/', '-')))
+def names_to_pair(name0, name1, separator='/'):
+    return separator.join((name0.replace('/', '-'), name1.replace('/', '-')))
+
+
+def names_to_pair_old(name0, name1):
+    return names_to_pair(name0, name1, separator='_')

From fa7e95ea48997da8a9daa43ca44285a15bb41a99 Mon Sep 17 00:00:00 2001
From: Paul-Edouard Sarlin <paul.edouard.sarlin@gmail.com>
Date: Wed, 2 Mar 2022 15:58:15 +0100
Subject: [PATCH 6/6] Minor fixes

---
 hloc/match_features.py | 4 ++--
 hloc/utils/io.py       | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/hloc/match_features.py b/hloc/match_features.py
index 7d8fb56e..ba86e9c7 100644
--- a/hloc/match_features.py
+++ b/hloc/match_features.py
@@ -95,7 +95,7 @@ def main(conf: Dict,
     return matches
 
 
-def find_pairs_to_match(pairs_all: List[Tuple[str]], match_path: Path = None):
+def find_unique_new_pairs(pairs_all: List[Tuple[str]], match_path: Path = None):
     '''Avoid to recompute duplicates to save time.'''
     pairs = set()
     for i, j in pairs_all:
@@ -138,7 +138,7 @@ def match_from_paths(conf: Dict,
     assert pairs_path.exists(), pairs_path
     pairs = parse_retrieval(pairs_path)
     pairs = [(q, r) for q, rs in pairs.items() for r in rs]
-    pairs = find_pairs_to_match(pairs, None if overwrite else match_path)
+    pairs = find_unique_new_pairs(pairs, None if overwrite else match_path)
     if len(pairs) == 0:
         logger.info('Skipping the matching.')
         return
diff --git a/hloc/utils/io.py b/hloc/utils/io.py
index 2b3d9902..602e7583 100644
--- a/hloc/utils/io.py
+++ b/hloc/utils/io.py
@@ -57,7 +57,7 @@ def find_pair(hfile: h5py.File, name0: str, name1: str):
 
 def get_matches(path: Path, name0: str, name1: str) -> Tuple[np.ndarray]:
     with h5py.File(str(path), 'r') as hfile:
-        reverse, pair = find_pair(hfile, name0, name1)
+        pair, reverse = find_pair(hfile, name0, name1)
         matches = hfile[pair]['matches0'].__array__()
         scores = hfile[pair]['matching_scores0'].__array__()
     idx = np.where(matches != -1)[0]