diff --git a/modules/SimpleSLAM/LICENSE b/modules/SimpleSLAM/LICENSE new file mode 100644 index 0000000000..261eeb9e9f --- /dev/null +++ b/modules/SimpleSLAM/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/modules/SimpleSLAM/README.md b/modules/SimpleSLAM/README.md new file mode 100644 index 0000000000..e046d1cea4 --- /dev/null +++ b/modules/SimpleSLAM/README.md @@ -0,0 +1,180 @@ +# Structure From Motion (SfM) - README + + +https://github.com/user-attachments/assets/f489a554-299e-41ad-a4b4-436e32d8cbd5 + + +## IMPORTANT - LightGlue Installation- +- 1st do this +``` +git clone https://github.com/cvg/LightGlue.git && cd LightGlue +python -m pip install -e . +``` +- 2nd download data + - https://rpg.ifi.uzh.ch/docs/teaching/2024/kitti05.zip + + - put it in folder named 'Dataset' + +- 3rd + - intall ```requirements.txt``` + - ```bash scripts/run_tracker_visualization.sh``` + +## INSTALLING COLMAP +![Image showing commands](media/colmap_instruc.png) +use this command - ```ninja -j4 -l 8 > build.log 2>&1``` + + +for pycolmap - https://colmap.github.io/pycolmap/index.html#pycolmap-index + +refer the image if when running the command it freezes and crashes +![pycolmap instructions- Image showing commands](media/pycolmap_instruc.png) + + +## Overview + +This repository contains two Python scripts that demonstrate basic Structure-from-Motion (SfM) pipelines: + +1. **sfm.py** + - A more classical approach that uses standard OpenCV feature detectors (e.g., SIFT, ORB, AKAZE) and BFMatcher or FLANN to match keypoints between images. + - Performs pose estimation (essential matrix or PnP) and triangulation to build a sparse map of 3D points. + - Uses optional non-linear refinement via scipy’s least squares to improve the estimated camera pose. + +2. **sfm_lightglue_aliked.py** + - An enhanced pipeline that integrates neural network-based feature extraction (**ALIKED**) and feature matching (**LightGlue**). + - Demonstrates how modern, learned feature detectors and matchers can improve keypoint reliability and reduce drift. + - Also includes the same core SfM steps (pose estimation, triangulation, optional non-linear refinement). + - Tracks a simple **Absolute Trajectory Error (ATE)** and accumulates a **cumulative translation error** for quick performance checks. + +Both scripts are **prototypes** designed primarily for **concept validation and experimentation**. For real-time, production-grade implementations, it’s recommended to integrate a C++ back end (e.g., [Ceres Solver](https://github.com/ceres-solver/ceres-solver)) for optimization and manage heavy-lifting tasks in a more performant environment. + +--- + +## Features + +### Common SfM Steps +- **Dataset Loading** (KITTI, Malaga, or custom folder with images). +- **Camera Calibration** for loading intrinsic/extrinsic parameters. +- **Feature Extraction** + - sfm.py: classical (SIFT, ORB, AKAZE) + - sfm_lightglue_aliked.py: ALIKED (learned keypoints + descriptors) +- **Feature Matching** + - sfm.py: BFMatcher or FLANN + - sfm_lightglue_aliked.py: LightGlue (neural network-based matching) +- **Motion Estimation** + - 2D-2D with essential matrix. + - 2D-3D with PnP (once 3D map points are available). +- **Triangulation** + - Convert 2D matches into 3D points. +- **Non-linear Refinement** + - Uses scipy’s Levenberg-Marquardt (`least_squares`) to minimize reprojection error. +- **Basic Stereo Handling** (KITTI, Malaga) + - Combine left and right images for better scale recovery if stereo calibration is present. +- **Trajectory Evaluation** + - **ATE** (Absolute Trajectory Error) if ground truth is available. + - A simple “cumulative translation error” measure. + +--- + +## Requirements + +- **Python 3.7+** +- **OpenCV** (>= 4.x recommended) +- **NumPy** +- **Matplotlib** (for visualization) +- **scipy** (for non-linear refinement) +- **tqdm** (for progress bars) +- **PyTorch** (only required for sfm_lightglue_aliked.py, if using LightGlue + ALIKED) +- **lightglue** (the Python package for the LightGlue matching framework) + +--- + +## Usage + +### 1. Cloning & Installation +1. Clone this repository: + ```bash + git clone https://github.com/your-organization/your-repo.git + ``` +2. Install dependencies: + ```bash + pip install -r requirements.txt + ``` + Or individually: + ```bash + pip install opencv-python numpy matplotlib scipy tqdm torch + # plus LightGlue if not already installed + ``` + +### 2. Running **sfm.py** +```bash +python sfm.py --dataset kitti --data_path ./Dataset/kitti +``` +- **Arguments**: + - `--dataset`: name of the dataset (kitti, malaga, or custom). + - `--data_path`: path to the dataset folder. +- **Behavior**: + - Loads images, performs feature detection + matching (SIFT, ORB, AKAZE), estimates camera motion, triangulates points. + - Optionally runs non-linear refinement on the pose. + - Plots or logs the results (trajectory, errors). + +*(Adjust arguments to match your own script’s CLI if needed.)* + +### 3. Running **sfm_lightglue_aliked.py** +```bash +python sfm_lightglue_aliked.py --dataset kitti --data_path ./Dataset/kitti --use_lightglue True +``` +- **Arguments**: + - `--dataset`: name of the dataset (kitti, malaga, or custom). + - `--data_path`: path to the dataset folder. + - `--use_lightglue`: enable or disable ALIKED + LightGlue pipeline. +- **Behavior**: + - Loads images, runs ALIKED for feature extraction, and LightGlue for matching (GPU if available). + - Estimates camera motion, triangulates points, performs non-linear refinement if configured. + - Computes: + - **ATE** (Absolute Trajectory Error) + - A “cumulative translation error” measure + - Optionally displays debug visualizations (keypoints, matches, trajectory). + +### 4. Visualizations +- **Matplotlib** windows may pop up showing: + - Keypoints and matches for consecutive frames. + - The evolution of the 3D point cloud (if any). + - The camera’s estimated trajectory vs. ground truth (if available). + +### 5. Customization +- Modify `THUMPUP_POS_THRESHOLD` and `THUMPUP_ROT_THRESHOLD` for keyframe selection. +- Tweak the **maximum keypoints** or **confidence** in `ALIKED` or **LightGlue** for performance vs. accuracy trade-offs. +- Adjust RANSAC thresholds or non-linear refinement parameters (in `refine()` method) for more robust or faster solutions. + +--- + +## Implementation Details + +- **sfm.py** + - Uses OpenCV for feature detection (SIFT, ORB, or AKAZE). + - BFMatcher or FLANN for matching. + - Essential matrix / PnP for pose. + - Minimal keyframe selection logic. + +- **sfm_lightglue_aliked.py** + - ALIKED for learned keypoints + descriptors, LightGlue for matching. + - Similar pose estimation logic (PnP, essential matrix). + - Triangulation + refinement steps are nearly the same. + - Typically yields more reliable matches and lower drift. + +- **Stereo** logic (KITTI, Malaga) uses left/right cameras for absolute scale. +- **Monocular** is scale-invariant and can produce an arbitrary scale. +- **Error Metrics**: + - **ATE**: Norm of translation difference from ground truth. + - **Cumulative translation error**: Summation of frame-by-frame translation offsets. + +--- + +## Performance & Future Directions + +- **Python Prototyping**: Great for algorithmic experimentation but can be slower for large-scale or real-time tasks. +- **Production-Grade**: Offload heavy steps (bundle adjustment, large-scale optimization) to C++. +- **Loop Closure & Full SLAM**: These scripts focus on **Visual Odometry**. Future expansions may include place recognition, pose graph optimization, etc. + +--- + diff --git a/modules/SimpleSLAM/config/calibrate_camera/calibration.pkl b/modules/SimpleSLAM/config/calibrate_camera/calibration.pkl new file mode 100644 index 0000000000..9d15f007d0 Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/calibration.pkl differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/calibration.py b/modules/SimpleSLAM/config/calibrate_camera/calibration.py new file mode 100644 index 0000000000..fe42ae17e9 --- /dev/null +++ b/modules/SimpleSLAM/config/calibrate_camera/calibration.py @@ -0,0 +1,137 @@ +import numpy as np +import cv2 as cv +import glob +import pickle + + + +################ FIND CHESSBOARD CORNERS - OBJECT POINTS AND IMAGE POINTS ############################# + +chessboardSize = (9,6) +# frameSize = (640,480) +image_size = None + +# termination criteria +criteria = (cv.TERM_CRITERIA_EPS + cv.TERM_CRITERIA_MAX_ITER, 30, 0.001) + + +# prepare object points, like (0,0,0), (1,0,0), (2,0,0) ....,(6,5,0) +objp = np.zeros((chessboardSize[0] * chessboardSize[1], 3), np.float32) +objp[:,:2] = np.mgrid[0:chessboardSize[0],0:chessboardSize[1]].T.reshape(-1,2) + +size_of_chessboard_squares_mm = 20 +objp = objp * size_of_chessboard_squares_mm + + +# Arrays to store object points and image points from all the images. +objpoints = [] # 3d point in real world space +imgpoints = [] # 2d points in image plane. + + +images = glob.glob('./images/*.png') + +for image in images: + + img = cv.imread(image) + gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY) + + # Set image_size based on the first image + if image_size is None: + image_size = gray.shape[::-1] + + + # Find the chess board corners + ret, corners = cv.findChessboardCorners(gray, chessboardSize, None) + + # If found, add object points, image points (after refining them) + if ret == True: + + objpoints.append(objp) + corners2 = cv.cornerSubPix(gray, corners, (11,11), (-1,-1), criteria) + imgpoints.append(corners) + + # Draw and display the corners + cv.drawChessboardCorners(img, chessboardSize, corners2, ret) + # cv.imshow('img', img) + # cv.waitKey(1000) + + +cv.destroyAllWindows() + + + + +############## CALIBRATION ####################################################### + +# Ensure that image_size has been set +if image_size is not None: + ret, cameraMatrix, dist, rvecs, tvecs = cv.calibrateCamera(objpoints, imgpoints, image_size, None, None) +else: + raise Exception("Error: No images were processed for calibration.") + +# Save the camera calibration result for later use (we won't worry about rvecs / tvecs) +pickle.dump((cameraMatrix, dist), open( "calibration.pkl", "wb" )) +pickle.dump(cameraMatrix, open( "cameraMatrix.pkl", "wb" )) +pickle.dump(dist, open( "dist.pkl", "wb" )) + +# ###########PRINT PARAMETERS############################# +# Print the camera matrix +print("Camera Matrix:") +print(cameraMatrix) + +# Print the distortion coefficients +print("\nDistortion Coefficients:") +print(dist) + +# # Print the rotation vectors +# print("\nRotation Vectors:") +# for i, rvec in enumerate(rvecs): +# print(f"Image {i+1}:") +# print(rvec) + +# # Print the translation vectors +# print("\nTranslation Vectors:") +# for i, tvec in enumerate(tvecs): +# print(f"Image {i+1}:") +# print(tvec) + + +# ############## UNDISTORTION ##################################################### + +# img = cv.imread('cali5.png') +# h, w = img.shape[:2] +# newCameraMatrix, roi = cv.getOptimalNewCameraMatrix(cameraMatrix, dist, (w,h), 1, (w,h)) + + + +# # Undistort +# dst = cv.undistort(img, cameraMatrix, dist, None, newCameraMatrix) + +# # crop the image +# x, y, w, h = roi +# dst = dst[y:y+h, x:x+w] +# cv.imwrite('caliResult1.png', dst) + + + +# # Undistort with Remapping +# mapx, mapy = cv.initUndistortRectifyMap(cameraMatrix, dist, None, newCameraMatrix, (w,h), 5) +# dst = cv.remap(img, mapx, mapy, cv.INTER_LINEAR) + +# # crop the image +# x, y, w, h = roi +# dst = dst[y:y+h, x:x+w] +# cv.imwrite('caliResult2.png', dst) + + + + +# Reprojection Error +mean_error = 0 + +for i in range(len(objpoints)): + imgpoints2, _ = cv.projectPoints(objpoints[i], rvecs[i], tvecs[i], cameraMatrix, dist) + error = cv.norm(imgpoints[i], imgpoints2, cv.NORM_L2)/len(imgpoints2) + mean_error += error + +print( "total error: {}".format(mean_error/len(objpoints)) ) diff --git a/modules/SimpleSLAM/config/calibrate_camera/cameraMatrix.pkl b/modules/SimpleSLAM/config/calibrate_camera/cameraMatrix.pkl new file mode 100644 index 0000000000..e4c1a2f062 Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/cameraMatrix.pkl differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/dist.pkl b/modules/SimpleSLAM/config/calibrate_camera/dist.pkl new file mode 100644 index 0000000000..e68f6ef218 Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/dist.pkl differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/getImages.py b/modules/SimpleSLAM/config/calibrate_camera/getImages.py new file mode 100644 index 0000000000..6022cf49d9 --- /dev/null +++ b/modules/SimpleSLAM/config/calibrate_camera/getImages.py @@ -0,0 +1,25 @@ +import cv2 + +cap = cv2.VideoCapture(2) + +num = 0 + +while cap.isOpened(): + + succes, img = cap.read() + + k = cv2.waitKey(5) + + if k == 27: + break + elif k == ord('s'): # wait for 's' key to save and exit + cv2.imwrite('images/img' + str(num) + '.png', img) + print("image saved!") + num += 1 + + cv2.imshow('Img',img) + +# Release and destroy all windows before termination +cap.release() + +cv2.destroyAllWindows() \ No newline at end of file diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0001.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0001.png new file mode 100644 index 0000000000..ca03e4cf07 Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0001.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0002.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0002.png new file mode 100644 index 0000000000..e10588ecea Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0002.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0006.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0006.png new file mode 100644 index 0000000000..44444f4af5 Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0006.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0009.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0009.png new file mode 100644 index 0000000000..8ed06231c3 Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0009.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0012.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0012.png new file mode 100644 index 0000000000..9cea8e72e7 Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0012.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0013.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0013.png new file mode 100644 index 0000000000..3151ee953f Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0013.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0014.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0014.png new file mode 100644 index 0000000000..69b7b43b41 Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0014.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0016.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0016.png new file mode 100644 index 0000000000..61243fceef Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0016.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0018.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0018.png new file mode 100644 index 0000000000..4638656186 Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0018.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0021.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0021.png new file mode 100644 index 0000000000..fa320c5979 Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0021.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0025.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0025.png new file mode 100644 index 0000000000..64602b5255 Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0025.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0029.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0029.png new file mode 100644 index 0000000000..174b1d0489 Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0029.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0030.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0030.png new file mode 100644 index 0000000000..9a423ecc34 Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0030.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0032.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0032.png new file mode 100644 index 0000000000..a4610a9b13 Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0032.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0033.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0033.png new file mode 100644 index 0000000000..626fafc42f Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0033.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0034.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0034.png new file mode 100644 index 0000000000..97476fece8 Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0034.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0036.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0036.png new file mode 100644 index 0000000000..bee1acd749 Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0036.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0042.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0042.png new file mode 100644 index 0000000000..bfedab34c0 Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0042.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0044.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0044.png new file mode 100644 index 0000000000..5b0f48e641 Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0044.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0045.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0045.png new file mode 100644 index 0000000000..5a96e94590 Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0045.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0048.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0048.png new file mode 100644 index 0000000000..8371db0c95 Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0048.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0049.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0049.png new file mode 100644 index 0000000000..408b53c56a Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0049.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0050.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0050.png new file mode 100644 index 0000000000..70a4ae65e0 Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0050.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0051.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0051.png new file mode 100644 index 0000000000..9bf7de7128 Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0051.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0052.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0052.png new file mode 100644 index 0000000000..cfb31c568f Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0052.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0054.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0054.png new file mode 100644 index 0000000000..7f4a4c3ecb Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0054.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0055.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0055.png new file mode 100644 index 0000000000..70952068c0 Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0055.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0058.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0058.png new file mode 100644 index 0000000000..1c81661de8 Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0058.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0062.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0062.png new file mode 100644 index 0000000000..b109d78d9b Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0062.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0065.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0065.png new file mode 100644 index 0000000000..d78ab5f5cb Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0065.png differ diff --git a/modules/SimpleSLAM/config/calibrate_camera/images/frame-0069.png b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0069.png new file mode 100644 index 0000000000..89b0b7869c Binary files /dev/null and b/modules/SimpleSLAM/config/calibrate_camera/images/frame-0069.png differ diff --git a/modules/SimpleSLAM/config/monocular.yaml b/modules/SimpleSLAM/config/monocular.yaml new file mode 100644 index 0000000000..e69de29bb2 diff --git a/modules/SimpleSLAM/config/stereo.yaml b/modules/SimpleSLAM/config/stereo.yaml new file mode 100644 index 0000000000..e69de29bb2 diff --git a/modules/SimpleSLAM/docs/system_design.md b/modules/SimpleSLAM/docs/system_design.md new file mode 100644 index 0000000000..e69de29bb2 diff --git a/modules/SimpleSLAM/media/Screenshot from 2025-06-25 20-23-36.png b/modules/SimpleSLAM/media/Screenshot from 2025-06-25 20-23-36.png new file mode 100644 index 0000000000..d117e7380a Binary files /dev/null and b/modules/SimpleSLAM/media/Screenshot from 2025-06-25 20-23-36.png differ diff --git a/modules/SimpleSLAM/media/colmap_instruc.png b/modules/SimpleSLAM/media/colmap_instruc.png new file mode 100644 index 0000000000..bb29c8acf4 Binary files /dev/null and b/modules/SimpleSLAM/media/colmap_instruc.png differ diff --git a/modules/SimpleSLAM/media/feature-visualization/Feature Tracking_screenshot_04.06.2025.png b/modules/SimpleSLAM/media/feature-visualization/Feature Tracking_screenshot_04.06.2025.png new file mode 100644 index 0000000000..24a7ec50b2 Binary files /dev/null and b/modules/SimpleSLAM/media/feature-visualization/Feature Tracking_screenshot_04.06.2025.png differ diff --git a/modules/SimpleSLAM/media/feature-visualization/Feature Tracking_screenshot_04.06.20251.png b/modules/SimpleSLAM/media/feature-visualization/Feature Tracking_screenshot_04.06.20251.png new file mode 100644 index 0000000000..ff002efebf Binary files /dev/null and b/modules/SimpleSLAM/media/feature-visualization/Feature Tracking_screenshot_04.06.20251.png differ diff --git a/modules/SimpleSLAM/media/feature-visualization/kitt Feature Tracking_screenshot_04.06.2025.png b/modules/SimpleSLAM/media/feature-visualization/kitt Feature Tracking_screenshot_04.06.2025.png new file mode 100644 index 0000000000..0514bafc4d Binary files /dev/null and b/modules/SimpleSLAM/media/feature-visualization/kitt Feature Tracking_screenshot_04.06.2025.png differ diff --git a/modules/SimpleSLAM/media/feature-visualization/lightglue -Feature Tracking_screenshot_04.06.2025.png b/modules/SimpleSLAM/media/feature-visualization/lightglue -Feature Tracking_screenshot_04.06.2025.png new file mode 100644 index 0000000000..a5db82cdc0 Binary files /dev/null and b/modules/SimpleSLAM/media/feature-visualization/lightglue -Feature Tracking_screenshot_04.06.2025.png differ diff --git a/modules/SimpleSLAM/media/feature-visualization/lightglue Feature Tracking_screenshot_04.06.2025.png b/modules/SimpleSLAM/media/feature-visualization/lightglue Feature Tracking_screenshot_04.06.2025.png new file mode 100644 index 0000000000..1915546c89 Binary files /dev/null and b/modules/SimpleSLAM/media/feature-visualization/lightglue Feature Tracking_screenshot_04.06.2025.png differ diff --git a/modules/SimpleSLAM/media/feature-visualization/lightglue malaga-Feature Tracking_screenshot_04.06.2025.png b/modules/SimpleSLAM/media/feature-visualization/lightglue malaga-Feature Tracking_screenshot_04.06.2025.png new file mode 100644 index 0000000000..8f7eb95adf Binary files /dev/null and b/modules/SimpleSLAM/media/feature-visualization/lightglue malaga-Feature Tracking_screenshot_04.06.2025.png differ diff --git a/modules/SimpleSLAM/media/output.mp4 b/modules/SimpleSLAM/media/output.mp4 new file mode 100644 index 0000000000..4f8d4e3a19 Binary files /dev/null and b/modules/SimpleSLAM/media/output.mp4 differ diff --git a/modules/SimpleSLAM/media/pyceres_pycolmap_works.png b/modules/SimpleSLAM/media/pyceres_pycolmap_works.png new file mode 100644 index 0000000000..3bfd4bfed5 Binary files /dev/null and b/modules/SimpleSLAM/media/pyceres_pycolmap_works.png differ diff --git a/modules/SimpleSLAM/media/pycolmap_instruc.png b/modules/SimpleSLAM/media/pycolmap_instruc.png new file mode 100644 index 0000000000..c4072de4f1 Binary files /dev/null and b/modules/SimpleSLAM/media/pycolmap_instruc.png differ diff --git a/modules/SimpleSLAM/opencv_simple_slam.egg-info/PKG-INFO b/modules/SimpleSLAM/opencv_simple_slam.egg-info/PKG-INFO new file mode 100644 index 0000000000..388be457dd --- /dev/null +++ b/modules/SimpleSLAM/opencv_simple_slam.egg-info/PKG-INFO @@ -0,0 +1,11 @@ +Metadata-Version: 2.1 +Name: opencv-simple-slam +Version: 0.1.0 +Summary: A simple feature-based SLAM using OpenCV and LightGlue +Home-page: UNKNOWN +License: UNKNOWN +Platform: UNKNOWN +License-File: LICENSE + +UNKNOWN + diff --git a/modules/SimpleSLAM/opencv_simple_slam.egg-info/SOURCES.txt b/modules/SimpleSLAM/opencv_simple_slam.egg-info/SOURCES.txt new file mode 100644 index 0000000000..a1fbaf0920 --- /dev/null +++ b/modules/SimpleSLAM/opencv_simple_slam.egg-info/SOURCES.txt @@ -0,0 +1,26 @@ +LICENSE +README.md +setup.py +opencv_simple_slam.egg-info/PKG-INFO +opencv_simple_slam.egg-info/SOURCES.txt +opencv_simple_slam.egg-info/dependency_links.txt +opencv_simple_slam.egg-info/requires.txt +opencv_simple_slam.egg-info/top_level.txt +slam/__init__.py +slam/core/__init__.py +slam/core/dataloader.py +slam/core/features_utils.py +slam/core/keyframe_utils.py +slam/core/landmark_utils.py +slam/core/multi_view_utils.py +slam/core/pnp_utils.py +slam/core/trajectory_utils.py +slam/core/visualization_utils.py +slam/monocular/__init__.py +slam/monocular/main.py +slam/monocular/main2.py +slam/stereo/ROUGHstereo_tracker.py +slam/stereo/__init__.py +tests/__init__.py +tests/test_lightglue_vs_manual.py +tests/test_multi_view_triangulation.py \ No newline at end of file diff --git a/modules/SimpleSLAM/opencv_simple_slam.egg-info/dependency_links.txt b/modules/SimpleSLAM/opencv_simple_slam.egg-info/dependency_links.txt new file mode 100644 index 0000000000..8b13789179 --- /dev/null +++ b/modules/SimpleSLAM/opencv_simple_slam.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/modules/SimpleSLAM/opencv_simple_slam.egg-info/requires.txt b/modules/SimpleSLAM/opencv_simple_slam.egg-info/requires.txt new file mode 100644 index 0000000000..1710f46421 --- /dev/null +++ b/modules/SimpleSLAM/opencv_simple_slam.egg-info/requires.txt @@ -0,0 +1,6 @@ +lightglue@ git+https://github.com/cvg/LightGlue.git +lz4 +numpy +opencv-python +torch +tqdm diff --git a/modules/SimpleSLAM/opencv_simple_slam.egg-info/top_level.txt b/modules/SimpleSLAM/opencv_simple_slam.egg-info/top_level.txt new file mode 100644 index 0000000000..91fd4a6e69 --- /dev/null +++ b/modules/SimpleSLAM/opencv_simple_slam.egg-info/top_level.txt @@ -0,0 +1,2 @@ +slam +tests diff --git a/modules/SimpleSLAM/refrences/ba.py b/modules/SimpleSLAM/refrences/ba.py new file mode 100644 index 0000000000..6ffd073f16 --- /dev/null +++ b/modules/SimpleSLAM/refrences/ba.py @@ -0,0 +1,79 @@ +import numpy as np +from scipy.sparse import lil_matrix +from scipy.optimize import least_squares + +def adjust_bundle(camera_array, points_3d, points_2d, camera_indices, point_indices, num_cameras, num_points): + """Intializes all the class attributes and instance variables. + Write the specifications for each variable: + + cameraArray with shape (n_cameras, 9) contains initial estimates of parameters for all cameras. + First 3 components in each row form a rotation vector, + next 3 components form a translation vector, + then a focal distance and two distortion parameters. + + points_3d with shape (n_points, 3) + contains initial estimates of point coordinates in the world frame. + + camera_ind with shape (n_observations,) + contains indices of cameras (from 0 to n_cameras - 1) involved in each observation. + + point_ind with shape (n_observations,) + contatins indices of points (from 0 to n_points - 1) involved in each observation. + + points_2d with shape (n_observations, 2) + contains measured 2-D coordinates of points projected on images in each observations. + """ + def rotate(points, rot_vecs): + theta = np.linalg.norm(rot_vecs, axis=1)[:, np.newaxis] + with np.errstate(invalid='ignore'): + v = rot_vecs / theta + v = np.nan_to_num(v) + dot = np.sum(points * v, axis=1)[:, np.newaxis] + cos_theta = np.cos(theta) + sin_theta = np.sin(theta) + + return cos_theta * points + sin_theta * np.cross(v, points) + dot * (1 - cos_theta) * v + + def project(points, cameraArray): + """Convert 3-D points to 2-D by projecting onto images.""" + points_proj = rotate(points, cameraArray[:, :3]) + points_proj += cameraArray[:, 3:6] + points_proj = -points_proj[:, :2] / points_proj[:, 2, np.newaxis] + # f = cameraArray[:, 6] + # k1 = cameraArray[:, 7] + # k2 = cameraArray[:, 8] + # n = np.sum(points_proj ** 2, axis=1) + # r = 1 + k1 * n + k2 * n ** 2 + # points_proj *= (r * f)[:, np.newaxis] + return points_proj + + def fun(params, n_cameras, num_points, camera_indices, point_indices, points_2d): + camera_params = params[:n_cameras * 9].reshape((n_cameras, 9)) + points_3d = params[n_cameras * 9:].reshape((num_points, 3)) + points_proj = project(points_3d[point_indices], camera_params[camera_indices]) + return (points_proj - points_2d).ravel() + + def bundle_adjustment_sparsity(numCameras, numPoints, cameraIndices, pointIndices): + m = cameraIndices.size * 2 + n = numCameras * 9 + numPoints * 3 + A = lil_matrix((m, n), dtype=int) + i = np.arange(cameraIndices.size) + for s in range(9): + A[2 * i, cameraIndices * 9 + s] = 1 + A[2 * i + 1, cameraIndices * 9 + s] = 1 + for s in range(3): + A[2 * i, numCameras * 9 + pointIndices * 3 + s] = 1 + A[2 * i + 1, numCameras * 9 + pointIndices * 3 + s] = 1 + return A + + def optimizedParams(params, num_cameras, num_points): + camera_params = params[:num_cameras * 9].reshape((num_cameras, 9)) + points_3d = params[num_cameras * 9:].reshape((num_points, 3)) + return camera_params, points_3d + + x0 = np.hstack((camera_array.ravel(), points_3d.ravel())) + A = bundle_adjustment_sparsity(num_cameras, num_points, camera_indices, point_indices) + res = least_squares(fun, x0, ftol=1e-2, method='lm', + args=(num_cameras, num_points, camera_indices, point_indices, points_2d)) + camera_params, points_3d = optimizedParams(res.x, num_cameras, num_points) + return camera_params, points_3d \ No newline at end of file diff --git a/modules/SimpleSLAM/refrences/sfm.py b/modules/SimpleSLAM/refrences/sfm.py new file mode 100644 index 0000000000..c39fc13582 --- /dev/null +++ b/modules/SimpleSLAM/refrences/sfm.py @@ -0,0 +1,769 @@ +import cv2 +import numpy as np +import os +import glob +import matplotlib +matplotlib.use('TkAgg') # or 'Qt5Agg', 'WXAgg', etc. +from matplotlib import pyplot as plt +import matplotlib.gridspec as gridspec +from tqdm import tqdm +from scipy.optimize import least_squares +import pandas as pd +import pickle + +class StructureFromMotion: + def __init__(self, prefix, thumpup_pos_thresh, thumpup_rot_thresh): + # You can set this to True if you definitely want to run stereo mode. + # If the dataset doesn't have right-camera images, you can set it to False. + self.isStereo = False + + self.THUMPUP_POS_THRESHOLD = thumpup_pos_thresh + self.THUMPUP_ROT_THRESHOLD = thumpup_rot_thresh + + # ================== DETERMINE DATASET ================== + if 'kitti' in prefix: + self.dataset = 'kitti' + self.image_list = sorted(glob.glob(os.path.join(prefix, '05/image_0', '*.png'))) + self.image_list_r = sorted(glob.glob(os.path.join(prefix, '05/image_1', '*.png'))) + self.K_3x3, self.Proj, self.K_r_3x3, self.Proj_r = self.load_calibration_kitti() + self.gt = np.loadtxt(os.path.join(prefix, 'poses/05.txt')).reshape(-1, 3, 4) + # Set stereo to True if you want to use stereo images. + # If the dataset is incomplete, set it to False. + # self.isStereo = True + + elif 'parking' in prefix: + self.dataset = 'parking' + self.image_list = sorted(glob.glob(os.path.join(prefix, 'images', '*.png'))) + self.gt = np.loadtxt(os.path.join(prefix, 'poses.txt')).reshape(-1, 3, 4) + self.K_3x3, self.Proj = self.load_calibration_parking() + # For parking dataset, typically no stereo images are provided, + # so you can set self.isStereo = False if needed. + + elif 'malaga' in prefix: + self.dataset = 'malaga' + self.image_list = sorted(glob.glob( + os.path.join(prefix, 'malaga-urban-dataset-extract-07_rectified_1024x768_Images', + '*_left.jpg'))) + self.image_list_r = sorted(glob.glob( + os.path.join(prefix, 'malaga-urban-dataset-extract-07_rectified_1024x768_Images', + '*_right.jpg'))) + self.K_3x3, self.Proj, self.K_r_3x3, self.Proj_r = self.load_calibration_malaga() + self.gt = self.malaga_get_gt( + os.path.join(prefix, 'malaga-urban-dataset-extract-07_all-sensors_GPS.txt'), + os.path.join(prefix, 'poses') + ) + # self.isStereo = True + + elif 'custom' in prefix: + """ + For a custom dataset: + - we expect: + prefix/ + custom_video.mp4 or custom_compress.mp4 (video frames) + calibration.pkl (something that provides camera intrinsics) + groundtruth.txt (optional) + """ + self.dataset = 'custom' + self.image_list = [] + + # 1) Load calibration + calib_path = os.path.join(prefix, 'calibration.pkl') + if not os.path.exists(calib_path): + raise FileNotFoundError(f"Calibration file not found at: {calib_path}") + self.K_3x3, self.Proj = self.load_calibration_custom(calib_path) + + # 2) Load frames from a video + video_path = os.path.join(prefix, 'custom_compress.mp4') + if not os.path.exists(video_path): + raise FileNotFoundError(f"Video file not found at: {video_path}") + cap = cv2.VideoCapture(video_path) + self.frames = [] + success, frame = cap.read() + while success: + self.frames.append(frame) + success, frame = cap.read() + cap.release() + + self.image_list = list(range(len(self.frames))) # e.g. [0..N-1] + + # 3) Optionally load ground truth + possible_gt_path = os.path.join(prefix, 'groundtruth.txt') + if os.path.exists(possible_gt_path): + try: + self.gt = np.loadtxt(possible_gt_path).reshape(-1, 3, 4) + except: + print("Ground truth found but could not be loaded. Skipping.") + self.gt = None + else: + print("No ground truth for custom video. Ground truth set to None.") + self.gt = None + + else: + raise NotImplementedError('Dataset not implemented:', prefix) + + # ================== FEATURE DETECTORS ================== + self.MAX_FEATURES = 6000 + matcher_type = "akaze" # or "orb", "sift" + + if matcher_type == "orb": + self.fd = cv2.ORB_create(self.MAX_FEATURES) + self.bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True) + elif matcher_type == "sift": + self.fd = cv2.SIFT_create() + self.bf = cv2.BFMatcher(cv2.NORM_L2, crossCheck=True) + elif matcher_type == "akaze": + self.fd = cv2.AKAZE_create() + self.bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True) + else: + raise ValueError(f"Unsupported matcher, {matcher_type}") + + self.RANSAC_THRESHOLD = 1.0 + + # =============== STEREO SGBM SETUP if self.isStereo = True ========= + if self.isStereo: + self.MIN_DISP = 0 + self.NUM_DISP = 32 + self.BLOCK_SIZE = 11 + P1 = self.BLOCK_SIZE * self.BLOCK_SIZE * 8 + P2 = self.BLOCK_SIZE * self.BLOCK_SIZE * 32 + self.stereo = cv2.StereoSGBM_create( + minDisparity=self.MIN_DISP, + numDisparities=self.NUM_DISP, + blockSize=self.BLOCK_SIZE, + P1=P1, + P2=P2 + ) + + # --------------------------------------------------------------------- + # CALIBRATION LOADERS + # --------------------------------------------------------------------- + def load_calibration_kitti(self): + # left + params_left = np.fromstring( + "7.070912e+02 0.000000e+00 6.018873e+02 0.000000e+00 " + "0.000000e+00 7.070912e+02 1.831104e+02 0.000000e+00 " + "0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00", + dtype=np.float64, sep=' ' + ).reshape(3, 4) + K_left_3x3 = params_left[:3, :3] + + # right + params_right = np.fromstring( + "7.070912e+02 0.000000e+00 6.018873e+02 -3.798145e+02 " + "0.000000e+00 7.070912e+02 1.831104e+02 0.000000e+00 " + "0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00", + dtype=np.float64, sep=' ' + ).reshape(3, 4) + K_right_3x3 = params_right[:3, :3] + return K_left_3x3, params_left, K_right_3x3, params_right + + def load_calibration_parking(self): + P = np.array([ + 331.37, 0.0, 320.0, 0.0, + 0.0, 369.568, 240.0, 0.0, + 0.0, 0.0, 1.0, 0.0 + ]).reshape(3, 4) + K_3x3 = P[:3, :3] + return K_3x3, P + + def load_calibration_malaga(self): + K_left_3x3 = np.array([ + [795.11588, 0.0, 517.12973], + [0.0, 795.11588, 395.59665], + [0.0, 0.0, 1.0 ] + ]) + Proj_l = np.array([ + [795.11588, 0.0, 517.12973, 0.0], + [0.0, 795.11588, 395.59665, 0.0], + [0.0, 0.0, 1.0, 0.0] + ]) + + # Right camera extrinsics + q = np.array([1.0, 0.0, 0.0, 0.0]) + R_right = self.quaternion_to_rotation_matrix(q) + t_right = np.array([0.119471, 0.0, 0.0]).reshape(3, 1) + + inv_homo_proj = np.linalg.inv(self.get_4by4_homo_proj(R_right, t_right)) + Proj_r = Proj_l @ inv_homo_proj + + # For the right camera + K_right_3x3 = K_left_3x3.copy() + return K_left_3x3, Proj_l, K_right_3x3, Proj_r + + def load_calibration_custom(self, calib_path): + with open(calib_path, 'rb') as f: + calibration = pickle.load(f) + camera_matrix_3x3 = calibration[0] # shape (3,3) + # distCoeffs = calibration[1] # shape (5,) or similar, if needed + + # Build a 3×4 projection for the left camera + Proj = np.hstack([camera_matrix_3x3, np.zeros((3, 1))]) + return camera_matrix_3x3, Proj + + # --------------------------------------------------------------------- + # MALAGA GT UTILS + # --------------------------------------------------------------------- + def malaga_get_gt(self, filepath, gt_filepath): + col_names = [ + "Time","Lat","Lon","Alt","fix","sats","speed","dir", + "LocalX","LocalY","LocalZ","rawlogID","GeocenX","GeocenY","GeocenZ", + "GPSX","GPSY","GPSZ","GPSVX","GPSVY","GPSVZ","LocalVX","LocalVY","LocalVZ","SATTime" + ] + df = pd.read_csv(filepath, sep='\s+', comment='%', header=None, names=col_names) + df = df[["Time", "LocalX", "LocalY", "LocalZ"]].sort_values(by="Time").reset_index(drop=True) + + times = df["Time"].values + first_time, last_time = times[0], times[-1] + + # Remove frames that have no GT + rows_to_del = [] + for i in range(len(self.image_list)): + f = self.image_list[i] + timestamp = self.extract_file_timestamp(f) + if timestamp < first_time or timestamp > last_time: + rows_to_del.append(i) + self.image_list = np.delete(self.image_list, rows_to_del) + self.image_list_r = np.delete(self.image_list_r, rows_to_del) + + gt = [] + for f in self.image_list: + timestamp = self.extract_file_timestamp(f) + position = self.get_position_at_time(timestamp, df) + pose = np.eye(4) + pose[:3, 3] = position + gt.append(pose) + + gt = np.array(gt) + # Truncate to Nx3 if that is what your pipeline expects + gt = gt[:, :3] + + np.save(gt_filepath + ".npy", gt) + np.savetxt(gt_filepath + ".txt", gt.reshape(gt.shape[0], -1), fmt="%.5f") + return gt + + def extract_file_timestamp(self, filepath): + filename = os.path.basename(filepath) + parts = filename.split("_") + time_part = parts[2] + return float(time_part) + + def get_position_at_time(self, timestamp, df): + times = df["Time"].values + idx = np.searchsorted(times, timestamp) + t0 = times[idx-1] + t1 = times[idx] + row0 = df.iloc[idx-1] + row1 = df.iloc[idx] + x0, y0, z0 = row0["LocalX"], row0["LocalY"], row0["LocalZ"] + x1, y1, z1 = row1["LocalX"], row1["LocalY"], row1["LocalZ"] + + alpha = (timestamp - t0)/(t1 - t0) if t1 != t0 else 0 + x = x0 + alpha*(x1 - x0) + y = y0 + alpha*(y1 - y0) + z = z0 + alpha*(z1 - z0) + return [-y, z, x] + + # --------------------------------------------------------------------- + # GENERAL UTILS + # --------------------------------------------------------------------- + def quaternion_to_rotation_matrix(self, q): + a, b, c, d = q + a2, b2, c2, d2 = a*a, b*b, c*c, d*d + R = np.array([ + [a2 + b2 - c2 - d2, 2*b*c - 2*a*d, 2*b*d + 2*a*c], + [2*b*c + 2*a*d, a2 - b2 + c2 - d2, 2*c*d - 2*a*b], + [2*b*d - 2*a*c, 2*c*d + 2*a*b, a2 - b2 - c2 + d2] + ]) + return R + + def get_4by4_homo_proj(self, R, t): + P = np.eye(4) + P[:3, :3] = R + P[:3, 3] = t.ravel() + return P + + # --------------------------------------------------------------------- + # READING IMAGES + # --------------------------------------------------------------------- + def read_image(self, idx): + if self.dataset == 'custom': + return self.frames[idx] + else: + return cv2.imread(self.image_list[idx]) + + # --------------------------------------------------------------------- + # FEATURE DETECTION & MATCHING + # --------------------------------------------------------------------- + def feature_detection(self, image): + kp, des = self.fd.detectAndCompute(image, mask=None) + return kp, des + + def feature_matching(self, des1, des2, kp1, kp2): + matches = self.bf.match(des1, des2) + matches = sorted(matches, key=lambda x: x.distance) + m_kp1 = np.array([kp1[m.queryIdx] for m in matches]) + m_kp2 = np.array([kp2[m.trainIdx] for m in matches]) + return m_kp1, m_kp2, matches + + # --------------------------------------------------------------------- + # POSE ESTIMATION (2D-2D) + # --------------------------------------------------------------------- + def pose_estimation(self, pos_m_kp1, pos_m_kp2): + E, _ = cv2.findEssentialMat( + pos_m_kp1, pos_m_kp2, + cameraMatrix=self.K_3x3, + method=cv2.RANSAC, + threshold=self.RANSAC_THRESHOLD + ) + _, R, t, _ = cv2.recoverPose(E, pos_m_kp1, pos_m_kp2, cameraMatrix=self.K_3x3) + return R, t + + # --------------------------------------------------------------------- + # TRIANGULATION + # --------------------------------------------------------------------- + def triangulate(self, R, t, pos_m_kp1, pos_m_kp2): + """ + We use self.Proj as the 3×4 matrix for the left camera. + For the second camera, we do self.Proj @ [R|t]. + """ + P1 = self.Proj + P2 = self.Proj @ self.get_4by4_homo_proj(R, t) + points_4d = cv2.triangulatePoints(P1, P2, pos_m_kp1.T, pos_m_kp2.T) + points_3d = (points_4d[:3] / points_4d[3]).T # Nx3 + return points_3d + + def project_3d_to_2d(self, X_3D, R, t): + P = self.Proj @ self.get_4by4_homo_proj(R, t) + X_3D_hom = np.hstack([X_3D, np.ones((X_3D.shape[0], 1))]) + X_pix = X_3D_hom @ P.T + u = X_pix[:, 0] / X_pix[:, 2] + v = X_pix[:, 1] / X_pix[:, 2] + return np.vstack((u, v)).T + + # --------------------------------------------------------------------- + # STEREO UTILITIES + # --------------------------------------------------------------------- + def compute_stereo_disparity(self, img_l, img_r): + gray_left = cv2.cvtColor(img_l, cv2.COLOR_BGR2GRAY) + gray_right = cv2.cvtColor(img_r, cv2.COLOR_BGR2GRAY) + disparity = self.stereo.compute(gray_left, gray_right).astype(np.float32) / 16.0 + return disparity + + def apply_disparity_check(self, q, disp, min_disp, max_disp): + q_idx = q.astype(int) + disp_vals = disp.T[q_idx[:,0], q_idx[:,1]] + mask = (disp_vals > min_disp) & (disp_vals < max_disp) + return disp_vals, mask + + def calculate_right_features(self, q1, q2, disparity1, disparity2, min_disp=0.0, max_disp=100.0): + disparity1_vals, mask1 = self.apply_disparity_check(q1, disparity1, min_disp, max_disp) + disparity2_vals, mask2 = self.apply_disparity_check(q2, disparity2, min_disp, max_disp) + in_bounds = mask1 & mask2 + + q1_l = q1[in_bounds] + q2_l = q2[in_bounds] + disp1 = disparity1_vals[in_bounds] + disp2 = disparity2_vals[in_bounds] + + # Right coords + q1_r = np.copy(q1_l) + q2_r = np.copy(q2_l) + q1_r[:, 0] -= disp1 + q2_r[:, 0] -= disp2 + return q1_l, q1_r, q2_l, q2_r, in_bounds + + def get_stereo_3d_pts(self, q1_l, q1_r, q2_l, q2_r): + """ + Triangulate from left and right images for two frames (q1_l, q1_r) and (q2_l, q2_r). + Then we can do solvePnPRansac on Q1, etc. + """ + Q1_4d = cv2.triangulatePoints(self.Proj, self.Proj_r, q1_l.T, q1_r.T) + Q1_3d = (Q1_4d[:3] / Q1_4d[3]).T + + Q2_4d = cv2.triangulatePoints(self.Proj, self.Proj_r, q2_l.T, q2_r.T) + Q2_3d = (Q2_4d[:3] / Q2_4d[3]).T + return Q1_3d, Q2_3d + + # --------------------------------------------------------------------- + # NONLINEAR REFINEMENT + # --------------------------------------------------------------------- + def refine(self, pos_m_kp1, pos_m_kp2, points_3d, R, t, verbose=False): + def reprojection_resid(params, pos_m_kp1, pos_m_kp2, points_3d): + R_vec = params[:3] + t_vec = params[3:].reshape(3,1) + R_opt, _ = cv2.Rodrigues(R_vec) + + # First camera is (I, 0) + proj1 = self.project_3d_to_2d(points_3d, np.eye(3), np.zeros((3,1))) + # Second camera is (R, t) + proj2 = self.project_3d_to_2d(points_3d, R_opt, t_vec) + + err1 = (pos_m_kp1 - proj1).ravel() + err2 = (pos_m_kp2 - proj2).ravel() + return np.concatenate((err1, err2)) + + r_init = cv2.Rodrigues(R)[0].ravel() + t_init = t.ravel() + params_init = np.hstack([r_init, t_init]) + + result = least_squares( + reprojection_resid, + params_init, + args=(pos_m_kp1, pos_m_kp2, points_3d), + method='lm', + ftol=1e-6, xtol=1e-6, gtol=1e-6 + ) + r_refined = result.x[:3] + t_refined = result.x[3:].reshape(3,1) + R_refined, _ = cv2.Rodrigues(r_refined) + + if verbose: + print("[Refine] r_init:", r_init, "-> r_refined:", r_refined) + print("[Refine] t_init:", t_init, "-> t_refined:", t_refined.ravel()) + + return R_refined, t_refined + + # --------------------------------------------------------------------- + # VISUALIZATION + # --------------------------------------------------------------------- + def vis_init(self): + plt.ion() # interactive mode on + fig = plt.figure(figsize=(20, 10)) + gs = gridspec.GridSpec(2, 3, figure=fig) + ax1 = fig.add_subplot(gs[0, 0]) + ax2 = fig.add_subplot(gs[0, 1]) + ax3 = fig.add_subplot(gs[0, 2]) + ax4 = fig.add_subplot(gs[1, 0], projection='3d') + ax5 = fig.add_subplot(gs[1, 1]) + ax6 = fig.add_subplot(gs[1, 2]) + + if self.dataset == 'kitti': + ax4.view_init(elev=15, azim=-90) + elif self.dataset == 'parking': + ax4.view_init(elev=-80, azim=-90) + + # Basic range + x_min, x_max = -400, 400 + y_min, y_max = -400, 400 + z_min, z_max = -400, 400 + + return ax1, ax2, ax3, ax4, ax5, ax6, x_min, x_max, y_min, y_max, z_min, z_max + + def vis(self, vis_param, image1, image2, kp1, kp2, all_points_3d, all_poses, matches, i, show): + ax1, ax2, ax3, ax4, ax5, ax6, x_min, x_max, y_min, y_max, z_min, z_max = vis_param + + c_pose = all_poses[-1] + points_3d = all_points_3d[-1] + # transform local points to world + points_3d_world = (c_pose[:, :3] @ points_3d.T + c_pose[:, 3:]).T + + ax1.clear() + ax1.imshow(image1, cmap='gray') + ax1.axis('off') + + outimg1 = cv2.drawKeypoints(image1, kp1, None) + ax2.clear() + ax2.imshow(outimg1) + ax2.axis('off') + + matched_img = cv2.drawMatches(image1, kp1, image2, kp2, matches, None, + flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS) + ax3.clear() + ax3.imshow(matched_img) + ax3.axis('off') + + ax4.clear() + ax4.set_xlim([x_min + c_pose[0, -1], x_max + c_pose[0, -1]]) + ax4.set_ylim([y_min + c_pose[1, -1], y_max + c_pose[1, -1]]) + ax4.set_zlim([z_min + c_pose[2, -1], z_max + c_pose[2, -1]]) + ax4.scatter(points_3d_world[:,0], points_3d_world[:,1], points_3d_world[:,2], s=10, c='green') + ax4.scatter(c_pose[0, -1], c_pose[1, -1], c_pose[2, -1], c='red', label='Camera') + ax4.legend() + + ax5.clear() + ax5.grid() + ax5.set_xlim([x_min + c_pose[0, -1], x_max + c_pose[0, -1]]) + ax5.set_ylim([z_min + c_pose[2, -1], z_max + c_pose[2, -1]]) + arr_poses = np.array(all_poses) + ax5.scatter(arr_poses[:, 0, -1], arr_poses[:, 2, -1], s=10, c='red', label='Estimated camera') + + # if GT exists + if self.gt is not None and i < self.gt.shape[0]: + ax5.scatter(self.gt[:i, 0, -1], self.gt[:i, 2, -1], s=10, c='black', label='GT camera') + + ax5.scatter(points_3d_world[:,0], points_3d_world[:,2], s=10, c='green', alpha=0.5) + ax5.legend() + + ax6.clear() + ax6.plot(range(len(all_points_3d)), [p.shape[0] for p in all_points_3d], label='num matched keypoints') + ax6.grid() + ax6.legend() + + plt.tight_layout() + if show: + plt.pause(0.01) + plt.show() + + # --------------------------------------------------------------------- + # LOSSES + # --------------------------------------------------------------------- + def compute_ate(self, est_pose, gt_pose): + t_est = est_pose[:, 3] + t_gt = gt_pose[:, 3] + return np.linalg.norm(t_est - t_gt) + + def compute_rte(self, est_pose_old, est_pose_new, gt_pose_old, gt_pose_new): + old_est_t = est_pose_old[:, 3] + new_est_t = est_pose_new[:, 3] + old_gt_t = gt_pose_old[:, 3] + new_gt_t = gt_pose_new[:, 3] + rel_est = new_est_t - old_est_t + rel_gt = new_gt_t - old_gt_t + return np.linalg.norm(rel_est - rel_gt) + + # --------------------------------------------------------------------- + # INITIALIZE (KEYFRAMES) + # --------------------------------------------------------------------- + def initialize(self, keyframe_filename): + if os.path.exists(keyframe_filename): + return + + c_pose = np.hstack([np.eye(3), np.zeros((3,1))]) # 3x4 + all_poses = [c_pose] + keyframes = [0] + + print("Finding keyframes") + initial_frame = True + for id_n_keyframe in tqdm(range(1, len(self.image_list))): + id_c_keyframe = keyframes[-1] + pose_c_keyframe = all_poses[id_c_keyframe] + + img1 = self.read_image(id_c_keyframe) + img2 = self.read_image(id_n_keyframe) + + if self.isStereo: + # For stereo, we load the right images as well: + img1_r = cv2.imread(self.image_list_r[id_c_keyframe]) + img2_r = cv2.imread(self.image_list_r[id_n_keyframe]) + disparity1 = self.compute_stereo_disparity(img1, img1_r) + disparity2 = self.compute_stereo_disparity(img2, img2_r) + + kp1, des1 = self.feature_detection(img1) + kp2, des2 = self.feature_detection(img2) + m_kp1, m_kp2, matches = self.feature_matching(des1, des2, kp1, kp2) + pos_m_kp1 = np.array([kp.pt for kp in m_kp1]) + pos_m_kp2 = np.array([kp.pt for kp in m_kp2]) + + # Pose estimation + if initial_frame or not self.isStereo: + R, t = self.pose_estimation(pos_m_kp1, pos_m_kp2) + initial_frame = False + else: + # stereo approach + q1_l, q1_r, q2_l, q2_r, mask_stereo = self.calculate_right_features( + pos_m_kp1, pos_m_kp2, + disparity1, disparity2 + ) + # Triangulate in the left-right images for both frames + Q1, Q2 = self.get_stereo_3d_pts(q1_l, q1_r, q2_l, q2_r) + + success, rvec, tvec, inliers = cv2.solvePnPRansac( + objectPoints=Q1.reshape(-1,1,3).astype(np.float32), + imagePoints=q2_l.reshape(-1,1,2).astype(np.float32), + cameraMatrix=self.K_3x3, + distCoeffs=None, + iterationsCount=500, + reprojectionError=5.0, + confidence=0.99, + flags=cv2.SOLVEPNP_ITERATIVE + ) + if success: + R, _ = cv2.Rodrigues(rvec) + t = tvec.reshape(3,1) + else: + print("PnP RANSAC failed... (Stereo approach)") + + # Triangulate & refine + pts_3d = self.triangulate(R, t, pos_m_kp1, pos_m_kp2) + R, t = self.refine(pos_m_kp1, pos_m_kp2, pts_3d, R, t, verbose=False) + pts_3d = self.triangulate(R, t, pos_m_kp1, pos_m_kp2) + + inv_pose = np.linalg.inv(self.get_4by4_homo_proj(R, t)) + c_pose = c_pose @ inv_pose + all_poses.append(c_pose) + + # check thresholds + dist_from_keyframe = np.linalg.norm(c_pose[:3, -1] - pose_c_keyframe[:3, -1]) + good_depth_pts = pts_3d[pts_3d[:,2] > 0, 2] + avg_depth = np.mean(good_depth_pts) if len(good_depth_pts) else 1e-6 + + rel_R = np.linalg.inv(pose_c_keyframe[:3, :3]) @ c_pose[:3, :3] + rod = cv2.Rodrigues(rel_R)[0] + deg = np.abs(rod * 180.0 / np.pi) + ratio = dist_from_keyframe / avg_depth + + if ratio > self.THUMPUP_POS_THRESHOLD or np.any(deg > self.THUMPUP_ROT_THRESHOLD): + print(f" [Keyframe] Dist: {dist_from_keyframe:.3f}, AvgDepth: {avg_depth:.3f}, ratio={ratio:.3f}, deg={deg.squeeze()}, frame={id_n_keyframe}") + keyframes.append(id_n_keyframe) + + np.save(keyframe_filename, np.array(keyframes)) + + # --------------------------------------------------------------------- + # MAIN RUN + # --------------------------------------------------------------------- + def run(self, keyframe_filename, checkpoints=20, dynamic_vis=True): + """ + Runs the main pipeline using either the keyframes from file + or all frames if no file is found. The 'dynamic_vis' parameter + can be set to False if you do not want interactive visualization. + """ + import matplotlib.pyplot as plt + + # 1) Load or generate keyframes + if os.path.exists(keyframe_filename): + keyframes = np.load(keyframe_filename) + print(f"loaded keyframes from file: {keyframes}") + else: + keyframes = np.arange(len(self.image_list)) + print("keyframe file not found, using all frames") + + # 2) Conditionally set up visualization + vis_param = None + if dynamic_vis: + vis_param = self.vis_init() + + # 3) Initialize + c_pose = np.hstack((np.eye(3), np.zeros((3, 1)))) + all_poses = [c_pose] + all_points_3d = [] + initial_frame = True + cumulative_loss_loc = 0 + + # 4) Main loop + iter_count = len(keyframes) + for i in tqdm(range(1, iter_count)): + idx1 = keyframes[i - 1] + idx2 = keyframes[i] + + # --- Read images + img1 = self.read_image(idx1) + img2 = self.read_image(idx2) + + # If stereo is active, compute disparities or do pnp + if self.isStereo: + img1_r = cv2.imread(self.image_list_r[idx1]) + img2_r = cv2.imread(self.image_list_r[idx2]) + disparity1 = self.compute_stereo_disparity(img1, img1_r) + disparity2 = self.compute_stereo_disparity(img2, img2_r) + + # --- Feature detection + matching + kp1, des1 = self.feature_detection(img1) + kp2, des2 = self.feature_detection(img2) + m_kp1, m_kp2, matches = self.feature_matching(des1, des2, kp1, kp2) + pos_m_kp1 = np.array([kp.pt for kp in m_kp1]) + pos_m_kp2 = np.array([kp.pt for kp in m_kp2]) + + # --- Pose estimation + if initial_frame or not self.isStereo: + R, t = self.pose_estimation(pos_m_kp1, pos_m_kp2) + initial_frame = False + else: + # stereo approach + q1_l, q1_r, q2_l, q2_r, in_bounds = self.calculate_right_features( + pos_m_kp1, pos_m_kp2, + disparity1, disparity2 + ) + Q1, Q2 = self.get_stereo_3d_pts(q1_l, q1_r, q2_l, q2_r) + success, rvec, tvec, inliers = cv2.solvePnPRansac( + Q1.reshape(-1,1,3).astype(np.float32), + q2_l.reshape(-1,1,2).astype(np.float32), + self.K_3x3, None, + iterationsCount=2000, + reprojectionError=5.0, + confidence=0.99, + flags=cv2.SOLVEPNP_ITERATIVE + ) + if success: + R, _ = cv2.Rodrigues(rvec) + t = tvec.reshape(3,1) + else: + print("[Warning] Stereo PnP RANSAC failed to find a valid pose.") + + # --- Triangulate & refine + points_3d = self.triangulate(R, t, pos_m_kp1, pos_m_kp2) + R, t = self.refine(pos_m_kp1, pos_m_kp2, points_3d, R, t) + points_3d = self.triangulate(R, t, pos_m_kp1, pos_m_kp2) + all_points_3d.append(points_3d) + + # --- Update global camera pose + inv_pose = np.linalg.inv(self.get_4by4_homo_proj(R, t)) + c_pose = c_pose @ inv_pose + all_poses.append(c_pose) + + # --- If ground truth is available, compute any desired loss + ate_loss = -1 + rte_loss = -1 + if (self.gt is not None) and (idx2 < self.gt.shape[0]): + gt_pose_current = self.gt[idx2] + diff_mean = np.mean(np.abs(gt_pose_current[:, 3] - c_pose[:, 3])) + cumulative_loss_loc += diff_mean + ate_loss = self.compute_ate(c_pose, gt_pose_current) + # For RTE: + # if i >= 2 and (idx1 < self.gt.shape[0]): + # rte_loss = self.compute_rte(all_poses[-2], all_poses[-1], self.gt[idx1], self.gt[idx2]) + + # --- Visualization + if dynamic_vis: + self.vis(vis_param, img1, img2, kp1, kp2, all_points_3d, all_poses, matches, idx1, show=True) + + # --- Save screenshots every 'checkpoints' frames + if (i % checkpoints == 0) or (i == iter_count - 1): + if dynamic_vis: + self.vis(vis_param, img1, img2, kp1, kp2, all_points_3d, all_poses, matches, idx1, show=False) + + out_dir = f"./output/{self.dataset}" + if not os.path.exists(out_dir): + os.mkdir(out_dir) + + if ate_loss == -1: + fname = f"{idx1}_noGT.png" + else: + fname = f"{idx1}_{(cumulative_loss_loc / i):.2f}_{ate_loss:.2f}.png" + + plt.savefig(os.path.join(out_dir, fname), dpi=100) + + +# ================================ +# Example main script usage +# ================================ +if __name__ == "__main__": + thumpup_pos_thresh = 0.015 + thumpup_rot_thresh = 1.5 + dataset = "kitti" # can be "kitti", "parking", "malaga", or "custom" + + # Create output directory if it doesn't exist + output_dir = "output" + if not os.path.exists(output_dir): + os.makedirs(output_dir) + + + if dataset == "malaga": + keyframe_filename = f"./output/malaga_keyframes_{thumpup_pos_thresh}_{thumpup_rot_thresh}.npy" + sfm = StructureFromMotion("./Dataset/malaga-urban-dataset-extract-07", thumpup_pos_thresh, thumpup_rot_thresh) + elif dataset == "parking": + keyframe_filename = f"./output/parking_keyframes_{thumpup_pos_thresh}_{thumpup_rot_thresh}.npy" + sfm = StructureFromMotion("./Dataset/parking", thumpup_pos_thresh, thumpup_rot_thresh) + elif dataset == "kitti": + keyframe_filename = f"./output/kitti_keyframes_{thumpup_pos_thresh}_{thumpup_rot_thresh}.npy" + sfm = StructureFromMotion("./Dataset/kitti", thumpup_pos_thresh, thumpup_rot_thresh) + elif dataset == "custom": + keyframe_filename = f"./output/custom_keyframes_{thumpup_pos_thresh}_{thumpup_rot_thresh}.npy" + sfm = StructureFromMotion("./Dataset/custom", thumpup_pos_thresh, thumpup_rot_thresh) + else: + raise ValueError("Unknown dataset") + + sfm.initialize(keyframe_filename) + sfm.run(keyframe_filename, checkpoints=20, dynamic_vis=True) + + # If you want to block at the end (so the figure remains): + # plt.show(block=True) diff --git a/modules/SimpleSLAM/refrences/sfm_lightglue_aliked.py b/modules/SimpleSLAM/refrences/sfm_lightglue_aliked.py new file mode 100644 index 0000000000..2f626acdca --- /dev/null +++ b/modules/SimpleSLAM/refrences/sfm_lightglue_aliked.py @@ -0,0 +1,853 @@ +import cv2 +import numpy as np +import os +import glob +import matplotlib +matplotlib.use('TkAgg') # or 'Qt5Agg', 'WXAgg', etc. +from matplotlib import pyplot as plt +import matplotlib.gridspec as gridspec +from tqdm import tqdm +from scipy.optimize import least_squares +import pandas as pd +import pickle + + +# New imports for LightGlue integration +import torch +from lightglue import LightGlue, ALIKED # using ALIKED features +from lightglue.utils import load_image, rbd + +class StructureFromMotion: + def __init__(self, prefix, thumpup_pos_thresh, thumpup_rot_thresh, use_lightglue=True): + # Set this flag to True to use ALIKED+LightGlue instead of the OpenCV pipeline. + self.use_lightglue = use_lightglue + + # If using LightGlue, initialize its extractor and matcher. + if self.use_lightglue: + self.extractor = ALIKED(max_num_keypoints=2048).eval().cuda() + # Here we choose 'disk' as the matching features for LightGlue. + self.matcher = LightGlue(features='disk').eval().cuda() + + # (Rest of your __init__ code remains unchanged...) + self.isStereo = False + self.THUMPUP_POS_THRESHOLD = thumpup_pos_thresh + self.THUMPUP_ROT_THRESHOLD = thumpup_rot_thresh + + # ================== DETERMINE DATASET ================== + if 'kitti' in prefix: + self.dataset = 'kitti' + self.image_list = sorted(glob.glob(os.path.join(prefix, '05/image_0', '*.png'))) + self.image_list_r = sorted(glob.glob(os.path.join(prefix, '05/image_1', '*.png'))) + self.K_3x3, self.Proj, self.K_r_3x3, self.Proj_r = self.load_calibration_kitti() + self.gt = np.loadtxt(os.path.join(prefix, 'poses/05.txt')).reshape(-1, 3, 4) + elif 'parking' in prefix: + self.dataset = 'parking' + self.image_list = sorted(glob.glob(os.path.join(prefix, 'images', '*.png'))) + self.gt = np.loadtxt(os.path.join(prefix, 'poses.txt')).reshape(-1, 3, 4) + self.K_3x3, self.Proj = self.load_calibration_parking() + elif 'malaga' in prefix: + self.dataset = 'malaga' + self.image_list = sorted(glob.glob( + os.path.join(prefix, 'malaga-urban-dataset-extract-07_rectified_1024x768_Images', + '*_left.jpg'))) + self.image_list_r = sorted(glob.glob( + os.path.join(prefix, 'malaga-urban-dataset-extract-07_rectified_1024x768_Images', + '*_right.jpg'))) + self.K_3x3, self.Proj, self.K_r_3x3, self.Proj_r = self.load_calibration_malaga() + self.gt = self.malaga_get_gt( + os.path.join(prefix, 'malaga-urban-dataset-extract-07_all-sensors_GPS.txt'), + os.path.join(prefix, 'poses') + ) + elif 'custom' in prefix: + self.dataset = 'custom' + self.image_list = [] + calib_path = os.path.join(prefix, 'calibration.pkl') + if not os.path.exists(calib_path): + raise FileNotFoundError(f"Calibration file not found at: {calib_path}") + self.K_3x3, self.Proj = self.load_calibration_custom(calib_path) + video_path = os.path.join(prefix, 'custom_compress.mp4') + if not os.path.exists(video_path): + raise FileNotFoundError(f"Video file not found at: {video_path}") + cap = cv2.VideoCapture(video_path) + self.frames = [] + success, frame = cap.read() + while success: + self.frames.append(frame) + success, frame = cap.read() + cap.release() + self.image_list = list(range(len(self.frames))) + possible_gt_path = os.path.join(prefix, 'groundtruth.txt') + if os.path.exists(possible_gt_path): + try: + self.gt = np.loadtxt(possible_gt_path).reshape(-1, 3, 4) + except: + print("Ground truth found but could not be loaded. Skipping.") + self.gt = None + else: + print("No ground truth for custom video. Ground truth set to None.") + self.gt = None + else: + raise NotImplementedError('Dataset not implemented:', prefix) + + # # ================== FEATURE DETECTORS (OpenCV fallback) ================== + self.MAX_FEATURES = 6000 + matcher_type = "akaze" # or "orb", "sift" + if matcher_type == "orb": + self.fd = cv2.ORB_create(self.MAX_FEATURES) + self.bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True) + elif matcher_type == "sift": + self.fd = cv2.SIFT_create() + self.bf = cv2.BFMatcher(cv2.NORM_L2, crossCheck=True) + elif matcher_type == "akaze": + self.fd = cv2.AKAZE_create() + self.bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True) + else: + raise ValueError(f"Unsupported matcher, {matcher_type}") + + self.RANSAC_THRESHOLD = 1.0 + + if self.isStereo: + self.MIN_DISP = 0 + self.NUM_DISP = 32 + self.BLOCK_SIZE = 11 + P1 = self.BLOCK_SIZE * self.BLOCK_SIZE * 8 + P2 = self.BLOCK_SIZE * self.BLOCK_SIZE * 32 + self.stereo = cv2.StereoSGBM_create( + minDisparity=self.MIN_DISP, + numDisparities=self.NUM_DISP, + blockSize=self.BLOCK_SIZE, + P1=P1, + P2=P2 + ) + + # --------------------------------------------------------------------- + # CALIBRATION LOADERS + # --------------------------------------------------------------------- + def load_calibration_kitti(self): + # left + params_left = np.fromstring( + "7.070912e+02 0.000000e+00 6.018873e+02 0.000000e+00 " + "0.000000e+00 7.070912e+02 1.831104e+02 0.000000e+00 " + "0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00", + dtype=np.float64, sep=' ' + ).reshape(3, 4) + K_left_3x3 = params_left[:3, :3] + + # right + params_right = np.fromstring( + "7.070912e+02 0.000000e+00 6.018873e+02 -3.798145e+02 " + "0.000000e+00 7.070912e+02 1.831104e+02 0.000000e+00 " + "0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00", + dtype=np.float64, sep=' ' + ).reshape(3, 4) + K_right_3x3 = params_right[:3, :3] + return K_left_3x3, params_left, K_right_3x3, params_right + + def load_calibration_parking(self): + P = np.array([ + 331.37, 0.0, 320.0, 0.0, + 0.0, 369.568, 240.0, 0.0, + 0.0, 0.0, 1.0, 0.0 + ]).reshape(3, 4) + K_3x3 = P[:3, :3] + return K_3x3, P + + def load_calibration_malaga(self): + K_left_3x3 = np.array([ + [795.11588, 0.0, 517.12973], + [0.0, 795.11588, 395.59665], + [0.0, 0.0, 1.0 ] + ]) + Proj_l = np.array([ + [795.11588, 0.0, 517.12973, 0.0], + [0.0, 795.11588, 395.59665, 0.0], + [0.0, 0.0, 1.0, 0.0] + ]) + + # Right camera extrinsics + q = np.array([1.0, 0.0, 0.0, 0.0]) + R_right = self.quaternion_to_rotation_matrix(q) + t_right = np.array([0.119471, 0.0, 0.0]).reshape(3, 1) + + inv_homo_proj = np.linalg.inv(self.get_4by4_homo_proj(R_right, t_right)) + Proj_r = Proj_l @ inv_homo_proj + + # For the right camera + K_right_3x3 = K_left_3x3.copy() + return K_left_3x3, Proj_l, K_right_3x3, Proj_r + + def load_calibration_custom(self, calib_path): + with open(calib_path, 'rb') as f: + calibration = pickle.load(f) + camera_matrix_3x3 = calibration[0] # shape (3,3) + # distCoeffs = calibration[1] # shape (5,) or similar, if needed + + # Build a 3×4 projection for the left camera + Proj = np.hstack([camera_matrix_3x3, np.zeros((3, 1))]) + return camera_matrix_3x3, Proj + + # --------------------------------------------------------------------- + # MALAGA GT UTILS + # --------------------------------------------------------------------- + def malaga_get_gt(self, filepath, gt_filepath): + col_names = [ + "Time","Lat","Lon","Alt","fix","sats","speed","dir", + "LocalX","LocalY","LocalZ","rawlogID","GeocenX","GeocenY","GeocenZ", + "GPSX","GPSY","GPSZ","GPSVX","GPSVY","GPSVZ","LocalVX","LocalVY","LocalVZ","SATTime" + ] + df = pd.read_csv(filepath, sep='\s+', comment='%', header=None, names=col_names) + df = df[["Time", "LocalX", "LocalY", "LocalZ"]].sort_values(by="Time").reset_index(drop=True) + + times = df["Time"].values + first_time, last_time = times[0], times[-1] + + # Remove frames that have no GT + rows_to_del = [] + for i in range(len(self.image_list)): + f = self.image_list[i] + timestamp = self.extract_file_timestamp(f) + if timestamp < first_time or timestamp > last_time: + rows_to_del.append(i) + self.image_list = np.delete(self.image_list, rows_to_del) + self.image_list_r = np.delete(self.image_list_r, rows_to_del) + + gt = [] + for f in self.image_list: + timestamp = self.extract_file_timestamp(f) + position = self.get_position_at_time(timestamp, df) + pose = np.eye(4) + pose[:3, 3] = position + gt.append(pose) + + gt = np.array(gt) + # Truncate to Nx3 if that is what your pipeline expects + gt = gt[:, :3] + + np.save(os.path.join(gt_filepath + ".npy"), gt) + np.savetxt(os.path.join(gt_filepath + ".txt"), gt.reshape(gt.shape[0], -1), fmt="%.5f") + return gt + + def extract_file_timestamp(self, filepath): + filename = os.path.basename(filepath) + parts = filename.split("_") + time_part = parts[2] + return float(time_part) + + def get_position_at_time(self, timestamp, df): + times = df["Time"].values + idx = np.searchsorted(times, timestamp) + t0 = times[idx-1] + t1 = times[idx] + row0 = df.iloc[idx-1] + row1 = df.iloc[idx] + x0, y0, z0 = row0["LocalX"], row0["LocalY"], row0["LocalZ"] + x1, y1, z1 = row1["LocalX"], row1["LocalY"], row1["LocalZ"] + + alpha = (timestamp - t0)/(t1 - t0) if t1 != t0 else 0 + x = x0 + alpha*(x1 - x0) + y = y0 + alpha*(y1 - y0) + z = z0 + alpha*(z1 - z0) + return [-y, z, x] + + # --------------------------------------------------------------------- + # GENERAL UTILS + # --------------------------------------------------------------------- + def quaternion_to_rotation_matrix(self, q): + a, b, c, d = q + a2, b2, c2, d2 = a*a, b*b, c*c, d*d + R = np.array([ + [a2 + b2 - c2 - d2, 2*b*c - 2*a*d, 2*b*d + 2*a*c], + [2*b*c + 2*a*d, a2 - b2 + c2 - d2, 2*c*d - 2*a*b], + [2*b*d - 2*a*c, 2*c*d + 2*a*b, a2 - b2 - c2 + d2] + ]) + return R + + def get_4by4_homo_proj(self, R, t): + P = np.eye(4) + P[:3, :3] = R + P[:3, 3] = t.ravel() + return P + + # --------------------------------------------------------------------- + # READING IMAGES + # --------------------------------------------------------------------- + def read_image(self, idx): + if self.dataset == 'custom': + return self.frames[idx] + else: + return cv2.imread(self.image_list[idx]) + + # --------------------------------------------------------------------- + # FEATURE DETECTION & MATCHING + # --------------------------------------------------------------------- + def feature_detection(self, image): + kp, des = self.fd.detectAndCompute(image, mask=None) + return kp, des + + def feature_matching(self, des1, des2, kp1, kp2): + matches = self.bf.match(des1, des2) + matches = sorted(matches, key=lambda x: x.distance) + m_kp1 = np.array([kp1[m.queryIdx] for m in matches]) + m_kp2 = np.array([kp2[m.trainIdx] for m in matches]) + return m_kp1, m_kp2, matches + + # --------------------------------------------------------------------- + # POSE ESTIMATION (2D-2D) + # --------------------------------------------------------------------- + def pose_estimation(self, pos_m_kp1, pos_m_kp2): + E, _ = cv2.findEssentialMat( + pos_m_kp1, pos_m_kp2, + cameraMatrix=self.K_3x3, + method=cv2.RANSAC, + threshold=self.RANSAC_THRESHOLD + ) + _, R, t, _ = cv2.recoverPose(E, pos_m_kp1, pos_m_kp2, cameraMatrix=self.K_3x3) + return R, t + + def pose_estimation(self, pos_m_kp1, pos_m_kp2): + # If the keypoints are torch tensors on GPU, move them to CPU and convert to numpy + if isinstance(pos_m_kp1, torch.Tensor): + pts1 = pos_m_kp1.cpu().numpy().astype(np.float32) + else: + pts1 = np.asarray(pos_m_kp1, dtype=np.float32) + + if isinstance(pos_m_kp2, torch.Tensor): + pts2 = pos_m_kp2.cpu().numpy().astype(np.float32) + else: + pts2 = np.asarray(pos_m_kp2, dtype=np.float32) + + + # Pass the camera matrix and additional parameters as positional arguments + E, _ = cv2.findEssentialMat( + pts1, pts2, + cameraMatrix=self.K_3x3, + method=cv2.RANSAC, + threshold=self.RANSAC_THRESHOLD + ) + _, R, t, _ = cv2.recoverPose(E, pts1, pts2, cameraMatrix=self.K_3x3) + return R, t + + + # --------------------------------------------------------------------- + # TRIANGULATION + # --------------------------------------------------------------------- + def triangulate(self, R, t, pos_m_kp1, pos_m_kp2): + P1 = self.Proj + P2 = self.Proj @ self.get_4by4_homo_proj(R, t) + + # Ensure the keypoints are numpy arrays of type float32 + if isinstance(pos_m_kp1, torch.Tensor): + pts1 = pos_m_kp1.cpu().numpy().astype(np.float32) + else: + pts1 = np.asarray(pos_m_kp1, dtype=np.float32) + + if isinstance(pos_m_kp2, torch.Tensor): + pts2 = pos_m_kp2.cpu().numpy().astype(np.float32) + else: + pts2 = np.asarray(pos_m_kp2, dtype=np.float32) + + # cv2.triangulatePoints expects points in shape (2, N) + points_4d = cv2.triangulatePoints(P1, P2, pts1.T, pts2.T) + points_3d = (points_4d[:3] / points_4d[3]).T # Convert homogeneous coordinates to 3D + return points_3d + + # --------------------------------------------------------------------- + # STEREO UTILITIES + # --------------------------------------------------------------------- + def compute_stereo_disparity(self, img_l, img_r): + gray_left = cv2.cvtColor(img_l, cv2.COLOR_BGR2GRAY) + gray_right = cv2.cvtColor(img_r, cv2.COLOR_BGR2GRAY) + disparity = self.stereo.compute(gray_left, gray_right).astype(np.float32) / 16.0 + return disparity + + def apply_disparity_check(self, q, disp, min_disp, max_disp): + q_idx = q.astype(int) + disp_vals = disp.T[q_idx[:,0], q_idx[:,1]] + mask = (disp_vals > min_disp) & (disp_vals < max_disp) + return disp_vals, mask + + def calculate_right_features(self, q1, q2, disparity1, disparity2, min_disp=0.0, max_disp=100.0): + disparity1_vals, mask1 = self.apply_disparity_check(q1, disparity1, min_disp, max_disp) + disparity2_vals, mask2 = self.apply_disparity_check(q2, disparity2, min_disp, max_disp) + in_bounds = mask1 & mask2 + + q1_l = q1[in_bounds] + q2_l = q2[in_bounds] + disp1 = disparity1_vals[in_bounds] + disp2 = disparity2_vals[in_bounds] + + # Right coords + q1_r = np.copy(q1_l) + q2_r = np.copy(q2_l) + q1_r[:, 0] -= disp1 + q2_r[:, 0] -= disp2 + return q1_l, q1_r, q2_l, q2_r, in_bounds + + def get_stereo_3d_pts(self, q1_l, q1_r, q2_l, q2_r): + """ + Triangulate from left and right images for two frames (q1_l, q1_r) and (q2_l, q2_r). + Then we can do solvePnPRansac on Q1, etc. + """ + Q1_4d = cv2.triangulatePoints(self.Proj, self.Proj_r, q1_l.T, q1_r.T) + Q1_3d = (Q1_4d[:3] / Q1_4d[3]).T + + Q2_4d = cv2.triangulatePoints(self.Proj, self.Proj_r, q2_l.T, q2_r.T) + Q2_3d = (Q2_4d[:3] / Q2_4d[3]).T + return Q1_3d, Q2_3d + + def project_3d_to_2d(self, X_3D, R, t): + # Compute the projection matrix for the given rotation and translation. + P = self.Proj @ self.get_4by4_homo_proj(R, t) + # Convert 3D points to homogeneous coordinates. + X_3D_hom = np.hstack([X_3D, np.ones((X_3D.shape[0], 1))]) + # Project the 3D points. + X_pix = X_3D_hom @ P.T + # Normalize to get pixel coordinates. + u = X_pix[:, 0] / X_pix[:, 2] + v = X_pix[:, 1] / X_pix[:, 2] + return np.vstack((u, v)).T + + # --------------------------------------------------------------------- + # NONLINEAR REFINEMENT + # --------------------------------------------------------------------- + def refine(self, pos_m_kp1, pos_m_kp2, points_3d, R, t, verbose=False): + def reprojection_resid(params, pos_m_kp1, pos_m_kp2, points_3d): + R_vec = params[:3] + t_vec = params[3:].reshape(3,1) + R_opt, _ = cv2.Rodrigues(R_vec) + + # Project points for camera 1 and camera 2. + proj1 = self.project_3d_to_2d(points_3d, np.eye(3), np.zeros((3,1))) + proj2 = self.project_3d_to_2d(points_3d, R_opt, t_vec) + + # Ensure the keypoint arrays are numpy arrays on the CPU. + if isinstance(pos_m_kp1, torch.Tensor): + pos_m_kp1 = pos_m_kp1.cpu().numpy().astype(np.float32) + else: + pos_m_kp1 = np.asarray(pos_m_kp1, dtype=np.float32) + if isinstance(pos_m_kp2, torch.Tensor): + pos_m_kp2 = pos_m_kp2.cpu().numpy().astype(np.float32) + else: + pos_m_kp2 = np.asarray(pos_m_kp2, dtype=np.float32) + + err1 = (pos_m_kp1 - proj1).ravel() + err2 = (pos_m_kp2 - proj2).ravel() + return np.concatenate((err1, err2)) + + # Get initial parameters from the current pose. + r_init = cv2.Rodrigues(R)[0].ravel() + t_init = t.ravel() + params_init = np.hstack([r_init, t_init]) + + result = least_squares( + reprojection_resid, + params_init, + args=(pos_m_kp1, pos_m_kp2, points_3d), + method='lm', + ftol=1e-6, xtol=1e-6, gtol=1e-6 + ) + + # Check if the optimization was successful; if not, return the original R, t. + if not result.success: + print("Least squares refinement did not converge. Returning original pose.") + return R, t + + r_refined = result.x[:3] + t_refined = result.x[3:].reshape(3, 1) + R_refined, _ = cv2.Rodrigues(r_refined) + + if verbose: + print("[Refine] r_init:", r_init, "-> r_refined:", r_refined) + print("[Refine] t_init:", t_init, "-> t_refined:", t_refined.ravel()) + + return R_refined, t_refined + + + # --------------------------------------------------------------------- + # VISUALIZATION + # --------------------------------------------------------------------- + def vis_init(self): + plt.ion() # interactive mode on + fig = plt.figure(figsize=(20, 10)) + gs = gridspec.GridSpec(2, 3, figure=fig) + ax1 = fig.add_subplot(gs[0, 0]) + ax2 = fig.add_subplot(gs[0, 1]) + ax3 = fig.add_subplot(gs[0, 2]) + ax4 = fig.add_subplot(gs[1, 0], projection='3d') + ax5 = fig.add_subplot(gs[1, 1]) + ax6 = fig.add_subplot(gs[1, 2]) + + if self.dataset == 'kitti': + ax4.view_init(elev=15, azim=-90) + elif self.dataset == 'parking': + ax4.view_init(elev=-80, azim=-90) + + # Basic range + x_min, x_max = -400, 400 + y_min, y_max = -400, 400 + z_min, z_max = -400, 400 + + return ax1, ax2, ax3, ax4, ax5, ax6, x_min, x_max, y_min, y_max, z_min, z_max + + def vis(self, vis_param, image1, image2, kp1, kp2, all_points_3d, all_poses, matches, i, show): + ax1, ax2, ax3, ax4, ax5, ax6, x_min, x_max, y_min, y_max, z_min, z_max = vis_param + + c_pose = all_poses[-1] + points_3d = all_points_3d[-1] + # transform local points to world + points_3d_world = (c_pose[:, :3] @ points_3d.T + c_pose[:, 3:]).T + + ax1.clear() + ax1.imshow(image1, cmap='gray') + ax1.axis('off') + + outimg1 = cv2.drawKeypoints(image1, kp1, None) + ax2.clear() + ax2.imshow(outimg1) + ax2.axis('off') + + matched_img = cv2.drawMatches(image1, kp1, image2, kp2, matches, None, + flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS) + ax3.clear() + ax3.imshow(matched_img) + ax3.axis('off') + + ax4.clear() + ax4.set_xlim([x_min + c_pose[0, -1], x_max + c_pose[0, -1]]) + ax4.set_ylim([y_min + c_pose[1, -1], y_max + c_pose[1, -1]]) + ax4.set_zlim([z_min + c_pose[2, -1], z_max + c_pose[2, -1]]) + ax4.scatter(points_3d_world[:,0], points_3d_world[:,1], points_3d_world[:,2], s=10, c='green') + ax4.scatter(c_pose[0, -1], c_pose[1, -1], c_pose[2, -1], c='red', label='Camera') + ax4.legend() + + ax5.clear() + ax5.grid() + ax5.set_xlim([x_min + c_pose[0, -1], x_max + c_pose[0, -1]]) + ax5.set_ylim([z_min + c_pose[2, -1], z_max + c_pose[2, -1]]) + arr_poses = np.array(all_poses) + ax5.scatter(arr_poses[:, 0, -1], arr_poses[:, 2, -1], s=10, c='red', label='Estimated camera') + + # if GT exists + if self.gt is not None and i < self.gt.shape[0]: + ax5.scatter(self.gt[:i, 0, -1], self.gt[:i, 2, -1], s=10, c='black', label='GT camera') + + ax5.scatter(points_3d_world[:,0], points_3d_world[:,2], s=10, c='green', alpha=0.5) + ax5.legend() + + ax6.clear() + ax6.plot(range(len(all_points_3d)), [p.shape[0] for p in all_points_3d], label='num matched keypoints') + ax6.grid() + ax6.legend() + + plt.tight_layout() + if show: + # pass + plt.pause(0.01) + plt.show() + + # --------------------------------------------------------------------- + # LOSSES + # --------------------------------------------------------------------- + def compute_ate(self, est_pose, gt_pose): + t_est = est_pose[:, 3] + t_gt = gt_pose[:, 3] + return np.linalg.norm(t_est - t_gt) + + def compute_rte(self, est_pose_old, est_pose_new, gt_pose_old, gt_pose_new): + old_est_t = est_pose_old[:, 3] + new_est_t = est_pose_new[:, 3] + old_gt_t = gt_pose_old[:, 3] + new_gt_t = gt_pose_new[:, 3] + rel_est = new_est_t - old_est_t + rel_gt = new_gt_t - old_gt_t + return np.linalg.norm(rel_est - rel_gt) + + # --------------------------------------------------------------------- + # INITIALIZE (KEYFRAMES) + # --------------------------------------------------------------------- + def initialize(self, keyframe_filename): + if os.path.exists(keyframe_filename): + return + + c_pose = np.hstack([np.eye(3), np.zeros((3,1))]) # 3x4 + all_poses = [c_pose] + keyframes = [0] + + print("Finding keyframes") + initial_frame = True + for id_n_keyframe in tqdm(range(1, len(self.image_list))): + id_c_keyframe = keyframes[-1] + pose_c_keyframe = all_poses[id_c_keyframe] + + img1 = self.read_image(id_c_keyframe) + img2 = self.read_image(id_n_keyframe) + + # Use LightGlue if enabled, otherwise use OpenCV pipeline. + if self.use_lightglue: + pos_m_kp1, pos_m_kp2, matches, kp1, kp2 = self.lightglue_match(img1, img2) + else: + kp1, des1 = self.feature_detection(img1) + kp2, des2 = self.feature_detection(img2) + m_kp1, m_kp2, matches = self.feature_matching(des1, des2, kp1, kp2) + pos_m_kp1 = np.array([kp.pt for kp in m_kp1]) + pos_m_kp2 = np.array([kp.pt for kp in m_kp2]) + + # Pose estimation, triangulation, refinement, etc. (unchanged) + if initial_frame or not self.isStereo: + R, t = self.pose_estimation(pos_m_kp1, pos_m_kp2) + initial_frame = False + else: + # stereo approach (unchanged) + q1_l, q1_r, q2_l, q2_r, mask_stereo = self.calculate_right_features( + pos_m_kp1, pos_m_kp2, + disparity1, disparity2 + ) + Q1, Q2 = self.get_stereo_3d_pts(q1_l, q1_r, q2_l, q2_r) + success, rvec, tvec, inliers = cv2.solvePnPRansac( + objectPoints=Q1.reshape(-1,1,3).astype(np.float32), + imagePoints=q2_l.reshape(-1,1,2).astype(np.float32), + cameraMatrix=self.K_3x3, + distCoeffs=None, + iterationsCount=500, + reprojectionError=5.0, + confidence=0.99, + flags=cv2.SOLVEPNP_ITERATIVE + ) + if success: + R, _ = cv2.Rodrigues(rvec) + t = tvec.reshape(3,1) + else: + print("PnP RANSAC failed... (Stereo approach)") + + pts_3d = self.triangulate(R, t, pos_m_kp1, pos_m_kp2) + R, t = self.refine(pos_m_kp1, pos_m_kp2, pts_3d, R, t, verbose=False) + pts_3d = self.triangulate(R, t, pos_m_kp1, pos_m_kp2) + inv_pose = np.linalg.inv(self.get_4by4_homo_proj(R, t)) + c_pose = c_pose @ inv_pose + all_poses.append(c_pose) + + dist_from_keyframe = np.linalg.norm(c_pose[:3, -1] - pose_c_keyframe[:3, -1]) + good_depth_pts = pts_3d[pts_3d[:,2] > 0, 2] + avg_depth = np.mean(good_depth_pts) if len(good_depth_pts) else 1e-6 + + rel_R = np.linalg.inv(pose_c_keyframe[:3, :3]) @ c_pose[:3, :3] + rod = cv2.Rodrigues(rel_R)[0] + deg = np.abs(rod * 180.0 / np.pi) + ratio = dist_from_keyframe / avg_depth + + if ratio > self.THUMPUP_POS_THRESHOLD or np.any(deg > self.THUMPUP_ROT_THRESHOLD): + print(f" [Keyframe] Dist: {dist_from_keyframe:.3f}, AvgDepth: {avg_depth:.3f}, ratio={ratio:.3f}, deg={deg.squeeze()}, frame={id_n_keyframe}") + keyframes.append(id_n_keyframe) + + np.save(keyframe_filename, np.array(keyframes)) + + + + # --------------------------------------------------------------------- + # MAIN RUN + # --------------------------------------------------------------------- + def run(self, keyframe_filename, checkpoints=20, dynamic_vis=True): + import matplotlib.pyplot as plt + + if os.path.exists(keyframe_filename): + keyframes = np.load(keyframe_filename) + print(f"loaded keyframes from file: {keyframes}") + else: + keyframes = np.arange(len(self.image_list)) + print("keyframe file not found, using all frames") + + vis_param = self.vis_init() if dynamic_vis else None + + c_pose = np.hstack((np.eye(3), np.zeros((3, 1)))) + all_poses = [c_pose] + all_points_3d = [] + initial_frame = True + cumulative_loss_loc = 0 + + iter_count = len(keyframes) + for i in tqdm(range(1, iter_count)): + idx1 = keyframes[i - 1] + idx2 = keyframes[i] + img1 = self.read_image(idx1) + img2 = self.read_image(idx2) + + if self.isStereo: + img1_r = cv2.imread(self.image_list_r[idx1]) + img2_r = cv2.imread(self.image_list_r[idx2]) + disparity1 = self.compute_stereo_disparity(img1, img1_r) + disparity2 = self.compute_stereo_disparity(img2, img2_r) + + if self.use_lightglue: + pos_m_kp1, pos_m_kp2, matches, kp1, kp2 = self.lightglue_match(img1, img2) + else: + kp1, des1 = self.feature_detection(img1) + kp2, des2 = self.feature_detection(img2) + m_kp1, m_kp2, matches = self.feature_matching(des1, des2, kp1, kp2) + pos_m_kp1 = np.array([kp.pt for kp in m_kp1]) + pos_m_kp2 = np.array([kp.pt for kp in m_kp2]) + + if initial_frame or not self.isStereo: + R, t = self.pose_estimation(pos_m_kp1, pos_m_kp2) + initial_frame = False + else: + q1_l, q1_r, q2_l, q2_r, in_bounds = self.calculate_right_features( + pos_m_kp1, pos_m_kp2, + disparity1, disparity2 + ) + Q1, Q2 = self.get_stereo_3d_pts(q1_l, q1_r, q2_l, q2_r) + success, rvec, tvec, inliers = cv2.solvePnPRansac( + Q1.reshape(-1,1,3).astype(np.float32), + q2_l.reshape(-1,1,2).astype(np.float32), + self.K_3x3, None, + iterationsCount=2000, + reprojectionError=5.0, + confidence=0.99, + flags=cv2.SOLVEPNP_ITERATIVE + ) + if success: + R, _ = cv2.Rodrigues(rvec) + t = tvec.reshape(3,1) + else: + print("[Warning] Stereo PnP RANSAC failed to find a valid pose.") + + points_3d = self.triangulate(R, t, pos_m_kp1, pos_m_kp2) + R, t = self.refine(pos_m_kp1, pos_m_kp2, points_3d, R, t) + points_3d = self.triangulate(R, t, pos_m_kp1, pos_m_kp2) + all_points_3d.append(points_3d) + + inv_pose = np.linalg.inv(self.get_4by4_homo_proj(R, t)) + c_pose = c_pose @ inv_pose + all_poses.append(c_pose) + + ate_loss = -1 + rte_loss = -1 + if (self.gt is not None) and (idx2 < self.gt.shape[0]): + gt_pose_current = self.gt[idx2] + diff_mean = np.mean(np.abs(gt_pose_current[:, 3] - c_pose[:, 3])) + cumulative_loss_loc += diff_mean + ate_loss = self.compute_ate(c_pose, gt_pose_current) + + # --- Visualization + if dynamic_vis: + self.vis(vis_param, img1, img2, kp1, kp2, all_points_3d, all_poses, matches, idx1, show=True) + + # --- Save screenshots every 'checkpoints' frames + if (i % checkpoints == 0) or (i == iter_count - 1): + if dynamic_vis: + self.vis(vis_param, img1, img2, kp1, kp2, all_points_3d, all_poses, matches, idx1, show=False) + + out_dir = os.path.join(output_dir, self.dataset) + if not os.path.exists(out_dir): + os.mkdir(out_dir) + + if ate_loss == -1: + fname = f"{idx1}_noGT.png" + else: + fname = f"{idx1}_{(cumulative_loss_loc / i):.2f}_{ate_loss:.2f}.png" + + plt.savefig(os.path.join(out_dir, fname), dpi=100) + + # ----------------------- NEW HELPER FUNCTIONS ----------------------- + + def numpy_to_tensor(self, image): + """Converts a BGR numpy image (as read by cv2) into a torch tensor of shape (1, 3, H, W) + normalized in [0,1] and moved to GPU if available.""" + image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + image_rgb = image_rgb.astype(np.float32) / 255.0 + tensor = torch.from_numpy(image_rgb).permute(2, 0, 1).unsqueeze(0) + return tensor.cuda() if torch.cuda.is_available() else tensor + + def convert_lightglue_to_opencv(self, keypoints0, keypoints1, matches): + """ + Convert filtered LightGlue keypoints and matches into OpenCV-compatible KeyPoint and DMatch objects. + Here, keypoints0 and keypoints1 are assumed to already be filtered (only matched keypoints). + + Returns: + opencv_kp0: List of cv2.KeyPoint objects for image0. + opencv_kp1: List of cv2.KeyPoint objects for image1. + opencv_matches: List of cv2.DMatch objects where each match is simply (i,i). + """ + n_matches = keypoints0.shape[0] # number of matched keypoints + opencv_kp0 = [cv2.KeyPoint(float(pt[0]), float(pt[1]), 1) for pt in keypoints0] + opencv_kp1 = [cv2.KeyPoint(float(pt[0]), float(pt[1]), 1) for pt in keypoints1] + opencv_matches = [] + for i in range(n_matches): + # Here, we assume that the i-th keypoint in keypoints0 matches the i-th in keypoints1. + dmatch = cv2.DMatch(i, i, 0, 0.0) + opencv_matches.append(dmatch) + return opencv_kp0, opencv_kp1, opencv_matches + + + + def lightglue_match(self, img1, img2): + """ + Uses ALIKED and LightGlue to extract features and match them, + then converts the outputs to OpenCV-compatible formats. + Returns: + pos_kp0: numpy array of keypoint coordinates for image0 (shape (K,2)). + pos_kp1: numpy array of keypoint coordinates for image1 (shape (K,2)). + opencv_matches: list of cv2.DMatch objects. + opencv_kp0: list of cv2.KeyPoint objects for image0. + opencv_kp1: list of cv2.KeyPoint objects for image1. + """ + # Convert image to torch tensor + img1_tensor = self.numpy_to_tensor(img1) + img2_tensor = self.numpy_to_tensor(img2) + feats0 = self.extractor.extract(img1_tensor) + feats1 = self.extractor.extract(img2_tensor) + matches_out = self.matcher({'image0': feats0, 'image1': feats1}) + # Remove batch dimension using rbd from lightglue.utils + feats0 = rbd(feats0) + feats1 = rbd(feats1) + matches_out = rbd(matches_out) + + # Get matches and keypoints as numpy arrays. + matches = matches_out['matches'] # shape (K,2) + # keypoints returned by LightGlue are numpy arrays of (x,y) + keypoints0 = feats0['keypoints'][matches[..., 0]] + keypoints1 = feats1['keypoints'][matches[..., 1]] + + # Convert the keypoints and matches to OpenCV formats. + opencv_kp0, opencv_kp1, opencv_matches = self.convert_lightglue_to_opencv(keypoints0, keypoints1, matches) + + return keypoints0, keypoints1, opencv_matches, opencv_kp0, opencv_kp1 + + + # ----------------------- OVERRIDDEN METHODS IN initialize() and run() ----------------------- + + + +# ================================ +# Example main script usage +# ================================ +if __name__ == "__main__": + thumpup_pos_thresh = 0.015 + thumpup_rot_thresh = 1.5 + dataset = "kitti" # can be "kitti", "parking", "malaga", or "custom" + + # Create output directory if it doesn't exist + output_dir = "output" + if not os.path.exists(output_dir): + os.makedirs(output_dir) + + + if dataset == "malaga": + keyframe_filename = f"./output/malaga_keyframes_{thumpup_pos_thresh}_{thumpup_rot_thresh}.npy" + sfm = StructureFromMotion("./Dataset/malaga-urban-dataset-extract-07", thumpup_pos_thresh, thumpup_rot_thresh) + elif dataset == "parking": + keyframe_filename = f"./output/parking_keyframes_{thumpup_pos_thresh}_{thumpup_rot_thresh}.npy" + sfm = StructureFromMotion("./Dataset/parking", thumpup_pos_thresh, thumpup_rot_thresh) + elif dataset == "kitti": + keyframe_filename = f"./output/kitti_keyframes_{thumpup_pos_thresh}_{thumpup_rot_thresh}.npy" + sfm = StructureFromMotion("./Dataset/kitti", thumpup_pos_thresh, thumpup_rot_thresh) + elif dataset == "custom": + keyframe_filename = f"./output/custom_keyframes_{thumpup_pos_thresh}_{thumpup_rot_thresh}.npy" + sfm = StructureFromMotion("./Dataset/custom", thumpup_pos_thresh, thumpup_rot_thresh) + else: + raise ValueError("Unknown dataset") + + sfm.initialize(keyframe_filename) + sfm.run(keyframe_filename, checkpoints=20, dynamic_vis=True) + + # If you want to block at the end (so the figure remains): + # plt.show(block=True) diff --git a/modules/SimpleSLAM/requirements.txt b/modules/SimpleSLAM/requirements.txt new file mode 100644 index 0000000000..4dcb596437 --- /dev/null +++ b/modules/SimpleSLAM/requirements.txt @@ -0,0 +1,12 @@ +lightglue==0.0 +matplotlib==3.10.3 +numpy==2.2.6 +opencv_python==4.11.0.86 +pandas==2.2.3 +scipy==1.15.3 +torch==2.7.0 +tqdm==4.67.1 +open3d==0.18.0 +pyceres==2.3 +pycolmap==3.10.0 +lz4==4.4.4 \ No newline at end of file diff --git a/modules/SimpleSLAM/scripts/run_tracker_visualization.sh b/modules/SimpleSLAM/scripts/run_tracker_visualization.sh new file mode 100644 index 0000000000..52605b8831 --- /dev/null +++ b/modules/SimpleSLAM/scripts/run_tracker_visualization.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash +set -e + +# ============================================== +# 1) Run with OpenCV SIFT detector + BF matcher: +# ============================================== +# python3 -m slam.monocular.main \ +# --dataset kitti \ +# --base_dir ./Dataset \ +# --detector akaze \ +# --matcher bf \ +# --fps 10 \ +# --ransac_thresh 1.0 \ +# --no_viz3d + +# ============================================== +# 2) (Alternative) Run with ALIKED + LightGlue: +# ============================================== +python3 -m slam.monocular.main4 \ + --dataset tum-rgbd \ + --base_dir ./Dataset \ + --use_lightglue \ + --ransac_thresh 1.0 + # --fps 10 \ + # --no_viz3d diff --git a/modules/SimpleSLAM/setup.py b/modules/SimpleSLAM/setup.py new file mode 100644 index 0000000000..143cd325a2 --- /dev/null +++ b/modules/SimpleSLAM/setup.py @@ -0,0 +1,18 @@ +# setup.py +from setuptools import setup, find_packages + +setup( + name = "opencv_simple_slam", + version = "0.1.0", + description = "A simple feature-based SLAM using OpenCV and LightGlue", + packages = find_packages(), # this will pick up your `slam/` package + install_requires=[ + "opencv-python", + "numpy", + "lz4", + "tqdm", + "torch", # if you intend to support LightGlue + "lightglue @ git+https://github.com/cvg/LightGlue.git", + ], +) + diff --git a/modules/SimpleSLAM/slam/__init__.py b/modules/SimpleSLAM/slam/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/modules/SimpleSLAM/slam/core/__init__.py b/modules/SimpleSLAM/slam/core/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/modules/SimpleSLAM/slam/core/archieve/visualization_keyframes.py b/modules/SimpleSLAM/slam/core/archieve/visualization_keyframes.py new file mode 100644 index 0000000000..c3b94f42d1 --- /dev/null +++ b/modules/SimpleSLAM/slam/core/archieve/visualization_keyframes.py @@ -0,0 +1,492 @@ +import os +import glob +import argparse +import time +from tqdm import tqdm +from dataclasses import dataclass +import lz4.frame +import cv2 +import numpy as np + + +# Optional LightGlue imports +try: + import torch + from lightglue import LightGlue, ALIKED + from lightglue.utils import rbd , load_image +except ImportError: + raise ImportError('LightGlue unavailable, please install it using instructions on https://github.com/cvg/LightGlue') + + +def get_detector(detector_type, max_features=6000): + if detector_type == 'orb': + return cv2.ORB_create(max_features) + elif detector_type == 'sift': + return cv2.SIFT_create() + elif detector_type == 'akaze': + return cv2.AKAZE_create() + raise ValueError(f"Unsupported detector: {detector_type}") + +def get_matcher(matcher_type, detector_type=None): + if matcher_type == 'bf': + norm = cv2.NORM_HAMMING if detector_type in ['orb','akaze'] else cv2.NORM_L2 + return cv2.BFMatcher(norm, crossCheck=True) + raise ValueError(f"Unsupported matcher: {matcher_type}") + +def opencv_detector_and_matcher(img1,img2,detector,matcher): + kp1,des1=detector.detectAndCompute(img1,None) + kp2,des2=detector.detectAndCompute(img2,None) + if des1 is None or des2 is None: + return [],[],[] + matches=matcher.match(des1,des2) + return kp1, kp2, des1, des2, sorted(matches,key=lambda m:m.distance) + +def bgr_to_tensor(image): + """Convert a OpenCV‐style BGR uint8 into (3,H,W) torch tensor in [0,1] .""" + img_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)/255.0 + tensor = torch.from_numpy(img_rgb).permute(2,0,1).unsqueeze(0) + return tensor.cuda() if torch.cuda.is_available() else tensor + +def tensor_to_bgr(img_tensor): + """Convert a (1,3,H,W) or (3,H,W) torch tensor in [0,1] to RGB → OpenCV‐style BGR uint8 image""" + if img_tensor.dim() == 4: + img_tensor = img_tensor[0] + img_np = img_tensor.permute(1,2,0).cpu().numpy() + img_np = (img_np * 255).clip(0,255).astype(np.uint8) + return cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR) + +def convert_lightglue_to_opencv(keypoints0, keypoints1, matches): + """ + Convert filtered LightGlue keypoints and matches into OpenCV-compatible KeyPoint and DMatch objects. + Here, keypoints0 and keypoints1 are assumed to already be filtered (only matched keypoints). + + Returns: + opencv_kp0: List of cv2.KeyPoint objects for image0. + opencv_kp1: List of cv2.KeyPoint objects for image1. + opencv_matches: List of cv2.DMatch objects where each match is simply (i,i). + """ + # TODO: convert descriptors as well + n_matches = keypoints0.shape[0] # number of matched keypoints + opencv_kp0 = [cv2.KeyPoint(float(pt[0]), float(pt[1]), 1) for pt in keypoints0] + opencv_kp1 = [cv2.KeyPoint(float(pt[0]), float(pt[1]), 1) for pt in keypoints1] + + opencv_matches = [ + cv2.DMatch(int(q), int(t), 0, 0.0) for q, t in matches + ] + + return opencv_kp0, opencv_kp1, opencv_matches + +def lightglue_match(img1, img2, extractor, matcher, min_conf=0.0): + """ + Uses ALIKED and LightGlue to extract features and match them, + then converts the outputs to OpenCV-compatible formats. + Returns: + pos_kp0: numpy array of keypoint coordinates for image0 (shape (K,2)). + pos_kp1: numpy array of keypoint coordinates for image1 (shape (K,2)). + opencv_matches: list of cv2.DMatch objects. + opencv_kp0: list of cv2.KeyPoint objects for image0. + opencv_kp1: list of cv2.KeyPoint objects for image1. + """ + # TODO: add min_conf filtering + # TODO: return descriptors as well + # Convert image to torch tensor + img1_tensor = bgr_to_tensor(img1) + img2_tensor = bgr_to_tensor(img2) + feats0 = extractor.extract(img1_tensor) + feats1 = extractor.extract(img2_tensor) + matches_out = matcher({'image0': feats0, 'image1': feats1}) + # Remove batch dimension using rbd from lightglue.utils + feats0 = rbd(feats0) + feats1 = rbd(feats1) + matches_out = rbd(matches_out) + + # Get matches and keypoints as numpy arrays. + matches = matches_out['matches'] + # keypoints returned by LightGlue are numpy arrays of (x,y) + keypoints0 = feats0['keypoints'] + keypoints1 = feats1['keypoints'] + descriptors0 = feats0['descriptors'] + descriptors1 = feats1['descriptors'] + # print("discriptor types:", type(descriptors0), type(descriptors1)) + + + # Convert the keypoints and matches to OpenCV formats. + opencv_kp0, opencv_kp1, opencv_matches = convert_lightglue_to_opencv(keypoints0, keypoints1, matches) + + return opencv_kp0, opencv_kp1, descriptors0, descriptors1, opencv_matches + +def update_and_prune_tracks(matches, prev_map, tracks, kp_curr, frame_idx, next_track_id, prune_age=30): + """ + Update feature tracks given a list of matches, then prune stale ones. + + Args: + matches : List[cv2.DMatch] between previous and current keypoints. + prev_map : Dict[int, int] mapping prev-frame kp index -> track_id. + tracks : Dict[int, List[Tuple[int, int, int]]], + each track_id → list of (frame_idx, x, y). + kp_curr : List[cv2.KeyPoint] for current frame. + frame_idx : int, index of the current frame (1-based or 0-based). + next_track_id : int, the next unused track ID. + prune_age : int, max age (in frames) before a track is discarded. + + Returns: + curr_map : Dict[int, int] mapping curr-frame kp index → track_id. + tracks : Updated tracks dict. + next_track_id : Updated next unused track ID. + """ + curr_map = {} + + # 1) Assign each match to an existing or new track_id + for m in matches: + q = m.queryIdx # keypoint idx in previous frame + t = m.trainIdx # keypoint idx in current frame + + # extract integer pixel coords + x, y = int(kp_curr[t].pt[0]), int(kp_curr[t].pt[1]) + + if q in prev_map: + # continue an existing track + tid = prev_map[q] + else: + # start a brand-new track + tid = next_track_id + tracks[tid] = [] + next_track_id += 1 + + # map this current keypoint to the track + curr_map[t] = tid + # append the new observation + tracks[tid].append((frame_idx, x, y)) + + # 2) Prune any track not seen in the last `prune_age` frames + for tid, pts in list(tracks.items()): + last_seen_frame = pts[-1][0] + if (frame_idx - last_seen_frame) > prune_age: + del tracks[tid] + + return curr_map, tracks, next_track_id + +def filter_matches_ransac(kp1, kp2, matches, thresh): + """ + Filter a list of cv2.DMatch objects using RANSAC on the fundamental matrix. + + Args: + kp1 (List[cv2.KeyPoint]): KeyPoints from the first image. + kp2 (List[cv2.KeyPoint]): KeyPoints from the second image. + matches (List[cv2.DMatch]) : Initial matches between kp1 and kp2. + thresh (float) : RANSAC inlier threshold (pixels). + + Returns: + List[cv2.DMatch]: Only those matches deemed inliers by RANSAC. + """ + # Need at least 8 points for a fundamental matrix + if len(matches) < 8: + return matches + + # Build corresponding point arrays + pts1 = np.float32([kp1[m.queryIdx].pt for m in matches]) + pts2 = np.float32([kp2[m.trainIdx].pt for m in matches]) + + # Run RANSAC to find inlier mask + F, mask = cv2.findFundamentalMat( + pts1, pts2, + method=cv2.FM_RANSAC, + ransacReprojThreshold=thresh, + confidence=0.99 + ) + mask = mask.ravel().astype(bool) + + # Keep only the inlier matches + inliers = [m for m, ok in zip(matches, mask) if ok] + return inliers + +def draw_tracks(vis, tracks, current_frame, max_age=10, sample_rate=5, max_tracks=1000): + """ + Draw each track's path with color decaying from green (new) to red (old). + Only draw up to max_tracks most recent tracks, and sample every sample_rate-th track. + """ + recent=[(tid,pts) for tid,pts in tracks.items() if pts and current_frame-pts[-1][0]<=max_age] + recent.sort(key=lambda x:x[1][-1][0],reverse=True) + drawn=0 + for tid,pts in recent: + if drawn>=max_tracks: break + if tid%sample_rate!=0: continue + pts=[p for p in pts if current_frame-p[0]<=max_age] + for j in range(1,len(pts)): + frame_idx,x0,y0=pts[j-1] + _,x1,y1=pts[j] + age=current_frame-frame_idx + ratio=age/max_age + b=0 + g=int(255*(1-ratio)) + r=int(255*ratio) + cv2.line(vis,(int(x0),int(y0)),(int(x1),int(y1)),(b,g,r),2) + drawn+=1 + return vis + +def dataloader(args): + """ + Load the dataset and return the detector, matcher, and sequence of images. + """ + # init modules once + if args.use_lightglue: + detector= ALIKED(max_num_keypoints=2048).eval().cuda() + matcher= LightGlue(features='aliked').eval().cuda() + else: + detector=get_detector(args.detector) + matcher=get_matcher(args.matcher,args.detector) + + # load sequence + prefix=os.path.join(args.base_dir, args.dataset) + is_custom=False + if args.dataset=='kitti': img_dir,pat=os.path.join(prefix,'05','image_0'),'*.png' + elif args.dataset=='parking': img_dir,pat=os.path.join(prefix,'images'),'*.png' + elif args.dataset=='malaga': img_dir,pat =os.path.join(prefix,'malaga-urban-dataset-extract-07_rectified_800x600_Images'),'*_left.jpg' + elif args.dataset=='tum-rgbd': img_dir,pat =os.path.join(prefix,'rgbd_dataset_freiburg1_room', 'rgb'),'*.png' + else: + vid=os.path.join(prefix,'custom_compress.mp4') + cap=cv2.VideoCapture(vid) + seq=[] + while True: + ok,fr=cap.read() + if not ok: break; seq.append(fr) + cap.release(); is_custom=True + + # load images + if not is_custom: seq=sorted(glob.glob(os.path.join(img_dir,pat))) + + return detector, matcher, seq + +def load_frame_pair(args,seq, i): + """ + Load a pair of consecutive frames from the sequence. + Args: + seq (list): List of image paths or frames. + i (int): Index of the current frame. + Returns: + img1 (numpy.ndarray): First image. + img2 (numpy.ndarray): Second image. + """ + # load images + if args.dataset=='custom': + img1, img2 = seq[i], seq[i+1] + else: + img1 = cv2.imread(seq[i]) + img2=cv2.imread(seq[i+1]) + + return img1, img2 + +def detect_and_match(args, img1, img2, detector, matcher): + """ + Load two consecutive images from the sequence and match features using the specified detector and matcher. + Args: + args (argparse.Namespace): Command line arguments. + img1 (numpy.ndarray): First image. + img2 (numpy.ndarray): Second image. + detector: Feature detector object. + matcher: Feature matcher object. + Returns: + kp_map1 (list): Keypoints from the first image. + kp_map2 (list): Keypoints from the second image. + matches (list): List of matched features. + """ + # match features + if args.use_lightglue: + kp_map1, kp_map2, des1, des2, matches = lightglue_match(img1,img2,detector,matcher) + else: + kp_map1, kp_map2, des1, des2, matches = opencv_detector_and_matcher(img1, img2, detector, matcher) + + return kp_map1, kp_map2, des1, des2, matches + +def is_new_keyframe(frame_idx: int, + matches_to_kf: list[cv2.DMatch], + n_kf_features: int, + kp_curr: list[cv2.KeyPoint], + kp_kf: list[cv2.KeyPoint], + kf_max_disp: float = 30.0, + kf_min_inliers: int = 125, + kf_cooldown: int = 5, + last_kf_idx: int = -999) -> bool: + """ + Decide whether the *current* frame should become a new key-frame. + + Parameters + ---------- + frame_idx : index of the current frame in the sequence + matches_to_kf : list of inlier cv2.DMatch between LAST KF and current frame + n_kf_features : total number of keypoints that existed in the last KF + kp_curr : keypoints detected in the current frame + kp_kf : keypoints of the last key-frame + kf_max_disp : pixel-space parallax threshold (avg. L2) to trigger a new KF + kf_min_inliers : minimum *surviving-match* ratio; below → new KF + kf_cooldown : minimum #frames to wait after last KF before creating another + last_kf_idx : frame index of the most recent key-frame + + Returns + ------- + bool : True ⇒ promote current frame to key-frame + """ + + # 0) Cool-down guard # + if frame_idx - last_kf_idx < kf_cooldown: + return False # too soon to spawn another KF + + # 1) Overlap / track-survival test # + # If the key-frame had zero features (should never happen) → force new KF + if not matches_to_kf or not n_kf_features: # no matches or features at all ⇒ we must create a new KF + return True + + if len(matches_to_kf) < kf_min_inliers: + return True # too few matches survived, so we must create a new KF + + # OVERLAP RATIO TEST (not used anymore, see below) # This Logic is not working well, so we use a different metric + # overlap_ratio = len(matches_to_kf) / float(n_kf_features) + # print(f"[KF] Overlap ratio: {overlap_ratio:.2f} (matches={len(matches_to_kf)})") + # if overlap_ratio < kf_min_ratio: + # return True + + # 2) Parallax test (average pixel displacement) #TODO replace with a more robust metric + # Compute mean Euclidean displacement between matched pairs + disp = [ + np.hypot( + kp_curr[m.trainIdx].pt[0] - kp_kf[m.queryIdx].pt[0], + kp_curr[m.trainIdx].pt[1] - kp_kf[m.queryIdx].pt[1] + ) + for m in matches_to_kf + ] + + if np.mean(disp) > kf_max_disp: + return True + return False + +def make_thumb(bgr, hw=(854,480)): + """Return lz4-compressed JPEG thumbnail (bytes).""" + th = cv2.resize(bgr, hw) + ok, enc = cv2.imencode('.jpg', th, [int(cv2.IMWRITE_JPEG_QUALITY), 70]) + return lz4.frame.compress(enc.tobytes()) if ok else b'' + + +@dataclass +class Keyframe: + idx: int # global frame index + path: str # on-disk image file OR "" if custom video + kps: list[cv2.KeyPoint] # keypoints (needed for geometric checks) + desc: np.ndarray # descriptors (uint8/float32) + thumb: bytes # lz4-compressed thumbnail for UI + + +def main(): + parser=argparse.ArgumentParser("Feature tracking with RANSAC filtering") + parser.add_argument('--dataset',choices=['kitti','malaga','tum-rgbd','custom'],required=True) + parser.add_argument('--base_dir',default='.././Dataset') + parser.add_argument('--detector',choices=['orb','sift','akaze'],default='orb') + parser.add_argument('--matcher',choices=['bf'],default='bf') + parser.add_argument('--use_lightglue',action='store_true') + parser.add_argument('--fps',type=float,default=10) + # --- RANSAC related CLI flags ------------------------------------------- + parser.add_argument('--ransac_thresh',type=float,default=1.0, help='RANSAC threshold for fundamental matrix') + # --- Key-frame related CLI flags ----------------------------------------- + parser.add_argument('--kf_max_disp', type=float, default=45, + help='Min avg. pixel displacement wrt last keyframe') + parser.add_argument('--kf_min_inliers', type=float, default=150, + help='Min surviving-match features wrt last keyframe') + parser.add_argument('--kf_cooldown', type=int, default=3, + help='Frames to wait before next keyframe check') + parser.add_argument('--kf_thumb_hw', type=int, nargs=2, default=[640,360], + help='Width Height of key-frame thumbnail') + args=parser.parse_args() + + # initialize detector and matcher + detector, matcher, seq = dataloader(args) + + # tracking data + track_id=0; prev_map={}; tracks={} + cv2.namedWindow('Feature Tracking',cv2.WINDOW_NORMAL) + cv2.resizeWindow('Feature Tracking',1200,600) + + total=len(seq)-1; prev_time=time.time(); achieved_fps=0.0 + for i in tqdm(range(total),desc='Tracking'): + # load frame pair + img1, img2 = load_frame_pair(args, seq, i) + + # Detect and match features + kp_map1, kp_map2, des1, des2, matches = detect_and_match(args, img1, img2, detector, matcher) + + # filter with RANSAC + matches = filter_matches_ransac(kp_map1, kp_map2, matches, args.ransac_thresh) + + # update & prune tracks + frame_no = i + 1 + prev_map, tracks, track_id = update_and_prune_tracks(matches, prev_map, tracks, kp_map2, frame_no, track_id, prune_age=30) + + # ---------------------------------------------------------------------- + # Key-frame subsystem (light-weight, zero full-image retention) + # ---------------------------------------------------------------------- + if i == 0: # bootstrap KF0 + thumb = make_thumb(img1, tuple(args.kf_thumb_hw)) + kfs = [Keyframe(0, seq[0] if isinstance(seq[0],str) else "", + kp_map1, des1, thumb)] + last_kf_idx = 0 + else: + # 1) match *stored* KF descriptors ↔ current frame descriptors + kf = kfs[-1] + kf_matches = matcher.match(kf.desc, des2) if len(des2) and len(kf.desc) else [] + kf_matches = filter_matches_ransac(kf.kps, kp_map2, kf_matches, args.ransac_thresh) + + # 2) decide + if is_new_keyframe(frame_no, + kf_matches, + n_kf_features=len(kf.kps), + kp_curr=kp_map2, + kp_kf=kf.kps, + kf_max_disp=args.kf_max_disp, + kf_min_inliers=args.kf_min_inliers, + kf_cooldown=args.kf_cooldown, + last_kf_idx=last_kf_idx): + thumb = make_thumb(img2, tuple(args.kf_thumb_hw)) + kfs.append(Keyframe(frame_no, + seq[i+1] if isinstance(seq[i+1],str) else "", + kp_map2, des2, thumb)) + last_kf_idx = frame_no + # print(f"[KF] {frame_no:4d} total={len(kfs)}") + + + # draw + vis= img2.copy() + vis=draw_tracks(vis,tracks,i+1) + for t,tid in prev_map.items(): + cv2.circle(vis,tuple(map(int, kp_map2[t].pt)),3,(0,255,0),-1) + + + # -------------------------------------------------------- + # overlay active KF id + cv2.putText(vis, f"KF index:{last_kf_idx} , len: {len(kfs)}", (10,60), + cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,0), 2) + + # assemble strip + thumbs = [cv2.imdecode(np.frombuffer(lz4.frame.decompress(kf.thumb),np.uint8), + cv2.IMREAD_COLOR) for kf in kfs[-4:]] + bar = np.hstack(thumbs) if thumbs else np.zeros((args.kf_thumb_hw[1],args.kf_thumb_hw[0],3), np.uint8) + cv2.imshow('Keyframes', bar) + + MAX_MEM_KF = 500 + if len(kfs) > MAX_MEM_KF: + dump = kfs.pop(0) # oldest KF + # -------------------------------------------------------- + + text=f"Frame {i+1}/{total} | Tracks: {len(tracks)} | FPS: {achieved_fps:.1f}" + cv2.putText(vis,text,(10,30),cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,255),2) + + # show + interval_ms=int(1000/args.fps) if args.fps>0 else 0 + key=cv2.waitKey(interval_ms) + cv2.imshow('Feature Tracking',vis) + # measure fps + now=time.time();dt=now-prev_time + if dt>0: achieved_fps=1.0/dt + prev_time=now + if key&0xFF==27: break + cv2.destroyAllWindows() + +if __name__=='__main__': + main() diff --git a/modules/SimpleSLAM/slam/core/archieve/visualize_tracking.py b/modules/SimpleSLAM/slam/core/archieve/visualize_tracking.py new file mode 100644 index 0000000000..e1d89c72b3 --- /dev/null +++ b/modules/SimpleSLAM/slam/core/archieve/visualize_tracking.py @@ -0,0 +1,375 @@ +import os +import glob +import argparse +import time +from tqdm import tqdm +from dataclasses import dataclass + +import cv2 +import numpy as np + + +# Optional LightGlue imports +try: + import torch + from lightglue import LightGlue, ALIKED + from lightglue.utils import rbd , load_image +except ImportError: + raise ImportError('LightGlue unavailable, please install it using instructions on https://github.com/cvg/LightGlue') + + +def get_detector(detector_type, max_features=6000): + if detector_type == 'orb': + return cv2.ORB_create(max_features) + elif detector_type == 'sift': + return cv2.SIFT_create() + elif detector_type == 'akaze': + return cv2.AKAZE_create() + raise ValueError(f"Unsupported detector: {detector_type}") + +def get_matcher(matcher_type, detector_type=None): + if matcher_type == 'bf': + norm = cv2.NORM_HAMMING if detector_type in ['orb','akaze'] else cv2.NORM_L2 + return cv2.BFMatcher(norm, crossCheck=True) + raise ValueError(f"Unsupported matcher: {matcher_type}") + +def opencv_detector_and_matcher(img1,img2,detector,matcher): + kp1,des1=detector.detectAndCompute(img1,None) + kp2,des2=detector.detectAndCompute(img2,None) + if des1 is None or des2 is None: + return [],[],[] + matches=matcher.match(des1,des2) + return kp1, kp2, des1, des2, sorted(matches,key=lambda m:m.distance) + +def bgr_to_tensor(image): + """Convert a OpenCV‐style BGR uint8 into (3,H,W) torch tensor in [0,1] .""" + img_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)/255.0 + tensor = torch.from_numpy(img_rgb).permute(2,0,1).unsqueeze(0) + return tensor.cuda() if torch.cuda.is_available() else tensor + +def tensor_to_bgr(img_tensor): + """Convert a (1,3,H,W) or (3,H,W) torch tensor in [0,1] to RGB → OpenCV‐style BGR uint8 image""" + if img_tensor.dim() == 4: + img_tensor = img_tensor[0] + img_np = img_tensor.permute(1,2,0).cpu().numpy() + img_np = (img_np * 255).clip(0,255).astype(np.uint8) + return cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR) + +def convert_lightglue_to_opencv(keypoints0, keypoints1, matches): + """ + Convert filtered LightGlue keypoints and matches into OpenCV-compatible KeyPoint and DMatch objects. + Here, keypoints0 and keypoints1 are assumed to already be filtered (only matched keypoints). + + Returns: + opencv_kp0: List of cv2.KeyPoint objects for image0. + opencv_kp1: List of cv2.KeyPoint objects for image1. + opencv_matches: List of cv2.DMatch objects where each match is simply (i,i). + """ + # TODO: convert descriptors as well + n_matches = keypoints0.shape[0] # number of matched keypoints + opencv_kp0 = [cv2.KeyPoint(float(pt[0]), float(pt[1]), 1) for pt in keypoints0] + opencv_kp1 = [cv2.KeyPoint(float(pt[0]), float(pt[1]), 1) for pt in keypoints1] + + opencv_matches = [ + cv2.DMatch(int(q), int(t), 0, 0.0) for q, t in matches + ] + + return opencv_kp0, opencv_kp1, opencv_matches + +def lightglue_match(img1, img2, extractor, matcher, min_conf=0.0): + """ + Uses ALIKED and LightGlue to extract features and match them, + then converts the outputs to OpenCV-compatible formats. + Returns: + pos_kp0: numpy array of keypoint coordinates for image0 (shape (K,2)). + pos_kp1: numpy array of keypoint coordinates for image1 (shape (K,2)). + opencv_matches: list of cv2.DMatch objects. + opencv_kp0: list of cv2.KeyPoint objects for image0. + opencv_kp1: list of cv2.KeyPoint objects for image1. + """ + # TODO: add min_conf filtering + # TODO: return descriptors as well + # Convert image to torch tensor + img1_tensor = bgr_to_tensor(img1) + img2_tensor = bgr_to_tensor(img2) + feats0 = extractor.extract(img1_tensor) + feats1 = extractor.extract(img2_tensor) + matches_out = matcher({'image0': feats0, 'image1': feats1}) + # Remove batch dimension using rbd from lightglue.utils + feats0 = rbd(feats0) + feats1 = rbd(feats1) + matches_out = rbd(matches_out) + + # Get matches and keypoints as numpy arrays. + matches = matches_out['matches'] # shape (K,2) + # keypoints returned by LightGlue are numpy arrays of (x,y) + keypoints0 = feats0['keypoints'] + keypoints1 = feats1['keypoints'] + descriptors0 = feats0['descriptors'] # shape (K,D) + descriptors1 = feats1['descriptors'] # shape (K,D) + print("discriptor types:", type(descriptors0), type(descriptors1)) + + + # Convert the keypoints and matches to OpenCV formats. + opencv_kp0, opencv_kp1, opencv_matches = convert_lightglue_to_opencv(keypoints0, keypoints1, matches) + + return opencv_kp0, opencv_kp1, descriptors0, descriptors1, opencv_matches + +def update_and_prune_tracks(matches, prev_map, tracks, kp_curr, frame_idx, next_track_id, prune_age=30): + """ + Update feature tracks given a list of matches, then prune stale ones. + + Args: + matches : List[cv2.DMatch] between previous and current keypoints. + prev_map : Dict[int, int] mapping prev-frame kp index -> track_id. + tracks : Dict[int, List[Tuple[int, int, int]]], + each track_id → list of (frame_idx, x, y). + kp_curr : List[cv2.KeyPoint] for current frame. + frame_idx : int, index of the current frame (1-based or 0-based). + next_track_id : int, the next unused track ID. + prune_age : int, max age (in frames) before a track is discarded. + + Returns: + curr_map : Dict[int, int] mapping curr-frame kp index → track_id. + tracks : Updated tracks dict. + next_track_id : Updated next unused track ID. + """ + curr_map = {} + + # 1) Assign each match to an existing or new track_id + for m in matches: + q = m.queryIdx # keypoint idx in previous frame + t = m.trainIdx # keypoint idx in current frame + + # extract integer pixel coords + x, y = int(kp_curr[t].pt[0]), int(kp_curr[t].pt[1]) + + if q in prev_map: + # continue an existing track + tid = prev_map[q] + else: + # start a brand-new track + tid = next_track_id + tracks[tid] = [] + next_track_id += 1 + + # map this current keypoint to the track + curr_map[t] = tid + # append the new observation + tracks[tid].append((frame_idx, x, y)) + + # 2) Prune any track not seen in the last `prune_age` frames + for tid, pts in list(tracks.items()): + last_seen_frame = pts[-1][0] + if (frame_idx - last_seen_frame) > prune_age: + del tracks[tid] + + return curr_map, tracks, next_track_id + +def filter_matches_ransac(kp1, kp2, matches, thresh): + """ + Filter a list of cv2.DMatch objects using RANSAC on the fundamental matrix. + + Args: + kp1 (List[cv2.KeyPoint]): KeyPoints from the first image. + kp2 (List[cv2.KeyPoint]): KeyPoints from the second image. + matches (List[cv2.DMatch]) : Initial matches between kp1 and kp2. + thresh (float) : RANSAC inlier threshold (pixels). + + Returns: + List[cv2.DMatch]: Only those matches deemed inliers by RANSAC. + """ + # Need at least 8 points for a fundamental matrix + if len(matches) < 8: + return matches + + # Build corresponding point arrays + pts1 = np.float32([kp1[m.queryIdx].pt for m in matches]) + pts2 = np.float32([kp2[m.trainIdx].pt for m in matches]) + + # Run RANSAC to find inlier mask + F, mask = cv2.findFundamentalMat( + pts1, pts2, + method=cv2.FM_RANSAC, + ransacReprojThreshold=thresh, + confidence=0.99 + ) + mask = mask.ravel().astype(bool) + + # Keep only the inlier matches + inliers = [m for m, ok in zip(matches, mask) if ok] + return inliers + +def draw_tracks(vis, tracks, current_frame, max_age=10, sample_rate=5, max_tracks=1000): + """ + Draw each track's path with color decaying from green (new) to red (old). + Only draw up to max_tracks most recent tracks, and sample every sample_rate-th track. + """ + recent=[(tid,pts) for tid,pts in tracks.items() if pts and current_frame-pts[-1][0]<=max_age] + recent.sort(key=lambda x:x[1][-1][0],reverse=True) + drawn=0 + for tid,pts in recent: + if drawn>=max_tracks: break + if tid%sample_rate!=0: continue + pts=[p for p in pts if current_frame-p[0]<=max_age] + for j in range(1,len(pts)): + frame_idx,x0,y0=pts[j-1] + _,x1,y1=pts[j] + age=current_frame-frame_idx + ratio=age/max_age + b=0 + g=int(255*(1-ratio)) + r=int(255*ratio) + cv2.line(vis,(int(x0),int(y0)),(int(x1),int(y1)),(b,g,r),2) + drawn+=1 + return vis + +def dataloader(args): + """ + Load the dataset and return the detector, matcher, and sequence of images. + """ + # init modules once + if args.use_lightglue: + detector=ALIKED(max_num_keypoints=2048).eval().cuda() + matcher=LightGlue(features='aliked').eval().cuda() + else: + detector=get_detector(args.detector) + matcher=get_matcher(args.matcher,args.detector) + + # load sequence + prefix=os.path.join(args.base_dir, args.dataset) + is_custom=False + if args.dataset=='kitti': img_dir,pat=os.path.join(prefix,'05','image_0'),'*.png' + elif args.dataset=='parking': img_dir,pat=os.path.join(prefix,'images'),'*.png' + elif args.dataset=='malaga': img_dir,pat =os.path.join(prefix,'malaga-urban-dataset-extract-07_rectified_800x600_Images'),'*_left.jpg' + elif args.dataset=='tum-rgbd': img_dir,pat =os.path.join(prefix,'rgbd_dataset_freiburg1_room', 'rgb'),'*.png' + else: + vid=os.path.join(prefix,'custom_compress.mp4') + cap=cv2.VideoCapture(vid) + seq=[] + while True: + ok,fr=cap.read() + if not ok: break; seq.append(fr) + cap.release(); is_custom=True + + # load images + if not is_custom: seq=sorted(glob.glob(os.path.join(img_dir,pat))) + + return detector, matcher, seq + +def load_frame_pair(args,seq, i): + """ + Load a pair of consecutive frames from the sequence. + Args: + seq (list): List of image paths or frames. + i (int): Index of the current frame. + Returns: + img1 (numpy.ndarray): First image. + img2 (numpy.ndarray): Second image. + """ + # load images + if args.dataset=='custom': + img1, img2 = seq[i], seq[i+1] + else: + img1 = cv2.imread(seq[i]) + img2=cv2.imread(seq[i+1]) + + return img1, img2 + +def detect_and_match(args, img1, img2, detector, matcher): + """ + Load two consecutive images from the sequence and match features using the specified detector and matcher. + Args: + args (argparse.Namespace): Command line arguments. + img1 (numpy.ndarray): First image. + img2 (numpy.ndarray): Second image. + detector: Feature detector object. + matcher: Feature matcher object. + Returns: + kp_map1 (list): Keypoints from the first image. + kp_map2 (list): Keypoints from the second image. + matches (list): List of matched features. + """ + # match features + if args.use_lightglue: + kp_map1, kp_map2, des1, des2, matches = lightglue_match(img1,img2,detector,matcher) + else: + kp_map1, kp_map2, des1, des2, matches = opencv_detector_and_matcher(img1, img2, detector, matcher) + + return kp_map1, kp_map2, des1, des2, matches + + +@dataclass +class Keyframe: + idx: int # global frame index + path: str # on-disk image file OR "" if custom video + kps: list[cv2.KeyPoint] # keypoints (needed for geometric checks) + desc: np.ndarray # descriptors (uint8/float32) + thumb: bytes # lz4-compressed thumbnail for UI + + +def main(): + parser=argparse.ArgumentParser("Feature tracking with RANSAC filtering") + parser.add_argument('--dataset',choices=['kitti','malaga','tum-rgb','custom'],required=True) + parser.add_argument('--base_dir',default='.././Dataset') + parser.add_argument('--detector',choices=['orb','sift','akaze'],default='orb') + parser.add_argument('--matcher',choices=['bf'],default='bf') + parser.add_argument('--use_lightglue',action='store_true') + parser.add_argument('--fps',type=float,default=10) + # --- RANSAC related CLI flags ------------------------------------------- + parser.add_argument('--ransac_thresh',type=float,default=1.0, help='RANSAC threshold for fundamental matrix') + # --- Key-frame related CLI flags ----------------------------------------- + parser.add_argument('--kf_max_disp', type=float, default=30, + help='Min avg. pixel displacement wrt last keyframe') + parser.add_argument('--kf_min_ratio', type=float, default=0.5, + help='Min surviving-match ratio wrt last keyframe') + parser.add_argument('--kf_cooldown', type=int, default=5, + help='Frames to wait before next keyframe check') + + args=parser.parse_args() + + # initialize detector and matcher + detector, matcher, seq = dataloader(args) + + # tracking data + track_id=0; prev_map={}; tracks={} + cv2.namedWindow('Feature Tracking',cv2.WINDOW_NORMAL) + cv2.resizeWindow('Feature Tracking',1200,600) + + total=len(seq)-1; prev_time=time.time(); achieved_fps=0.0 + for i in tqdm(range(total),desc='Tracking'): + # load frame pair + img1, img2 = load_frame_pair(args, seq, i) + + # Detect and match features + kp_map1, kp_map2, des1, des2, matches = detect_and_match(args, img1, img2, detector, matcher) + + # filter with RANSAC + matches = filter_matches_ransac(kp_map1, kp_map2, matches, args.ransac_thresh) + + # update & prune tracks + frame_no = i + 1 + prev_map, tracks, track_id = update_and_prune_tracks(matches, prev_map, tracks, kp_map2, frame_no, track_id, prune_age=30) + + # draw + vis= img2.copy() + vis=draw_tracks(vis,tracks,i+1) + for t,tid in prev_map.items(): + cv2.circle(vis,tuple(map(int, kp_map2[t].pt)),3,(0,255,0),-1) + + text=f"Frame {i+1}/{total} | Tracks: {len(tracks)} | FPS: {achieved_fps:.1f}" + cv2.putText(vis,text,(10,30),cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,255),2) + + # show + interval_ms=int(1000/args.fps) if args.fps>0 else 0 + key=cv2.waitKey(interval_ms) + cv2.imshow('Feature Tracking',vis) + # measure fps + now=time.time();dt=now-prev_time + if dt>0: achieved_fps=1.0/dt + prev_time=now + if key&0xFF==27: break + cv2.destroyAllWindows() + +if __name__=='__main__': + main() diff --git a/modules/SimpleSLAM/slam/core/ba_utils.py b/modules/SimpleSLAM/slam/core/ba_utils.py new file mode 100644 index 0000000000..544ad225c9 --- /dev/null +++ b/modules/SimpleSLAM/slam/core/ba_utils.py @@ -0,0 +1,282 @@ +# -*- coding: utf-8 -*- +""" +ba_utils.py +=========== + +Light-weight Bundle-Adjustment helpers built on **pyceres**. +Implements + • two_view_ba(...) + • pose_only_ba(...) + • local_bundle_adjustment(...) +and keeps the older run_bundle_adjustment (full BA) for +back-compatibility. + +A *blue-print* for global_bundle_adjustment is included but not wired. +""" +from __future__ import annotations +import cv2 +import numpy as np +import pyceres +from pycolmap import cost_functions, CameraModelId +import math + +# TODO: uses camera-to-world pose convention T_wc (camera→world) should ideally be using world-to-camera T_cw (world→camera) +# --------------------------------------------------------------------- # +# Small pose ⇄ parameter converters +# --------------------------------------------------------------------- # +def _pose_to_quat_trans(T_wc: np.ndarray) -> tuple[np.ndarray, np.ndarray]: + # Convert T_wc (camera→world) ➜ (quat_cw , t_cw) with X_c = R_cw·X_w + t_cw + R_wc, t_wc = T_wc[:3, :3], T_wc[:3, 3] + R_cw = R_wc.T + t_cw = -R_cw @ t_wc + aa, _ = cv2.Rodrigues(R_cw) # axis-angle + theta = np.linalg.norm(aa) + if theta < 1e-8: + quat = np.array([1.0, 0.0, 0.0, 0.0], np.float64) + else: + axis = aa.flatten() / theta # normalised axis + s = math.sin(theta / 2.0) + quat = np.array([math.cos(theta / 2.0), axis[0]*s, axis[1]*s, axis[2]*s], np.float64) + return quat, t_cw.copy() + + +def _quat_trans_to_pose(quat_cw: np.ndarray, t_cw: np.ndarray) -> np.ndarray: + # Inverse of the above: (quat_cw , t_cw) ➜ T_wc + qw, qx, qy, qz = quat_cw + R_cw = np.array([ + [1-2*(qy*qy+qz*qz), 2*(qx*qy-qz*qw), 2*(qx*qz+qy*qw)], + [2*(qx*qy+qz*qw), 1-2*(qx*qx+qz*qz), 2*(qy*qz-qx*qw)], + [2*(qx*qz-qy*qw), 2*(qy*qz+qx*qw), 1-2*(qx*qx+qy*qy)] + ], dtype=np.float64) + R_wc = R_cw.T + t_wc = -R_wc @ t_cw + T = np.eye(4, dtype=np.float64) + T[:3, :3], T[:3, 3] = R_wc, t_wc + # C = -R_cw.T @ t_cw # camera-centre in world coords + # T[:3, :3] = R_cw.T + # T[:3, 3] = C + + return T + + +# --------------------------------------------------------------------- # +# Shared helper to add one reprojection residual +# --------------------------------------------------------------------- # +def _add_reproj_edge(problem, loss_fn, + kp_uv: tuple[float, float], + quat_param, trans_param, + point_param, intr_param): + """Create a pyceres residual block for one observation.""" + cost = cost_functions.ReprojErrorCost( + CameraModelId.PINHOLE, + np.asarray(kp_uv, np.float64) + ) + problem.add_residual_block( + cost, loss_fn, + [quat_param, trans_param, point_param, intr_param] + ) + +# --------------------------------------------------------------------- # +# 1) Two-view BA (bootstrap refinement) +# --------------------------------------------------------------------- # +def two_view_ba(world_map, K, keypoints, max_iters: int = 20): + """ + Refine the two initial camera poses + all bootstrap landmarks. + + Assumes `world_map` has exactly *two* poses (T_0w, T_1w) and that + each MapPoint already stores **two** observations (frame-0 and + frame-1). Called once right after initialisation. + """ + assert len(world_map.poses) == 2, "two_view_ba expects exactly 2 poses" + + _core_ba(world_map, K, keypoints, + opt_kf_idx=[0, 1], + fix_kf_idx=[], + max_iters=max_iters, + info_tag="[2-view BA]") + + +# --------------------------------------------------------------------- # +# 2) Pose-only BA (current frame refinement) +# --------------------------------------------------------------------- # +def pose_only_ba(world_map, K, keypoints, + frame_idx: int, max_iters: int = 8, + huber_thr: float = 2.0): + """ + Optimise **only one pose** (SE3) while keeping all 3-D points fixed. + Mimics ORB-SLAM's `Optimizer::PoseOptimization`. + """ + fx, fy, cx, cy = float(K[0, 0]), float(K[1, 1]), float(K[0, 2]), float(K[1, 2]) + intr = np.array([fx, fy, cx, cy], np.float64) + + quat, trans = _pose_to_quat_trans(world_map.poses[frame_idx]) + + problem = pyceres.Problem() + problem.add_parameter_block(quat, 4) + problem.add_parameter_block(trans, 3) + problem.set_manifold(quat, pyceres.EigenQuaternionManifold()) + problem.add_parameter_block(intr, 4) + problem.set_parameter_block_constant(intr) + + loss_fn = pyceres.HuberLoss(huber_thr) + + # for pid, mp in world_map.points.items(): + # for f_idx, kp_idx in mp.observations: + # if f_idx != frame_idx: + # continue + # u, v = keypoints[f_idx][kp_idx].pt + # _add_reproj_edge(problem, loss_fn, + # (u, v), quat, trans, mp.position, intr) + for mp in world_map.points.values(): + problem.add_parameter_block(mp.position, 3) + problem.set_parameter_block_constant(mp.position) + for f_idx, kp_idx in mp.observations: + if f_idx != frame_idx: + continue + u, v = keypoints[f_idx][kp_idx].pt + _add_reproj_edge(problem, loss_fn, + (u, v), quat, trans, mp.position, intr) + + + if problem.num_residual_blocks() < 10: + print(f"POSE-ONLY BA skipped – not enough residuals") + return # too few observations + + opts = pyceres.SolverOptions() + opts.max_num_iterations = max_iters + summary = pyceres.SolverSummary() + pyceres.solve(opts, problem, summary) + + world_map.poses[frame_idx][:] = _quat_trans_to_pose(quat, trans) + # print(f"[Pose-only BA] iters={summary.iterations_used}" + # f" inliers={problem.num_residual_blocks()}") + print(f"[Pose-only BA] iters={summary.num_successful_steps}" + f" inliers={problem.num_residual_blocks()}") + + +# --------------------------------------------------------------------- # +# 3) Local BA (sliding window) +# --------------------------------------------------------------------- # +def local_bundle_adjustment(world_map, K, keypoints, + center_kf_idx: int, + window_size: int = 8, + max_points : int = 3000, + max_iters : int = 15): + """ + Optimise the *last* `window_size` key-frames around + `center_kf_idx` plus all landmarks they observe. + Older poses are kept fixed (gauge). + """ + first_opt = max(0, center_kf_idx - window_size + 1) + opt_kf = list(range(first_opt, center_kf_idx + 1)) + fix_kf = list(range(0, first_opt)) + + _core_ba(world_map, K, keypoints, + opt_kf_idx=opt_kf, + fix_kf_idx=fix_kf, + max_points=max_points, + max_iters=max_iters, + info_tag=f"[Local BA (kf {center_kf_idx})]") + + +# --------------------------------------------------------------------- # +# 4) Global BA (blue-print only) +# --------------------------------------------------------------------- # +def global_bundle_adjustment_blueprint(world_map, K, keypoints): + """ + *** NOT WIRED YET *** + + Outline: + • opt_kf_idx = all key-frames + • fix_kf_idx = [] (maybe fix the very first to anchor gauge) + • run _core_ba(...) with a robust kernel + • run asynchronously (thread) and allow early termination + if tracking thread needs the map + """ + raise NotImplementedError + + +# --------------------------------------------------------------------- # +# Shared low-level BA engine +# --------------------------------------------------------------------- # +def _core_ba(world_map, K, keypoints, + *, + opt_kf_idx: list[int], + fix_kf_idx: list[int], + max_points: int | None = None, + max_iters : int = 20, + info_tag : str = ""): + """ + Generic sparse BA over a **subset** of poses + points. + """ + fx, fy, cx, cy = float(K[0, 0]), float(K[1, 1]), float(K[0, 2]), float(K[1, 2]) + intr = np.array([fx, fy, cx, cy], np.float64) + + problem = pyceres.Problem() + # TODO DONT add intrinsics if they are fixed + problem.add_parameter_block(intr, 4) + problem.set_parameter_block_constant(intr) + + # --- pose blocks --------------------------------------------------- + #TODO why for looop, optimize relative poses instead of absolute, + quat_params, trans_params = {}, {} + for k in opt_kf_idx: + quat, tr = _pose_to_quat_trans(world_map.poses[k]) + quat_params[k] = quat + trans_params[k] = tr + problem.add_parameter_block(quat, 4) + problem.set_manifold(quat, pyceres.EigenQuaternionManifold()) + problem.add_parameter_block(tr, 3) + + for k in fix_kf_idx: + quat, tr = _pose_to_quat_trans(world_map.poses[k]) + quat_params[k] = quat + trans_params[k] = tr + problem.add_parameter_block(quat, 4) + problem.set_manifold(quat, pyceres.EigenQuaternionManifold()) + problem.add_parameter_block(tr, 3) + problem.set_parameter_block_constant(quat) + problem.set_parameter_block_constant(tr) + + # --- point blocks -------------------------------------------------- + loss_fn = pyceres.HuberLoss(2.0) + added_pts = 0 + for mp in world_map.points.values(): + # keep only points seen by at least one optimisable KF + if not any(f in opt_kf_idx for f, _ in mp.observations): + continue + if max_points and added_pts >= max_points: + continue + problem.add_parameter_block(mp.position, 3) + added_pts += 1 + + for f_idx, kp_idx in mp.observations: + if f_idx not in opt_kf_idx and f_idx not in fix_kf_idx: + continue + u, v = keypoints[f_idx][kp_idx].pt + _add_reproj_edge(problem, loss_fn, + (u, v), + quat_params[f_idx], + trans_params[f_idx], + mp.position, + intr) + print(problem.num_residual_blocks(), "residuals added") + if problem.num_residual_blocks() < 10: + print(f"{info_tag} skipped – not enough residuals") + return + + # --- solve --------------------------------------------------------- + opts = pyceres.SolverOptions() + opts.max_num_iterations = max_iters + summary = pyceres.SolverSummary() + pyceres.solve(opts, problem, summary) + + # --- write poses back --------------------------------------------- + for k in opt_kf_idx: + world_map.poses[k][:] = _quat_trans_to_pose( + quat_params[k], trans_params[k]) + + iters = (getattr(summary, "iterations_used", getattr(summary, "num_successful_steps", getattr(summary, "num_iterations", None)))) + print(f"{info_tag} iters={iters} " + f"χ²={summary.final_cost:.2f} " + f"res={problem.num_residual_blocks()}") diff --git a/modules/SimpleSLAM/slam/core/ba_utils_OG.py b/modules/SimpleSLAM/slam/core/ba_utils_OG.py new file mode 100644 index 0000000000..3d642593c2 --- /dev/null +++ b/modules/SimpleSLAM/slam/core/ba_utils_OG.py @@ -0,0 +1,137 @@ +# slam/core/ba_utils.py + +import cv2 +import numpy as np +import pyceres +from pycolmap import cost_functions, CameraModelId + +def _pose_to_quat_trans(T: np.ndarray) -> tuple[np.ndarray, np.ndarray]: + """ + Convert a 4×4 camera-to-world pose matrix into + a quaternion (w, x, y, z) and a translation vector (3,). + """ + R = T[:3, :3] + aa, _ = cv2.Rodrigues(R) # rotation vector = axis * angle + theta = np.linalg.norm(aa) + if theta < 1e-8: + qw, qx, qy, qz = 1.0, 0.0, 0.0, 0.0 + else: + axis = aa.flatten() / theta + qw = np.cos(theta / 2.0) + sin_half = np.sin(theta / 2.0) + qx, qy, qz = axis * sin_half + quat = np.array([qw, qx, qy, qz], dtype=np.float64) + trans = T[:3, 3].astype(np.float64).copy() + return quat, trans + +def _quat_trans_to_pose(quat: np.ndarray, t: np.ndarray) -> np.ndarray: + """ + Reconstruct a 4×4 pose matrix from a quaternion (w, x, y, z) + and translation vector (3,). + """ + qw, qx, qy, qz = quat + # build rotation matrix from quaternion + R = np.array([ + [1 - 2*(qy*qy + qz*qz), 2*(qx*qy - qz*qw), 2*(qx*qz + qy*qw)], + [2*(qx*qy + qz*qw), 1 - 2*(qx*qx + qz*qz), 2*(qy*qz - qx*qw)], + [2*(qx*qz - qy*qw), 2*(qy*qz + qx*qw), 1 - 2*(qx*qx + qy*qy)] + ], dtype=np.float64) + T = np.eye(4, dtype=np.float64) + T[:3, :3] = R + T[:3, 3] = t + return T + +def run_bundle_adjustment( + world_map, + K: np.ndarray, + keypoints: list[list[cv2.KeyPoint]], + *, + fix_first_pose: bool = True, + loss: str = 'huber', + huber_thr: float = 1.0, + max_iters: int = 40 +): + """ + Jointly refine all camera poses (world_map.poses) and 3D points + (world_map.points) using a simple PINHOLE model. `keypoints` + is a list of length N_poses, each entry a List[cv2.KeyPoint]. + """ + # -- 1) Intrinsics block ------------------------------------------------ + fx, fy = K[0, 0], K[1, 1] + cx, cy = K[0, 2], K[1, 2] + intr = np.array([fx, fy, cx, cy], dtype=np.float64) + + problem = pyceres.Problem() + problem.add_parameter_block(intr, 4) + problem.set_parameter_block_constant(intr) + + # -- 2) Pose parameter blocks (quaternion + translation) ----------------- + quat_params: list[np.ndarray] = [] + trans_params: list[np.ndarray] = [] + for T in world_map.poses: + quat, trans = _pose_to_quat_trans(T) + quat_params.append(quat) + trans_params.append(trans) + + # add quaternion blocks with manifold + for q in quat_params: + problem.add_parameter_block(q, 4) + problem.set_manifold(q, pyceres.EigenQuaternionManifold()) + # add translation blocks + for t in trans_params: + problem.add_parameter_block(t, 3) + + if fix_first_pose: + problem.set_parameter_block_constant(quat_params[0]) + problem.set_parameter_block_constant(trans_params[0]) + + # -- 3) 3D point parameter blocks ---------------------------------------- + point_ids = world_map.point_ids() + point_params = [ + world_map.points[pid].position.copy().astype(np.float64) + for pid in point_ids + ] + for X in point_params: + problem.add_parameter_block(X, 3) + + # -- 4) Loss function ---------------------------------------------------- + loss_fn = pyceres.HuberLoss(huber_thr) if loss.lower() == 'huber' else None + + # -- 5) Add reprojection residuals -------------------------------------- + for j, pid in enumerate(point_ids): + X_block = point_params[j] + mp = world_map.points[pid] + for frame_idx, kp_idx in mp.observations: + u, v = keypoints[frame_idx][kp_idx].pt + uv = np.array([u, v], dtype=np.float64) + + cost = cost_functions.ReprojErrorCost( + CameraModelId.PINHOLE, + uv + ) + # order: [quat, translation, point3D, intrinsics] + problem.add_residual_block( + cost, + loss_fn, + [ + quat_params[frame_idx], + trans_params[frame_idx], + X_block, + intr + ] + ) + + # -- 6) Solve ------------------------------------------------------------ + options = pyceres.SolverOptions() + options.max_num_iterations = max_iters + summary = pyceres.SolverSummary() + pyceres.solve(options, problem, summary) + print(summary.BriefReport()) + + # -- 7) Write optimized values back into world_map ----------------------- + # Update poses + for i, (q, t) in enumerate(zip(quat_params, trans_params)): + world_map.poses[i][:] = _quat_trans_to_pose(q, t) + # Update points + for pid, X in zip(point_ids, point_params): + world_map.points[pid].position[:] = X diff --git a/modules/SimpleSLAM/slam/core/ba_utils_V1.py b/modules/SimpleSLAM/slam/core/ba_utils_V1.py new file mode 100644 index 0000000000..8b762e8589 --- /dev/null +++ b/modules/SimpleSLAM/slam/core/ba_utils_V1.py @@ -0,0 +1,263 @@ +# -*- coding: utf-8 -*- +""" +ba_utils.py +=========== + +Light-weight Bundle-Adjustment helpers built on **pyceres**. +Implements + • two_view_ba(...) + • pose_only_ba(...) + • local_bundle_adjustment(...) +and keeps the older run_bundle_adjustment (full BA) for +back-compatibility. + +A *blue-print* for global_bundle_adjustment is included but not wired. +""" +from __future__ import annotations +import cv2 +import numpy as np +import pyceres +from pycolmap import cost_functions, CameraModelId + +# --------------------------------------------------------------------- # +# Small pose ⇄ parameter converters +# --------------------------------------------------------------------- # +def _pose_to_quat_trans(T: np.ndarray) -> tuple[np.ndarray, np.ndarray]: + R = T[:3, :3] + aa, _ = cv2.Rodrigues(R) + theta = np.linalg.norm(aa) + if theta < 1e-8: + quat = np.array([1.0, 0.0, 0.0, 0.0], np.float64) + else: + axis = aa.flatten() / theta + s = np.sin(theta / 2.0) + quat = np.array([np.cos(theta / 2.0), + axis[0] * s, + axis[1] * s, + axis[2] * s], np.float64) + trans = T[:3, 3].astype(np.float64).copy() + return quat, trans + + +def _quat_trans_to_pose(quat: np.ndarray, t: np.ndarray) -> np.ndarray: + qw, qx, qy, qz = quat + R = np.array([ + [1 - 2*(qy*qy + qz*qz), 2*(qx*qy - qz*qw), 2*(qx*qz + qy*qw)], + [2*(qx*qy + qz*qw), 1 - 2*(qx*qx + qz*qz), 2*(qy*qz - qx*qw)], + [2*(qx*qz - qy*qw), 2*(qy*qz + qx*qw), 1 - 2*(qx*qx + qy*qy)] + ], dtype=np.float64) + T = np.eye(4, dtype=np.float64) + T[:3, :3] = R + T[:3, 3] = t + return T + +# --------------------------------------------------------------------- # +# Shared helper to add one reprojection residual +# --------------------------------------------------------------------- # +def _add_reproj_edge(problem, loss_fn, + kp_uv: tuple[float, float], + quat_param, trans_param, + point_param, intr_param): + """Create a pyceres residual block for one observation.""" + cost = cost_functions.ReprojErrorCost( + CameraModelId.PINHOLE, + np.asarray(kp_uv, np.float64) + ) + problem.add_residual_block( + cost, loss_fn, + [quat_param, trans_param, point_param, intr_param] + ) + +# --------------------------------------------------------------------- # +# 1) Two-view BA (bootstrap refinement) +# --------------------------------------------------------------------- # +def two_view_ba(world_map, K, keypoints, max_iters: int = 20): + """ + Refine the two initial camera poses + all bootstrap landmarks. + + Assumes `world_map` has exactly *two* poses (T_0w, T_1w) and that + each MapPoint already stores **two** observations (frame-0 and + frame-1). Called once right after initialisation. + """ + assert len(world_map.poses) == 2, "two_view_ba expects exactly 2 poses" + + _core_ba(world_map, K, keypoints, + opt_kf_idx=[0, 1], + fix_kf_idx=[], + max_iters=max_iters, + info_tag="[2-view BA]") + + +# --------------------------------------------------------------------- # +# 2) Pose-only BA (current frame refinement) +# --------------------------------------------------------------------- # +def pose_only_ba(world_map, K, keypoints, + frame_idx: int, max_iters: int = 8, + huber_thr: float = 2.0): + """ + Optimise **only one pose** (SE3) while keeping all 3-D points fixed. + Mimics ORB-SLAM's `Optimizer::PoseOptimization`. + """ + fx, fy, cx, cy = float(K[0, 0]), float(K[1, 1]), float(K[0, 2]), float(K[1, 2]) + intr = np.array([fx, fy, cx, cy], np.float64) + + quat, trans = _pose_to_quat_trans(world_map.poses[frame_idx]) + + problem = pyceres.Problem() + problem.add_parameter_block(quat, 4) + problem.add_parameter_block(trans, 3) + problem.set_manifold(quat, pyceres.EigenQuaternionManifold()) + problem.add_parameter_block(intr, 4) + problem.set_parameter_block_constant(intr) + + loss_fn = pyceres.HuberLoss(huber_thr) + + for pid, mp in world_map.points.items(): + for f_idx, kp_idx in mp.observations: + if f_idx != frame_idx: + continue + u, v = keypoints[f_idx][kp_idx].pt + _add_reproj_edge(problem, loss_fn, + (u, v), quat, trans, mp.position, intr) + + if problem.num_residual_blocks() < 10: + print(f"POSE-ONLY BA skipped – not enough residuals") + return # too few observations + + opts = pyceres.SolverOptions() + opts.max_num_iterations = max_iters + summary = pyceres.SolverSummary() + pyceres.solve(opts, problem, summary) + + world_map.poses[frame_idx][:] = _quat_trans_to_pose(quat, trans) + # print(f"[Pose-only BA] iters={summary.iterations_used}" + # f" inliers={problem.num_residual_blocks()}") + print(f"[Pose-only BA] iters={summary.num_successful_steps}" + f" inliers={problem.num_residual_blocks()}") + + +# --------------------------------------------------------------------- # +# 3) Local BA (sliding window) +# --------------------------------------------------------------------- # +def local_bundle_adjustment(world_map, K, keypoints, + center_kf_idx: int, + window_size: int = 8, + max_points : int = 3000, + max_iters : int = 15): + """ + Optimise the *last* `window_size` key-frames around + `center_kf_idx` plus all landmarks they observe. + Older poses are kept fixed (gauge). + """ + first_opt = max(0, center_kf_idx - window_size + 1) + opt_kf = list(range(first_opt, center_kf_idx + 1)) + fix_kf = list(range(0, first_opt)) + + _core_ba(world_map, K, keypoints, + opt_kf_idx=opt_kf, + fix_kf_idx=fix_kf, + max_points=max_points, + max_iters=max_iters, + info_tag=f"[Local BA (kf {center_kf_idx})]") + + +# --------------------------------------------------------------------- # +# 4) Global BA (blue-print only) +# --------------------------------------------------------------------- # +def global_bundle_adjustment_blueprint(world_map, K, keypoints): + """ + *** NOT WIRED YET *** + + Outline: + • opt_kf_idx = all key-frames + • fix_kf_idx = [] (maybe fix the very first to anchor gauge) + • run _core_ba(...) with a robust kernel + • run asynchronously (thread) and allow early termination + if tracking thread needs the map + """ + raise NotImplementedError + + +# --------------------------------------------------------------------- # +# Shared low-level BA engine +# --------------------------------------------------------------------- # +def _core_ba(world_map, K, keypoints, + *, + opt_kf_idx: list[int], + fix_kf_idx: list[int], + max_points: int | None = None, + max_iters : int = 20, + info_tag : str = ""): + """ + Generic sparse BA over a **subset** of poses + points. + """ + fx, fy, cx, cy = float(K[0, 0]), float(K[1, 1]), float(K[0, 2]), float(K[1, 2]) + intr = np.array([fx, fy, cx, cy], np.float64) + + problem = pyceres.Problem() + # TODO DONT add intrinsics if they are fixed + problem.add_parameter_block(intr, 4) + problem.set_parameter_block_constant(intr) + + # --- pose blocks --------------------------------------------------- + #TODO why for looop, optimize relative poses instead of absolute, + quat_params, trans_params = {}, {} + for k in opt_kf_idx: + quat, tr = _pose_to_quat_trans(world_map.poses[k]) + quat_params[k] = quat + trans_params[k] = tr + problem.add_parameter_block(quat, 4) + problem.set_manifold(quat, pyceres.EigenQuaternionManifold()) + problem.add_parameter_block(tr, 3) + + for k in fix_kf_idx: + quat, tr = _pose_to_quat_trans(world_map.poses[k]) + quat_params[k] = quat + trans_params[k] = tr + problem.add_parameter_block(quat, 4) + problem.add_parameter_block(tr, 3) + problem.set_parameter_block_constant(quat) + problem.set_parameter_block_constant(tr) + + # --- point blocks -------------------------------------------------- + loss_fn = pyceres.HuberLoss(1.0) + added_pts = 0 + for pid, mp in world_map.points.items(): + # keep only points seen by at least one optimisable KF + if not any(f in opt_kf_idx for f, _ in mp.observations): + continue + if max_points and added_pts >= max_points: + continue + problem.add_parameter_block(mp.position, 3) + added_pts += 1 + + for f_idx, kp_idx in mp.observations: + if f_idx not in opt_kf_idx and f_idx not in fix_kf_idx: + continue + u, v = keypoints[f_idx][kp_idx].pt + _add_reproj_edge(problem, loss_fn, + (u, v), + quat_params[f_idx], + trans_params[f_idx], + mp.position, + intr) + print(problem.num_residual_blocks(), "residuals added") + if problem.num_residual_blocks() < 10: + print(f"{info_tag} skipped – not enough residuals") + return + + # --- solve --------------------------------------------------------- + opts = pyceres.SolverOptions() + opts.max_num_iterations = max_iters + summary = pyceres.SolverSummary() + pyceres.solve(opts, problem, summary) + + # --- write poses back --------------------------------------------- + for k in opt_kf_idx: + world_map.poses[k][:] = _quat_trans_to_pose( + quat_params[k], trans_params[k]) + + iters = (getattr(summary, "iterations_used", getattr(summary, "num_successful_steps", getattr(summary, "num_iterations", None)))) + print(f"{info_tag} iters={iters} " + f"χ²={summary.final_cost:.2f} " + f"res={problem.num_residual_blocks()}") diff --git a/modules/SimpleSLAM/slam/core/dataloader.py b/modules/SimpleSLAM/slam/core/dataloader.py new file mode 100644 index 0000000000..5991f02063 --- /dev/null +++ b/modules/SimpleSLAM/slam/core/dataloader.py @@ -0,0 +1,337 @@ +# dataloader.py + +import os +import glob +import cv2 +import numpy as np +import pickle +from typing import List, Optional, Dict, Tuple +import pandas as pd + +def _parse_tum_rgb_list(txt_path: str, seq_dir: str) -> Tuple[List[str], List[float]]: + """Parse TUM rgb.txt returning (paths, timestamps).""" + paths, stamps = [], [] + with open(txt_path, "r") as f: + for line in f: + if line.startswith("#") or not line.strip(): + continue + ts_str, rel = line.strip().split() + stamps.append(float(ts_str)) + paths.append(os.path.join(seq_dir, rel)) + return paths, stamps + +def load_sequence(args) -> List[str]: + """ + Return a list `seq` of either on-disk image paths or in-memory BGR frames. + (Exactly as before—only left / monocular images.) + """ + # TODO: also have to add the ability to load a video file + # TODO: add the ability to load an entire dataset various sequences + prefix = os.path.join(args.base_dir, args.dataset) + + if args.dataset == 'kitti': + img_dir, pat = os.path.join(prefix, '05', 'image_0'), '*.png' + seq = sorted(glob.glob(os.path.join(img_dir, pat))) + + elif args.dataset == 'parking': + img_dir, pat = os.path.join(prefix, 'images'), '*.png' + seq = sorted(glob.glob(os.path.join(img_dir, pat))) + + elif args.dataset == 'malaga': + img_dir = os.path.join(prefix, + 'malaga-urban-dataset-extract-07_rectified_800x600_Images') + seq = sorted(glob.glob(os.path.join(img_dir, '*_left.jpg'))) + + elif args.dataset == 'tum-rgbd': + img_dir = os.path.join(prefix, 'rgbd_dataset_freiburg1_room', 'rgb') + seq = sorted(glob.glob(os.path.join(img_dir, '*.png'))) + + elif args.dataset == 'custom': + vid = os.path.join(prefix, 'custom_compress.mp4') + cap = cv2.VideoCapture(vid) + seq = [] + while True: + ok, fr = cap.read() + if not ok: + break + seq.append(fr) + cap.release() + + else: + raise ValueError(f"Unknown dataset: {args.dataset}") + + if len(seq) < 2: + raise RuntimeError("Dataset must contain at least two frames.") + return seq + + +def load_frame_pair(args, seq, i): + """ + Convenience wrapper: returns BGR frame i and i+1. + Works for both path-based and in-memory sequences. + """ + if args.dataset == 'custom': # frames already in‐memory + return seq[i], seq[i + 1] + return cv2.imread(seq[i]), cv2.imread(seq[i + 1]) + + +# --------------------------------------------------------------------------- # +# New: stereo image paths +# --------------------------------------------------------------------------- # +# USAGE: right_seq = load_stereo_paths(args) # unused for now +def load_stereo_paths(args) -> List[str]: + """ + Return sorted list of right-camera image paths + if the dataset provides them; else an empty list. + """ + prefix = os.path.join(args.base_dir, args.dataset) + if args.dataset == 'kitti': + right_dir, pat = os.path.join(prefix, '05', 'image_1'), '*.png' + return sorted(glob.glob(os.path.join(right_dir, pat))) + if args.dataset == 'malaga': + right_dir = os.path.join(prefix, + 'malaga-urban-dataset-extract-07_rectified_800x600_Images') + return sorted(glob.glob(os.path.join(right_dir, '*_right.jpg'))) + # parking, tum‐rgbd, custom have no right camera + return [] + + +# --------------------------------------------------------------------------- # +# New: calibration loaders +# --------------------------------------------------------------------------- # +def load_calibration(args) -> Dict[str, np.ndarray]: + """ + Returns a dict with keys: + 'K_l', 'P_l', ← left intrinsics 3×3 and 3×4 + 'K_r', 'P_r' ← right intrinsics & proj (if available) + """ + # TODO: add TUM RGB-D calibration + prefix = os.path.join(args.base_dir, args.dataset) + + if args.dataset == 'kitti': + return _calib_kitti() + if args.dataset == 'malaga': + return _calib_malaga() + if args.dataset == "tum-rgbd": + return _calib_tum() + if args.dataset == 'custom': + calib_path = os.path.join(prefix, 'calibration.pkl') + return _calib_custom(calib_path) + + raise ValueError(f"No calibration loader for {args.dataset}") + + +def _calib_kitti() -> Dict[str, np.ndarray]: + params_l = np.fromstring( + "7.070912e+02 0.000000e+00 6.018873e+02 0.000000e+00 " + "0.000000e+00 7.070912e+02 1.831104e+02 0.000000e+00 " + "0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00", + sep=' ', dtype=np.float64).reshape(3, 4) + K_l = params_l[:3, :3] + + params_r = np.fromstring( + "7.070912e+02 0.000000e+00 6.018873e+02 -3.798145e+02 " + "0.000000e+00 7.070912e+02 1.831104e+02 0.000000e+00 " + "0.000000e+00 0.000000e+00 1.000000e+00 0.000000e+00", + sep=' ', dtype=np.float64).reshape(3, 4) + K_r = params_r[:3, :3] + + return {'K_l': K_l, 'P_l': params_l, + 'K_r': K_r, 'P_r': params_r} + +def _calib_malaga() -> Dict[str, np.ndarray]: + # left intrinsics & proj (from your rough code) + K_l = np.array([ + [795.11588, 0.0, 517.12973], + [0.0, 795.11588, 395.59665], + [0.0, 0.0, 1.0 ] + ]) + P_l = np.hstack([K_l, np.zeros((3,1))]) + # Right camera: assume identity extrinsics for now + return {'K_l': K_l, 'P_l': P_l, 'K_r': K_l.copy(), 'P_r': P_l.copy()} + +def _calib_tum(): + K = np.array([[517.3, 0., 318.6], + [0., 516.5, 255.3], + [0., 0., 1. ]]) + D = np.array([ 0.2624, -0.9531, 0.0054, 0.0026, -1.1633 ]) # d0-d4 + P = np.hstack([K, np.zeros((3,1))]) + return {"K_l": K, "P_l": P, "D_l": D, "K_r":None, "P_r":None, "D_r":None} + + +def _calib_custom(calib_path: str) -> Dict[str, np.ndarray]: + with open(calib_path, 'rb') as f: + camera_matrix, *_ = pickle.load(f) + P = np.hstack([camera_matrix, np.zeros((3,1))]) + return {'K_l': camera_matrix, 'P_l': P, 'K_r': None, 'P_r': None} + + +# --------------------------------------------------------------------------- # +# New: ground‐truth loaders +# --------------------------------------------------------------------------- # + +def _quat_to_rot(qx: float, qy: float, qz: float, qw: float) -> np.ndarray: + """Convert unit quaternion to 3×3 rotation matrix (TUM order).""" + xx, yy, zz = qx * qx, qy * qy, qz * qz + xy, xz, yz = qx * qy, qx * qz, qy * qz + wx, wy, wz = qw * qx, qw * qy, qw * qz + + return np.array( + [ + [1.0 - 2.0 * (yy + zz), 2.0 * (xy - wz), 2.0 * (xz + wy)], + [2.0 * (xy + wz), 1.0 - 2.0 * (xx + zz), 2.0 * (yz - wx)], + [2.0 * (xz - wy), 2.0 * (yz + wx), 1.0 - 2.0 * (xx + yy)], + ], + dtype=np.float64, + ) + +def _align_tum_gt(rgb_ts: List[float], gt_ts: List[float], poses: List[np.ndarray]) -> List[np.ndarray]: + """Align ground‑truth poses to rgb timestamps via nearest‑neighbour search.""" + aligned = [] + j = 0 + for t in rgb_ts: + # advance j until gt_ts[j] >= t + while j + 1 < len(gt_ts) and gt_ts[j] < t: + j += 1 + # choose closer of j and j‑1 + if j == 0: + aligned.append(poses[0]) + else: + if abs(gt_ts[j] - t) < abs(gt_ts[j - 1] - t): + aligned.append(poses[j]) + else: + aligned.append(poses[j - 1]) + return aligned + + +def load_groundtruth(args) -> Optional[np.ndarray]: + """ + Returns an array of shape [N×3×4] with ground‐truth camera poses, + or None if no GT is available for this dataset. + """ + # TODO: add the ability to load the GT generally from a file - ie just give a path of dataset and the function will parse it + prefix = os.path.join(args.base_dir, args.dataset) + + if args.dataset == 'kitti': + poses = np.loadtxt(os.path.join(prefix, 'poses/05.txt')) + return poses.reshape(-1, 3, 4) + + if args.dataset == 'malaga': + seq = load_sequence(args) + filepath = os.path.join( + prefix, + 'malaga-urban-dataset-extract-07_all-sensors_GPS.txt' + ) + return _malaga_get_gt(filepath, seq) + + if args.dataset == "tum-rgbd": + seq_dir = os.path.join(prefix, "rgbd_dataset_freiburg1_room") + rgb_list = os.path.join(seq_dir, "rgb.txt") + gt_file = os.path.join(seq_dir, "groundtruth.txt") + + # --- read rgb timestamps --- # + _, rgb_ts = _parse_tum_rgb_list(rgb_list, seq_dir) + + # --- read ground‑truth trajectory --- # + gt_ts, poses = [], [] + with open(gt_file, "r") as f: + for line in f: + if line.startswith("#") or not line.strip(): + continue + items = line.strip().split() + ts = float(items[0]) + tx, ty, tz = map(float, items[1:4]) + qx, qy, qz, qw = map(float, items[4:8]) + R = _quat_to_rot(qx, qy, qz, qw) + P = np.hstack([R, np.array([[tx], [ty], [tz]], dtype=np.float64)]) + gt_ts.append(ts) + poses.append(P) + + aligned = _align_tum_gt(rgb_ts, gt_ts, poses) + return np.stack(aligned, axis=0) # [N×3×4] + + else: + print(f"No ground truth available for dataset: {args.dataset}") + pass + + return None + +# --------------------------------------------------------------------------- # +# Malaga-urban ground truth helper functions +# --------------------------------------------------------------------------- # + +def _malaga_get_gt( + filepath: str, + seq: List[str] +) -> np.ndarray: + """ + Load Malaga-urban GPS/INS ground truth and align it to the left-image sequence. + Returns [N×3×4] pose array (no file writes). + """ + # 1) read and trim the GPS log + col_names = [ + "Time","Lat","Lon","Alt","fix","sats","speed","dir", + "LocalX","LocalY","LocalZ","rawlogID","GeocenX","GeocenY","GeocenZ", + "GPSX","GPSY","GPSZ","GPSVX","GPSVY","GPSVZ","LocalVX","LocalVY","LocalVZ","SATTime" + ] + df = pd.read_csv( + filepath, + sep=r'\s+', + comment='%', + header=None, + names=col_names + ) + df = df[["Time","LocalX","LocalY","LocalZ"]].sort_values(by="Time").reset_index(drop=True) + times = df["Time"].values + t0, t1 = times[0], times[-1] + + # 2) keep only images whose timestamp falls within the GT interval + valid_seq = [] + for img in seq: + ts = extract_file_timestamp(img) + if t0 <= ts <= t1: + valid_seq.append(img) + seq[:] = valid_seq # trim the sequence in place + + # 3) build the pose list + poses = [] + for img in seq: + ts = extract_file_timestamp(img) + position = get_position_at_time(ts, df) # [-y, z, x] + P4 = np.eye(4, dtype=np.float64) + P4[:3, 3] = position + poses.append(P4[:3,:4]) + + return np.stack(poses, axis=0) # [N×3×4] + + +def extract_file_timestamp(filepath: str) -> float: + """ + Extract the floating‐point timestamp from a Malaga filename: + e.g. '..._1234567890.123_left.jpg' → 1234567890.123 + """ + name = os.path.basename(filepath) + parts = name.split("_") + return float(parts[2]) + + +def get_position_at_time(timestamp: float, df: pd.DataFrame) -> np.ndarray: + """ + Linearly interpolate the (LocalX,LocalY,LocalZ) at the given timestamp. + Returns a length-3 vector [-y, z, x] to match camera axes. + """ + times = df["Time"].values + idx = np.searchsorted(times, timestamp) + idx = np.clip(idx, 1, len(times)-1) + t_prev, t_next = times[idx-1], times[idx] + row_prev, row_next = df.iloc[idx-1], df.iloc[idx] + x0, y0, z0 = row_prev[["LocalX","LocalY","LocalZ"]] + x1, y1, z1 = row_next[["LocalX","LocalY","LocalZ"]] + + # use ASCII variable name 'alpha' instead of a Greek symbol + alpha = (timestamp - t_prev) / (t_next - t_prev) if (t_next != t_prev) else 0.0 + + x = x0 + alpha * (x1 - x0) + y = y0 + alpha * (y1 - y0) + z = z0 + alpha * (z1 - z0) + return np.array([-y, z, x], dtype=np.float64) \ No newline at end of file diff --git a/modules/SimpleSLAM/slam/core/features_utils.py b/modules/SimpleSLAM/slam/core/features_utils.py new file mode 100644 index 0000000000..a20154d0f5 --- /dev/null +++ b/modules/SimpleSLAM/slam/core/features_utils.py @@ -0,0 +1,206 @@ +# features_utils.py +import cv2 +import numpy as np +from typing import List, Tuple +# optional LightGlue imports guarded by try/except +try: + import torch + from lightglue import LightGlue, ALIKED + from lightglue.utils import rbd , load_image +except ImportError: + raise ImportError('LightGlue unavailable, please install it using instructions on https://github.com/cvg/LightGlue') + + + +# --------------------------------------------------------------------------- # +# Initialisation helpers +# --------------------------------------------------------------------------- # +def init_feature_pipeline(args): + """ + Instantiate detector & matcher according to CLI arguments. + Returns (detector, matcher) + """ + if args.use_lightglue: + device = "cuda" if torch.cuda.is_available() else "cpu" + detector = ALIKED(max_num_keypoints=2048).eval().to(device) + matcher = LightGlue(features='aliked').eval().to(device) + else: + detector = _get_opencv_detector(args.detector) + matcher = _get_opencv_matcher(args.matcher, args.detector) + return detector, matcher + + +def _get_opencv_detector(detector_type, max_features=6000): + if detector_type == 'orb': + return cv2.ORB_create(max_features) + if detector_type == 'sift': + return cv2.SIFT_create() + if detector_type == 'akaze': + return cv2.AKAZE_create() + raise ValueError(f"Unsupported detector: {detector_type}") + + +def _get_opencv_matcher(matcher_type, detector_type): + if matcher_type != 'bf': + raise ValueError(f"Unsupported matcher: {matcher_type}") + norm = cv2.NORM_HAMMING if detector_type in ['orb', 'akaze'] else cv2.NORM_L2 + return cv2.BFMatcher(norm, crossCheck=True) + +# --------------------------------------------------------------------------- # +# Individual feature extraction and matching functions +# --------------------------------------------------------------------------- # + +def _convert_lg_kps_to_opencv(kp0: torch.Tensor) -> List[cv2.KeyPoint]: + cv_kp0 = [cv2.KeyPoint(float(x), float(y), 1) for x, y in kp0] + return cv_kp0 + +def _convert_opencv_to_lg_kps(cv_kp: List[cv2.KeyPoint]) -> torch.Tensor: + """ + Convert a list of cv2.KeyPoint into a tensor of shape [M×2] + containing (x, y) coordinates for LightGlue. + Args: + cv_kp: list of cv2.KeyPoint + Returns: + Tensor of shape [M, 2], dtype=torch.float32 + """ + coords = [(kp.pt[0], kp.pt[1]) for kp in cv_kp] + if not coords: + return torch.empty((0, 2), dtype=torch.float32) + return torch.tensor(coords, dtype=torch.float32) + + +def _convert_lg_matches_to_opencv(matches_raw: torch.Tensor) -> List[cv2.DMatch]: + pairs = matches_raw.cpu().numpy().tolist() + cv_matches = [cv2.DMatch(int(i), int(j), 0, 0.0) for i, j in pairs] + return cv_matches + +def feature_extractor(args, img: np.ndarray, detector): + """ + Extract features from a single image. + Returns a FrameFeatures with OpenCV-compatible keypoints and descriptors, + and optional LightGlue tensors if using LightGlue. + """ + + if args.use_lightglue: + t0 = _bgr_to_tensor(img) + feats = detector.extract(t0) + feats = rbd(feats) + lg_kps = feats['keypoints'] + kp0 = _convert_lg_kps_to_opencv(lg_kps) + des0 = feats['descriptors'] + return kp0, des0 + + else: + kp0, des0 = detector.detectAndCompute(img, None) + if des0 is None: + return [], [] + return kp0, des0 + + +def feature_matcher(args, kp0, kp1, des0, des1, matcher): + """ Match features between two FrameFeatures and return OpenCV-compatible matches.""" + # optionally filter matches from LightGlue by confidence + if args.use_lightglue: + # LightGlue matching + # kp0, kp1: List[cv2.KeyPoint] + # des0, des1: torch.Tensor [M×D], [N×D] + device = des0.device + lg_kp0 = _convert_opencv_to_lg_kps(kp0).to(device).unsqueeze(0) # [1×M×2] + lg_kp1 = _convert_opencv_to_lg_kps(kp1).to(device).unsqueeze(0) # [1×N×2] + lg_desc0 = des0.unsqueeze(0) # [1×M×D] + lg_desc1 = des1.unsqueeze(0) # [1×N×D] + + raw = matcher({ + 'image0': {'keypoints': lg_kp0, 'descriptors': lg_desc0}, + 'image1': {'keypoints': lg_kp1, 'descriptors': lg_desc1} + }) + raw = rbd(raw) + matches_raw = raw['matches'] + # LightGlue may return confidence either as 'scores' or 'confidence'. + # Avoid `or` with tensors to prevent ambiguity errors. + conf = raw['scores'] if 'scores' in raw else raw.get('confidence') + if conf is not None: + mask = conf > args.min_conf + matches_raw = matches_raw[mask] + # convert index pairs to cv2.DMatch list + cv_matches = _convert_lg_matches_to_opencv(matches_raw) + return cv_matches + else: + # OpenCV matching + matches = matcher.match(des0, des1) + return sorted(matches, key=lambda m: m.distance) + + +# --------------------------------------------------------------------------- # +# Public API +# --------------------------------------------------------------------------- # + +def filter_matches_ransac(kp1, kp2, matches, thresh=1.0): + """ + Drop outliers using the fundamental matrix + RANSAC. + """ + if len(matches) < 8: + return matches + + pts1 = np.float32([kp1[m.queryIdx].pt for m in matches]) + pts2 = np.float32([kp2[m.trainIdx].pt for m in matches]) + + _, mask = cv2.findFundamentalMat(pts1, pts2, + cv2.FM_RANSAC, thresh, 0.99) + mask = mask.ravel().astype(bool) + return [m for m, ok in zip(matches, mask) if ok] + + +# --------------------------------------------------------------------------- # +# Convenience method for LightGlue/OpenCV pipeline to detect and match features in two images +# NO LONGER USED IN THE MAIN SLAM PIPELINE, BUT LEFT FOR REFERENCE +# (e.g. for testing purposes). +# --------------------------------------------------------------------------- # +def _opencv_detect_and_match(img1, img2, detector, matcher): + kp1, des1 = detector.detectAndCompute(img1, None) + kp2, des2 = detector.detectAndCompute(img2, None) + + if des1 is None or des2 is None: + return [], [], [], [], [] # gracefully handle empty images + + matches = sorted(matcher.match(des1, des2), key=lambda m: m.distance) + + return kp1, kp2, des1, des2, matches + +def _bgr_to_tensor(image): + img_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0 + tensor = torch.from_numpy(img_rgb).permute(2, 0, 1).unsqueeze(0) + return tensor.cuda() if torch.cuda.is_available() else tensor + + +def _convert_lightglue_to_opencv(kp0, kp1, matches): + n = kp0.shape[0] + cv_kp0 = [cv2.KeyPoint(float(x), float(y), 1) for x, y in kp0] + cv_kp1 = [cv2.KeyPoint(float(x), float(y), 1) for x, y in kp1] + cv_matches = [cv2.DMatch(int(i), int(j), 0, 0.0) for i, j in matches] + return cv_kp0, cv_kp1, cv_matches + + +def _lightglue_detect_and_match(img1, img2, extractor, matcher): + t0, t1 = _bgr_to_tensor(img1), _bgr_to_tensor(img2) + + f0, f1 = extractor.extract(t0), extractor.extract(t1) + matches = matcher({'image0': f0, 'image1': f1}) + + # remove batch dimension + f0, f1 = rbd(f0), rbd(f1) + matches = rbd(matches) + + kp0, kp1 = f0['keypoints'], f1['keypoints'] + des0, des1 = f0['descriptors'], f1['descriptors'] + cv_kp0, cv_kp1, cv_matches = _convert_lightglue_to_opencv(kp0, kp1, + matches['matches']) + return cv_kp0, cv_kp1, des0, des1, cv_matches + +def detect_and_match(img1, img2, detector, matcher, args): + """ + Front-end entry. Chooses OpenCV or LightGlue depending on CLI flag. + """ + if args.use_lightglue: + return _lightglue_detect_and_match(img1, img2, detector, matcher) + return _opencv_detect_and_match(img1, img2, detector, matcher) diff --git a/modules/SimpleSLAM/slam/core/keyframe_utils.py b/modules/SimpleSLAM/slam/core/keyframe_utils.py new file mode 100644 index 0000000000..37270b6541 --- /dev/null +++ b/modules/SimpleSLAM/slam/core/keyframe_utils.py @@ -0,0 +1,143 @@ +# keyframe.py +from dataclasses import dataclass +import cv2 +import numpy as np +import lz4.frame +from typing import List, Tuple +from slam.core.features_utils import feature_matcher, filter_matches_ransac + +# --------------------------------------------------------------------------- # +# Dataclass +# --------------------------------------------------------------------------- # +@dataclass +class Keyframe: + idx: int # global frame index + path: str # "" for in-memory frames + kps: list[cv2.KeyPoint] + desc: np.ndarray + pose: np.ndarray # 4×4 camera-to-world pose + thumb: bytes # lz4-compressed JPEG + + +# --------------------------------------------------------------------------- # +# Helpers +# --------------------------------------------------------------------------- # +def make_thumb(bgr, hw=(640, 360)): + th = cv2.resize(bgr, hw) + ok, enc = cv2.imencode('.jpg', th, + [int(cv2.IMWRITE_JPEG_QUALITY), 70]) + return lz4.frame.compress(enc.tobytes()) if ok else b'' + +def is_new_keyframe(frame_idx, + matches_to_kf, + kp_curr, + kp_kf, + kf_max_disp=30.0, + kf_min_inliers=125, + kf_cooldown=5, + last_kf_idx=-999): + """ + Decide whether current frame should be promoted to a Keyframe. + """ + if frame_idx - last_kf_idx > kf_cooldown: + return True # enough time has passed since last KF + if not matches_to_kf: + return True + if len(matches_to_kf) < kf_min_inliers: + return True + # TODO as a ratio of frame rather than pixels + disp = [np.hypot(kp_curr[m.trainIdx].pt[0] - kp_kf[m.queryIdx].pt[0], + kp_curr[m.trainIdx].pt[1] - kp_kf[m.queryIdx].pt[1]) + for m in matches_to_kf] + return np.mean(disp) > kf_max_disp + +# TODO add pose +def select_keyframe( + args, + seq: List[str], + frame_idx: int, + img2, kp2, des2, + pose2, + matcher, + kfs: List[Keyframe], + last_kf_idx: int +) -> Tuple[List[Keyframe], int]: + """ + Decide whether to add a new Keyframe at this iteration. + + Parameters + ---------- + args + CLI namespace (provides use_lightglue, ransac_thresh, kf_* params). + seq + Original sequence list (so we can grab file paths if needed). + frame_idx + Zero-based index of the *first* of the pair. Keyframes use i+1 as frame number. + img2 + BGR image for frame i+1 (for thumbnail if we promote). + kp2, des2 + KPs/descriptors of frame i+1. + pose2 + Current camera-to-world pose estimate for frame i+1 (4×4). May be None. + matcher + Either the OpenCV BF/FLANN matcher or the LightGlue matcher. + kfs + Current list of Keyframe objects. + last_kf_idx + Frame number (1-based) of the last keyframe added; or -inf if none. + + Returns + ------- + kfs + Possibly-extended list of Keyframe objects. + last_kf_idx + Updated last keyframe index (still the same if we didn’t add one). + """ + frame_no = frame_idx + 1 + prev_kf = kfs[-1] + # 1) match descriptors from the old KF to the new frame + raw_matches = feature_matcher( + args, prev_kf.kps, kp2, prev_kf.desc, des2, matcher + ) + # 2) drop outliers + matches = filter_matches_ransac( + prev_kf.kps, kp2, raw_matches, args.ransac_thresh + ) + + # 3) promotion test + if is_new_keyframe(frame_no, matches, kp2, prev_kf.kps, args.kf_max_disp, args.kf_min_inliers, + args.kf_cooldown, last_kf_idx): + thumb = make_thumb(img2, tuple(args.kf_thumb_hw)) + path = seq[frame_idx + 1] if isinstance(seq[frame_idx + 1], str) else "" + kfs.append(Keyframe(frame_no, path, kp2, des2, pose2, thumb)) + last_kf_idx = frame_no + + return kfs, last_kf_idx + + +# --------------------------------------------------------------------------- # +# Track maintenance +# --------------------------------------------------------------------------- # +def update_and_prune_tracks(matches, prev_map, tracks, + kp_curr, frame_idx, next_track_id, + prune_age=30): + """ + Continuation of simple 2-D point tracks across frames. + """ + curr_map = {} + + for m in matches: + q, t = m.queryIdx, m.trainIdx + x, y = map(int, kp_curr[t].pt) + tid = prev_map.get(q, next_track_id) + if tid == next_track_id: + tracks[tid] = [] + next_track_id += 1 + curr_map[t] = tid + tracks[tid].append((frame_idx, x, y)) + + # prune dead tracks + for tid, pts in list(tracks.items()): + if frame_idx - pts[-1][0] > prune_age: + del tracks[tid] + return curr_map, tracks, next_track_id diff --git a/modules/SimpleSLAM/slam/core/landmark_utils.py b/modules/SimpleSLAM/slam/core/landmark_utils.py new file mode 100644 index 0000000000..d99fbadca0 --- /dev/null +++ b/modules/SimpleSLAM/slam/core/landmark_utils.py @@ -0,0 +1,208 @@ +from __future__ import annotations + +""" +landmark_utils.py +~~~~~~~~~~~~~~~~~ +Classes and helper functions for managing 3‑D landmarks and camera poses +in an incremental VO / SLAM pipeline. + +* MapPoint ─ encapsulates a single 3‑D landmark. +* Map ─ container for all landmarks + camera trajectory. +* triangulate_points ─ convenience wrapper around OpenCV triangulation. + +The module is intentionally lightweight and free of external dependencies +beyond NumPy + OpenCV; this makes it easy to unit‑test without a heavy +visualisation stack. +""" + +from dataclasses import dataclass, field +from typing import Dict, List, Tuple, Optional + +import cv2 +import numpy as np +from scipy.spatial import cKDTree + +# --------------------------------------------------------------------------- # +# MapPoint +# --------------------------------------------------------------------------- # +@dataclass +class MapPoint: + """A single triangulated 3‑D landmark.""" + id: int + position: np.ndarray # shape (3,) + keyframe_idx: int = -1 + colour: np.ndarray = field(default_factory=lambda: np.ones(3, dtype=np.float32)) # (3,) in **linear** RGB 0-1 + observations: List[Tuple[int, int]] = field(default_factory=list) # (frame_idx, kp_idx) + + def add_observation(self, frame_idx: int, kp_idx: int) -> None: + """Register that *kp_idx* in *frame_idx* observes this landmark.""" + self.observations.append((frame_idx, kp_idx)) + + +# --------------------------------------------------------------------------- # +# Map container +# --------------------------------------------------------------------------- # +class Map: + """A minimalistic map: 3‑D points + camera trajectory.""" + + def __init__(self) -> None: + self.points: Dict[int, MapPoint] = {} + self.poses: List[np.ndarray] = [] # List of 4×4 camera‑to‑world matrices + self._next_pid: int = 0 + + # ---------------- Camera trajectory ---------------- # + def add_pose(self, pose_w_c: np.ndarray) -> None: + """Append a 4×4 *pose_w_c* (camera‑to‑world) to the trajectory.""" + assert pose_w_c.shape == (4, 4), "Pose must be 4×4 homogeneous matrix" + self.poses.append(pose_w_c.copy()) + + # ---------------- Landmarks ------------------------ # + def add_points(self, pts3d: np.ndarray, colours: Optional[np.ndarray] = None, + keyframe_idx: int = -1) -> List[int]: + """Add a set of 3‑D points and return the list of newly assigned ids.""" + if pts3d.ndim != 2 or pts3d.shape[1] != 3: + raise ValueError("pts3d must be (N,3)") + new_ids: List[int] = [] + + colours = colours if colours is not None else np.ones_like(pts3d) + for p, c in zip(pts3d, colours): + pid = self._next_pid + self.points[pid] = MapPoint( + pid, + p.astype(np.float64), + keyframe_idx, + c.astype(np.float32), + ) + new_ids.append(pid) + self._next_pid += 1 + return new_ids + + # def add_points(self, + # xyz : np.ndarray, # (M,3) float32/64 + # rgb : np.ndarray | None = None # (M,3) float32 in [0,1] + # ) -> list[int]: + # """ + # Insert M new landmarks and return their integer ids. + # `rgb` may be omitted – we then default to light-grey. + # """ + # xyz = np.asarray(xyz, dtype=np.float32).reshape(-1, 3) + # if rgb is None: + # rgb = np.full_like(xyz, 0.8, dtype=np.float32) # light grey + # else: + # rgb = np.asarray(rgb, dtype=np.float32).reshape(-1, 3) + + # assert xyz.shape == rgb.shape, \ + # "xyz and rgb must have the same length" + + # ids = list(range(self._next_pid, self._next_pid + len(xyz))) + # self._next_pid += len(xyz) + + # for pid, p, c in zip(ids, xyz, rgb): + # self.points[pid] = MapPoint(pid, p, c) + + # return ids + + + # ---------------- Convenience accessors ------------ # + def get_point_array(self) -> np.ndarray: + """Return all landmark positions as an (N,3) array (N may be 0).""" + if not self.points: + return np.empty((0, 3)) + return np.stack([mp.position for mp in self.points.values()], axis=0) + + def get_color_array(self) -> np.ndarray: + if not self.points: + return np.empty((0, 3), np.float32) + return np.stack([mp.colour for mp in self.points.values()]) + + def point_ids(self) -> List[int]: + return list(self.points.keys()) + + def __len__(self) -> int: + return len(self.points) + + # ---------------- Merging landmarks ---------------- # + def fuse_closeby_duplicate_landmarks(self, radius: float = 0.05) -> None: + """Average-merge landmarks whose centres are closer than ``radius``.""" + + if len(self.points) < 2: + return + + ids = list(self.points.keys()) + pts = np.stack([self.points[i].position for i in ids]) + tree = cKDTree(pts) + pairs = sorted(tree.query_pairs(radius)) + + removed: set[int] = set() + for i, j in pairs: + ida, idb = ids[i], ids[j] + if idb in removed or ida in removed: + continue + pa = self.points[ida].position + pb = self.points[idb].position + self.points[ida].position = (pa + pb) * 0.5 + removed.add(idb) + + for idx in removed: + self.points.pop(idx, None) + + + def align_points_to_map(self, pts: np.ndarray, radius: float = 0.05) -> np.ndarray: + """Rigidly align ``pts`` to existing landmarks using nearest neighbours.""" + map_pts = self.get_point_array() + if len(map_pts) < 3 or len(pts) < 3: + return pts + + tree = cKDTree(map_pts) + src, dst = [], [] + for p in pts: + idxs = tree.query_ball_point(p, radius) + if idxs: + src.append(p) + dst.append(map_pts[idxs[0]]) + + if len(src) < 3: + return pts + + from .pnp_utils import align_point_clouds + R, t = align_point_clouds(np.asarray(src), np.asarray(dst)) + pts_aligned = (R @ pts.T + t[:, None]).T + return pts_aligned + + +# --------------------------------------------------------------------------- # +# Geometry helpers (stay here to avoid cyclic imports) +# --------------------------------------------------------------------------- # +def triangulate_points( + K: np.ndarray, + R: np.ndarray, + t: np.ndarray, + pts1: np.ndarray, + pts2: np.ndarray, +) -> np.ndarray: + """Triangulate corresponding *pts1* ↔ *pts2* given (R, t). + + Parameters + ---------- + K + 3×3 camera intrinsic matrix. + R, t + Rotation + translation from *view‑1* to *view‑2*. + pts1, pts2 + Nx2 arrays of pixel coordinates (dtype float32/float64). + Returns + ------- + pts3d + Nx3 array in *view‑1* camera coordinates (not yet in world frame). + """ + if pts1.shape != pts2.shape: + raise ValueError("pts1 and pts2 must be the same shape") + if pts1.ndim != 2 or pts1.shape[1] != 2: + raise ValueError("pts1/pts2 must be (N,2)") + + proj1 = K @ np.hstack((np.eye(3), np.zeros((3, 1)))) + proj2 = K @ np.hstack((R, t.reshape(3, 1))) # Equivalent to proj1 @ get_homo_from_pose_rt(R, t) + + pts4d_h = cv2.triangulatePoints(proj1, proj2, pts1.T, pts2.T) # TODO: do triangulation from scratch for N observations + pts3d = (pts4d_h[:3] / pts4d_h[3]).T # → (N,3) + return pts3d diff --git a/modules/SimpleSLAM/slam/core/multi_view_utils.py b/modules/SimpleSLAM/slam/core/multi_view_utils.py new file mode 100644 index 0000000000..d4cfd0b1ee --- /dev/null +++ b/modules/SimpleSLAM/slam/core/multi_view_utils.py @@ -0,0 +1,168 @@ +# slam/core/multi_view_utils.py +"""Utilities for deferred (multi-view) triangulation in the SLAM pipeline.""" + +from __future__ import annotations +from dataclasses import dataclass +from typing import Dict, List, Tuple, Optional + +import numpy as np +from .landmark_utils import Map + + +# --------------------------------------------------------------------------- # +# Robust linear triangulation across ≥ 2 views +# --------------------------------------------------------------------------- # +def multi_view_triangulation( + K: np.ndarray, + poses_w_c: List[np.ndarray], # M × 4×4 (cam→world) + pts2d: np.ndarray, # M × 2 (pixels) + *, + min_depth: float, + max_depth: float, + max_rep_err: float, + eps: float = 1e-6 +) -> Optional[np.ndarray]: + """Return xyz _w or **None** if cheirality / depth / reprojection checks fail.""" + assert len(poses_w_c) == len(pts2d) >= 2, "Need ≥ 2 consistent views" + + # Build A (2 M × 4) + A = [] + for T_w_c, (u, v) in zip(poses_w_c, pts2d): + P = K @ np.linalg.inv(T_w_c)[:3, :4] + A.append(u * P[2] - P[0]) + A.append(v * P[2] - P[1]) + A = np.stack(A) + + _, _, Vt = np.linalg.svd(A) + X_h = Vt[-1] + if abs(X_h[3]) < eps: # degenerate solution + return None + X = X_h[:3] / X_h[3] + + # Cheats: cheirality, depth & reprojection + reproj, depths = [], [] + for T_w_c, (u, v) in zip(poses_w_c, pts2d): + pc = (np.linalg.inv(T_w_c) @ np.append(X, 1.0))[:3] # TODO pc = (np.linalg.inv(T_w_c) @ np.append(X, 1.0))[:3] Not sure if this is correct + if pc[2] <= 0: # behind the camera + print("Cheirality check failed:", pc) + return None + depths.append(pc[2]) + + uv_hat = (K @ pc)[:2] / pc[2] + reproj.append(np.linalg.norm(uv_hat - (u, v))) + + if not (min_depth <= np.mean(depths) <= max_depth): + print(f"Depth check failed: {np.mean(depths)} not in [{min_depth}, {max_depth}]") + return None + if np.mean(reproj) > max_rep_err: + print(f"Reprojection error check failed: {np.mean(reproj)} > {max_rep_err}") + return None + return X + + +# --------------------------------------------------------------------------- # +# Track manager – accumulates 2-D key-frame observations +# --------------------------------------------------------------------------- # +@dataclass +class _Obs: + kf_idx: int + kp_idx: int + uv: Tuple[float, float] + + +class MultiViewTriangulator: + """ + Accumulate feature tracks (key-frames only) and triangulate once a track + appears in ≥ `min_views` distinct key-frames. + """ + + def __init__(self, + K: np.ndarray, + *, + min_views: int, + merge_radius: float, + max_rep_err: float, + min_depth: float, + max_depth: float): + # All thresholds come from the caller – no magic numbers inside. + self.K = K + self.min_views = max(2, min_views) + self.merge_radius = merge_radius + self.max_rep_err = max_rep_err + self.min_depth = min_depth + self.max_depth = max_depth + + self._track_obs: Dict[int, List[_Obs]] = {} + self._kf_poses: Dict[int, np.ndarray] = {} + self._kf_imgs: Dict[int, np.ndarray] = {} # BGR uint8 + self._triangulated: set[int] = set() + + # ------------------------------------------------------------------ # + def add_keyframe(self, + frame_idx: int, + pose_w_c: np.ndarray, + kps: List, # List[cv2.KeyPoint] + track_map: Dict[int, int], + img_bgr: np.ndarray) -> None: + """Register observations (and keep the *full-res* image for colour sampling).""" + self._kf_poses[frame_idx] = pose_w_c.copy() + self._kf_imgs[frame_idx] = img_bgr # shallow copy is fine + for kp_idx, tid in track_map.items(): + u, v = kps[kp_idx].pt + self._track_obs.setdefault(tid, []).append(_Obs(frame_idx, kp_idx, (u, v))) + + # ------------------------------------------------------------------ # + def triangulate_ready_tracks(self, world_map: Map) -> List[int]: + """Triangulate mature tracks, insert them into the map, and return new ids.""" + new_ids: List[int] = [] + + for tid, obs in list(self._track_obs.items()): + if tid in self._triangulated or len(obs) < self.min_views: + continue + + obs_sorted = sorted(obs, key=lambda o: o.kf_idx) + poses, pts2d = [], [] + for o in obs_sorted: + pose = self._kf_poses.get(o.kf_idx) + if pose is None: + break + poses.append(pose) + pts2d.append(o.uv) + else: + # print(f"Triangulating track {tid} with {len(obs)} observations") + X = multi_view_triangulation( + self.K, poses, np.float32(pts2d), + min_depth=self.min_depth, + max_depth=self.max_depth, + max_rep_err=self.max_rep_err, + ) + # print(" Triangulated 3D point:", X) + if X is None: + continue + + # --------- colour sampling (pick first obs with an image) ------- + rgb = (1.0, 1.0, 1.0) # default white + for o in obs_sorted: + img = self._kf_imgs.get(o.kf_idx) + if img is None: + continue + h, w, _ = img.shape + x, y = int(round(o.uv[0])), int(round(o.uv[1])) + if 0 <= x < w and 0 <= y < h: + b, g, r = img[y, x] + rgb = (r / 255.0, g / 255.0, b / 255.0) + break + + # --------------- map insertion (+ optional merging) ------------- + X = world_map.align_points_to_map( + X[None, :], radius=self.merge_radius + )[0] + pid = world_map.add_points(X[None, :], np.float32([[*rgb]]))[0] + for o in obs_sorted: + world_map.points[pid].add_observation(o.kf_idx, o.kp_idx) + + new_ids.append(pid) + self._triangulated.add(tid) + self._track_obs.pop(tid, None) # free memory + + return new_ids diff --git a/modules/SimpleSLAM/slam/core/pnp_utils.py b/modules/SimpleSLAM/slam/core/pnp_utils.py new file mode 100644 index 0000000000..e72b84b866 --- /dev/null +++ b/modules/SimpleSLAM/slam/core/pnp_utils.py @@ -0,0 +1,138 @@ +""" +pnp_utils.py +~~~~~~~~~~~~ +Light-weight helpers for + +* projecting world landmarks into an image, +* finding 3-D ↔ 2-D correspondences, +* robust PnP pose refinement, and +* optional landmark ↔ key-point bookkeeping. + +All functions are Numpy-only apart from OpenCV calls; no extra deps. +""" +from __future__ import annotations +import cv2 +import numpy as np +from typing import List, Tuple, Sequence + +# --------------------------------------------------------------- # +# Projection helper +# --------------------------------------------------------------- # +def project_points( + K: np.ndarray, + pose_w_c: np.ndarray, # 4×4 camera-to-world + pts_w: np.ndarray # N×3 (world) +) -> np.ndarray: # N×2 float32 (pixels) + """Project *pts_w* into the camera given `pose_w_c`.""" + T_c_w = np.linalg.inv(pose_w_c) # world → camera + P = K @ T_c_w[:3, :4] # 3×4 + pts_h = np.hstack([pts_w, np.ones((len(pts_w), 1))]).T # 4×N + uvw = P @ pts_h # 3×N + return (uvw[:2] / uvw[2]).T.astype(np.float32) # N×2 + +# --------------------------------------------------------------- # +# 3-D ↔ 2-D association +# --------------------------------------------------------------- # +def associate_landmarks( + K: np.ndarray, + pose_w_c: np.ndarray, + pts_w: np.ndarray, # N×3 (world coords of map) + kps: Sequence[cv2.KeyPoint], + search_rad: float = 10.0 +) -> Tuple[np.ndarray, np.ndarray, List[int]]: + """ + Simple projection-based data association: + + 1. Project every landmark into the current image. + 2. Take the *closest* key-point within `search_rad` px. + (no descriptor needed → fast, deterministic) + + Returns + ------- + pts3d : M×3 world points that found a match + pts2d : M×2 pixel coords of the matched key-points + kp_idx : list of length *M* with the matching cv2.KeyPoint indices + """ + if len(pts_w) == 0 or len(kps) == 0: + return np.empty((0, 3)), np.empty((0, 2)), [] + + proj = project_points(K, pose_w_c, pts_w) # N×2 + kp_xy = np.float32([kp.pt for kp in kps]) # K×2 + + pts3d, pts2d, kp_ids = [], [], [] + used = set() + for i, (u, v) in enumerate(proj): + d2 = np.sum((kp_xy - (u, v)) ** 2, axis=1) + j = np.argmin(d2) + if d2[j] < search_rad ** 2 and j not in used: + pts3d.append(pts_w[i]) + pts2d.append(kp_xy[j]) + kp_ids.append(j) + used.add(j) + + if not pts3d: + return np.empty((0, 3)), np.empty((0, 2)), [] + return np.float32(pts3d), np.float32(pts2d), kp_ids + +# TODO: Try to use EPnP +# --------------------------------------------------------------- # +# Robust PnP RANSAC + LM refinement +# --------------------------------------------------------------- # +def refine_pose_pnp( + K: np.ndarray, + pts3d: np.ndarray, # M×3 + pts2d: np.ndarray # M×2 +) -> Tuple[np.ndarray, np.ndarray] | Tuple[None, None]: + """AP3P + RANSAC → LM-refined Returns (R, t) that map **world → camera**.""" + if len(pts3d) < 4: + return None, None + + ok, rvec, tvec, inl = cv2.solvePnPRansac( + pts3d, pts2d, K, None, + reprojectionError=3.0, + iterationsCount=100, + flags=cv2.SOLVEPNP_AP3P + ) + if not ok or inl is None or len(inl) < 4: + return None, None + + # final bundle-free local optimisation + cv2.solvePnPRefineLM( + pts3d[inl[:, 0]], pts2d[inl[:, 0]], + K, None, rvec, tvec + ) + R, _ = cv2.Rodrigues(rvec) + return R, tvec.ravel() # R: 3×3, t: 3×1, # 1-D (3,) instead of (3,1) + +# --------------------------------------------------------------- # +# 3-D ↔ 3-D alignment +# --------------------------------------------------------------- # +def align_point_clouds( + src: np.ndarray, # N×3 + dst: np.ndarray # N×3 +) -> Tuple[np.ndarray, np.ndarray]: + """Least-squares rigid alignment **src → dst** (no scale). + + Returns rotation ``R`` and translation ``t`` such that + + ``dst ≈ (R @ src.T + t).T`` + """ + + if src.shape != dst.shape or src.ndim != 2 or src.shape[1] != 3: + raise ValueError("src/dst must both be (N,3)") + + centroid_src = src.mean(axis=0) + centroid_dst = dst.mean(axis=0) + + src_centered = src - centroid_src + dst_centered = dst - centroid_dst + + H = src_centered.T @ dst_centered + U, _, Vt = np.linalg.svd(H) + R = Vt.T @ U.T + if np.linalg.det(R) < 0: + Vt[2, :] *= -1 + R = Vt.T @ U.T + + t = centroid_dst - R @ centroid_src + return R, t diff --git a/modules/SimpleSLAM/slam/core/trajectory_utils.py b/modules/SimpleSLAM/slam/core/trajectory_utils.py new file mode 100644 index 0000000000..f4852d276a --- /dev/null +++ b/modules/SimpleSLAM/slam/core/trajectory_utils.py @@ -0,0 +1,52 @@ +""" +trajectory_utils.py +~~~~~~~~~~~~~~~~~~~ +Small utilities for aligning, transforming and plotting camera +trajectories. + +This file contains *no* project-specific imports, so it can be reused +from notebooks or quick scripts without pulling the whole SLAM stack. +""" +from __future__ import annotations +from typing import Tuple, Optional + +import numpy as np +import matplotlib.pyplot as plt + + +# ------------------------------------------------------------------ # +# Alignment helpers +# ------------------------------------------------------------------ # +def compute_gt_alignment(gt_T: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: + """ + Return (R_align, t_align) such that + + p_slam = R_align @ p_gt + t_align + + maps *ground-truth* positions into the SLAM world used in this + project (camera-0 frame at t=0). + + Parameters + ---------- + gt_T : (N,4,4) homogeneous ground-truth poses, first one defines the + reference frame. + + Notes + ----- + *KITTI* supplies (3×4) poses whose *first* matrix is *already* + camera-0 wrt world, therefore the alignment is an identity – but we + keep it generic so other datasets work too. + """ + if gt_T.ndim != 3 or gt_T.shape[1:] != (4, 4): + raise ValueError("gt_T must be (N,4,4)") + R0, t0 = gt_T[0, :3, :3], gt_T[0, :3, 3] + R_align = np.eye(3) @ R0.T + t_align = -R_align @ t0 + return R_align, t_align + + +def apply_alignment(p_gt: np.ndarray, + R_align: np.ndarray, + t_align: np.ndarray) -> np.ndarray: + """Transform *one* 3-D point from GT to SLAM coordinates.""" + return R_align @ p_gt + t_align diff --git a/modules/SimpleSLAM/slam/core/visualization_utils.py b/modules/SimpleSLAM/slam/core/visualization_utils.py new file mode 100644 index 0000000000..8f6b735abd --- /dev/null +++ b/modules/SimpleSLAM/slam/core/visualization_utils.py @@ -0,0 +1,324 @@ +# visualization_utils.py +from __future__ import annotations +""" +visualization_utils.py +~~~~~~~~~~~~~~~~~~~~~~ +Clean, modular utilities for +* drawing 2‑D track overlays with OpenCV, and +* rendering a coloured 3‑D map in an **Open3D** window. + +Main entry‑points +----------------- +``draw_tracks(img, tracks, frame_no)`` + Overlay recent feature tracks. + +``Visualizer3D`` + Live window that shows + • the SLAM point cloud, colour‑coded along an axis or PCA‑auto; + • the camera trajectory (blue line); + • new landmarks highlighted (default bright‑green). + Supports WASDQE first‑person navigation when the Open3D build exposes + key‑callback APIs. +""" + +from typing import Dict, List, Tuple, Optional, Literal +import warnings, threading, cv2, numpy as np + +import cv2 +import numpy as np + +try: + import open3d as o3d # type: ignore +except Exception as exc: # pragma: no cover + o3d = None # type: ignore + _OPEN3D_ERR = exc +else: + _OPEN3D_ERR = None + +from slam.core.landmark_utils import Map +import numpy as np + +ColourAxis = Literal["x", "y", "z", "auto"] + +# --------------------------------------------------------------------------- # +# 3‑D visualiser (Open3D only) # +# --------------------------------------------------------------------------- # + +class Visualizer3D: + """Open3D window that shows the coloured point-cloud and camera path.""" + + def __init__( + self, + color_axis: ColourAxis = "z", + *, + new_colour: Tuple[float, float, float] = (0.0, 1.0, 0.0), + window_size: Tuple[int, int] = (1280, 720), + nav_step: float = 0.25, + ) -> None: + self.backend = "none" + self._closed = False + self._lock = threading.Lock() + self.paused = False + + self.color_axis = color_axis + self.new_colour = np.asarray(new_colour, dtype=np.float32) + self.nav_step = nav_step + + # scalar-to-colour normalisation + self._v_min: Optional[float] = None + self._v_max: Optional[float] = None + self._pc_vec: Optional[np.ndarray] = None # PCA axis for "auto" + + # ------------------------------------------------------------------ # + # Open3D init + # ------------------------------------------------------------------ # + if o3d is None: + warnings.warn(f"[Visualizer3D] Open3D missing → window disabled ({_OPEN3D_ERR})") + return + + vis_cls = ( + o3d.visualization.VisualizerWithKeyCallback + if hasattr(o3d.visualization, "VisualizerWithKeyCallback") + else o3d.visualization.Visualizer + ) + self.vis = vis_cls() + self.vis.create_window("SLAM Map", width=window_size[0], height=window_size[1]) + self.backend = "open3d" + + self.pcd = o3d.geometry.PointCloud() + self.lines = o3d.geometry.LineSet() + self._first = True + + if isinstance(self.vis, o3d.visualization.VisualizerWithKeyCallback): + self._bind_nav_keys() + + print(f"[Visualizer3D] ready | colour_axis={self.color_axis}") + + # ------------------------------------------------------------------ # + # WASDQE first-person navigation + # ------------------------------------------------------------------ # + def _bind_nav_keys(self): + vc = self.vis.get_view_control() + + def translate(delta: np.ndarray): + cam = vc.convert_to_pinhole_camera_parameters() + T = np.eye(4); T[:3, 3] = delta * self.nav_step + cam.extrinsic = T @ cam.extrinsic + vc.convert_from_pinhole_camera_parameters(cam) + return False + + key_map = { + ord("W"): np.array([0, 0, -1]), + ord("S"): np.array([0, 0, 1]), + ord("A"): np.array([-1, 0, 0]), + ord("D"): np.array([ 1, 0, 0]), + ord("Q"): np.array([0, 1, 0]), + ord("E"): np.array([0, -1, 0]), + } + for k, v in key_map.items(): + self.vis.register_key_callback(k, lambda _v, vec=v: translate(vec)) + + # ------------------------------------------------------------------ # + # Helpers for colour mapping + # ------------------------------------------------------------------ # + def _compute_scalar(self, pts: np.ndarray) -> np.ndarray: + if self.color_axis in ("x", "y", "z"): + return pts[:, {"x": 0, "y": 1, "z": 2}[self.color_axis]] + if self._pc_vec is None: # first call → PCA axis + centred = pts - pts.mean(0) + _, _, vh = np.linalg.svd(centred, full_matrices=False) + self._pc_vec = vh[0] + return pts @ self._pc_vec + + def _normalise(self, scalars: np.ndarray) -> np.ndarray: + if self._v_min is None: # initialise 5th–95th perc. + self._v_min, self._v_max = np.percentile(scalars, [5, 95]) + else: # expand running min / max + self._v_min = min(self._v_min, scalars.min()) + self._v_max = max(self._v_max, scalars.max()) + return np.clip((scalars - self._v_min) / (self._v_max - self._v_min + 1e-6), 0, 1) + + def _colormap(self, norm: np.ndarray) -> np.ndarray: + try: + import matplotlib.cm as cm + return cm.get_cmap("turbo")(norm)[:, :3] + except Exception: + # fall-back: simple HSV → RGB + h = (1 - norm) * 240 + c = np.ones_like(h); m = np.zeros_like(h) + x = c * (1 - np.abs((h / 60) % 2 - 1)) + return np.select( + [h < 60, h < 120, h < 180, h < 240, h < 300, h >= 300], + [ + np.stack([c, x, m], 1), np.stack([x, c, m], 1), + np.stack([m, c, x], 1), np.stack([m, x, c], 1), + np.stack([x, m, c], 1), np.stack([c, m, x], 1), + ]) + + # ------------------------------------------------------------------ # + # Public interface + # ------------------------------------------------------------------ # + def update(self, slam_map: Map, new_ids: Optional[List[int]] = None): + if self.backend != "open3d" or len(slam_map.points) == 0: + return + if self.paused: + with self._lock: + self.vis.poll_events(); self.vis.update_renderer() + return + + # -------------------------- build numpy arrays ------------------------- + pts = slam_map.get_point_array() + col = slam_map.get_color_array() if hasattr(slam_map, "get_color_array") else None + if col is None or len(col) == 0: # legacy maps without colour + scal = self._compute_scalar(pts) + col = self._colormap(self._normalise(scal)) + else: + col = col.astype(np.float32) + + # keep arrays in sync (pad / trim) + if len(col) < len(pts): + diff = len(pts) - len(col) + col = np.vstack([col, np.full((diff, 3), 0.8, np.float32)]) + elif len(col) > len(pts): + col = col[: len(pts)] + + # highlight newly-added landmarks + if new_ids: + id_to_i = {pid: i for i, pid in enumerate(slam_map.point_ids())} + for nid in new_ids: + if nid in id_to_i: + col[id_to_i[nid]] = self.new_colour + + # ----------------------- Open3D geometry update ------------------------ + with self._lock: + self.pcd.points = o3d.utility.Vector3dVector(pts) + self.pcd.colors = o3d.utility.Vector3dVector(col) + + self._update_lineset(slam_map) + + if self._first: + self.vis.add_geometry(self.pcd); self.vis.add_geometry(self.lines) + self._first = False + else: + self.vis.update_geometry(self.pcd); self.vis.update_geometry(self.lines) + + self.vis.poll_events(); self.vis.update_renderer() + + # ------------------------------------------------------------------ # + # Blue camera trajectory poly-line + # ------------------------------------------------------------------ # + def _update_lineset(self, slam_map: Map): + if len(slam_map.poses) < 2: + return + path = np.asarray([p[:3, 3] for p in slam_map.poses], np.float32) + self.lines.points = o3d.utility.Vector3dVector(path) + self.lines.lines = o3d.utility.Vector2iVector([[i, i + 1] for i in range(len(path) - 1)]) + self.lines.colors = o3d.utility.Vector3dVector(np.tile([[0, 0, 1]], (len(path) - 1, 1))) + + # ------------------------------------------------------------------ # + # Clean shutdown + # ------------------------------------------------------------------ # + def close(self): + if self.backend == "open3d" and not self._closed: + self.vis.destroy_window(); self._closed = True +# --------------------------------------------------------------------------- # +# 2‑D overlay helpers +# --------------------------------------------------------------------------- # + +def draw_tracks( + vis: np.ndarray, + tracks: Dict[int, List[Tuple[int, int, int]]], + current_frame: int, + max_age: int = 10, + sample_rate: int = 5, + max_tracks: int = 100, +) -> np.ndarray: + """Draw ageing feature tracks as fading polylines. + + Parameters + ---------- + vis : BGR uint8 image (modified *in‑place*) + tracks : {track_id: [(frame_idx,x,y), ...]} + current_frame: index of the frame being drawn + max_age : only show segments younger than this (#frames) + sample_rate : skip tracks where `track_id % sample_rate != 0` to avoid clutter + max_tracks : cap total rendered tracks for speed + """ + recent = [ + (tid, pts) + for tid, pts in tracks.items() + if current_frame - pts[-1][0] <= max_age + ] + recent.sort(key=lambda x: x[1][-1][0], reverse=True) + + drawn = 0 + for tid, pts in recent: + if drawn >= max_tracks: + break + if tid % sample_rate: + continue + pts = [p for p in pts if current_frame - p[0] <= max_age] + for j in range(1, len(pts)): + _, x0, y0 = pts[j - 1] + _, x1, y1 = pts[j] + ratio = (current_frame - pts[j - 1][0]) / max_age + colour = (0, int(255 * (1 - ratio)), int(255 * ratio)) + cv2.line(vis, (x0, y0), (x1, y1), colour, 2) + drawn += 1 + return vis + + +# --------------------------------------------------------------------------- # +# Lightweight 2-D trajectory plotter (Matplotlib) +# --------------------------------------------------------------------------- # +import matplotlib.pyplot as plt + +class TrajectoryPlotter: + """Interactive Matplotlib window showing estimate (blue) and GT (red).""" + + def __init__(self, figsize: Tuple[int, int] = (5, 5)) -> None: + plt.ion() + self.fig, self.ax = plt.subplots(figsize=figsize) + self.ax.set_aspect("equal", adjustable="datalim") + self.ax.grid(True, ls="--", lw=0.5) + self.line_est, = self.ax.plot([], [], "b-", lw=1.5, label="estimate") + self.line_gt, = self.ax.plot([], [], "r-", lw=1.0, label="ground-truth") + self.ax.legend(loc="upper right") + self._est_xy: list[tuple[float, float]] = [] + self._gt_xy: list[tuple[float, float]] = [] + + # ------------------------------------------------------------------ # + def append(self, + est_pos: np.ndarray, + gt_pos: Optional[np.ndarray] = None, + *, + swap_axes: bool = False, + mirror_x: bool = False) -> None: + """ + Add one position pair and refresh the plot. + + Parameters + ---------- + est_pos : (3,) SLAM position in world coords. + gt_pos : (3,) or None aligned ground-truth, same frame. + swap_axes : plot x<->z if your dataset convention differs. + """ + x, z = (est_pos[2], est_pos[0]) if swap_axes else (est_pos[0], est_pos[2]) + if mirror_x: + z = -z + self._est_xy.append((x, z)) + ex, ez = zip(*self._est_xy) + self.line_est.set_data(ex, ez) + + if gt_pos is not None: + gx, gz = (gt_pos[2], gt_pos[0]) if swap_axes else (gt_pos[0], gt_pos[2]) + # if mirror_x: + # gx = -gx + self._gt_xy.append((gx, gz)) + gx_s, gz_s = zip(*self._gt_xy) + self.line_gt.set_data(gx_s, gz_s) + + self.ax.relim() + self.ax.autoscale_view() + self.fig.canvas.draw_idle() + self.fig.canvas.flush_events() diff --git a/modules/SimpleSLAM/slam/monocular/__init__.py b/modules/SimpleSLAM/slam/monocular/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/modules/SimpleSLAM/slam/monocular/main.py b/modules/SimpleSLAM/slam/monocular/main.py new file mode 100644 index 0000000000..949fe6349a --- /dev/null +++ b/modules/SimpleSLAM/slam/monocular/main.py @@ -0,0 +1,366 @@ +# main.py +""" +WITHOUT MULTI-VIEW TRIANGULATION + +Entry-point: high-level processing loop +-------------------------------------- +$ python main.py --dataset kitti --base_dir ../Dataset + + +The core loop now performs: + 1) Feature detection + matching (OpenCV or LightGlue) + 2) Essential‑matrix estimation + pose recovery + 3) Landmarks triangulation (with Z‑filtering) + 4) Pose integration (camera trajectory in world frame) + 5) Optional 3‑D visualisation via Open3D + +The script shares most command‑line arguments with the previous version +but adds `--no_viz3d` to disable the 3‑D window. + +""" +import argparse +import cv2 +import lz4.frame +import numpy as np +from tqdm import tqdm +from typing import List + +from slam.core.dataloader import ( + load_sequence, + load_frame_pair, + load_calibration, + load_groundtruth) + +from slam.core.features_utils import ( + init_feature_pipeline, + feature_extractor, + feature_matcher, + filter_matches_ransac) + +from slam.core.keyframe_utils import ( + Keyframe, + update_and_prune_tracks, + select_keyframe, + make_thumb) + +from slam.core.visualization_utils import draw_tracks, Visualizer3D, TrajectoryPlotter +from slam.core.trajectory_utils import compute_gt_alignment, apply_alignment +from slam.core.landmark_utils import Map, triangulate_points +from slam.core.pnp_utils import associate_landmarks, refine_pose_pnp +from slam.core.ba_utils_OG import run_bundle_adjustment + + +# --------------------------------------------------------------------------- # +# CLI +# --------------------------------------------------------------------------- # +def _build_parser() -> argparse.ArgumentParser: + p = argparse.ArgumentParser("Feature tracking with key-frames") + p.add_argument('--dataset', + choices=['kitti', 'malaga', 'tum-rgbd', 'custom'], + required=True) + p.add_argument('--base_dir', default='../Dataset') + # feature/detector settings + p.add_argument('--detector', choices=['orb', 'sift', 'akaze'], + default='orb') + p.add_argument('--matcher', choices=['bf'], default='bf') + p.add_argument('--use_lightglue', action='store_true') + p.add_argument('--min_conf', type=float, default=0.7, + help='Minimum LightGlue confidence for a match') + # runtime + p.add_argument('--fps', type=float, default=10) + # RANSAC + p.add_argument('--ransac_thresh', type=float, default=1.0) + # key-frame params + p.add_argument('--kf_max_disp', type=float, default=45) + p.add_argument('--kf_min_inliers', type=float, default=150) + p.add_argument('--kf_cooldown', type=int, default=3) + p.add_argument('--kf_thumb_hw', type=int, nargs=2, + default=[640, 360]) + + # 3‑D visualisation toggle + p.add_argument("--no_viz3d", action="store_true", help="Disable 3‑D visualization window") + # triangulation depth filtering + p.add_argument("--min_depth", type=float, default=1.0) + p.add_argument("--max_depth", type=float, default=50.0) + + # PnP / map-maintenance + p.add_argument('--pnp_min_inliers', type=int, default=30) + p.add_argument('--proj_radius', type=float, default=3.0) + p.add_argument('--merge_radius', type=float, default=0.10) + + return p + + +# --------------------------------------------------------------------------- # +# Helper functions +# --------------------------------------------------------------------------- # + +def _pose_inv(R: np.ndarray, t: np.ndarray) -> np.ndarray: + """Return 4×4 inverse of a rigid‑body transform specified by R|t.""" + Rt = R.T + tinv = -Rt @ t + T = np.eye(4) + T[:3, :3] = Rt + T[:3, 3] = tinv.ravel() + return T + +def get_homo_from_pose_rt(R: np.ndarray, t: np.ndarray) -> np.ndarray: + """Homogeneous matrix **T_c1→c2** from the relative pose (R, t) + returned by `recoverPose`.""" + T = np.eye(4) + T[:3, :3] = R + T[:3, 3] = t.ravel() + return T + +# --------------------------------------------------------------------------- # +# Main processing loop +# --------------------------------------------------------------------------- # +def main(): + PAUSED = False + args = _build_parser().parse_args() + + # --- Data loading --- + seq = load_sequence(args) + calib = load_calibration(args) # dict with K_l, P_l, ... + groundtruth = load_groundtruth(args) # None or Nx3x4 array + K = calib["K_l"] # intrinsic matrix for left camera + P = calib["P_l"] # projection matrix for left camera + + # ------ build 4×4 GT poses + alignment matrix (once) ---------------- + gt_T = None + R_align = t_align = None + if groundtruth is not None: + gt_T = np.pad(groundtruth, ((0, 0), (0, 1), (0, 0)), constant_values=0.0) + gt_T[:, 3, 3] = 1.0 # homogeneous 1s + R_align, t_align = compute_gt_alignment(gt_T) + + # --- feature pipeline (OpenCV / LightGlue) --- + detector, matcher = init_feature_pipeline(args) + + # --- tracking state --- + prev_map, tracks = {}, {} + next_track_id = 0 + + world_map = Map() + cur_pose = np.eye(4) # camera‑to‑world (identity at t=0) + world_map.add_pose(cur_pose) + viz3d = None if args.no_viz3d else Visualizer3D(color_axis="y") + plot2d = TrajectoryPlotter() + + kfs: list[Keyframe] = [] + last_kf_idx = -999 + + frame_keypoints: List[List[cv2.KeyPoint]] = [] #CHANGE + # --- visualisation --- + achieved_fps = 0.0 + last_time = cv2.getTickCount() / cv2.getTickFrequency() + + # cv2.namedWindow('Feature Tracking', cv2.WINDOW_NORMAL) + # cv2.resizeWindow('Feature Tracking', 1200, 600) + + total = len(seq) - 1 + + for i in tqdm(range(total), desc='Tracking'): + # --- load image pair --- + img1, img2 = load_frame_pair(args, seq, i) + + # --- feature extraction / matching --- + kp1, des1 = feature_extractor(args, img1, detector) + kp2, des2 = feature_extractor(args, img2, detector) + matches = feature_matcher(args, kp1, kp2, des1, des2, matcher) + # --- filter matches with RANSAC --- + # matches = filter_matches_ransac(kp1, kp2, matches, args.ransac_thresh) + + if len(matches) < 12: + print(f"[WARN] Not enough matches at frame {i}. Skipping.") + continue + + # store keypoints for *every* new pose: + if i == 0: + # kp1 is the keypoints for the very first pose (index 0) + frame_keypoints.append(kp1) + # kp2 will correspond to the pose you'll add below at index i+1 + frame_keypoints.append(kp2) + + # ---------------- Key-frame decision -------------------------- # + if i == 0: + kfs.append(Keyframe(0, seq[0] if isinstance(seq[0], str) else "", + kp1, des1, make_thumb(img1, tuple(args.kf_thumb_hw)))) + last_kf_idx = 0 + is_kf = False + else: + prev_len = len(kfs) + kfs, last_kf_idx = select_keyframe( + args, seq, i, img2, kp2, des2, matcher, kfs, last_kf_idx) + is_kf = len(kfs) > prev_len + + + # --- #TODO: INITIALIZATION Relative pose from Essential matrix --- + # TODO missing triangulation step here, also should be in if statement only in beginning or loss of keyframe + pts1 = np.float32([kp1[m.queryIdx].pt for m in matches]) + pts2 = np.float32([kp2[m.trainIdx].pt for m in matches]) + + E, mask_E = cv2.findEssentialMat( + pts1, + pts2, + K, + method=cv2.RANSAC, + prob=0.999, + threshold=args.ransac_thresh, + ) + if E is None or E.shape != (3, 3): + print(f"[WARN] Essential matrix failed at frame {i}.") + continue + + _, R, t, mask_pose = cv2.recoverPose(E, pts1, pts2, K) + mask = (mask_E.ravel() & mask_pose.ravel()).astype(bool) + pts1_in, pts2_in = pts1[mask], pts2[mask] + + + # -- Predict pose from last step (VO) -- + pose_prev = cur_pose.copy() + pose_pred = cur_pose @ get_homo_from_pose_rt(R, t) + + + # -----------------------------PNP + cur_pose = pose_pred + used_kp2: list[int] = [] + new_ids: list[int] = [] + + if is_kf: + # -- PnP refinement using existing landmarks -- + pts_w = world_map.get_point_array() + pts3d, pts2d, used_kp2 = associate_landmarks(K, pose_pred, pts_w, kp2, args.proj_radius) + + if len(pts3d) >= args.pnp_min_inliers: + R_pnp, t_pnp = refine_pose_pnp(K, pts3d, pts2d) + if R_pnp is not None: + cur_pose = np.eye(4) + cur_pose[:3, :3] = R_pnp.T + cur_pose[:3, 3] = -R_pnp.T @ t_pnp + else: + cur_pose = pose_pred + else: + cur_pose = pose_pred + + # --- Triangulate new landmarks only for keyframes --- + fresh = [m for m in matches if m.trainIdx not in set(used_kp2)] + if fresh: + pts1_new = np.float32([kp1[m.queryIdx].pt for m in fresh]) + pts2_new = np.float32([kp2[m.trainIdx].pt for m in fresh]) + + rel = np.linalg.inv(pose_prev) @ cur_pose + R_rel, t_rel = rel[:3, :3], rel[:3, 3] + pts3d_cam = triangulate_points(K, R_rel, t_rel, pts1_new, pts2_new) + z = pts3d_cam[:, 2] + valid = (z > args.min_depth) & (z < args.max_depth) + pts3d_cam = pts3d_cam[valid] + + if len(pts3d_cam): + pts3d_world = ( + pose_prev + @ np.hstack([pts3d_cam, np.ones((len(pts3d_cam), 1))]).T + ).T[:, :3] + + # ------------ sample RGB from img2 ------------- + pix = [kp2[m.trainIdx].pt for m in np.asarray(fresh)[valid]] + cols = [] + h, w, _ = img2.shape + for (u, v) in pix: + x, y = int(round(u)), int(round(v)) + if 0 <= x < w and 0 <= y < h: + b, g, r = img2[y, x] # BGR uint8 + cols.append((r, g, b)) # keep RGB order + else: + cols.append((255, 255, 255)) # white fallback + cols = np.float32(cols) / 255.0 # → 0-1 + + # coarse rigid alignment of the *new* cloud to the map + pts3d_world = world_map.align_points_to_map(pts3d_world, radius=args.merge_radius * 2.0) # TODO: HYPER PARAMETER CHECK -MAGIC VARIABLE- radius can be loose here + frame_no = i + 1 + new_ids = world_map.add_points( + pts3d_world, cols, keyframe_idx=frame_no + ) + for pid, m in zip(new_ids, np.asarray(fresh)[valid]): + world_map.points[pid].add_observation(frame_no, m.trainIdx) + + if i % 10 == 0: + world_map.fuse_closeby_duplicate_landmarks(args.merge_radius) + + + world_map.add_pose(cur_pose) + + if is_kf and len(world_map.points) > 100: # light heuristic + run_bundle_adjustment(world_map, K, frame_keypoints, + fix_first_pose=True, + max_iters=20) + + + # --- 2-D path plot (cheap) ---------------------------------------------- + est_pos = cur_pose[:3, 3] + gt_pos = None + if gt_T is not None and i + 1 < len(gt_T): + p_gt = gt_T[i + 1, :3, 3] # raw GT + gt_pos = apply_alignment(p_gt, R_align, t_align) + plot2d.append(est_pos, gt_pos, mirror_x=True) + + + # # --- 2-D track maintenance (for GUI only) --- + # frame_no = i + 1 + # prev_map, tracks, next_track_id = update_and_prune_tracks( + # matches, prev_map, tracks, kp2, frame_no, next_track_id) + + + # --- 3-D visualisation --- + if viz3d is not None: + viz3d.update(world_map, new_ids) + + + key = cv2.waitKey(1) & 0xFF + if key == 27: + break + if key == ord('p') and viz3d is not None: + viz3d.paused = not viz3d.paused + # # ---------------------- GUI FOR 2D feature Tracking -------------------------- # + # vis = draw_tracks(img2.copy(), tracks, frame_no) + # for t in prev_map.keys(): + # cv2.circle(vis, tuple(map(int, kp2[t].pt)), 3, (0, 255, 0), -1) + + # cv2.putText(vis, f"KF idx: {last_kf_idx} | total KFs: {len(kfs)}", + # (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, + # (255, 255, 0), 2) + + # # thumb strip (last 4) + # thumbs = [cv2.imdecode( + # np.frombuffer(lz4.frame.decompress(k.thumb), np.uint8), + # cv2.IMREAD_COLOR) for k in kfs[-4:]] + # bar = (np.hstack(thumbs) if thumbs else + # np.zeros((*args.kf_thumb_hw[::-1], 3), np.uint8)) + # cv2.imshow('Keyframes', bar) + + # cv2.putText(vis, + # (f"Frame {frame_no}/{total} | " + # f"Tracks: {len(tracks)} | " + # f"FPS: {achieved_fps:.1f}"), + # (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, + # (255, 255, 255), 2) + + # cv2.imshow('Feature Tracking', vis) + # wait_ms = int(1000 / args.fps) if args.fps > 0 else 1 + # key = cv2.waitKey(0 if PAUSED else wait_ms) & 0xFF + # if key == 27: # ESC to exit + # break + # elif key == ord('p'): # 'p' to toggle pause + # PAUSED = not PAUSED + # continue + + # update FPS + # now = cv2.getTickCount() / cv2.getTickFrequency() + # achieved_fps = 1.0 / (now - last_time) + # last_time = now + + cv2.destroyAllWindows() + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/modules/SimpleSLAM/slam/monocular/main2.py b/modules/SimpleSLAM/slam/monocular/main2.py new file mode 100644 index 0000000000..c78dec94e6 --- /dev/null +++ b/modules/SimpleSLAM/slam/monocular/main2.py @@ -0,0 +1,332 @@ +# main.py + +""" +WITH MULTI-VIEW TRIANGULATION + +Entry-point: high-level processing loop +-------------------------------------- +$ python main.py --dataset kitti --base_dir ../Dataset + + +The core loop now performs: + 1) Feature detection + matching (OpenCV or LightGlue) + 2) Essential‑matrix estimation + pose recovery + 3) Landmarks triangulation (with Z‑filtering) + 4) Pose integration (camera trajectory in world frame) + 5) Optional 3‑D visualisation via Open3D + +The script shares most command‑line arguments with the previous version +but adds `--no_viz3d` to disable the 3‑D window. + +""" +import argparse +import cv2 +import lz4.frame +import numpy as np +from tqdm import tqdm + + +from slam.core.dataloader import ( + load_sequence, + load_frame_pair, + load_calibration, + load_groundtruth) + +from slam.core.features_utils import ( + init_feature_pipeline, + feature_extractor, + feature_matcher, + filter_matches_ransac) + +from slam.core.keyframe_utils import ( + Keyframe, + update_and_prune_tracks, + select_keyframe, + make_thumb) + +from slam.core.visualization_utils import draw_tracks, Visualizer3D, TrajectoryPlotter +from slam.core.trajectory_utils import compute_gt_alignment, apply_alignment +from slam.core.landmark_utils import Map +from slam.core.multi_view_utils import MultiViewTriangulator +from slam.core.pnp_utils import associate_landmarks, refine_pose_pnp + + +# --------------------------------------------------------------------------- # +# CLI +# --------------------------------------------------------------------------- # +def _build_parser() -> argparse.ArgumentParser: + p = argparse.ArgumentParser("Feature tracking with key-frames") + p.add_argument('--dataset', + choices=['kitti', 'malaga', 'tum-rgbd', 'custom'], + required=True) + p.add_argument('--base_dir', default='../Dataset') + # feature/detector settings + p.add_argument('--detector', choices=['orb', 'sift', 'akaze'], + default='orb') + p.add_argument('--matcher', choices=['bf'], default='bf') + p.add_argument('--use_lightglue', action='store_true') + p.add_argument('--min_conf', type=float, default=0.7, + help='Minimum LightGlue confidence for a match') + # runtime + p.add_argument('--fps', type=float, default=10) + # RANSAC + p.add_argument('--ransac_thresh', type=float, default=1.0) + # key-frame params + p.add_argument('--kf_max_disp', type=float, default=45) + p.add_argument('--kf_min_inliers', type=float, default=150) + p.add_argument('--kf_cooldown', type=int, default=3) + p.add_argument('--kf_thumb_hw', type=int, nargs=2, + default=[640, 360]) + + # 3‑D visualisation toggle + p.add_argument("--no_viz3d", action="store_true", help="Disable 3‑D visualization window") + # triangulation depth filtering + p.add_argument("--min_depth", type=float, default=1.0) + p.add_argument("--max_depth", type=float, default=50.0) + p.add_argument('--max_rep_err', type=float, default=2.5, + help='max mean reprojection error (px) for multi-view triangulation') + + # multi-view triangulation + p.add_argument('--mv_views', type=int, default=3, + help='minimum keyframes required before triangulating a track') + + # PnP / map-maintenance + p.add_argument('--pnp_min_inliers', type=int, default=30) + p.add_argument('--proj_radius', type=float, default=3.0) + p.add_argument('--merge_radius', type=float, default=0.10) + + return p + + +# --------------------------------------------------------------------------- # +# Helper functions +# --------------------------------------------------------------------------- # + +def _pose_inv(R: np.ndarray, t: np.ndarray) -> np.ndarray: + """Return 4×4 inverse of a rigid‑body transform specified by R|t.""" + Rt = R.T + tinv = -Rt @ t + T = np.eye(4) + T[:3, :3] = Rt + T[:3, 3] = tinv.ravel() + return T + +def _pose_rt(R: np.ndarray, t: np.ndarray) -> np.ndarray: + """Homogeneous matrix **T_c1→c2** from the relative pose (R, t) + returned by `recoverPose`.""" + T = np.eye(4) + T[:3, :3] = R + T[:3, 3] = t.ravel() + return T + +# --------------------------------------------------------------------------- # +# Main processing loop +# --------------------------------------------------------------------------- # +def main(): + PAUSED = False + args = _build_parser().parse_args() + + # --- Data loading --- + seq = load_sequence(args) + calib = load_calibration(args) # dict with K_l, P_l, ... + groundtruth = load_groundtruth(args) # None or Nx3x4 array + K = calib["K_l"] # intrinsic matrix for left camera + P = calib["P_l"] # projection matrix for left camera + + # ------ build 4×4 GT poses + alignment matrix (once) ---------------- + gt_T = None + R_align = t_align = None + if groundtruth is not None: + gt_T = np.pad(groundtruth, ((0, 0), (0, 1), (0, 0)), constant_values=0.0) + gt_T[:, 3, 3] = 1.0 # homogeneous 1s + R_align, t_align = compute_gt_alignment(gt_T) + + # --- feature pipeline (OpenCV / LightGlue) --- + detector, matcher = init_feature_pipeline(args) + + # --- tracking state --- + prev_map, tracks = {}, {} + next_track_id = 0 + mv_triangulator = MultiViewTriangulator( + K, + min_views = args.mv_views, + merge_radius = args.merge_radius, + max_rep_err = args.max_rep_err, + min_depth = args.min_depth, + max_depth = args.max_depth, +) + + world_map = Map() + cur_pose = np.eye(4) # camera‑to‑world (identity at t=0) + world_map.add_pose(cur_pose) + viz3d = None if args.no_viz3d else Visualizer3D(color_axis="y") + plot2d = TrajectoryPlotter() + + kfs: list[Keyframe] = [] + last_kf_idx = -999 + + # --- visualisation --- + achieved_fps = 0.0 + last_time = cv2.getTickCount() / cv2.getTickFrequency() + + # cv2.namedWindow('Feature Tracking', cv2.WINDOW_NORMAL) + # cv2.resizeWindow('Feature Tracking', 1200, 600) + + total = len(seq) - 1 + + for i in tqdm(range(total), desc='Tracking'): + # --- load image pair --- + img1, img2 = load_frame_pair(args, seq, i) + + # --- feature extraction / matching --- + kp1, des1 = feature_extractor(args, img1, detector) + kp2, des2 = feature_extractor(args, img2, detector) + matches = feature_matcher(args, kp1, kp2, des1, des2, matcher) + # --- filter matches with RANSAC --- + matches = filter_matches_ransac(kp1, kp2, matches, args.ransac_thresh) + + # # --- 2-D Feature track maintenance --- + frame_no = i + 1 + prev_map, tracks, next_track_id = update_and_prune_tracks( + matches, prev_map, tracks, kp2, frame_no, next_track_id) + + if len(matches) < 12: + print(f"[WARN] Not enough matches at frame {i}. Skipping.") + continue + + # --- Relative pose from Essential matrix --- + pts1 = np.float32([kp1[m.queryIdx].pt for m in matches]) + pts2 = np.float32([kp2[m.trainIdx].pt for m in matches]) + + E, mask_E = cv2.findEssentialMat( + pts1, + pts2, + K, + method=cv2.RANSAC, + prob=0.999, + threshold=args.ransac_thresh, + ) + if E is None or E.shape != (3, 3): + print(f"[WARN] Essential matrix failed at frame {i}.") + continue + + _, R, t, mask_pose = cv2.recoverPose(E, pts1, pts2, K) + mask = (mask_E.ravel() & mask_pose.ravel()).astype(bool) + pts1_in, pts2_in = pts1[mask], pts2[mask] + + + # -- Predict pose from last step (VO) -- + pose_prev = cur_pose.copy() + pose_pred = cur_pose @ _pose_rt(R, t) + + # ---------------- Key-frame decision -------------------------- # + if i == 0: + kfs.append(Keyframe(0, seq[0] if isinstance(seq[0], str) else "", + kp1, des1, make_thumb(img1, tuple(args.kf_thumb_hw)))) + last_kf_idx = 0 + is_kf = False + else: + prev_len = len(kfs) + kfs, last_kf_idx = select_keyframe( + args, seq, i, img2, kp2, des2, matcher, kfs, last_kf_idx) + is_kf = len(kfs) > prev_len + + cur_pose = pose_pred + used_kp2: list[int] = [] + new_ids: list[int] = [] + + if is_kf: + # -- PnP refinement using existing landmarks -- + pts_w = world_map.get_point_array() + pts3d, pts2d, used_kp2 = associate_landmarks(K, pose_pred, pts_w, kp2, args.proj_radius) + + if len(pts3d) >= args.pnp_min_inliers: + R_pnp, t_pnp = refine_pose_pnp(K, pts3d, pts2d) + if R_pnp is not None: + cur_pose = np.eye(4) + cur_pose[:3, :3] = R_pnp.T + cur_pose[:3, 3] = -R_pnp.T @ t_pnp + else: + cur_pose = pose_pred + else: + cur_pose = pose_pred + + # --- Triangulate Keyframe observations when ready (enough keyframes) --- + mv_triangulator.add_keyframe(frame_no, cur_pose, kp2, prev_map, img2) + new_ids = mv_triangulator.triangulate_ready_tracks(world_map) + + # if i % 10 == 0: + # world_map.fuse_closeby_duplicate_landmarks(args.merge_radius) + + + world_map.add_pose(cur_pose) + + # --- 2-D path plot (cheap) ---------------------------------------------- + est_pos = cur_pose[:3, 3] + gt_pos = None + if gt_T is not None and i + 1 < len(gt_T): + p_gt = gt_T[i + 1, :3, 3] # raw GT + gt_pos = apply_alignment(p_gt, R_align, t_align) + plot2d.append(est_pos, gt_pos, mirror_x=True) + + + # # --- 2-D track maintenance (for GUI only) --- + # frame_no = i + 1 + # prev_map, tracks, next_track_id = update_and_prune_tracks( + # matches, prev_map, tracks, kp2, frame_no, next_track_id) + + + # --- 3-D visualisation --- + if viz3d is not None: + viz3d.update(world_map, new_ids) + + + key = cv2.waitKey(1) & 0xFF + if key == 27: + break + if key == ord('p') and viz3d is not None: + viz3d.paused = not viz3d.paused + + # # ---------------------- GUI FOR 2D feature Tracking -------------------------- # + # vis = draw_tracks(img2.copy(), tracks, frame_no) + # for t in prev_map.keys(): + # cv2.circle(vis, tuple(map(int, kp2[t].pt)), 3, (0, 255, 0), -1) + + # cv2.putText(vis, f"KF idx: {last_kf_idx} | total KFs: {len(kfs)}", + # (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, + # (255, 255, 0), 2) + + # # thumb strip (last 4) + # thumbs = [cv2.imdecode( + # np.frombuffer(lz4.frame.decompress(k.thumb), np.uint8), + # cv2.IMREAD_COLOR) for k in kfs[-4:]] + # bar = (np.hstack(thumbs) if thumbs else + # np.zeros((*args.kf_thumb_hw[::-1], 3), np.uint8)) + # cv2.imshow('Keyframes', bar) + + # cv2.putText(vis, + # (f"Frame {frame_no}/{total} | " + # f"Tracks: {len(tracks)} | " + # f"FPS: {achieved_fps:.1f}"), + # (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, + # (255, 255, 255), 2) + + # cv2.imshow('Feature Tracking', vis) + # wait_ms = int(1000 / args.fps) if args.fps > 0 else 1 + # key = cv2.waitKey(0 if PAUSED else wait_ms) & 0xFF + # if key == 27: # ESC to exit + # break + # elif key == ord('p'): # 'p' to toggle pause + # PAUSED = not PAUSED + # continue + + # update FPS + # now = cv2.getTickCount() / cv2.getTickFrequency() + # achieved_fps = 1.0 / (now - last_time) + # last_time = now + + cv2.destroyAllWindows() + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/modules/SimpleSLAM/slam/monocular/main3.py b/modules/SimpleSLAM/slam/monocular/main3.py new file mode 100644 index 0000000000..e715d1aa65 --- /dev/null +++ b/modules/SimpleSLAM/slam/monocular/main3.py @@ -0,0 +1,523 @@ +# main.py +""" +WITHOUT MULTI-VIEW TRIANGULATION - Use 2 different stratergy for motion estimation + +Entry-point: high-level processing loop +-------------------------------------- +$ python main.py --dataset kitti --base_dir ../Dataset + + +The core loop now performs: + 1) Feature detection + matching (OpenCV or LightGlue) + 2) Essential‑matrix estimation + pose recovery + 3) Landmarks triangulation (with Z‑filtering) + 4) Pose integration (camera trajectory in world frame) + 5) Optional 3‑D visualisation via Open3D + +The script shares most command‑line arguments with the previous version +but adds `--no_viz3d` to disable the 3‑D window. + +""" +import argparse +import cv2 +import lz4.frame +import numpy as np +from tqdm import tqdm +from typing import List + +from slam.core.dataloader import ( + load_sequence, + load_frame_pair, + load_calibration, + load_groundtruth) + +from slam.core.features_utils import ( + init_feature_pipeline, + feature_extractor, + feature_matcher, + filter_matches_ransac) + +from slam.core.keyframe_utils import ( + Keyframe, + update_and_prune_tracks, + select_keyframe, + make_thumb) + +from slam.core.visualization_utils import draw_tracks, Visualizer3D, TrajectoryPlotter +from slam.core.trajectory_utils import compute_gt_alignment, apply_alignment +from slam.core.landmark_utils import Map, triangulate_points +from slam.core.pnp_utils import associate_landmarks, refine_pose_pnp +from slam.core.ba_utils_OG import run_bundle_adjustment +from slam.core.ba_utils import ( + two_view_ba, + pose_only_ba, + local_bundle_adjustment +) + +# --------------------------------------------------------------------------- # +# CLI +# --------------------------------------------------------------------------- # +def _build_parser() -> argparse.ArgumentParser: + p = argparse.ArgumentParser("Feature tracking with key-frames") + p.add_argument('--dataset', + choices=['kitti', 'malaga', 'tum-rgbd', 'custom'], + required=True) + p.add_argument('--base_dir', default='../Dataset') + # feature/detector settings + p.add_argument('--detector', choices=['orb', 'sift', 'akaze'], + default='orb') + p.add_argument('--matcher', choices=['bf'], default='bf') + p.add_argument('--use_lightglue', action='store_true') + p.add_argument('--min_conf', type=float, default=0.7, + help='Minimum LightGlue confidence for a match') + # runtime + p.add_argument('--fps', type=float, default=10) + # RANSAC + p.add_argument('--ransac_thresh', type=float, default=1.0) + # key-frame params + p.add_argument('--kf_max_disp', type=float, default=45) + p.add_argument('--kf_min_inliers', type=float, default=150) + p.add_argument('--kf_cooldown', type=int, default=3) + p.add_argument('--kf_thumb_hw', type=int, nargs=2, + default=[640, 360]) + + # 3‑D visualisation toggle + p.add_argument("--no_viz3d", action="store_true", help="Disable 3‑D visualization window") + # triangulation depth filtering + p.add_argument("--min_depth", type=float, default=1.0) + p.add_argument("--max_depth", type=float, default=50.0) + + # PnP / map-maintenance + p.add_argument('--pnp_min_inliers', type=int, default=30) + p.add_argument('--proj_radius', type=float, default=3.0) + p.add_argument('--merge_radius', type=float, default=0.10) + + return p + + +# --------------------------------------------------------------------------- # +# Helper functions +# --------------------------------------------------------------------------- # + +def _pose_inv(R: np.ndarray, t: np.ndarray) -> np.ndarray: + """Return 4×4 inverse of a rigid‑body transform specified by R|t.""" + Rt = R.T + tinv = -Rt @ t + T = np.eye(4) + T[:3, :3] = Rt + T[:3, 3] = tinv.ravel() + return T + +def get_homo_from_pose_rt(R: np.ndarray, t: np.ndarray) -> np.ndarray: + """Homogeneous matrix **T_c1→c2** from the relative pose (R, t) + returned by `recoverPose`.""" + T = np.eye(4) + T[:3, :3] = R + T[:3, 3] = t.ravel() + return T + +def try_bootstrap(K, kp0, kp1, matches, args, world_map): + """Return (success, T_cam0_w, T_cam1_w) and add initial landmarks.""" + if len(matches) < 50: + return False, None, None + + # 1. pick a model: Essential (general) *or* Homography (planar) + pts0 = np.float32([kp0[m.queryIdx].pt for m in matches]) + pts1 = np.float32([kp1[m.trainIdx].pt for m in matches]) + + E, inl_E = cv2.findEssentialMat( + pts0, pts1, K, cv2.RANSAC, 0.999, args.ransac_thresh) + + H, inl_H = cv2.findHomography( + pts0, pts1, cv2.RANSAC, args.ransac_thresh) + + # score = #inliers (ORB-SLAM uses a more elaborate model-selection score) + use_E = (inl_E.sum() > inl_H.sum()) + + if use_E and E is None: + return False, None, None + if not use_E and H is None: + return False, None, None + + if use_E: + _, R, t, inl_pose = cv2.recoverPose(E, pts0, pts1, K) + mask = (inl_E.ravel() & inl_pose.ravel()).astype(bool) + else: + R, t = cv2.decomposeHomographyMat(H, K)[1:3] # take the best hypothesis + mask = inl_H.ravel().astype(bool) + + # 2. triangulate those inliers – exactly once + p0 = pts0[mask] + p1 = pts1[mask] + pts3d = triangulate_points(K, R, t, p0, p1) + z = pts3d[:, 2] + ok = (z > args.min_depth) & (z < args.max_depth) + pts3d = pts3d[ok] + + if len(pts3d) < 80: + return False, None, None + + # 3. fill the map + T0_w = np.eye(4) + T1_w = np.eye(4) + T1_w[:3, :3] = R.T + T1_w[:3, 3] = ( -R.T @ t ).ravel() + + # world_map.reset() # fresh map + # world_map.add_pose(T0_w) + world_map.add_pose(T1_w) + + cols = np.full((len(pts3d), 3), 0.7) # grey – colour is optional here + ids = world_map.add_points(pts3d, cols, keyframe_idx=1) + + # ----------------------------------------------- + # add (frame_idx , kp_idx) pairs for each new MP + # ----------------------------------------------- + inlier_kp_idx = np.where(mask)[0][ok] # kp indices that survived depth + for pid, kp_idx in zip(ids, inlier_kp_idx): + world_map.points[pid].add_observation(0, kp_idx) # img0 side + world_map.points[pid].add_observation(1, kp_idx) # img1 side + + + print(f"[BOOTSTRAP] Map initialised with {len(ids)} landmarks.") + return True, T0_w, T1_w + + +def solve_pnp_step(K, pose_pred, world_map, kp, args): + """Return (success, refined_pose, used_kp_indices).""" + pts_w = world_map.get_point_array() + pts3d, pts2d, used = associate_landmarks( + K, pose_pred, pts_w, kp, args.proj_radius) + + if len(pts3d) < args.pnp_min_inliers: + return False, pose_pred, used + + R, t = refine_pose_pnp(K, pts3d, pts2d) + if R is None: + return False, pose_pred, used + + T = np.eye(4) + T[:3, :3] = R.T + T[:3, 3] = -R.T @ t + print(f"[PNP] Pose refined with {len(pts3d)} inliers.") + return True, T, used + + +def triangulate_new_points(K, pose_prev, pose_cur, + kp_prev, kp_cur, matches, + used_cur_idx, args, world_map, img_cur): + """ + Triangulate *all* keypoints that are ❶ un-tracked in the current + key-frame and ❷ fall inside the [min_depth , max_depth] band. + + The former parallax (bearing-angle) gate is gone, so we return more + candidate landmarks per key-frame. Depth filtering is kept to + avoid the most obvious degeneracies (z≈0 or way too far). + """ + # ------------------------------------------------------------------ + # 1) keep only keypoints that PnP did **not** consume + # ------------------------------------------------------------------ + fresh = [m for m in matches if m.trainIdx not in used_cur_idx] + if not fresh: + return [] # nothing new to add this time + + p0 = np.float32([kp_prev[m.queryIdx].pt for m in fresh]) + p1 = np.float32([kp_cur[m.trainIdx].pt for m in fresh]) + + # ------------------------------------------------------------------ + # 2) triangulate once per key-frame pair + # ------------------------------------------------------------------ + rel = np.linalg.inv(pose_prev) @ pose_cur + R_rel = rel[:3, :3] + t_rel = rel[:3, 3] + + pts3d_cam = triangulate_points(K, R_rel, t_rel, p0, p1) + + # ------------------------------------------------------------------ + # 3) depth sanity check (keep only z within user bounds) + # ------------------------------------------------------------------ + z = pts3d_cam[:, 2] + depth_ok = (z > args.min_depth) & (z < args.max_depth) + if not depth_ok.any(): + return [] # everything was outside bounds + + pts3d_cam = pts3d_cam[depth_ok] + fresh = [m for m, ok in zip(fresh, depth_ok) if ok] + + # ------------------------------------------------------------------ + # 4) colour sampling for the surviving points (optional but nice) + # ------------------------------------------------------------------ + h, w, _ = img_cur.shape + pix = [kp_cur[m.trainIdx].pt for m in fresh] + cols = [] + for (u, v) in pix: + x, y = int(round(u)), int(round(v)) + if 0 <= x < w and 0 <= y < h: + b, g, r = img_cur[y, x] + cols.append((r, g, b)) + else: + cols.append((255, 255, 255)) + cols = np.float32(cols) / 255.0 + + # ------------------------------------------------------------------ + # 5) transform to world frame and commit to the map + # ------------------------------------------------------------------ + pts3d_w = pose_prev @ np.hstack([pts3d_cam, np.ones((len(pts3d_cam), 1))]).T + pts3d_w = pts3d_w.T[:, :3] + + new_ids = world_map.add_points(pts3d_w, cols, + keyframe_idx=len(world_map.poses) - 1) + print(f"[TRIANGULATION] Added {len(new_ids)} new landmarks.") + return new_ids, fresh + +# def triangulate_new_points(K, pose_prev, pose_cur, +# kp_prev, kp_cur, matches, +# used_cur_idx, args, world_map, img_cur): +# """Triangulate only *unmatched* keypoints and add them if baseline is good.""" +# fresh = [m for m in matches if m.trainIdx not in used_cur_idx] +# if not fresh: +# return [] + +# p0 = np.float32([kp_prev[m.queryIdx].pt for m in fresh]) +# p1 = np.float32([kp_cur[m.trainIdx].pt for m in fresh]) + +# # -- baseline / parallax test (ORB-SLAM uses θ ≈ 1°) +# rel = np.linalg.inv(pose_prev) @ pose_cur +# R_rel, t_rel = rel[:3, :3], rel[:3, 3] +# cos_thresh = np.cos(np.deg2rad(1.0)) + +# rays0 = cv2.undistortPoints(p0.reshape(-1, 1, 2), K, None).reshape(-1, 2) +# rays1 = cv2.undistortPoints(p1.reshape(-1, 1, 2), K, None).reshape(-1, 2) +# rays0 = np.hstack([rays0, np.ones((len(rays0), 1))]) +# rays1 = np.hstack([rays1, np.ones((len(rays1), 1))]) + +# ang_ok = np.einsum('ij,ij->i', rays0, (R_rel @ rays1.T).T) < cos_thresh +# if not ang_ok.any(): +# return [] + +# p0, p1 = p0[ang_ok], p1[ang_ok] +# fresh = [m for m, keep in zip(fresh, ang_ok) if keep] + +# pts3d = triangulate_points(K, R_rel, t_rel, p0, p1) +# z = pts3d[:, 2] +# depth_ok = (z > args.min_depth) & (z < args.max_depth) +# pts3d = pts3d[depth_ok] +# fresh = [m for m, keep in zip(fresh, depth_ok) if keep] + +# if not len(pts3d): +# return [] + +# # colour sampling +# h, w, _ = img_cur.shape +# pix = [kp_cur[m.trainIdx].pt for m in fresh] +# cols = [] +# for (u, v) in pix: +# x, y = int(round(u)), int(round(v)) +# if 0 <= x < w and 0 <= y < h: +# b, g, r = img_cur[y, x] +# cols.append((r, g, b)) +# else: +# cols.append((255, 255, 255)) +# cols = np.float32(cols) / 255.0 + +# pts3d_w = pose_prev @ np.hstack([pts3d, np.ones((len(pts3d), 1))]).T +# pts3d_w = pts3d_w.T[:, :3] +# ids = world_map.add_points(pts3d_w, cols, keyframe_idx=len(world_map.poses)-1) +# return ids + + +# --------------------------------------------------------------------------- # +# Main processing loop +# --------------------------------------------------------------------------- # +def main(): + PAUSED = False + args = _build_parser().parse_args() + + # --- Data loading --- + seq = load_sequence(args) + calib = load_calibration(args) # dict with K_l, P_l, ... + groundtruth = load_groundtruth(args) # None or Nx3x4 array + K = calib["K_l"] # intrinsic matrix for left camera + P = calib["P_l"] # projection matrix for left camera + + # ------ build 4×4 GT poses + alignment matrix (once) ---------------- + gt_T = None + R_align = t_align = None + if groundtruth is not None: + gt_T = np.pad(groundtruth, ((0, 0), (0, 1), (0, 0)), constant_values=0.0) + gt_T[:, 3, 3] = 1.0 # homogeneous 1s + R_align, t_align = compute_gt_alignment(gt_T) + + # --- feature pipeline (OpenCV / LightGlue) --- + detector, matcher = init_feature_pipeline(args) + + # --- tracking state --- + prev_map, tracks = {}, {} + next_track_id = 0 + initialised = False + tracking_lost = False + + + world_map = Map() + # cur_pose = np.eye(4) # camera‑to‑world (identity at t=0) + # world_map.add_pose(cur_pose) #TODO CODE IS INITIALIZING SOMEWHERE ELSE IN BOOTSTRAP CHECK IT + viz3d = None if args.no_viz3d else Visualizer3D(color_axis="y") + plot2d = TrajectoryPlotter() + + kfs: list[Keyframe] = [] + last_kf_idx = -999 + + # TODO FOR BUNDLE ADJUSTMENT + frame_keypoints: List[List[cv2.KeyPoint]] = [] #CHANGE + frame_keypoints.append([]) # placeholder to keep indices aligned + + # --- visualisation --- + achieved_fps = 0.0 + last_time = cv2.getTickCount() / cv2.getTickFrequency() + + # cv2.namedWindow('Feature Tracking', cv2.WINDOW_NORMAL) + # cv2.resizeWindow('Feature Tracking', 1200, 600) + + total = len(seq) - 1 + + + + new_ids: list[int] = [] # CHANGE + + for i in tqdm(range(total), desc='Tracking'): + # --- load image pair --- + img1, img2 = load_frame_pair(args, seq, i) + + # --- feature extraction / matching --- + kp1, des1 = feature_extractor(args, img1, detector) + kp2, des2 = feature_extractor(args, img2, detector) + matches = feature_matcher(args, kp1, kp2, des1, des2, matcher) + # --- filter matches with RANSAC --- + matches = filter_matches_ransac(kp1, kp2, matches, args.ransac_thresh) + + if len(matches) < 12: + print(f"[WARN] Not enough matches at frame {i}. Skipping.") + continue + + # ---------------- Key-frame decision -------------------------- # TODO make every frame a keyframe + frame_keypoints.append(kp2) + if i == 0: + kfs.append(Keyframe(0, seq[0] if isinstance(seq[0], str) else "", + kp1, des1, make_thumb(img1, tuple(args.kf_thumb_hw)))) + last_kf_idx = 0 + prev_len = len(kfs) + is_kf = False + continue + else: + prev_len = len(kfs) + kfs, last_kf_idx = select_keyframe( + args, seq, i, img2, kp2, des2, matcher, kfs, last_kf_idx) + is_kf = len(kfs) > prev_len + + print("len(kfs) = ", len(kfs), "last_kf_idx = ", last_kf_idx) + # ------------------------------------------------ bootstrap ------------------------------------------------ + if not initialised: + if len(kfs) < 2: + continue + bootstrap_matches = feature_matcher(args, kfs[0].kps, kfs[-1].kps, kfs[0].desc, kfs[-1].desc, matcher) + bootstrap_matches = filter_matches_ransac(kfs[0].kps, kfs[-1].kps, bootstrap_matches, args.ransac_thresh) + ok, T0, T1 = try_bootstrap(K, kfs[0].kps, kfs[-1].kps, bootstrap_matches, args, world_map) + if ok: + frame_keypoints[0] = kfs[0].kps # BA (img1 is frame-0) + frame_keypoints[1] = kfs[1].kps # BA (img2 is frame-1) + # print("POSES: " ,world_map.poses) + two_view_ba(world_map, K, frame_keypoints, max_iters=25) # BA + + initialised = True + cur_pose = T1 # we are at frame i+1 + pose_prev = T0 + continue + else: + continue # keep trying with next frame + + # ------------------------------------------------ tracking ------------------------------------------------- + pose_pred = cur_pose.copy() # CV-predict could go here + ok_pnp, cur_pose, used_idx = solve_pnp_step( + K, pose_pred, world_map, kfs[-1].kps, args) # last keyframe + + if not ok_pnp: # fallback to 2-D-2-D if PnP failed + pts0 = np.float32([kp1[m.queryIdx].pt for m in matches]) + pts1 = np.float32([kp2[m.trainIdx].pt for m in matches]) + E, mask = cv2.findEssentialMat(pts0, pts1, K, cv2.RANSAC, + 0.999, args.ransac_thresh) + if E is None: + tracking_lost = True + continue + _, R, t, mpose = cv2.recoverPose(E, pts0, pts1, K) + cur_pose = cur_pose @ get_homo_from_pose_rt(R, t) + tracking_lost = False + + world_map.add_pose(cur_pose) # always push *some* pose + + # pose_only_ba(world_map, K, frame_keypoints, # FOR BA + # frame_idx=len(world_map.poses)-1) + + # ------------------------------------------------ map growth ------------------------------------------------ + if is_kf: + new_ids, fresh = triangulate_new_points( + K, pose_prev, cur_pose, + kp1, kp2, matches, + used_idx, args, world_map, img2 + ) + + print(last_kf_idx, "LAST --------------------CURRENT", len(kfs)) + # last_kf_idx = len(world_map.poses) - 1 # current KF’s index + + for pid, m in zip(new_ids, fresh): + world_map.points[pid].add_observation(last_kf_idx - 1, m.queryIdx) + world_map.points[pid].add_observation(last_kf_idx, m.trainIdx) + + if new_ids: + print(f"Added {len(new_ids)} new landmarks.") + + pose_prev = cur_pose.copy() + + # local_bundle_adjustment( + # world_map, K, frame_keypoints, + # center_kf_idx=last_kf_idx, + # window_size=5 ) + + + # if is_kf and len(world_map.points) > 100: # light heuristic + # run_bundle_adjustment(world_map, K, frame_keypoints, + # fix_first_pose=True, + # max_iters=20) + + + # --- 2-D path plot (cheap) ---------------------------------------------- + est_pos = cur_pose[:3, 3] + gt_pos = None + if gt_T is not None and i + 1 < len(gt_T): + p_gt = gt_T[i + 1, :3, 3] # raw GT + gt_pos = apply_alignment(p_gt, R_align, t_align) + plot2d.append(est_pos, gt_pos, mirror_x=True) + + + # # --- 2-D track maintenance (for GUI only) --- + # frame_no = i + 1 + # prev_map, tracks, next_track_id = update_and_prune_tracks( + # matches, prev_map, tracks, kp2, frame_no, next_track_id) + + + # --- 3-D visualisation --- + if viz3d is not None: + viz3d.update(world_map, new_ids) + + + key = cv2.waitKey(1) & 0xFF + if key == 27: + break + if key == ord('p') and viz3d is not None: + viz3d.paused = not viz3d.paused + + + cv2.destroyAllWindows() + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/modules/SimpleSLAM/slam/monocular/main4.py b/modules/SimpleSLAM/slam/monocular/main4.py new file mode 100644 index 0000000000..4611b3e3dd --- /dev/null +++ b/modules/SimpleSLAM/slam/monocular/main4.py @@ -0,0 +1,492 @@ +# main.py +""" +WITHOUT MULTI-VIEW TRIANGULATION - Use 2 different stratergy for motion estimation + +Entry-point: high-level processing loop +-------------------------------------- +$ python main.py --dataset kitti --base_dir ../Dataset + + +The core loop now performs: + 1) Feature detection + matching (OpenCV or LightGlue) + 2) Essential‑matrix estimation + pose recovery + 3) Landmarks triangulation (with Z‑filtering) + 4) Pose integration (camera trajectory in world frame) + 5) Optional 3‑D visualisation via Open3D + +The script shares most command‑line arguments with the previous version +but adds `--no_viz3d` to disable the 3‑D window. + +""" +import argparse +import cv2 +import lz4.frame +import numpy as np +from tqdm import tqdm +from typing import List + +from slam.core.dataloader import ( + load_sequence, + load_frame_pair, + load_calibration, + load_groundtruth) + +from slam.core.features_utils import ( + init_feature_pipeline, + feature_extractor, + feature_matcher, + filter_matches_ransac) + +from slam.core.keyframe_utils import ( + Keyframe, + update_and_prune_tracks, + select_keyframe, + make_thumb) + +from slam.core.visualization_utils import draw_tracks, Visualizer3D, TrajectoryPlotter +from slam.core.trajectory_utils import compute_gt_alignment, apply_alignment +from slam.core.landmark_utils import Map, triangulate_points +from slam.core.multi_view_utils import MultiViewTriangulator +from slam.core.pnp_utils import associate_landmarks, refine_pose_pnp +from slam.core.ba_utils_OG import run_bundle_adjustment +from slam.core.ba_utils import ( + two_view_ba, + pose_only_ba, + local_bundle_adjustment +) + +# --------------------------------------------------------------------------- # +# CLI +# --------------------------------------------------------------------------- # +def _build_parser() -> argparse.ArgumentParser: + p = argparse.ArgumentParser("Feature tracking with key-frames") + p.add_argument('--dataset', + choices=['kitti', 'malaga', 'tum-rgbd', 'custom'], + required=True) + p.add_argument('--base_dir', default='../Dataset') + # feature/detector settings + p.add_argument('--detector', choices=['orb', 'sift', 'akaze'], + default='orb') + p.add_argument('--matcher', choices=['bf'], default='bf') + p.add_argument('--use_lightglue', action='store_true') + p.add_argument('--min_conf', type=float, default=0.7, + help='Minimum LightGlue confidence for a match') + # runtime + p.add_argument('--fps', type=float, default=10) + # RANSAC + p.add_argument('--ransac_thresh', type=float, default=1.0) + # key-frame params + p.add_argument('--kf_max_disp', type=float, default=45) + p.add_argument('--kf_min_inliers', type=float, default=150) + p.add_argument('--kf_cooldown', type=int, default=5) + p.add_argument('--kf_thumb_hw', type=int, nargs=2, + default=[640, 360]) + + # 3‑D visualisation toggle + p.add_argument("--no_viz3d", action="store_true", help="Disable 3‑D visualization window") + # triangulation depth filtering + p.add_argument("--min_depth", type=float, default=0.60) + p.add_argument("--max_depth", type=float, default=50.0) + p.add_argument('--mvt_rep_err', type=float, default=30.0, + help='Max mean reprojection error (px) for multi-view triangulation') + + # PnP / map-maintenance + p.add_argument('--pnp_min_inliers', type=int, default=30) + p.add_argument('--proj_radius', type=float, default=3.0) + p.add_argument('--merge_radius', type=float, default=0.10) + + return p + + +# --------------------------------------------------------------------------- # +# Helper functions +# --------------------------------------------------------------------------- # + +def _pose_inv(R: np.ndarray, t: np.ndarray) -> np.ndarray: + """Return 4×4 inverse of a rigid‑body transform specified by R|t.""" + Rt = R.T + tinv = -Rt @ t + T = np.eye(4) + T[:3, :3] = Rt + T[:3, 3] = tinv.ravel() + return T + +def get_homo_from_pose_rt(R: np.ndarray, t: np.ndarray) -> np.ndarray: + """Homogeneous matrix **T_c1→c2** from the relative pose (R, t) + returned by `recoverPose`.""" + T = np.eye(4) + T[:3, :3] = R + T[:3, 3] = t.ravel() + return T + +def try_bootstrap(K, kp0, kp1, matches, args, world_map): + """Return (success, T_cam0_w, T_cam1_w) and add initial landmarks.""" + if len(matches) < 50: + return False, None, None + + # 1. pick a model: Essential (general) *or* Homography (planar) + pts0 = np.float32([kp0[m.queryIdx].pt for m in matches]) + pts1 = np.float32([kp1[m.trainIdx].pt for m in matches]) + + E, inl_E = cv2.findEssentialMat( + pts0, pts1, K, cv2.RANSAC, 0.999, args.ransac_thresh) + + H, inl_H = cv2.findHomography( + pts0, pts1, cv2.RANSAC, args.ransac_thresh) + + # score = #inliers (ORB-SLAM uses a more elaborate model-selection score) + use_E = (inl_E.sum() > inl_H.sum()) + + if use_E and E is None: + return False, None, None + if not use_E and H is None: + return False, None, None + + if use_E: + _, R, t, inl_pose = cv2.recoverPose(E, pts0, pts1, K) + mask = (inl_E.ravel() & inl_pose.ravel()).astype(bool) + else: + print("[BOOTSTRAP] Using Homography for initialisation") + R, t = cv2.decomposeHomographyMat(H, K)[1:3] # take the best hypothesis + mask = inl_H.ravel().astype(bool) + + # 2. triangulate those inliers – exactly once + p0 = pts0[mask] + p1 = pts1[mask] + pts3d = triangulate_points(K, R, t, p0, p1) + + z0 = pts3d[:, 2] + pts3d_cam1 = (R @ pts3d.T + t.reshape(3, 1)).T + z1 = pts3d_cam1[:, 2] + + ok = ( # both cameras see the point in front of them + (z0 > args.min_depth) & (z0 < args.max_depth) & # in front of cam‑0 + (z1 > args.min_depth) & (z1 < args.max_depth) # in front of cam‑1 + ) + pts3d = pts3d[ok] + + if len(pts3d) < 80: + print("[BOOTSTRAP] Not enough points to bootstrap the map.") + return False, None, None + + # 3. fill the map # TODO: _pose_inv(R, t) can be used here + T1_w = np.eye(4) + T1_w[:3, :3] = R.T + T1_w[:3, 3] = (-R.T @ t).ravel() + + world_map.add_pose(T1_w) + + cols = np.full((len(pts3d), 3), 0.7) # grey – colour is optional here + ids = world_map.add_points(pts3d, cols, keyframe_idx=1) + + # ----------------------------------------------- + # add (frame_idx , kp_idx) pairs for each new MP + # ----------------------------------------------- + inlier_kp_idx = np.where(mask)[0][ok] # kp indices that survived depth + for pid, kp_idx in zip(ids, inlier_kp_idx): + world_map.points[pid].add_observation(0, kp_idx) # img0 side + world_map.points[pid].add_observation(1, kp_idx) # img1 side + + + print(f"[BOOTSTRAP] Map initialised with {len(ids)} landmarks.") + return True, T1_w + + +def solve_pnp_step(K, pose_pred, world_map, kp, args): + """Return (success, refined_pose, used_kp_indices).""" + pts_w = world_map.get_point_array() + pts3d, pts2d, used = associate_landmarks( + K, pose_pred, pts_w, kp, args.proj_radius) + + if len(pts3d) < args.pnp_min_inliers: + return False, pose_pred, used + + R, t = refine_pose_pnp(K, pts3d, pts2d) + if R is None: + return False, pose_pred, used + + R_wc = R.T + t_wc = -R.T @ t + pose_w_c = np.eye(4, dtype=np.float32) + pose_w_c[:3,:3] = R_wc + pose_w_c[:3, 3] = t_wc + + print(f"[PNP] Pose refined with {len(pts3d)} inliers.") + return True, pose_w_c, used + + +def triangulate_new_points(K, pose_prev, pose_cur, + kp_prev, kp_cur, matches, + used_cur_idx, args, world_map, img_cur): + """Triangulate only *unmatched* keypoints and add them if baseline is good.""" + fresh = [m for m in matches if m.trainIdx not in used_cur_idx] + if not fresh: + print("[TRIANGULATION] No new points to triangulate.") + return [] + + p0 = np.float32([kp_prev[m.queryIdx].pt for m in fresh]) + p1 = np.float32([kp_cur[m.trainIdx].pt for m in fresh]) + + # -- baseline / parallax test (ORB-SLAM uses θ ≈ 1°) + # rel = np.linalg.inv(pose_prev) @ pose_cur + rel = np.linalg.inv(pose_cur) @ pose_prev # c₂ → c₁ + R_rel, t_rel = rel[:3, :3], rel[:3, 3] + cos_thresh = np.cos(np.deg2rad(1.0)) + + rays0 = cv2.undistortPoints(p0.reshape(-1, 1, 2), K, None).reshape(-1, 2) + rays1 = cv2.undistortPoints(p1.reshape(-1, 1, 2), K, None).reshape(-1, 2) + rays0 = np.hstack([rays0, np.ones((len(rays0), 1))]) + rays1 = np.hstack([rays1, np.ones((len(rays1), 1))]) + + ang_ok = np.einsum('ij,ij->i', rays0, (R_rel @ rays1.T).T) < cos_thresh + if not ang_ok.any(): + print("[TRIANGULATION] No points passed the parallax test.") + return [] + + p0, p1 = p0[ang_ok], p1[ang_ok] + fresh = [m for m, keep in zip(fresh, ang_ok) if keep] + + pts3d = triangulate_points(K, R_rel, t_rel, p0, p1) + z = pts3d[:, 2] + depth_ok = (z > args.min_depth) & (z < args.max_depth) + pts3d = pts3d[depth_ok] + fresh = [m for m, keep in zip(fresh, depth_ok) if keep] + + if not len(pts3d): + print("[TRIANGULATION] No points passed the depth test.") + return [] + + # colour sampling + h, w, _ = img_cur.shape + pix = [kp_cur[m.trainIdx].pt for m in fresh] + cols = [] + for (u, v) in pix: + x, y = int(round(u)), int(round(v)) + if 0 <= x < w and 0 <= y < h: + b, g, r = img_cur[y, x] + cols.append((r, g, b)) + else: + cols.append((255, 255, 255)) + cols = np.float32(cols) / 255.0 + + pts3d_w = pose_prev @ np.hstack([pts3d, np.ones((len(pts3d), 1))]).T + pts3d_w = pts3d_w.T[:, :3] + ids = world_map.add_points(pts3d_w, cols, keyframe_idx=len(world_map.poses)-1) + print(f"[TRIANGULATION] Added {len(ids)} new landmarks.") + return ids + + +# --------------------------------------------------------------------------- # +# Main processing loop +# --------------------------------------------------------------------------- # +def main(): + PAUSED = False + args = _build_parser().parse_args() + + # --- Data loading --- + seq = load_sequence(args) + calib = load_calibration(args) # dict with K_l, P_l, ... + groundtruth = load_groundtruth(args) # None or Nx3x4 array + K = calib["K_l"] # intrinsic matrix for left camera + P = calib["P_l"] # projection matrix for left camera + + # ------ build 4×4 GT poses + alignment matrix (once) ---------------- + gt_T = None + R_align = t_align = None + if groundtruth is not None: + gt_T = np.pad(groundtruth, ((0, 0), (0, 1), (0, 0)), constant_values=0.0) + gt_T[:, 3, 3] = 1.0 # homogeneous 1s + R_align, t_align = compute_gt_alignment(gt_T) + + # --- feature pipeline (OpenCV / LightGlue) --- + detector, matcher = init_feature_pipeline(args) + + mvt = MultiViewTriangulator( + K, + min_views=2, # ← “every 3 key-frames” + merge_radius=args.merge_radius, + max_rep_err=args.mvt_rep_err, + min_depth=args.min_depth, + max_depth=args.max_depth) + + # --- tracking state --- + prev_map, tracks = {}, {} + next_track_id = 0 + initialised = False + tracking_lost = False + + + world_map = Map() + cur_pose = np.eye(4) # camera‑to‑world (identity at t=0) + world_map.add_pose(cur_pose) + viz3d = None if args.no_viz3d else Visualizer3D(color_axis="y") + plot2d = TrajectoryPlotter() + + kfs: list[Keyframe] = [] + last_kf_idx = -999 + + # TODO FOR BUNDLE ADJUSTMENT + frame_keypoints: List[List[cv2.KeyPoint]] = [] #CHANGE + frame_keypoints.append([]) # placeholder to keep indices aligned + + # --- visualisation --- + achieved_fps = 0.0 + last_time = cv2.getTickCount() / cv2.getTickFrequency() + + # cv2.namedWindow('Feature Tracking', cv2.WINDOW_NORMAL) + # cv2.resizeWindow('Feature Tracking', 1200, 600) + + total = len(seq) - 1 + + + + new_ids: list[int] = [] # CHANGE + + for i in tqdm(range(total), desc='Tracking'): + # --- load image pair --- + img1, img2 = load_frame_pair(args, seq, i) + + # --- feature extraction / matching --- + kp1, des1 = feature_extractor(args, img1, detector) + kp2, des2 = feature_extractor(args, img2, detector) + matches = feature_matcher(args, kp1, kp2, des1, des2, matcher) + # --- filter matches with RANSAC --- + matches = filter_matches_ransac(kp1, kp2, matches, args.ransac_thresh) + + if len(matches) < 12: + print(f"[WARN] Not enough matches at frame {i}. Skipping.") + continue + + frame_no = i + 1 + curr_map, tracks, next_track_id = update_and_prune_tracks( + matches, prev_map, tracks, kp2, frame_no, next_track_id) + prev_map = curr_map # for the next iteration + + # ---------------- Key-frame decision -------------------------- # TODO make every frame a keyframe + frame_keypoints.append(kp2) + if i == 0: + kfs.append(Keyframe(0, seq[0] if isinstance(seq[0], str) else "", + kp1, des1, cur_pose, make_thumb(img1, tuple(args.kf_thumb_hw)))) + last_kf_idx = 0 + prev_len = len(kfs) + is_kf = False + continue + else: + prev_len = len(kfs) + kfs, last_kf_idx = select_keyframe( + args, seq, i, img2, kp2, des2, cur_pose, matcher, kfs, last_kf_idx) + is_kf = len(kfs) > prev_len + + print("len(kfs) = ", len(kfs), "last_kf_idx = ", last_kf_idx) + # ------------------------------------------------ bootstrap ------------------------------------------------ # TODO FIND A BETTER WAY TO manage index + if not initialised: + if len(kfs) < 2: + continue + bootstrap_matches = feature_matcher(args, kfs[0].kps, kfs[-1].kps, kfs[0].desc, kfs[-1].desc, matcher) + bootstrap_matches = filter_matches_ransac(kfs[0].kps, kfs[-1].kps, bootstrap_matches, args.ransac_thresh) + ok, temp_pose = try_bootstrap(K, kfs[0].kps, kfs[-1].kps, bootstrap_matches, args, world_map) + if ok: + frame_keypoints[0] = kfs[0].kps # BA (img1 is frame-0) + frame_keypoints[-1] = kfs[-1].kps # BA (img2 is frame-1) + # print("POSES: " ,world_map.poses) + # two_view_ba(world_map, K, frame_keypoints, max_iters=25) # BA + + initialised = True + cur_pose = world_map.poses[-1].copy() # we are at frame i+1 + continue + else: + print("******************BOOTSTRAP FAILED**************") + continue # keep trying with next frame + + # ------------------------------------------------ tracking ------------------------------------------------- + pose_pred = cur_pose.copy() # CV-predict could go here + ok_pnp, cur_pose, used_idx = solve_pnp_step( + K, pose_pred, world_map, kp2, args) # last keyframe + + if not ok_pnp: # fallback to 2-D-2-D if PnP failed + print(f"[WARN] PnP failed at frame {i}. Using 2D-2D tracking.") + # raise Exception(f"[WARN] PnP failed at frame {i}. Using 2D-2D tracking.") + if not is_kf: + last_kf = kfs[-1] + else: + last_kf = kfs[-2] if len(kfs) > 1 else kfs[0] + + tracking_matches = feature_matcher(args, last_kf.kps, kp2, last_kf.desc, des2, matcher) + tracking_matches = filter_matches_ransac(last_kf.kps, kp2, tracking_matches, args.ransac_thresh) + + pts0 = np.float32([last_kf.kps[m.queryIdx].pt for m in tracking_matches]) + pts1 = np.float32([kp2[m.trainIdx].pt for m in tracking_matches]) + E, mask = cv2.findEssentialMat(pts0, pts1, K, cv2.RANSAC, + 0.999, args.ransac_thresh) + if E is None: + tracking_lost = True + continue + _, R, t, mpose = cv2.recoverPose(E, pts0, pts1, K) + T_rel = get_homo_from_pose_rt(R, t) # c₁ → c₂ + cur_pose = last_kf.pose @ np.linalg.inv(T_rel) # c₂ → world + tracking_lost = False + + world_map.add_pose(cur_pose) # always push *some* pose + + # pose_only_ba(world_map, K, frame_keypoints, # FOR BA + # frame_idx=len(world_map.poses)-1) + + # ------------------------------------------------ map growth ------------------------------------------------ + if is_kf: + # 1) hand the new KF to the multi-view triangulator + mvt.add_keyframe( + frame_idx=frame_no, # global frame number of this KF + pose_w_c=cur_pose, + kps=kp2, + track_map=curr_map, + img_bgr=img2) + + # 2) try triangulating all tracks that now have ≥3 distinct KFs + new_mvt_ids = mvt.triangulate_ready_tracks(world_map) + + # 3) visualisation hook + new_ids = new_mvt_ids # keeps 3-D viewer in sync + + + # pose_prev = cur_pose.copy() + + # local_bundle_adjustment( + # world_map, K, frame_keypoints, + # center_kf_idx=last_kf_idx, + # window_size=5 ) + + p = cur_pose[:3, 3] + print(f"Cam position z = {p[2]:.2f} (should decrease on KITTI)") + + # --- 2-D path plot (cheap) ---------------------------------------------- + est_pos = cur_pose[:3, 3] + gt_pos = None + if gt_T is not None and i + 1 < len(gt_T): + p_gt = gt_T[i + 1, :3, 3] # raw GT + gt_pos = apply_alignment(p_gt, R_align, t_align) + plot2d.append(est_pos, gt_pos, mirror_x=True) + + + # # --- 2-D track maintenance (for GUI only) --- + # frame_no = i + 1 + # prev_map, tracks, next_track_id = update_and_prune_tracks( + # matches, prev_map, tracks, kp2, frame_no, next_track_id) + + + # --- 3-D visualisation --- + if viz3d is not None: + viz3d.update(world_map, new_ids) + + + key = cv2.waitKey(1) & 0xFF + if key == 27: + break + if key == ord('p') and viz3d is not None: + viz3d.paused = not viz3d.paused + + + cv2.destroyAllWindows() + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/modules/SimpleSLAM/slam/stereo/ROUGHstereo_tracker.py b/modules/SimpleSLAM/slam/stereo/ROUGHstereo_tracker.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/modules/SimpleSLAM/slam/stereo/__init__.py b/modules/SimpleSLAM/slam/stereo/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/modules/SimpleSLAM/slam/test_BA.ipynb b/modules/SimpleSLAM/slam/test_BA.ipynb new file mode 100644 index 0000000000..4d16fb7d8f --- /dev/null +++ b/modules/SimpleSLAM/slam/test_BA.ipynb @@ -0,0 +1,2386 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 15, + "id": "67337d44", + "metadata": {}, + "outputs": [], + "source": [ + "import pyceres\n", + "import pycolmap\n", + "import numpy as np\n", + "from hloc.utils import viz_3d\n", + "\n", + "from pycolmap import cost_functions" + ] + }, + { + "cell_type": "markdown", + "id": "cc59c6d8", + "metadata": {}, + "source": [ + "## Synthetic reconstruction" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "a155e6d8", + "metadata": {}, + "outputs": [], + "source": [ + "def create_reconstruction(num_points=50, num_images=2, seed=3, noise=0):\n", + " state = np.random.RandomState(seed)\n", + " rec = pycolmap.Reconstruction()\n", + " p3d = state.uniform(-1, 1, (num_points, 3)) + np.array([0, 0, 3])\n", + " for p in p3d:\n", + " rec.add_point3D(p, pycolmap.Track(), np.zeros(3))\n", + " w, h = 640, 480\n", + " cam = pycolmap.Camera(\n", + " model=\"SIMPLE_PINHOLE\",\n", + " width=w,\n", + " height=h,\n", + " params=np.array([max(w, h) * 1.2, w / 2, h / 2]),\n", + " camera_id=0,\n", + " )\n", + " rec.add_camera(cam)\n", + " for i in range(num_images):\n", + " im = pycolmap.Image(\n", + " id=i,\n", + " name=str(i),\n", + " camera_id=cam.camera_id,\n", + " cam_from_world=pycolmap.Rigid3d(\n", + " pycolmap.Rotation3d(), state.uniform(-1, 1, 3)\n", + " ),\n", + " )\n", + " im.registered = True\n", + " p2d = cam.img_from_cam(\n", + " im.cam_from_world * [p.xyz for p in rec.points3D.values()]\n", + " )\n", + " p2d_obs = np.array(p2d) + state.randn(len(p2d), 2) * noise\n", + " im.points2D = pycolmap.ListPoint2D(\n", + " [pycolmap.Point2D(p, id_) for p, id_ in zip(p2d_obs, rec.points3D)]\n", + " )\n", + " rec.add_image(im)\n", + " return rec\n", + "\n", + "\n", + "rec_gt = create_reconstruction()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "0e69461f-3619-41a5-b202-c6dc8e2fc778", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "marker": { + "color": "rgb(255,0,0)", + "line": { + "width": 0 + }, + "size": 1 + }, + "mode": "markers", + "type": "scatter3d", + "x": { + "bdata": "uF17vsMI0r/ERezUj/TYP1hs7RKxxNq/9FWP2lFk1L9IL3o9A63ev3DYwbSa3rQ/IKYWlbaGpb8qcMmFoJnivxJyH/O/Seu/YKrKn5/ltr8gI2MiT2PrP+BXCt7+UbY/8jVgKYBu478o1ogE9BvRPzwjXUwLVN8/kAcZjgn43D9gTjQp9jjev4gemOz3otM/KiIxQ8gg5j+wANsoccvhv1CWfXA83LY/nMtiSb642D9M7o/Z0NDev/DDOR3JQsc/4ABz8yYX0z9gAogTLE6+v8zJrAdp9ue/AGO+kcgslj+gwYbELgK6PwZPef20mOG/RPs+RrCJ3b90Lk3V/BLnP8yVA6o7a9y/RGZ9d5wc478AvOxG9M6gv4g7UVdhld8/HNl/At8t0L8gpG96nqLCP5qH1L8bPuG/0NQWqAmq3D9qGZjgxbbrv/rcE9DoR+a/sBdwBzhHuz+c7EUFDj/XPw==", + "dtype": "f8" + }, + "y": { + "bdata": "7I9+disE3T9e5nys0dnlP+REkM3J294/WA5t8mTc678M2YcXuJLYP9x7/Af8mOU/VLCoDf7T3L8YIgcE2IfdPzwG91D4HOk/sA5sa4UM4r90hCYqVmjiP3LR+svdQOq/FmYhZha147/4s+Ph2yjRv+AMWC6PhdQ/ANOFGfo7pL8y6Y4+eU/rP6APq+1Nnb0/MNhoUsg7z7/YrwJp6q3Mv4Riilat8OE/gLVnrtl8vr84sU+O7bvFv2yW37EUd+6/pGdWWoda3L8uSNYWghbuvyRTBF6IvOK/VBp/+wom6T9obnB9lqTaP5jJ/Y1P7se/VA+/zIim2r940t+bqxTGP+DrL3vGO7e/AHlO12m+nD+A2jO6NaTRvzwN75y7ddg/ECPU93CF1T84gqd380vWvxQZuMdVgtg/IP59Cl+Cqb/cCshVJFvrv5hzdfLjl+m/3N5Ph9f647+gSCXXhwC1Pw==", + "dtype": "f8" + }, + "z": { + "bdata": "g7ET4UE7AkDy99Y3aqILQK7DKUxOhwJATC9/JW/TDUB2NGqNQVQOQByL+xd1uAJAuqr9g4FvCkAwpMu1/2oKQBLtE9YpPgpAnlGvst3VAkCCF8Yp8sIHQPM/WMBs0gdAlN9nwFecC0BkPBeNc9gIQD9Lkoeknw5AZimzMEF5DUBvfySzVW0NQBDWCyL4yAVA79LTirh5AUD8jQpsbfsOQDjig3Ld5gRAWcLovoeCAkBM25yYUYkEQAaC8vsQ8QhAxjr8qPDRCkAzBuhaME8HQPvlL0bP0gBAVpkJbDdXDkDIHj66i6cEQH4Of33HgAdAtKhO+a5SB0A+WZ+RKokEQItqhVtcSQNA5KBin0plAUCOPQg1r1ILQAzwoQriBgtAjooUJwVwBUDIAklqUB8HQFBTDWxaaApAd/dpUO+LCUDQJFp8AS8DQH8YgXGWEQJAwMxUg847D0BsXg2bnFALQA==", + "dtype": "f8" + } + }, + { + "hovertemplate": "Image(image_id=1, camera_id=0, name=\"1\", triangulated=50/50)", + "legendgroup": "1", + "line": { + "color": "rgb(255,0,0)", + "width": 1 + }, + "mode": "lines", + "name": "1", + "showlegend": false, + "type": "scatter3d", + "x": { + "bdata": "WCalMT+l6b9YJqUxPyXsv1gmpTE/Jee/WCalMT+l6b9YJqUxPyXnv1gmpTE/Jee/WCalMT+l6b9YJqUxPyXnv1gmpTE/Jey/WCalMT+l6b9YJqUxPyXsv1gmpTE/Jey/", + "dtype": "f8" + }, + "y": { + "bdata": "DGO109Sa1b8MY7XT1FrZvwxjtdPUWtm/DGO109Sa1b8MY7XT1FrZvwxjtdPU2tG/DGO109Sa1b8MY7XT1NrRvwxjtdPU2tG/DGO109Sa1b8MY7XT1NrRvwxjtdPUWtm/", + "dtype": "f8" + }, + "z": { + "bdata": "eNgrpzp84r/wsFdOdfjYv/CwV051+Ni/eNgrpzp84r/wsFdOdfjYv/CwV051+Ni/eNgrpzp84r/wsFdOdfjYv/CwV051+Ni/eNgrpzp84r/wsFdOdfjYv/CwV051+Ni/", + "dtype": "f8" + } + }, + { + "hovertemplate": "Image(image_id=0, camera_id=0, name=\"0\", triangulated=50/50)", + "legendgroup": "0", + "line": { + "color": "rgb(255,0,0)", + "width": 1 + }, + "mode": "lines", + "name": "0", + "showlegend": false, + "type": "scatter3d", + "x": { + "bdata": "FFmbdcbU1z8UWZt1xtTSPxRZm3XG1Nw/FFmbdcbU1z8UWZt1xtTcPxRZm3XG1Nw/FFmbdcbU1z8UWZt1xtTcPxRZm3XG1NI/FFmbdcbU1z8UWZt1xtTSPxRZm3XG1NI/", + "dtype": "f8" + }, + "y": { + "bdata": "WMsUdjmWxD+wlinsciy6P7CWKexyLLo/WMsUdjmWxD+wlinsciy6P1jLFHY5Fsw/WMsUdjmWxD9YyxR2ORbMP1jLFHY5Fsw/WMsUdjmWxD9YyxR2ORbMP7CWKexyLLo/", + "dtype": "f8" + }, + "z": { + "bdata": "rhe79j0k6L+uF7v2PSTiv64Xu/Y9JOK/rhe79j0k6L+uF7v2PSTiv64Xu/Y9JOK/rhe79j0k6L+uF7v2PSTiv64Xu/Y9JOK/rhe79j0k6L+uF7v2PSTiv64Xu/Y9JOK/", + "dtype": "f8" + } + } + ], + "layout": { + "height": 800, + "legend": { + "orientation": "h", + "x": 0.1, + "xanchor": "left", + "y": 0.99, + "yanchor": "top" + }, + "margin": { + "b": 0, + "l": 0, + "pad": 0, + "r": 0, + "t": 0 + }, + "scene": { + "aspectmode": "data", + "camera": { + "eye": { + "x": 0, + "y": -0.1, + "z": -2 + }, + "projection": { + "type": "orthographic" + }, + "up": { + "x": 0, + "y": -1, + "z": 0 + } + }, + "dragmode": "orbit", + "xaxis": { + "autorange": true, + "showbackground": false, + "showgrid": false, + "showline": false, + "showticklabels": true, + "visible": false + }, + "yaxis": { + "autorange": true, + "showbackground": false, + "showgrid": false, + "showline": false, + "showticklabels": true, + "visible": false + }, + "zaxis": { + "autorange": true, + "showbackground": false, + "showgrid": false, + "showline": false, + "showticklabels": true, + "visible": false + } + }, + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#f2f5fa" + }, + "error_y": { + "color": "#f2f5fa" + }, + "marker": { + "line": { + "color": "rgb(17,17,17)", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "rgb(17,17,17)", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#A2B1C6", + "gridcolor": "#506784", + "linecolor": "#506784", + "minorgridcolor": "#506784", + "startlinecolor": "#A2B1C6" + }, + "baxis": { + "endlinecolor": "#A2B1C6", + "gridcolor": "#506784", + "linecolor": "#506784", + "minorgridcolor": "#506784", + "startlinecolor": "#A2B1C6" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "histogram": [ + { + "marker": { + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "marker": { + "line": { + "color": "#283442" + } + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "line": { + "color": "#283442" + } + }, + "type": "scattergl" + } + ], + "scattermap": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermap" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#506784" + }, + "line": { + "color": "rgb(17,17,17)" + } + }, + "header": { + "fill": { + "color": "#2a3f5f" + }, + "line": { + "color": "rgb(17,17,17)" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#f2f5fa", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#f2f5fa" + }, + "geo": { + "bgcolor": "rgb(17,17,17)", + "lakecolor": "rgb(17,17,17)", + "landcolor": "rgb(17,17,17)", + "showlakes": true, + "showland": true, + "subunitcolor": "#506784" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "dark" + }, + "paper_bgcolor": "rgb(17,17,17)", + "plot_bgcolor": "rgb(17,17,17)", + "polar": { + "angularaxis": { + "gridcolor": "#506784", + "linecolor": "#506784", + "ticks": "" + }, + "bgcolor": "rgb(17,17,17)", + "radialaxis": { + "gridcolor": "#506784", + "linecolor": "#506784", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "rgb(17,17,17)", + "gridcolor": "#506784", + "gridwidth": 2, + "linecolor": "#506784", + "showbackground": true, + "ticks": "", + "zerolinecolor": "#C8D4E3" + }, + "yaxis": { + "backgroundcolor": "rgb(17,17,17)", + "gridcolor": "#506784", + "gridwidth": 2, + "linecolor": "#506784", + "showbackground": true, + "ticks": "", + "zerolinecolor": "#C8D4E3" + }, + "zaxis": { + "backgroundcolor": "rgb(17,17,17)", + "gridcolor": "#506784", + "gridwidth": 2, + "linecolor": "#506784", + "showbackground": true, + "ticks": "", + "zerolinecolor": "#C8D4E3" + } + }, + "shapedefaults": { + "line": { + "color": "#f2f5fa" + } + }, + "sliderdefaults": { + "bgcolor": "#C8D4E3", + "bordercolor": "rgb(17,17,17)", + "borderwidth": 1, + "tickwidth": 0 + }, + "ternary": { + "aaxis": { + "gridcolor": "#506784", + "linecolor": "#506784", + "ticks": "" + }, + "baxis": { + "gridcolor": "#506784", + "linecolor": "#506784", + "ticks": "" + }, + "bgcolor": "rgb(17,17,17)", + "caxis": { + "gridcolor": "#506784", + "linecolor": "#506784", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "updatemenudefaults": { + "bgcolor": "#506784", + "borderwidth": 0 + }, + "xaxis": { + "automargin": true, + "gridcolor": "#283442", + "linecolor": "#506784", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "#283442", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "#283442", + "linecolor": "#506784", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "#283442", + "zerolinewidth": 2 + } + } + } + } + } + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig = viz_3d.init_figure()\n", + "viz_3d.plot_reconstruction(\n", + " fig, rec_gt, min_track_length=0, color=\"rgb(255,0,0)\", points_rgb=False\n", + ")\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "id": "98a6db79", + "metadata": {}, + "source": [ + "## Optimize 3D points" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "e199af72", + "metadata": {}, + "outputs": [], + "source": [ + "def define_problem(rec): #ORIGINAL\n", + " prob = pyceres.Problem()\n", + " loss = pyceres.TrivialLoss()\n", + " for im in rec.images.values():\n", + " cam = rec.cameras[im.camera_id]\n", + " for p in im.points2D:\n", + " cost = pycolmap.cost_functions.ReprojErrorCost(\n", + " cam.model, im.cam_from_world, p.xy\n", + " )\n", + " prob.add_residual_block(\n", + " cost, loss, [rec.points3D[p.point3D_id].xyz, cam.params]\n", + " )\n", + " for cam in rec.cameras.values():\n", + " prob.set_parameter_block_constant(cam.params)\n", + " return prob\n", + "\n", + "def solve(prob):\n", + " print(\n", + " prob.num_parameter_blocks(),\n", + " prob.num_parameters(),\n", + " prob.num_residual_blocks(),\n", + " prob.num_residuals(),\n", + " )\n", + " options = pyceres.SolverOptions()\n", + " options.linear_solver_type = pyceres.LinearSolverType.DENSE_QR\n", + " options.minimizer_progress_to_stdout = True\n", + " options.num_threads = -1\n", + " summary = pyceres.SolverSummary()\n", + " pyceres.solve(options, prob, summary)\n", + " print(summary.BriefReport())" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "ac2305ec", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "51 153 100 200\n", + "iter cost cost_change |gradient| |step| tr_ratio tr_radius ls_iter iter_time total_time\n", + " 0 5.119392e-26 0.00e+00 3.16e-11 0.00e+00 0.00e+00 1.00e+04 0 2.97e-04 1.30e-03\n", + "Ceres Solver Report: Iterations: 1, Initial cost: 5.119392e-26, Final cost: 5.119392e-26, Termination: CONVERGENCE\n" + ] + } + ], + "source": [ + "rec = create_reconstruction()\n", + "problem = define_problem(rec)\n", + "solve(problem)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3234540", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "43df4cd1", + "metadata": {}, + "source": [ + "Add some noise" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "356a7ad2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0.15040931 0.63148501 2.68457285]\n", + "51 153 100 200\n", + "iter cost cost_change |gradient| |step| tr_ratio tr_radius ls_iter iter_time total_time\n", + " 0 1.074601e+05 0.00e+00 3.34e+04 0.00e+00 0.00e+00 1.00e+04 0 2.31e-04 9.33e-04\n", + " 1 1.048585e+02 1.07e+05 1.33e+03 1.77e+00 9.99e-01 3.00e+04 1 2.33e-03 3.29e-03\n", + " 2 8.359400e-05 1.05e+02 1.50e+00 4.77e-02 1.00e+00 9.00e+04 1 2.20e-03 5.51e-03\n", + " 3 2.083842e-14 8.36e-05 1.75e-05 4.03e-05 1.00e+00 2.70e+05 1 1.94e-03 7.46e-03\n", + "Ceres Solver Report: Iterations: 4, Initial cost: 1.074601e+05, Final cost: 2.083842e-14, Termination: CONVERGENCE\n" + ] + } + ], + "source": [ + "rec = create_reconstruction()\n", + "for p in rec.points3D.values():\n", + " p.xyz += np.random.RandomState(0).uniform(-0.5, 0.5, 3)\n", + "print(rec.points3D[1].xyz)\n", + "problem = define_problem(rec)\n", + "solve(problem)" + ] + }, + { + "cell_type": "markdown", + "id": "8e24927e", + "metadata": {}, + "source": [ + "## Optimize poses" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "2d9aca7e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[np.float64(1.1219738478901715), np.float64(0.29153152220166934)]\n", + "[np.float64(32.546967105288715), np.float64(16.036538808692125)]\n", + "55 167 100 200\n", + "iter cost cost_change |gradient| |step| tr_ratio tr_radius ls_iter iter_time total_time\n", + " 0 5.661147e+05 0.00e+00 1.00e+06 0.00e+00 0.00e+00 1.00e+04 0 2.16e-04 6.86e-04\n", + " 1 1.589775e+04 5.50e+05 1.22e+05 8.21e-01 9.73e-01 3.00e+04 1 4.77e-04 1.19e-03\n", + " 2 2.077984e+02 1.57e+04 2.08e+04 4.43e-01 9.87e-01 9.00e+04 1 2.81e-04 1.49e-03\n", + " 3 3.067690e-02 2.08e+02 2.50e+02 4.13e-02 1.00e+00 2.70e+05 1 2.62e-04 1.77e-03\n", + " 4 8.020149e-10 3.07e-02 2.25e-01 5.20e-04 1.00e+00 8.10e+05 1 2.70e-04 2.06e-03\n", + " 5 1.703731e-19 8.02e-10 2.99e-07 9.13e-08 1.00e+00 2.43e+06 1 2.79e-04 2.34e-03\n", + "Ceres Solver Report: Iterations: 6, Initial cost: 5.661147e+05, Final cost: 1.703731e-19, Termination: CONVERGENCE\n", + "[np.float64(2.2088752356698325e-12), np.float64(5.697402360258597e-15)]\n", + "[np.float64(3.43760086011565e-11), np.float64(8.604298389459717e-14)]\n" + ] + } + ], + "source": [ + "def define_problem2(rec):\n", + " prob = pyceres.Problem()\n", + " loss = pyceres.TrivialLoss()\n", + " for im in rec.images.values():\n", + " cam = rec.cameras[im.camera_id]\n", + " for p in im.points2D:\n", + " cost = pycolmap.cost_functions.ReprojErrorCost(cam.model, p.xy)\n", + " pose = im.cam_from_world\n", + " params = [\n", + " pose.rotation.quat,\n", + " pose.translation,\n", + " rec.points3D[p.point3D_id].xyz,\n", + " cam.params,\n", + " ]\n", + " prob.add_residual_block(cost, loss, params)\n", + " prob.set_manifold(\n", + " im.cam_from_world.rotation.quat, pyceres.EigenQuaternionManifold()\n", + " )\n", + " for cam in rec.cameras.values():\n", + " prob.set_parameter_block_constant(cam.params)\n", + " for p in rec.points3D.values():\n", + " prob.set_parameter_block_constant(p.xyz)\n", + " return prob\n", + "\n", + "\n", + "rec = create_reconstruction()\n", + "for im in rec.images.values():\n", + " im.cam_from_world.translation += np.random.randn(3) / 2\n", + " im.cam_from_world.rotation *= pycolmap.Rotation3d(np.random.randn(3) / 5)\n", + " im.cam_from_world.rotation.normalize()\n", + "rec_init = rec.__deepcopy__({})\n", + "init_from_gt = [\n", + " rec.images[i].cam_from_world * rec_gt.images[i].cam_from_world.inverse()\n", + " for i in rec.images\n", + "]\n", + "print([np.linalg.norm(t.translation) for t in init_from_gt])\n", + "print([np.rad2deg(t.rotation.angle()) for t in init_from_gt])\n", + "problem = define_problem2(rec)\n", + "solve(problem)\n", + "opt_from_gt = [\n", + " rec.images[i].cam_from_world * rec_gt.images[i].cam_from_world.inverse()\n", + " for i in rec.images\n", + "]\n", + "print([np.linalg.norm(t.translation) for t in opt_from_gt])\n", + "print([np.rad2deg(t.rotation.angle()) for t in opt_from_gt])" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "31196ca9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0.80142173 0.33757516 0.57766469] [0.80142173 0.33757516 0.57766469]\n", + "[-7.14886602e-14 2.90551251e-13 2.14872540e-14 1.00000000e+00] [0. 0. 0. 1.]\n", + "[-0.37236177 -0.16083449 0.75442408] [-0.37236177 -0.16083449 0.75442408]\n", + "[-7.23833806e-16 1.88096324e-16 -6.69714114e-17 1.00000000e+00] [0. 0. 0. 1.]\n" + ] + }, + { + "data": { + "text/plain": [ + "(array([0.10159581, 0.41629565, 2.58180948]),\n", + " array([0.10159581, 0.41629565, 2.58180948]))" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "assert np.allclose(rec.cameras[0].params, rec_gt.cameras[0].params)\n", + "for i in rec.images:\n", + " print(\n", + " rec.images[i].cam_from_world.translation,\n", + " rec_gt.images[i].cam_from_world.translation,\n", + " )\n", + " print(\n", + " rec.images[i].cam_from_world.rotation.quat,\n", + " rec_gt.images[i].cam_from_world.rotation.quat,\n", + " )\n", + "rec.points3D[1].xyz, rec_gt.points3D[1].xyz" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "aab4bcbd", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "legendgroup": "init", + "marker": { + "color": "rgb(255,255,255)", + "line": { + "width": 0 + }, + "size": 1 + }, + "mode": "markers", + "name": "init", + "type": "scatter3d", + "x": { + "bdata": "uF17vsMI0r/ERezUj/TYP1hs7RKxxNq/9FWP2lFk1L9IL3o9A63ev3DYwbSa3rQ/IKYWlbaGpb8qcMmFoJnivxJyH/O/Seu/YKrKn5/ltr8gI2MiT2PrP+BXCt7+UbY/8jVgKYBu478o1ogE9BvRPzwjXUwLVN8/kAcZjgn43D9gTjQp9jjev4gemOz3otM/KiIxQ8gg5j+wANsoccvhv1CWfXA83LY/nMtiSb642D9M7o/Z0NDev/DDOR3JQsc/4ABz8yYX0z9gAogTLE6+v8zJrAdp9ue/AGO+kcgslj+gwYbELgK6PwZPef20mOG/RPs+RrCJ3b90Lk3V/BLnP8yVA6o7a9y/RGZ9d5wc478AvOxG9M6gv4g7UVdhld8/HNl/At8t0L8gpG96nqLCP5qH1L8bPuG/0NQWqAmq3D9qGZjgxbbrv/rcE9DoR+a/sBdwBzhHuz+c7EUFDj/XPw==", + "dtype": "f8" + }, + "y": { + "bdata": "7I9+disE3T9e5nys0dnlP+REkM3J294/WA5t8mTc678M2YcXuJLYP9x7/Af8mOU/VLCoDf7T3L8YIgcE2IfdPzwG91D4HOk/sA5sa4UM4r90hCYqVmjiP3LR+svdQOq/FmYhZha147/4s+Ph2yjRv+AMWC6PhdQ/ANOFGfo7pL8y6Y4+eU/rP6APq+1Nnb0/MNhoUsg7z7/YrwJp6q3Mv4Riilat8OE/gLVnrtl8vr84sU+O7bvFv2yW37EUd+6/pGdWWoda3L8uSNYWghbuvyRTBF6IvOK/VBp/+wom6T9obnB9lqTaP5jJ/Y1P7se/VA+/zIim2r940t+bqxTGP+DrL3vGO7e/AHlO12m+nD+A2jO6NaTRvzwN75y7ddg/ECPU93CF1T84gqd380vWvxQZuMdVgtg/IP59Cl+Cqb/cCshVJFvrv5hzdfLjl+m/3N5Ph9f647+gSCXXhwC1Pw==", + "dtype": "f8" + }, + "z": { + "bdata": "g7ET4UE7AkDy99Y3aqILQK7DKUxOhwJATC9/JW/TDUB2NGqNQVQOQByL+xd1uAJAuqr9g4FvCkAwpMu1/2oKQBLtE9YpPgpAnlGvst3VAkCCF8Yp8sIHQPM/WMBs0gdAlN9nwFecC0BkPBeNc9gIQD9Lkoeknw5AZimzMEF5DUBvfySzVW0NQBDWCyL4yAVA79LTirh5AUD8jQpsbfsOQDjig3Ld5gRAWcLovoeCAkBM25yYUYkEQAaC8vsQ8QhAxjr8qPDRCkAzBuhaME8HQPvlL0bP0gBAVpkJbDdXDkDIHj66i6cEQH4Of33HgAdAtKhO+a5SB0A+WZ+RKokEQItqhVtcSQNA5KBin0plAUCOPQg1r1ILQAzwoQriBgtAjooUJwVwBUDIAklqUB8HQFBTDWxaaApAd/dpUO+LCUDQJFp8AS8DQH8YgXGWEQJAwMxUg847D0BsXg2bnFALQA==", + "dtype": "f8" + } + }, + { + "hovertemplate": "Image(image_id=1, camera_id=0, name=\"1\", triangulated=50/50)", + "legendgroup": "init", + "line": { + "color": "rgb(255,255,255)", + "width": 1 + }, + "mode": "lines", + "name": "1", + "showlegend": false, + "type": "scatter3d", + "x": { + "bdata": "OgemPoSMuD8QZWC5WdeZP6hAM/j4KMQ/OgemPoSMuD+oQDP4+CjEPyU4gESyS7k/OgemPoSMuD8lOIBEsku5P8xfHHvSIKG/OgemPoSMuD/MXxx70iChvxBlYLlZ15k/", + "dtype": "f8" + }, + "y": { + "bdata": "A7JUcJuR77/JasXGhh7xv03NBkU7qu+/A7JUcJuR779NzQZFO6rvv6/mR9Duc+y/A7JUcJuR77+v5kfQ7nPsv/TuyxjBBu+/A7JUcJuR77/07ssYwQbvv8lqxcaGHvG/", + "dtype": "f8" + }, + "z": { + "bdata": "pOLn3iBu2b9PRgwQJbPLvwTC1a9C+si/pOLn3iBu2b8EwtWvQvrIv7c7buQp2Mq/pOLn3iBu2b+3O27kKdjKvwLApEQMkc2/pOLn3iBu2b8CwKREDJHNv09GDBAls8u/", + "dtype": "f8" + } + }, + { + "hovertemplate": "Image(image_id=0, camera_id=0, name=\"0\", triangulated=50/50)", + "legendgroup": "init", + "line": { + "color": "rgb(255,255,255)", + "width": 1 + }, + "mode": "lines", + "name": "0", + "showlegend": false, + "type": "scatter3d", + "x": { + "bdata": "lkNqRf690D8CHFnYvvPMP54ey02NF9g/lkNqRf690D+eHstNjRfYP9gpNCORT9Y/lkNqRf690D/YKTQjkU/WP3YyK4PGY8k/lkNqRf690D92MiuDxmPJPwIcWdi+88w/", + "dtype": "f8" + }, + "y": { + "bdata": "2YP+kiI4yz83wJ0tA8XCP7RLuBStbcc/2YP+kiI4yz+0S7gUrW3HP1Ngofqx/9I/2YP+kiI4yz9TYKH6sf/SP5QaFAddq9A/2YP+kiI4yz+UGhQHXavQPzfAnS0DxcI/", + "dtype": "f8" + }, + "z": { + "bdata": "wVas3xpO8L/xfnm4ukjqv7wVKeyNAuu/wVas3xpO8L+8FSnsjQLrvw77zQCREOu/wVas3xpO8L8O+80AkRDrv0NkHs29Vuq/wVas3xpO8L9DZB7NvVbqv/F+ebi6SOq/", + "dtype": "f8" + } + }, + { + "legendgroup": "GT", + "marker": { + "color": "rgb(255,0,0)", + "line": { + "width": 0 + }, + "size": 1 + }, + "mode": "markers", + "name": "GT", + "type": "scatter3d", + "x": { + "bdata": "uF17vsMI0r/ERezUj/TYP1hs7RKxxNq/9FWP2lFk1L9IL3o9A63ev3DYwbSa3rQ/IKYWlbaGpb8qcMmFoJnivxJyH/O/Seu/YKrKn5/ltr8gI2MiT2PrP+BXCt7+UbY/8jVgKYBu478o1ogE9BvRPzwjXUwLVN8/kAcZjgn43D9gTjQp9jjev4gemOz3otM/KiIxQ8gg5j+wANsoccvhv1CWfXA83LY/nMtiSb642D9M7o/Z0NDev/DDOR3JQsc/4ABz8yYX0z9gAogTLE6+v8zJrAdp9ue/AGO+kcgslj+gwYbELgK6PwZPef20mOG/RPs+RrCJ3b90Lk3V/BLnP8yVA6o7a9y/RGZ9d5wc478AvOxG9M6gv4g7UVdhld8/HNl/At8t0L8gpG96nqLCP5qH1L8bPuG/0NQWqAmq3D9qGZjgxbbrv/rcE9DoR+a/sBdwBzhHuz+c7EUFDj/XPw==", + "dtype": "f8" + }, + "y": { + "bdata": "7I9+disE3T9e5nys0dnlP+REkM3J294/WA5t8mTc678M2YcXuJLYP9x7/Af8mOU/VLCoDf7T3L8YIgcE2IfdPzwG91D4HOk/sA5sa4UM4r90hCYqVmjiP3LR+svdQOq/FmYhZha147/4s+Ph2yjRv+AMWC6PhdQ/ANOFGfo7pL8y6Y4+eU/rP6APq+1Nnb0/MNhoUsg7z7/YrwJp6q3Mv4Riilat8OE/gLVnrtl8vr84sU+O7bvFv2yW37EUd+6/pGdWWoda3L8uSNYWghbuvyRTBF6IvOK/VBp/+wom6T9obnB9lqTaP5jJ/Y1P7se/VA+/zIim2r940t+bqxTGP+DrL3vGO7e/AHlO12m+nD+A2jO6NaTRvzwN75y7ddg/ECPU93CF1T84gqd380vWvxQZuMdVgtg/IP59Cl+Cqb/cCshVJFvrv5hzdfLjl+m/3N5Ph9f647+gSCXXhwC1Pw==", + "dtype": "f8" + }, + "z": { + "bdata": "g7ET4UE7AkDy99Y3aqILQK7DKUxOhwJATC9/JW/TDUB2NGqNQVQOQByL+xd1uAJAuqr9g4FvCkAwpMu1/2oKQBLtE9YpPgpAnlGvst3VAkCCF8Yp8sIHQPM/WMBs0gdAlN9nwFecC0BkPBeNc9gIQD9Lkoeknw5AZimzMEF5DUBvfySzVW0NQBDWCyL4yAVA79LTirh5AUD8jQpsbfsOQDjig3Ld5gRAWcLovoeCAkBM25yYUYkEQAaC8vsQ8QhAxjr8qPDRCkAzBuhaME8HQPvlL0bP0gBAVpkJbDdXDkDIHj66i6cEQH4Of33HgAdAtKhO+a5SB0A+WZ+RKokEQItqhVtcSQNA5KBin0plAUCOPQg1r1ILQAzwoQriBgtAjooUJwVwBUDIAklqUB8HQFBTDWxaaApAd/dpUO+LCUDQJFp8AS8DQH8YgXGWEQJAwMxUg847D0BsXg2bnFALQA==", + "dtype": "f8" + } + }, + { + "hovertemplate": "Image(image_id=1, camera_id=0, name=\"1\", triangulated=50/50)", + "legendgroup": "GT", + "line": { + "color": "rgb(255,0,0)", + "width": 1 + }, + "mode": "lines", + "name": "1", + "showlegend": false, + "type": "scatter3d", + "x": { + "bdata": "WCalMT+l6b9YJqUxPyXsv1gmpTE/Jee/WCalMT+l6b9YJqUxPyXnv1gmpTE/Jee/WCalMT+l6b9YJqUxPyXnv1gmpTE/Jey/WCalMT+l6b9YJqUxPyXsv1gmpTE/Jey/", + "dtype": "f8" + }, + "y": { + "bdata": "DGO109Sa1b8MY7XT1FrZvwxjtdPUWtm/DGO109Sa1b8MY7XT1FrZvwxjtdPU2tG/DGO109Sa1b8MY7XT1NrRvwxjtdPU2tG/DGO109Sa1b8MY7XT1NrRvwxjtdPUWtm/", + "dtype": "f8" + }, + "z": { + "bdata": "eNgrpzp84r/wsFdOdfjYv/CwV051+Ni/eNgrpzp84r/wsFdOdfjYv/CwV051+Ni/eNgrpzp84r/wsFdOdfjYv/CwV051+Ni/eNgrpzp84r/wsFdOdfjYv/CwV051+Ni/", + "dtype": "f8" + } + }, + { + "hovertemplate": "Image(image_id=0, camera_id=0, name=\"0\", triangulated=50/50)", + "legendgroup": "GT", + "line": { + "color": "rgb(255,0,0)", + "width": 1 + }, + "mode": "lines", + "name": "0", + "showlegend": false, + "type": "scatter3d", + "x": { + "bdata": "FFmbdcbU1z8UWZt1xtTSPxRZm3XG1Nw/FFmbdcbU1z8UWZt1xtTcPxRZm3XG1Nw/FFmbdcbU1z8UWZt1xtTcPxRZm3XG1NI/FFmbdcbU1z8UWZt1xtTSPxRZm3XG1NI/", + "dtype": "f8" + }, + "y": { + "bdata": "WMsUdjmWxD+wlinsciy6P7CWKexyLLo/WMsUdjmWxD+wlinsciy6P1jLFHY5Fsw/WMsUdjmWxD9YyxR2ORbMP1jLFHY5Fsw/WMsUdjmWxD9YyxR2ORbMP7CWKexyLLo/", + "dtype": "f8" + }, + "z": { + "bdata": "rhe79j0k6L+uF7v2PSTiv64Xu/Y9JOK/rhe79j0k6L+uF7v2PSTiv64Xu/Y9JOK/rhe79j0k6L+uF7v2PSTiv64Xu/Y9JOK/rhe79j0k6L+uF7v2PSTiv64Xu/Y9JOK/", + "dtype": "f8" + } + }, + { + "legendgroup": "optimized", + "marker": { + "color": "rgb(0,255,0)", + "line": { + "width": 0 + }, + "size": 1 + }, + "mode": "markers", + "name": "optimized", + "type": "scatter3d", + "x": { + "bdata": "uF17vsMI0r/ERezUj/TYP1hs7RKxxNq/9FWP2lFk1L9IL3o9A63ev3DYwbSa3rQ/IKYWlbaGpb8qcMmFoJnivxJyH/O/Seu/YKrKn5/ltr8gI2MiT2PrP+BXCt7+UbY/8jVgKYBu478o1ogE9BvRPzwjXUwLVN8/kAcZjgn43D9gTjQp9jjev4gemOz3otM/KiIxQ8gg5j+wANsoccvhv1CWfXA83LY/nMtiSb642D9M7o/Z0NDev/DDOR3JQsc/4ABz8yYX0z9gAogTLE6+v8zJrAdp9ue/AGO+kcgslj+gwYbELgK6PwZPef20mOG/RPs+RrCJ3b90Lk3V/BLnP8yVA6o7a9y/RGZ9d5wc478AvOxG9M6gv4g7UVdhld8/HNl/At8t0L8gpG96nqLCP5qH1L8bPuG/0NQWqAmq3D9qGZjgxbbrv/rcE9DoR+a/sBdwBzhHuz+c7EUFDj/XPw==", + "dtype": "f8" + }, + "y": { + "bdata": "7I9+disE3T9e5nys0dnlP+REkM3J294/WA5t8mTc678M2YcXuJLYP9x7/Af8mOU/VLCoDf7T3L8YIgcE2IfdPzwG91D4HOk/sA5sa4UM4r90hCYqVmjiP3LR+svdQOq/FmYhZha147/4s+Ph2yjRv+AMWC6PhdQ/ANOFGfo7pL8y6Y4+eU/rP6APq+1Nnb0/MNhoUsg7z7/YrwJp6q3Mv4Riilat8OE/gLVnrtl8vr84sU+O7bvFv2yW37EUd+6/pGdWWoda3L8uSNYWghbuvyRTBF6IvOK/VBp/+wom6T9obnB9lqTaP5jJ/Y1P7se/VA+/zIim2r940t+bqxTGP+DrL3vGO7e/AHlO12m+nD+A2jO6NaTRvzwN75y7ddg/ECPU93CF1T84gqd380vWvxQZuMdVgtg/IP59Cl+Cqb/cCshVJFvrv5hzdfLjl+m/3N5Ph9f647+gSCXXhwC1Pw==", + "dtype": "f8" + }, + "z": { + "bdata": "g7ET4UE7AkDy99Y3aqILQK7DKUxOhwJATC9/JW/TDUB2NGqNQVQOQByL+xd1uAJAuqr9g4FvCkAwpMu1/2oKQBLtE9YpPgpAnlGvst3VAkCCF8Yp8sIHQPM/WMBs0gdAlN9nwFecC0BkPBeNc9gIQD9Lkoeknw5AZimzMEF5DUBvfySzVW0NQBDWCyL4yAVA79LTirh5AUD8jQpsbfsOQDjig3Ld5gRAWcLovoeCAkBM25yYUYkEQAaC8vsQ8QhAxjr8qPDRCkAzBuhaME8HQPvlL0bP0gBAVpkJbDdXDkDIHj66i6cEQH4Of33HgAdAtKhO+a5SB0A+WZ+RKokEQItqhVtcSQNA5KBin0plAUCOPQg1r1ILQAzwoQriBgtAjooUJwVwBUDIAklqUB8HQFBTDWxaaApAd/dpUO+LCUDQJFp8AS8DQH8YgXGWEQJAwMxUg847D0BsXg2bnFALQA==", + "dtype": "f8" + } + }, + { + "hovertemplate": "Image(image_id=1, camera_id=0, name=\"1\", triangulated=50/50)", + "legendgroup": "optimized", + "line": { + "color": "rgb(0,255,0)", + "width": 1 + }, + "mode": "lines", + "name": "1", + "showlegend": false, + "type": "scatter3d", + "x": { + "bdata": "ftukMT+l6b9q36QxPyXsv2rfpDE/Jee/ftukMT+l6b9q36QxPyXnvz3fpDE/Jee/ftukMT+l6b8936QxPyXnvz3fpDE/Jey/ftukMT+l6b8936QxPyXsv2rfpDE/Jey/", + "dtype": "f8" + }, + "y": { + "bdata": "Tju109Sa1b/0PLXT1FrZv209tdPUWtm/Tju109Sa1b9tPbXT1FrZv209tdPU2tG/Tju109Sa1b9tPbXT1NrRv/Q8tdPU2tG/Tju109Sa1b/0PLXT1NrRv/Q8tdPUWtm/", + "dtype": "f8" + }, + "z": { + "bdata": "/N4rpzp84r/AwVdOdfjYv127V051+Ni//N4rpzp84r9du1dOdfjYvzC6V051+Ni//N4rpzp84r8wuldOdfjYv5PAV051+Ni//N4rpzp84r+TwFdOdfjYv8DBV051+Ni/", + "dtype": "f8" + } + }, + { + "hovertemplate": "Image(image_id=0, camera_id=0, name=\"0\", triangulated=50/50)", + "legendgroup": "optimized", + "line": { + "color": "rgb(0,255,0)", + "width": 1 + }, + "mode": "lines", + "name": "0", + "showlegend": false, + "type": "scatter3d", + "x": { + "bdata": "L1mbdcbU1z8uWZt1xtTSPy5Zm3XG1Nw/L1mbdcbU1z8uWZt1xtTcPy5Zm3XG1Nw/L1mbdcbU1z8uWZt1xtTcPy5Zm3XG1NI/L1mbdcbU1z8uWZt1xtTSPy5Zm3XG1NI/", + "dtype": "f8" + }, + "y": { + "bdata": "HswUdjmWxD8omCnsciy6PyqYKexyLLo/HswUdjmWxD8qmCnsciy6PxXMFHY5Fsw/HswUdjmWxD8VzBR2ORbMPxTMFHY5Fsw/HswUdjmWxD8UzBR2ORbMPyiYKexyLLo/", + "dtype": "f8" + }, + "z": { + "bdata": "rxe79j0k6L+wF7v2PSTiv7AXu/Y9JOK/rxe79j0k6L+wF7v2PSTiv64Xu/Y9JOK/rxe79j0k6L+uF7v2PSTiv64Xu/Y9JOK/rxe79j0k6L+uF7v2PSTiv7AXu/Y9JOK/", + "dtype": "f8" + } + } + ], + "layout": { + "height": 800, + "legend": { + "orientation": "h", + "x": 0.1, + "xanchor": "left", + "y": 0.99, + "yanchor": "top" + }, + "margin": { + "b": 0, + "l": 0, + "pad": 0, + "r": 0, + "t": 0 + }, + "scene": { + "aspectmode": "data", + "camera": { + "eye": { + "x": 0, + "y": -0.1, + "z": -2 + }, + "projection": { + "type": "orthographic" + }, + "up": { + "x": 0, + "y": -1, + "z": 0 + } + }, + "dragmode": "orbit", + "xaxis": { + "autorange": true, + "showbackground": false, + "showgrid": false, + "showline": false, + "showticklabels": true, + "visible": false + }, + "yaxis": { + "autorange": true, + "showbackground": false, + "showgrid": false, + "showline": false, + "showticklabels": true, + "visible": false + }, + "zaxis": { + "autorange": true, + "showbackground": false, + "showgrid": false, + "showline": false, + "showticklabels": true, + "visible": false + } + }, + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#f2f5fa" + }, + "error_y": { + "color": "#f2f5fa" + }, + "marker": { + "line": { + "color": "rgb(17,17,17)", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "rgb(17,17,17)", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#A2B1C6", + "gridcolor": "#506784", + "linecolor": "#506784", + "minorgridcolor": "#506784", + "startlinecolor": "#A2B1C6" + }, + "baxis": { + "endlinecolor": "#A2B1C6", + "gridcolor": "#506784", + "linecolor": "#506784", + "minorgridcolor": "#506784", + "startlinecolor": "#A2B1C6" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "histogram": [ + { + "marker": { + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "marker": { + "line": { + "color": "#283442" + } + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "line": { + "color": "#283442" + } + }, + "type": "scattergl" + } + ], + "scattermap": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermap" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#506784" + }, + "line": { + "color": "rgb(17,17,17)" + } + }, + "header": { + "fill": { + "color": "#2a3f5f" + }, + "line": { + "color": "rgb(17,17,17)" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#f2f5fa", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#f2f5fa" + }, + "geo": { + "bgcolor": "rgb(17,17,17)", + "lakecolor": "rgb(17,17,17)", + "landcolor": "rgb(17,17,17)", + "showlakes": true, + "showland": true, + "subunitcolor": "#506784" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "dark" + }, + "paper_bgcolor": "rgb(17,17,17)", + "plot_bgcolor": "rgb(17,17,17)", + "polar": { + "angularaxis": { + "gridcolor": "#506784", + "linecolor": "#506784", + "ticks": "" + }, + "bgcolor": "rgb(17,17,17)", + "radialaxis": { + "gridcolor": "#506784", + "linecolor": "#506784", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "rgb(17,17,17)", + "gridcolor": "#506784", + "gridwidth": 2, + "linecolor": "#506784", + "showbackground": true, + "ticks": "", + "zerolinecolor": "#C8D4E3" + }, + "yaxis": { + "backgroundcolor": "rgb(17,17,17)", + "gridcolor": "#506784", + "gridwidth": 2, + "linecolor": "#506784", + "showbackground": true, + "ticks": "", + "zerolinecolor": "#C8D4E3" + }, + "zaxis": { + "backgroundcolor": "rgb(17,17,17)", + "gridcolor": "#506784", + "gridwidth": 2, + "linecolor": "#506784", + "showbackground": true, + "ticks": "", + "zerolinecolor": "#C8D4E3" + } + }, + "shapedefaults": { + "line": { + "color": "#f2f5fa" + } + }, + "sliderdefaults": { + "bgcolor": "#C8D4E3", + "bordercolor": "rgb(17,17,17)", + "borderwidth": 1, + "tickwidth": 0 + }, + "ternary": { + "aaxis": { + "gridcolor": "#506784", + "linecolor": "#506784", + "ticks": "" + }, + "baxis": { + "gridcolor": "#506784", + "linecolor": "#506784", + "ticks": "" + }, + "bgcolor": "rgb(17,17,17)", + "caxis": { + "gridcolor": "#506784", + "linecolor": "#506784", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "updatemenudefaults": { + "bgcolor": "#506784", + "borderwidth": 0 + }, + "xaxis": { + "automargin": true, + "gridcolor": "#283442", + "linecolor": "#506784", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "#283442", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "#283442", + "linecolor": "#506784", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "#283442", + "zerolinewidth": 2 + } + } + } + } + } + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig = viz_3d.init_figure()\n", + "viz_3d.plot_reconstruction(\n", + " fig,\n", + " rec_init,\n", + " min_track_length=0,\n", + " color=\"rgb(255,255,255)\",\n", + " points_rgb=False,\n", + " name=\"init\",\n", + ")\n", + "viz_3d.plot_reconstruction(\n", + " fig, rec_gt, min_track_length=0, color=\"rgb(255,0,0)\", points_rgb=False, name=\"GT\"\n", + ")\n", + "viz_3d.plot_reconstruction(\n", + " fig,\n", + " rec,\n", + " min_track_length=0,\n", + " color=\"rgb(0,255,0)\",\n", + " points_rgb=False,\n", + " name=\"optimized\",\n", + ")\n", + "fig.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "env2slam", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/modules/SimpleSLAM/slam/test_BA.py b/modules/SimpleSLAM/slam/test_BA.py new file mode 100644 index 0000000000..6c258af2af --- /dev/null +++ b/modules/SimpleSLAM/slam/test_BA.py @@ -0,0 +1,112 @@ +import pyceres +import pycolmap +import numpy as np +from hloc.utils import viz_3d + +from pycolmap import cost_functions + +def create_reconstruction(num_points=50, num_images=2, seed=3, noise=0): + state = np.random.RandomState(seed) + rec = pycolmap.Reconstruction() + p3d = state.uniform(-1, 1, (num_points, 3)) + np.array([0, 0, 3]) + for p in p3d: + rec.add_point3D(p, pycolmap.Track(), np.zeros(3)) + w, h = 640, 480 + cam = pycolmap.Camera( + model="SIMPLE_PINHOLE", + width=w, + height=h, + params=np.array([max(w, h) * 1.2, w / 2, h / 2]), + camera_id=0, + ) + rec.add_camera(cam) + for i in range(num_images): + im = pycolmap.Image( + id=i, + name=str(i), + camera_id=cam.camera_id, + cam_from_world=pycolmap.Rigid3d( + pycolmap.Rotation3d(), state.uniform(-1, 1, 3) + ), + ) + im.registered = True + p2d = cam.img_from_cam( + im.cam_from_world * [p.xyz for p in rec.points3D.values()] + ) + p2d_obs = np.array(p2d) + state.randn(len(p2d), 2) * noise + im.points2D = pycolmap.ListPoint2D( + [pycolmap.Point2D(p, id_) for p, id_ in zip(p2d_obs, rec.points3D)] + ) + rec.add_image(im) + return rec + + +rec_gt = create_reconstruction() + + +fig = viz_3d.init_figure() +viz_3d.plot_reconstruction( + fig, rec_gt, min_track_length=0, color="rgb(255,0,0)", points_rgb=False +) +fig.show() + + +# def define_problem(rec): #ORIGINAL +# prob = pyceres.Problem() +# loss = pyceres.TrivialLoss() +# for im in rec.images.values(): +# cam = rec.cameras[im.camera_id] +# for p in im.points2D: +# print(type(pycolmap.cost_functions.ReprojErrorCost( +# cam.model, im.cam_from_world, p.xy +# ))) +# cost = pycolmap.cost_functions.ReprojErrorCost( +# cam.model, im.cam_from_world, p.xy +# ) +# prob.add_residual_block( +# cost, loss, [rec.points3D[p.point3D_id].xyz, cam.params] +# ) +# for cam in rec.cameras.values(): +# prob.set_parameter_block_constant(cam.params) +# return prob + +def define_problem(rec): #ORIGINAL + prob = pyceres.Problem() + loss = pyceres.TrivialLoss() + for im in rec.images.values(): + cam = rec.cameras[im.camera_id] + for p in im.points2D: + # print((p.xy.shape), type(p.xy), p.point3D_id) + pt2 = p.xy.reshape((2,1)) + cost = pycolmap.cost_functions.ReprojErrorCost( + cam.model, im.cam_from_world, pt2 + ) + # cost = pycolmap.cost_functions.ReprojErrorCost( + # cam.model, pt2, im.cam_from_world + # ) + prob.add_residual_block( + cost, loss, [rec.points3D[p.point3D_id].xyz, cam.params] + ) + for cam in rec.cameras.values(): + prob.set_parameter_block_constant(cam.params) + return prob + +def solve(prob): + print( + prob.num_parameter_blocks(), + prob.num_parameters(), + prob.num_residual_blocks(), + prob.num_residuals(), + ) + options = pyceres.SolverOptions() + options.linear_solver_type = pyceres.LinearSolverType.DENSE_QR + options.minimizer_progress_to_stdout = True + options.num_threads = -1 + summary = pyceres.SolverSummary() + pyceres.solve(options, prob, summary) + print(summary.BriefReport()) + + +rec = create_reconstruction() +problem = define_problem(rec) +solve(problem) \ No newline at end of file diff --git a/modules/SimpleSLAM/tests/__init__.py b/modules/SimpleSLAM/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/modules/SimpleSLAM/tests/test_ba_utils.py b/modules/SimpleSLAM/tests/test_ba_utils.py new file mode 100644 index 0000000000..a7c97b347c --- /dev/null +++ b/modules/SimpleSLAM/tests/test_ba_utils.py @@ -0,0 +1,258 @@ +""" +Synthetic‐data unit‑tests for *ba_utils.py* +=========================================== + +These tests generate a small 3‑D scene, propagate a pin‑hole camera +through a known motion, add optional noise to the **initial** geometry +and verify that each bundle‑adjustment helper implemented in +*ba_utils.py* + + • two_view_ba + • pose_only_ba + • local_bundle_adjustment + +reduces the mean reprojection RMSE. + +**Pose convention – T_wc (camera→world)** +---------------------------------------- +Your SLAM pipeline stores a pose as the rigid-body transform **from the +camera frame to the world frame**. To project a world point X_w into a +camera at T_wc we therefore use + +``` +X_c = R_wcᵀ · (X_w − t_wc) +``` + +Both the synthetic generator and the RMSE metric below follow that +convention so that the tests are consistent with run-time code. + +Requires +-------- +* OpenCV ≥ 4 (for `cv2.KeyPoint`, `cv2.Rodrigues`) +* `pyceres` + `pycolmap` (same as *ba_utils*) + +Run with *pytest* or plain *unittest*: + +```bash +python -m pytest test_ba_utils_fixed.py # preferred +# – or – +python test_ba_utils_fixed.py # falls back to unittest.main() +``` +""" + +from __future__ import annotations +import math +import unittest +from typing import List, Tuple, Dict + +import cv2 +import numpy as np + +# import the module under test +import slam.core.ba_utils as bau + + +# ------------------------------------------------------------ +# Minimal SLAM‑like data containers understood by ba_utils +# ------------------------------------------------------------ +class MapPoint: + """Light‑weight replacement for *MapPoint*.""" + + def __init__(self, position: np.ndarray): + self.position: np.ndarray = position.astype(np.float64) + # list[(frame_idx, kp_idx)] + self.observations: List[Tuple[int, int]] = [] + + +class WorldMap: + """Holds camera poses (T_cw) and 3‑D points.""" + + def __init__(self): + self.poses: List[np.ndarray] = [] # each 4×4 SE(3) + self.points: Dict[int, MapPoint] = {} # pid → MapPoint + + +# ------------------------------------------------------------ +# Synthetic scene generator +# ------------------------------------------------------------ +WIDTH, HEIGHT = 1280, 960 +FX = FY = 800.0 +CX, CY = WIDTH / 2.0, HEIGHT / 2.0 +K = np.array([[FX, 0, CX], [0, FY, CY], [0, 0, 1]], np.float64) + + +def _yaw_to_R(yaw_deg: float) -> np.ndarray: + """Rotation around *y* axis (right‑handed, degrees → 3×3).""" + theta = math.radians(yaw_deg) + c, s = math.cos(theta), math.sin(theta) + return np.array([[c, 0, s], [0, 1, 0], [-s, 0, c]], np.float64) + + +def generate_scene( + n_frames: int, + n_points: int = 50, + dx_per_frame: float = 0.10, + yaw_per_frame_deg: float = 2.0, + pix_noise: float = 1.0, + pose_trans_noise: float = 0.05, + pose_rot_noise_deg: float = 2.0, + point_noise: float = 0.05, + *, + add_noise: bool = True, +): + """Return *(world_map, keypoints)* with optional noisy initial estimates. + + Set *add_noise=False* (or individual *_noise parameters to 0) to + create a perfect initialisation that should converge in a single + BA iteration. Ground‑truth is used only to generate measurements – + the initial geometry fed to BA is perturbed only when noise is + requested. + """ + + # When add_noise=False force all noise parameters to zero + if not add_noise: + pix_noise = pose_trans_noise = pose_rot_noise_deg = point_noise = 0.0 + + rng = np.random.default_rng(42) + + # --- ground‑truth 3‑D points ------------------------------------- + pts_gt = np.column_stack( + ( + rng.uniform(-1.0, 1.0, n_points), # X + rng.uniform(-0.7, 0.7, n_points), # Y + rng.uniform(4.0, 8.0, n_points), # Z + ) + ) + + # --- ground‑truth camera poses (camera-from-world) --------------- + poses_gt: List[np.ndarray] = [] + for i in range(n_frames): + R = _yaw_to_R(i * yaw_per_frame_deg) + t = np.array([i * dx_per_frame, 0.0, 0.0], np.float64) + T = np.eye(4, dtype=np.float64) + T[:3, :3] = R + T[:3, 3] = t + poses_gt.append(T) + + # --- create (possibly noisy) *initial* map ----------------------- + wmap = WorldMap() + keypoints: List[List[cv2.KeyPoint]] = [[] for _ in range(n_frames)] + + # a) camera poses -------------------------------------------------- + for T_gt in poses_gt: + # translation noise + t_noise = rng.normal(0.0, pose_trans_noise, 3) + # small random rotation about a random axis + axis = rng.normal(0.0, 1.0, 3) + axis /= np.linalg.norm(axis) + angle = math.radians(pose_rot_noise_deg) * rng.normal() + R_noise, _ = cv2.Rodrigues(axis * angle) + + T_noisy = np.eye(4, dtype=np.float64) + T_noisy[:3, :3] = R_noise @ T_gt[:3, :3] + T_noisy[:3, 3] = T_gt[:3, 3] + t_noise + wmap.poses.append(T_noisy) + + # b) points + observations --------------------------------------- + for pid, X_w in enumerate(pts_gt): + X_init = X_w + rng.normal(0.0, point_noise, 3) + mp = MapPoint(X_init) + wmap.points[pid] = mp + + for f_idx, T_wc in enumerate(poses_gt): + # Project through **ground‑truth** pose to create measurement + R_wc, t_wc = T_wc[:3, :3], T_wc[:3, 3] + X_c = R_wc.T @ (X_w - t_wc) # world → camera frame + + Z = X_c[2] + if Z <= 0: # behind camera + continue + + # Homogeneous pixel coordinates via intrinsics + uv_h = K @ X_c + u = uv_h[0] / Z + v = uv_h[1] / Z + + if not (0.0 <= u < WIDTH and 0.0 <= v < HEIGHT): + continue + + # add pixel noise (measurement noise, not to the *initial* estimate) + u_meas = u + rng.normal(0.0, pix_noise) + v_meas = v + rng.normal(0.0, pix_noise) + + kp = cv2.KeyPoint(float(u_meas), float(v_meas), 1) + kp_idx = len(keypoints[f_idx]) + keypoints[f_idx].append(kp) + mp.observations.append((f_idx, kp_idx)) + + return wmap, keypoints + + +# ------------------------------------------------------------ +# Utility: reprojection RMSE +# ------------------------------------------------------------ + +def reproj_rmse(wmap: WorldMap, keypoints, frames: List[int] | None = None) -> float: + sq_err = 0.0 + count = 0 + frames = set(range(len(keypoints))) if frames is None else set(frames) + + for mp in wmap.points.values(): + for f_idx, kp_idx in mp.observations: + if f_idx not in frames: + continue + kp = keypoints[f_idx][kp_idx] + u_m, v_m = kp.pt + + T = wmap.poses[f_idx] + X = mp.position + R_wc, t_wc = T[:3, :3], T[:3, 3] + X_c = R_wc.T @ (X - t_wc) + Z = X_c[2] + if Z <= 0: + continue + + uv_h = K @ X_c + u_p = uv_h[0] / Z + v_p = uv_h[1] / Z + + sq_err += (u_p - u_m) ** 2 + (v_p - v_m) ** 2 + count += 2 + + return math.sqrt(sq_err / count) if count else 0.0 + + +# ------------------------------------------------------------ +# Unit‑tests +# ------------------------------------------------------------ +class TestBundleAdjustment(unittest.TestCase): + def test_two_view_ba(self): + wmap, kps = generate_scene(n_frames=2, add_noise=False) + e0 = reproj_rmse(wmap, kps) + self.assertAlmostEqual(e0, 0.0, places=6, msg="Initial reprojection error should be zero when add_noise=False") + bau.two_view_ba(wmap, K, kps, max_iters=30) + e1 = reproj_rmse(wmap, kps) + self.assertLessEqual(e1, e0 + 1e-9, msg=f"two_view_ba failed: {e0:.6f} → {e1:.6f}") + + def test_pose_only_ba(self): + wmap, kps = generate_scene(n_frames=3, add_noise=False) + e0 = reproj_rmse(wmap, kps, frames=[2]) + self.assertAlmostEqual(e0, 0.0, places=6) + bau.pose_only_ba(wmap, K, kps, frame_idx=2, max_iters=15) + e1 = reproj_rmse(wmap, kps, frames=[2]) + self.assertLessEqual(e1, e0 + 1e-9, msg=f"pose_only_ba failed: {e0:.6f} → {e1:.6f}") + + def test_local_bundle_adjustment(self): + wmap, kps = generate_scene(n_frames=10, add_noise=False) # use noise to actually test BA + e0 = reproj_rmse(wmap, kps) + bau.local_bundle_adjustment(wmap, K, kps, center_kf_idx=9, window_size=8, max_iters=25) + e1 = reproj_rmse(wmap, kps) + self.assertLess(e1, e0, msg=f"local_bundle_adjustment failed: {e0:.2f} → {e1:.2f}") + + +if __name__ == "__main__": + # Fallback to unittest runner when executed directly + unittest.main(verbosity=2) + +# todo make noise function, to add noise to the points and poses, look at using scipy to convert quaternions to add matrix +# have 0 noise and test if BA converges to 0 diff --git a/modules/SimpleSLAM/tests/test_lightglue_vs_manual.py b/modules/SimpleSLAM/tests/test_lightglue_vs_manual.py new file mode 100644 index 0000000000..2e27b1a3d1 --- /dev/null +++ b/modules/SimpleSLAM/tests/test_lightglue_vs_manual.py @@ -0,0 +1,74 @@ +import pytest +import cv2 +import numpy as np +import torch +from types import SimpleNamespace + +# adjust this import to match your project layout +from slam.core.features_utils import ( + init_feature_pipeline, + _lightglue_detect_and_match, + feature_extractor, + feature_matcher, +) + +@pytest.fixture(scope="module") +def synthetic_pair(): + """ + Create two simple 200×200 BGR images, each with four white dots. + The second is a small translation of the first. + """ + img1 = np.zeros((200,200,3), dtype=np.uint8) + img2 = np.zeros((200,200,3), dtype=np.uint8) + pts = [(50,50), (150,50), (50,150), (150,150)] + for x,y in pts: + cv2.circle(img1, (x,y), 5, (255,255,255), -1) + cv2.circle(img2, (x+5,y+3), 5, (255,255,255), -1) + return img1, img2 + +def test_lightglue_pipeline_matches_manual(synthetic_pair): + img1, img2 = synthetic_pair + + # build a fake args object + args = SimpleNamespace(use_lightglue=True, + detector=None, # not used for LG + matcher=None, + min_conf=0.0) + + # 1) initialize LightGlue extractor & matcher + extractor, matcher = init_feature_pipeline(args) + + # 2) run the “direct” LG detect+match + kp1_dir, kp2_dir, desc1_dir, desc2_dir, matches_dir = ( + _lightglue_detect_and_match(img1, img2, extractor, matcher) + ) + + # 3) run our new split API + kp1_man, desc1_man = feature_extractor(args, img1, extractor) + kp2_man, desc2_man = feature_extractor(args, img2, extractor) + matches_man = feature_matcher(args, + kp1_man, kp2_man, + desc1_man, desc2_man, + matcher) + + # --- Assertions --- + + # a) same number of keypoints & identical positions + assert len(kp1_dir) == len(kp1_man) + for kd, km in zip(kp1_dir, kp1_man): + assert kd.pt == pytest.approx(km.pt) + + assert len(kp2_dir) == len(kp2_man) + for kd, km in zip(kp2_dir, kp2_man): + assert kd.pt == pytest.approx(km.pt) + + # b) descriptors are identical tensors + # (direct returns torch.Tensor, manual too) + assert torch.allclose(desc1_dir, desc1_man) + assert torch.allclose(desc2_dir, desc2_man) + + # c) same matches (queryIdx/trainIdx) + assert len(matches_dir) == len(matches_man) + for mdir, mman in zip(matches_dir, matches_man): + assert mdir.queryIdx == mman.queryIdx + assert mdir.trainIdx == mman.trainIdx diff --git a/modules/SimpleSLAM/tests/test_multi_view_triangulation-minimal.py b/modules/SimpleSLAM/tests/test_multi_view_triangulation-minimal.py new file mode 100644 index 0000000000..ed39c10168 --- /dev/null +++ b/modules/SimpleSLAM/tests/test_multi_view_triangulation-minimal.py @@ -0,0 +1,73 @@ +import numpy as np +from slam.core.multi_view_utils import multi_view_triangulation + +def build_camera(pose_w_c, K): + """Return 3×4 projection matrix from camera→world pose.""" + return K @ np.linalg.inv(pose_w_c)[:3, :4] + +def random_pose(tx=0.0, ty=0.0, tz=0.0): + """Simple axis-aligned translation pose_w_c.""" + T = np.eye(4) + T[:3, 3] = [tx, ty, tz] + return T + +def test_triangulation_noise_free(): + # ---------- synthetic scene -------------------------------------- + fx = fy = 500.0 + cx = cy = 320.0 + K = np.array([[fx, 0, cx], + [0, fy, cy], + [0, 0, 1]]) + + # camera 0 at origin, camera 1 translated 1 m on +X, camera 2 on +Y + poses = [random_pose(0, 0, 0), + random_pose(1, 0, 0), + random_pose(0, 1, 0)] + + # ground-truth world point (in front of all cameras) + X_w_gt = np.array([2.0, 1.5, 8.0]) + + # synthetic pixel observations + pts2d = [] + for T in poses: + pc = (np.linalg.inv(T) @ np.append(X_w_gt, 1))[:3] + uv = (K @ pc)[:2] / pc[2] + pts2d.append(uv) + + # ---------- call the function ------------------------------------ + X_w_est = multi_view_triangulation( + K, poses, np.float32(pts2d), + min_depth=0.5, max_depth=50.0, max_rep_err=0.5) + + assert X_w_est is not None, "Triangulation unexpectedly failed" + # sub-millimetre accuracy in noise-free synthetic setting + assert np.allclose(X_w_est, X_w_gt, atol=1e-3) + +def test_triangulation_with_pixel_noise(): + np.random.seed(42) + fx = fy = 600.0 + cx = cy = 320.0 + K = np.array([[fx, 0, cx], + [0, fy, cy], + [0, 0, 1]]) + + poses = [random_pose(0, 0, 0), + random_pose(1, 0.2, 0), + random_pose(-0.3, 1, 0), + random_pose(0.5, -0.1, 0.3)] + + X_w_gt = np.array([-1.5, 0.8, 6.5]) + pts2d = [] + for T in poses: + pc = (np.linalg.inv(T) @ np.append(X_w_gt, 1))[:3] + uv = (K @ pc)[:2] / pc[2] + uv += np.random.normal(scale=0.4, size=2) # add 0.4-px Gaussian noise + pts2d.append(uv) + + X_w_est = multi_view_triangulation( + K, poses, np.float32(pts2d), + min_depth=0.5, max_depth=50.0, max_rep_err=2.0) + + assert X_w_est is not None, "Triangulation failed with moderate noise" + # centimetre-level accuracy is fine here + assert np.linalg.norm(X_w_est - X_w_gt) < 0.05 diff --git a/modules/SimpleSLAM/tests/test_multi_view_utils.py b/modules/SimpleSLAM/tests/test_multi_view_utils.py new file mode 100644 index 0000000000..e239450e59 --- /dev/null +++ b/modules/SimpleSLAM/tests/test_multi_view_utils.py @@ -0,0 +1,189 @@ +"""pytest test‑suite for `multi_view_utils.py` +================================================ + +Goals +----- +1. **Function integration** – + * `multi_view_triangulation()` + * `MultiViewTriangulator` + +2. **Real‑world wiring** – whenever the project’s own + `landmark_utils.Map` implementation is importable we use it, giving an + *integration* rather than pure‑unit test. When that class is missing (for + example in a stripped‑down CI job) we transparently fall back to a *very* + small stub that exposes only the methods/attributes the triangulator needs. + +3. **Numerical accuracy** – the synthetic scene is designed so that with 0.4 px + image noise and five views the RMS localisation error should be + ≲ 5 cm. If the implementation regresses we’ll catch it. + +Run with:: + + pytest tests/test_multi_view_utils.py +""" + +from __future__ import annotations + +import importlib +import sys +import types +from pathlib import Path + +import numpy as np +import pytest + +# --------------------------------------------------------------------------- # +# Locate & load the module under test – `multi_view_utils` +# --------------------------------------------------------------------------- # +# We try these locations in order: +# 1. Installed package → `slam.core.multi_view_utils` +# 2. Source tree root ("editable" dev) → `multi_view_utils` +# 3. Direct path fallback so the test also works when launched from a +# separate build/CI directory. + +mvu: types.ModuleType + +for _modname in ("slam.core.multi_view_utils", "multi_view_utils"): + try: + mvu = importlib.import_module(_modname) # type: ignore + break + except ModuleNotFoundError: # pragma: no cover – probe next option + pass + +# Public call‑ables +multi_view_triangulation = mvu.multi_view_triangulation # type: ignore[attr‑defined] +MultiViewTriangulator = mvu.MultiViewTriangulator # type: ignore[attr‑defined] + +# --------------------------------------------------------------------------- # +# Map implementation (real vs stub) +# --------------------------------------------------------------------------- # +# 1. Prefer the full implementation shipped with the repo. +# 2. Otherwise synthesize the minimal surface‑area stub so that the Triangulator +# can still be unit‑tested. +from slam.core.landmark_utils import Map as SLAMMap # type: ignore[attr‑defined] + + +# --------------------------------------------------------------------------- # +# Synthetic scene generation helpers +# --------------------------------------------------------------------------- # + +def _make_camera_pose(tx: float) -> np.ndarray: + """Camera looks down +Z, translated along +X by *tx* (c→w).""" + T = np.eye(4) + T[0, 3] = tx + return T + + +def _generate_scene( + n_views: int = 5, + n_pts: int = 40, + noise_px: float = 0.4, + seed: int | None = None, +): + """Build a toy scene and return (K, poses_w_c, pts_w, 2‑D projections).""" + + rng = np.random.default_rng(seed) + + # -- basic pin‑hole intrinsics (640×480) -- + K = np.array([[800.0, 0.0, 320.0], [0.0, 800.0, 240.0], [0.0, 0.0, 1.0]]) + + # -- camera trajectory (translate along X) -- + poses = [_make_camera_pose(t) for t in np.linspace(0.0, 1.0, n_views)] + + # -- random 3‑D points in front of cameras (z ∈ [4,6]) -- + pts_w = np.vstack( + ( + rng.uniform(-1.0, 1.0, n_pts), # x + rng.uniform(-1.0, 1.0, n_pts), # y + rng.uniform(4.0, 6.0, n_pts), # z – ensure positive depth + ) + ).T + + # -- project each point into every view and add Gaussian pixel noise -- + pts2d_all: list[np.ndarray] = [] + for T_w_c in poses: + P_c_w = np.linalg.inv(T_w_c) # w→c + uv_view = [] + for X_w in pts_w: + Xc_h = P_c_w @ np.append(X_w, 1.0) + uv = (K @ Xc_h[:3])[:2] / Xc_h[2] + uv += rng.normal(0.0, noise_px, 2) + uv_view.append(uv) + pts2d_all.append(np.asarray(uv_view, dtype=np.float32)) + + return K, poses, pts_w, pts2d_all + +# --------------------------------------------------------------------------- # +# Light cv2.KeyPoint substitute (Triangulator only needs `.pt`) +# --------------------------------------------------------------------------- # + +class _KeyPoint: # pylint: disable=too‑few‑public‑methods + def __init__(self, x: float, y: float): + self.pt = (float(x), float(y)) + +# --------------------------------------------------------------------------- # +# Tests +# --------------------------------------------------------------------------- # + + +def test_multi_view_triangulation_accuracy(): + """Direct N‑view triangulation should achieve < 5 cm RMS error.""" + K, poses, pts_w, pts2d = _generate_scene() + + errs: list[float] = [] + for j in range(len(pts_w)): + uv_track = [view[j] for view in pts2d] + X_hat = multi_view_triangulation( + K, + poses, + np.float32(uv_track), + min_depth=0.1, + max_depth=10.0, + max_rep_err=2.0, + ) + assert X_hat is not None, "Triangulation unexpectedly returned None" + errs.append(np.linalg.norm(X_hat - pts_w[j])) + + rms = float(np.sqrt(np.mean(np.square(errs)))) + assert rms < 5e-2, f"RMS error too high: {rms:.4f} m" + + +@pytest.mark.parametrize("min_views", [2, 3]) +def test_multiview_triangulator_pipeline(min_views: int): + """Full pipeline: incremental key‑frames → map landmarks.""" + K, poses, pts_w, pts2d = _generate_scene() + + tri = MultiViewTriangulator( + K, + min_views=min_views, + merge_radius=0.1, + max_rep_err=2.0, + min_depth=0.1, + max_depth=10.0, + ) + + world_map = SLAMMap() + + # ---- Feed key‑frames ---- + for frame_idx, (pose_w_c, uv_view) in enumerate(zip(poses, pts2d)): + kps: list[_KeyPoint] = [] + track_map: dict[int, int] = {} + for pid, (u, v) in enumerate(uv_view): + kps.append(_KeyPoint(u, v)) + track_map[pid] = pid # 1‑to‑1 track id + dummy_img = np.zeros((480, 640, 3), dtype=np.uint8) + tri.add_keyframe(frame_idx, pose_w_c, kps, track_map, dummy_img) + + new_ids = tri.triangulate_ready_tracks(world_map) + assert len(new_ids) == len(pts_w), "Not all points were triangulated" + + # ---- Numerical accuracy ---- + errs: list[float] = [] + for pid in new_ids: + p_obj = world_map.points[pid] + # Real MapPoint uses `.position`; stub stores the same attribute name. + X_hat = p_obj.position if hasattr(p_obj, "position") else p_obj.xyz # type: ignore[attr‑defined] + errs.append(np.linalg.norm(X_hat - pts_w[pid])) + + rms = float(np.sqrt(np.mean(np.square(errs)))) + assert rms < 5e-2, f"RMS error too high: {rms:.4f} m" diff --git a/modules/SimpleSLAM/tools/trajectory_eval.py b/modules/SimpleSLAM/tools/trajectory_eval.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/modules/SimpleSLAM/tools/visualization.py b/modules/SimpleSLAM/tools/visualization.py new file mode 100644 index 0000000000..e69de29bb2