diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b13e045055..f19617ff1b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -475,7 +475,7 @@ set(ALICEVISION_HAVE_OPENCV_CONTRIB 0) if(ALICEVISION_BUILD_SFM) if(NOT ALICEVISION_USE_OPENCV STREQUAL "OFF") - find_package(OpenCV COMPONENTS core imgproc video imgcodecs videoio features2d photo) + find_package(OpenCV COMPONENTS core imgproc video imgcodecs videoio features2d optflow photo) if(OpenCV_FOUND) # We do not set the minimal version directly in find_package diff --git a/src/aliceVision/CMakeLists.txt b/src/aliceVision/CMakeLists.txt index 01eeb3770f..1d5df2a1ee 100644 --- a/src/aliceVision/CMakeLists.txt +++ b/src/aliceVision/CMakeLists.txt @@ -18,7 +18,6 @@ if(ALICEVISION_BUILD_SFM) add_subdirectory(graph) add_subdirectory(gpu) add_subdirectory(imageMatching) - add_subdirectory(keyframe) add_subdirectory(linearProgramming) add_subdirectory(localization) add_subdirectory(matching) @@ -35,6 +34,7 @@ if(ALICEVISION_BUILD_SFM) add_subdirectory(calibration) if(ALICEVISION_HAVE_OPENCV) add_subdirectory(imageMasking) + add_subdirectory(keyframe) endif() endif() diff --git a/src/aliceVision/dataio/FeedProvider.cpp b/src/aliceVision/dataio/FeedProvider.cpp index d5763234a9..46ac44e1ad 100644 --- a/src/aliceVision/dataio/FeedProvider.cpp +++ b/src/aliceVision/dataio/FeedProvider.cpp @@ -22,7 +22,7 @@ namespace aliceVision{ namespace dataio{ -FeedProvider::FeedProvider(const std::string &feedPath, const std::string &calibPath) +FeedProvider::FeedProvider(const std::string &feedPath, const std::string &calibPath) : _isVideo(false), _isLiveFeed(false) { namespace bf = boost::filesystem; @@ -30,7 +30,7 @@ FeedProvider::FeedProvider(const std::string &feedPath, const std::string &calib { throw std::invalid_argument("Empty filepath."); } - if(bf::is_regular_file(bf::path(feedPath))) + if(bf::is_regular_file(bf::path(feedPath))) { // Image or video file const std::string extension = bf::path(feedPath).extension().string(); @@ -38,16 +38,23 @@ FeedProvider::FeedProvider(const std::string &feedPath, const std::string &calib { _feeder.reset(new ImageFeed(feedPath, calibPath)); } - else + else { + if(VideoFeed::isSupported(extension)) + { #if ALICEVISION_IS_DEFINED(ALICEVISION_HAVE_OPENCV) - // let's try it with a video - _feeder.reset(new VideoFeed(feedPath, calibPath)); - _isVideo = true; + // let's try it with a video + _feeder.reset(new VideoFeed(feedPath, calibPath)); + _isVideo = true; #else - throw std::invalid_argument("Unsupported mode! If you intended to use a video" + throw std::invalid_argument("Unsupported mode! If you intended to use a video" " please add OpenCV support"); #endif + } + else + { + throw std::invalid_argument("Unsupported file format: " + feedPath); + } } } // parent_path() returns "/foo/bar/" when input path equals to "/foo/bar/" @@ -96,12 +103,12 @@ bool FeedProvider::readImage(image::Image &imageGray, { return(_feeder->readImage(imageGray, camIntrinsics, mediaPath, hasIntrinsics)); } - + std::size_t FeedProvider::nbFrames() const { if(_isLiveFeed) return std::numeric_limits::infinity(); - + return _feeder->nbFrames(); } @@ -122,5 +129,5 @@ bool FeedProvider::isInit() const FeedProvider::~FeedProvider( ) { } -}//namespace dataio +}//namespace dataio }//namespace aliceVision diff --git a/src/aliceVision/dataio/FeedProvider.hpp b/src/aliceVision/dataio/FeedProvider.hpp index 1c6a1d1aa1..1d2cb13108 100644 --- a/src/aliceVision/dataio/FeedProvider.hpp +++ b/src/aliceVision/dataio/FeedProvider.hpp @@ -17,9 +17,9 @@ namespace dataio{ class FeedProvider { public: - + FeedProvider(const std::string &feedPath, const std::string &calibPath = ""); - + /** * @brief Provide a new RGB image from the feed. * @@ -51,7 +51,7 @@ class FeedProvider camera::PinholeRadialK3 &camIntrinsics, std::string &mediaPath, bool &hasIntrinsics); - + /** * @brief Provide a new grayscale image from the feed. * @@ -101,7 +101,7 @@ class FeedProvider * @return True if the feed is a video. */ bool isVideo() const {return _isVideo; } - + /** * @brief Return true if the feed is a live stream (e.g. a webcam). * @@ -110,7 +110,7 @@ class FeedProvider bool isLiveFeed() const {return _isLiveFeed; } virtual ~FeedProvider(); - + private: std::unique_ptr _feeder; bool _isVideo; @@ -118,6 +118,6 @@ class FeedProvider }; -}//namespace dataio +}//namespace dataio }//namespace aliceVision diff --git a/src/aliceVision/dataio/IFeed.cpp b/src/aliceVision/dataio/IFeed.cpp index ed266e128b..117aef9245 100644 --- a/src/aliceVision/dataio/IFeed.cpp +++ b/src/aliceVision/dataio/IFeed.cpp @@ -35,19 +35,19 @@ void readCalibrationFromFile(const std::string &filename, camera::PinholeRadialK int height = 0; const size_t numParam = 6; std::vector params(numParam, 0); - + fs >> width; fs >> height; for(size_t i = 0; i < numParam; ++i) { fs >> params[i]; } - camIntrinsics = camera::PinholeRadialK3(width, height, + camIntrinsics = camera::PinholeRadialK3(width, height, params[0], params[1], params[2], params[3], params[4], params[5]); - + fs.close(); } -}//namespace dataio +}//namespace dataio }//namespace aliceVision diff --git a/src/aliceVision/dataio/IFeed.hpp b/src/aliceVision/dataio/IFeed.hpp index 00e07c5662..68cee71740 100644 --- a/src/aliceVision/dataio/IFeed.hpp +++ b/src/aliceVision/dataio/IFeed.hpp @@ -23,7 +23,7 @@ class IFeed * @return True if the feed is correctly initialized. */ virtual bool isInit() const = 0; - + /** * @brief Provide a new RGB image from the feed * @param[out] imageRGB The new RGB image from the feed. @@ -51,7 +51,7 @@ class IFeed camera::PinholeRadialK3 &camIntrinsics, std::string &mediaPath, bool &hasIntrinsics) = 0; - + /** * @brief Provide a new grayscale image from the feed * @param[out] imageGray The new image from the feed. @@ -62,16 +62,16 @@ class IFeed * @return True if there is a new image, false otherwise. */ virtual bool readImage(image::Image &imageGray, - camera::PinholeRadialK3 &camIntrinsics, + camera::PinholeRadialK3 &camIntrinsics, std::string &mediaPath, - bool &hasIntrinsics) = 0; + bool &hasIntrinsics) = 0; virtual std::size_t nbFrames() const = 0; - + virtual bool goToFrame(const unsigned int frame) = 0; - + virtual bool goToNextFrame() = 0; - + virtual ~IFeed( ) {} }; @@ -84,6 +84,6 @@ class IFeed */ void readCalibrationFromFile(const std::string &filename, camera::PinholeRadialK3 &camIntrinsics); -}//namespace dataio +}//namespace dataio }//namespace aliceVision diff --git a/src/aliceVision/dataio/ImageFeed.cpp b/src/aliceVision/dataio/ImageFeed.cpp index 06d6cb00e3..c3dab7c24e 100644 --- a/src/aliceVision/dataio/ImageFeed.cpp +++ b/src/aliceVision/dataio/ImageFeed.cpp @@ -28,13 +28,11 @@ namespace dataio{ class ImageFeed::FeederImpl { public: - - static bool isSupported(const std::string &ext); - + FeederImpl() : _isInit(false) {} - + FeederImpl(const std::string& imagePath, const std::string& calibPath); - + template bool readImage(image::Image &image, camera::PinholeRadialK3 &camIntrinsics, @@ -78,17 +76,17 @@ class ImageFeed::FeederImpl } return true; } - + std::size_t nbFrames() const; - + bool goToFrame(const unsigned int frame); - + bool goToNextFrame(); - - bool isInit() const {return _isInit;} - + + bool isInit() const {return _isInit;} + private: - + template bool feedWithJson(image::Image &image, camera::PinholeRadialK3 &camIntrinsics, @@ -124,7 +122,7 @@ class ImageFeed::FeederImpl } else { - const camera::PinholeRadialK3 * intrinsics = dynamic_cast(cam) ; + const camera::PinholeRadialK3 * intrinsics = dynamic_cast(cam); // simply copy values camIntrinsics = *intrinsics; @@ -134,33 +132,21 @@ class ImageFeed::FeederImpl ++_viewIterator; return true; } - -private: - static const std::vector supportedExtensions; - + private: bool _isInit; bool _withCalibration; // It contains the images to be fed std::vector _images; camera::PinholeRadialK3 _camIntrinsics; - + bool _sfmMode = false; sfmData::SfMData _sfmdata; sfmData::Views::const_iterator _viewIterator; unsigned int _currentImageIndex = 0; }; -const std::vector ImageFeed::FeederImpl::supportedExtensions = {".jpg", ".jpeg", ".png", ".ppm", ".tif", ".tiff", ".exr"}; - -bool ImageFeed::FeederImpl::isSupported(const std::string &ext) -{ - const auto start = FeederImpl::supportedExtensions.begin(); - const auto end = FeederImpl::supportedExtensions.end(); - return(std::find(start, end, boost::to_lower_copy(ext)) != end); -} - -ImageFeed::FeederImpl::FeederImpl(const std::string& imagePath, const std::string& calibPath) +ImageFeed::FeederImpl::FeederImpl(const std::string& imagePath, const std::string& calibPath) : _isInit(false) , _withCalibration(false) { @@ -179,7 +165,7 @@ ImageFeed::FeederImpl::FeederImpl(const std::string& imagePath, const std::strin _sfmMode = true; } // if it is an image file - else if(FeederImpl::isSupported(ext)) + else if(image::isSupported(ext) && !image::isVideoExtension(ext)) { _images.push_back(imagePath); _withCalibration = !calibPath.empty(); @@ -189,7 +175,7 @@ ImageFeed::FeederImpl::FeederImpl(const std::string& imagePath, const std::strin // if it is an image file else if(ext == ".txt") { - // we expect a simple txt file with a list of path to images relative to the + // we expect a simple txt file with a list of path to images relative to the // location of the txt file itself std::fstream fs(imagePath, std::ios::in); std::string line; @@ -237,14 +223,14 @@ ImageFeed::FeederImpl::FeederImpl(const std::string& imagePath, const std::strin // since some OS will provide the files in a random order, first store them // in a priority queue and then fill the _image queue with the alphabetical // order from the priority queue - std::priority_queue, + std::priority_queue, std::greater > tmpSorter; for(; iterator != bf::directory_iterator(); ++iterator) { // get the extension of the current file to check whether it is an image const std::string ext = iterator->path().extension().string(); - if(FeederImpl::isSupported(ext)) + if(image::isSupported(ext) && !image::isVideoExtension(ext)) { const std::string filepath = iterator->path().string(); const std::string filename = iterator->path().filename().string(); @@ -252,6 +238,10 @@ ImageFeed::FeederImpl::FeederImpl(const std::string& imagePath, const std::strin if(filePattern.empty() || std::regex_match(filename, re)) tmpSorter.push(filepath); } + else + { + ALICEVISION_LOG_WARNING("Unsupported file extension " << ext << " for " << iterator->path().string() << "."); + } } // put all the retrieve files inside the queue while(!tmpSorter.empty()) @@ -259,7 +249,7 @@ ImageFeed::FeederImpl::FeederImpl(const std::string& imagePath, const std::strin _images.push_back(tmpSorter.top()); tmpSorter.pop(); } - + _withCalibration = !calibPath.empty(); _sfmMode = false; _isInit = true; @@ -268,7 +258,7 @@ ImageFeed::FeederImpl::FeederImpl(const std::string& imagePath, const std::strin { throw std::invalid_argument("File or mode not yet implemented"); } - + // last thing: if _withCalibration is true it means that it is not a json and // a path to a calibration file has been passed // then load the calibration @@ -283,10 +273,10 @@ std::size_t ImageFeed::FeederImpl::nbFrames() const { if(!_isInit) return 0; - + if(_sfmMode) return _sfmdata.getViews().size(); - + return _images.size(); } @@ -295,10 +285,10 @@ bool ImageFeed::FeederImpl::goToFrame(const unsigned int frame) if(!_isInit) { _currentImageIndex = frame; - ALICEVISION_LOG_WARNING("Image feed is not initialized "); + ALICEVISION_LOG_WARNING("Image feed is not initialized"); return false; } - + // Reconstruction mode if(_sfmMode) { @@ -352,10 +342,10 @@ bool ImageFeed::FeederImpl::goToNextFrame() ImageFeed::ImageFeed() : _imageFeed(new FeederImpl()) { } -ImageFeed::ImageFeed(const std::string& imagePath, const std::string& calibPath) +ImageFeed::ImageFeed(const std::string& imagePath, const std::string& calibPath) : _imageFeed( new FeederImpl(imagePath, calibPath) ) { } -bool ImageFeed::readImage(image::Image &imageRGB, +bool ImageFeed::readImage(image::Image &imageRGB, camera::PinholeRadialK3 &camIntrinsics, std::string &mediaPath, bool &hasIntrinsics) @@ -371,7 +361,7 @@ bool ImageFeed::readImage(image::Image &imageGray, return(_imageFeed->readImage(imageGray, camIntrinsics, mediaPath, hasIntrinsics)); } -bool ImageFeed::readImage(image::Image &imageGray, +bool ImageFeed::readImage(image::Image &imageGray, camera::PinholeRadialK3 &camIntrinsics, std::string &mediaPath, bool &hasIntrinsics) @@ -408,11 +398,11 @@ bool ImageFeed::isSupported(const std::string &extension) } else { - return FeederImpl::isSupported(ext); + return (image::isSupported(ext) && !image::isVideoExtension(ext)); } } ImageFeed::~ImageFeed() { } -}//namespace dataio +}//namespace dataio }//namespace aliceVision diff --git a/src/aliceVision/dataio/ImageFeed.hpp b/src/aliceVision/dataio/ImageFeed.hpp index 5d6d9ec70c..166ac3d869 100644 --- a/src/aliceVision/dataio/ImageFeed.hpp +++ b/src/aliceVision/dataio/ImageFeed.hpp @@ -21,7 +21,7 @@ class ImageFeed : public IFeed * @brief Empty constructor */ ImageFeed(); - + /** * @brief Set up an image based feed from a choice of different sources: * 1) a directory containing images @@ -44,7 +44,7 @@ class ImageFeed : public IFeed * @see readCalibrationFromFile() */ ImageFeed(const std::string& imagePath, const std::string& calibPath); - + /** * @brief Provide a new RGB image from the feed * @@ -89,13 +89,13 @@ class ImageFeed : public IFeed camera::PinholeRadialK3 &camIntrinsics, std::string &mediaPath, bool &hasIntrinsics); - + std::size_t nbFrames() const; - + bool goToFrame(const unsigned int frame); bool goToNextFrame(); - + /** * @brief Return true if the feed is correctly initialized. * @@ -104,7 +104,7 @@ class ImageFeed : public IFeed bool isInit() const; virtual ~ImageFeed( ); - + /** * @brief For a given extension, return true if that file can be used as input * for the feed. ImageFeed supports .json, .txt, and the most common image files. @@ -113,13 +113,13 @@ class ImageFeed : public IFeed * @return True if the file is supported. */ static bool isSupported(const std::string &extension); - + private: class FeederImpl; std::unique_ptr _imageFeed; }; -}//namespace dataio +}//namespace dataio }//namespace aliceVision diff --git a/src/aliceVision/dataio/VideoFeed.cpp b/src/aliceVision/dataio/VideoFeed.cpp index 48fbe7d48d..3362b3dce4 100644 --- a/src/aliceVision/dataio/VideoFeed.cpp +++ b/src/aliceVision/dataio/VideoFeed.cpp @@ -8,6 +8,7 @@ #include #include +#include #include #include @@ -24,13 +25,13 @@ class VideoFeed::FeederImpl { public: FeederImpl() : _isInit(false) { } - + FeederImpl(const std::string &videoPath, const std::string &calibPath); - + FeederImpl(int videoDevice, const std::string &calibPath); - + bool isInit() const {return _isInit;} - + bool readImage(image::Image &imageRGB, camera::PinholeRadialK3 &camIntrinsics, std::string &mediaPath, @@ -40,18 +41,18 @@ class VideoFeed::FeederImpl camera::PinholeRadialK3 &camIntrinsics, std::string &mediaPath, bool &hasIntrinsics); - + bool readImage(image::Image &imageGray, camera::PinholeRadialK3 &camIntrinsics, std::string &mediaPath, bool &hasIntrinsics); - + bool goToFrame(const unsigned int frame); - + bool goToNextFrame(); - + std::size_t nbFrames() const; - + private: bool _isInit; bool _isLive; @@ -79,7 +80,7 @@ VideoFeed::FeederImpl::FeederImpl(const std::string &videoPath, const std::strin _withIntrinsics = !calibPath.empty(); if(_withIntrinsics) readCalibrationFromFile(calibPath, _camIntrinsics); - + _isInit = true; } @@ -95,12 +96,12 @@ VideoFeed::FeederImpl::FeederImpl(int videoDevice, const std::string &calibPath) } goToNextFrame(); - + // load the calibration path _withIntrinsics = !calibPath.empty(); if(_withIntrinsics) readCalibrationFromFile(calibPath, _camIntrinsics); - + _isInit = true; } @@ -116,15 +117,15 @@ bool VideoFeed::FeederImpl::readImage(image::Image &imageRGB, { return false; } - + if(frame.channels() == 3) { cv::Mat color; resize(frame, color, cv::Size(frame.cols, frame.rows)); - + cv::cvtColor(frame, color, cv::COLOR_BGR2RGB); imageRGB.resize(color.cols, color.rows); - + unsigned char* pixelPtr = (unsigned char*)color.data; for(int i = 0; i < color.rows; i++) { @@ -140,7 +141,7 @@ bool VideoFeed::FeederImpl::readImage(image::Image &imageRGB, ALICEVISION_LOG_WARNING("Error can't read RGB frame " << _videoPath); throw std::invalid_argument("Error can't read RGB frame " + _videoPath); } - + hasIntrinsics = _withIntrinsics; if(_withIntrinsics) camIntrinsics = _camIntrinsics; @@ -176,7 +177,7 @@ bool VideoFeed::FeederImpl::readImage(image::Image &imageGray, { return false; } - + if(frame.channels() == 3) { // convert to gray @@ -203,7 +204,10 @@ bool VideoFeed::FeederImpl::readImage(image::Image &imageGray, std::size_t VideoFeed::FeederImpl::nbFrames() const { if (!_videoCapture.isOpened()) + { + ALICEVISION_LOG_WARNING("The video file could not be opened."); return 0; + } return _videoCapture.get(cv::CAP_PROP_FRAME_COUNT); } @@ -214,22 +218,12 @@ bool VideoFeed::FeederImpl::goToFrame(const unsigned int frame) ALICEVISION_LOG_WARNING("We cannot open the video file."); return false; } - - if(_isLive) + + if (_isLive) return goToNextFrame(); - - if(frame > 0) - { - _videoCapture.set(cv::CAP_PROP_POS_FRAMES, frame); - _videoCapture.grab(); - return true; - } - else - { - _videoCapture.set(cv::CAP_PROP_POS_FRAMES, 0); - _videoCapture.grab(); - return false; - } + + _videoCapture.set(cv::CAP_PROP_POS_FRAMES, frame); + return _videoCapture.grab(); } bool VideoFeed::FeederImpl::goToNextFrame() @@ -243,11 +237,11 @@ bool VideoFeed::FeederImpl::goToNextFrame() VideoFeed::VideoFeed() : _feeder(new FeederImpl()) { } -VideoFeed::VideoFeed(const std::string &videoPath, const std::string &calibPath) +VideoFeed::VideoFeed(const std::string &videoPath, const std::string &calibPath) : _feeder(new FeederImpl(videoPath, calibPath)) { } -VideoFeed::VideoFeed(int videoDevice, const std::string &calibPath) +VideoFeed::VideoFeed(int videoDevice, const std::string &calibPath) : _feeder(new FeederImpl(videoDevice, calibPath)) { } @@ -292,7 +286,12 @@ bool VideoFeed::goToNextFrame() bool VideoFeed::isInit() const {return(_feeder->isInit()); } +bool VideoFeed::isSupported(const std::string &extension) +{ + return image::isVideoExtension(extension); +} + VideoFeed::~VideoFeed() { } -}//namespace dataio +}//namespace dataio }//namespace aliceVision diff --git a/src/aliceVision/dataio/VideoFeed.hpp b/src/aliceVision/dataio/VideoFeed.hpp index 93cc035287..a715000908 100644 --- a/src/aliceVision/dataio/VideoFeed.hpp +++ b/src/aliceVision/dataio/VideoFeed.hpp @@ -56,7 +56,7 @@ class VideoFeed : public IFeed * @see readCalibrationFromFile() */ VideoFeed(int videoDevice, const std::string &calibPath); - + /** * @brief Provide a new RGB image from the feed * @@ -101,13 +101,13 @@ class VideoFeed : public IFeed camera::PinholeRadialK3 &camIntrinsics, std::string &mediaPath, bool &hasIntrinsics); - + std::size_t nbFrames() const; - + bool goToFrame(const unsigned int frame); - + bool goToNextFrame(); - + /** * @brief Return true if the feed is correctly initialized. * @@ -115,8 +115,15 @@ class VideoFeed : public IFeed */ bool isInit() const; - virtual ~VideoFeed( ); - + virtual ~VideoFeed(); + +/** + * @brief For a given extension, return true if that file can be used as input video for the feed. + * @param extension The file extension to check in ".ext" format (case insensitive). + * @return True if the file is supported. + */ +static bool isSupported(const std::string &extension); + private: class FeederImpl; std::unique_ptr _feeder; diff --git a/src/aliceVision/image/io.cpp b/src/aliceVision/image/io.cpp index 7cbded20db..fe272a348c 100644 --- a/src/aliceVision/image/io.cpp +++ b/src/aliceVision/image/io.cpp @@ -126,28 +126,38 @@ std::istream& operator>>(std::istream& in, EImageFileType& imageFileType) std::vector getSupportedExtensions() { - static const std::string extensionList = oiio::get_string_attribute("extension_list"); std::vector supportedExtensions; - std::vector supportedFormat; - boost::split(supportedFormat, extensionList, boost::is_any_of(";"), boost::token_compress_on); - for(const std::string& format: supportedFormat) - { - std::vector extensions; - const std::string str = format.substr(format.find(":")+1); - boost::split(extensions, str, boost::is_any_of(","), boost::token_compress_on); - for(std::string& extension: extensions) - supportedExtensions.push_back(extension.insert(0, ".")); + // Map containing the parsed "extension_list" with each supported format and its associated extensions + static std::map> extensionList = oiio::get_extension_map(); + + for (auto& format : extensionList) { + for (auto& extension : format.second) { + supportedExtensions.push_back(extension.insert(0, ".")); + } } return supportedExtensions; } -bool isSupported(const std::string& ext) +bool isSupported(const std::string& extension) { static const std::vector supportedExtensions = getSupportedExtensions(); const auto start = supportedExtensions.begin(); const auto end = supportedExtensions.end(); - return (std::find(start, end, boost::to_lower_copy(ext)) != end); + return (std::find(start, end, boost::to_lower_copy(extension)) != end); +} + +bool isVideoExtension(const std::string& extension) +{ + // List provided by OpenImageIO: + // https://openimageio.readthedocs.io/en/latest/builtinplugins.html#movie-formats-using-ffmpeg + static const std::array supportedExtensions = { + ".avi", ".qt", ".mov", ".mp4", ".m4a", ".m4v", + ".3gp", ".3g2", ".mj2", ".m4v", ".mpg" + }; + const auto start = supportedExtensions.begin(); + const auto end = supportedExtensions.end(); + return (std::find(start, end, boost::to_lower_copy(extension)) != end); } std::string EStorageDataType_informations() diff --git a/src/aliceVision/image/io.hpp b/src/aliceVision/image/io.hpp index 78d75ac3c2..b35b96de4e 100644 --- a/src/aliceVision/image/io.hpp +++ b/src/aliceVision/image/io.hpp @@ -100,18 +100,28 @@ std::ostream& operator<<(std::ostream& os, EImageFileType imageFileType); std::istream& operator>>(std::istream& in, EImageFileType& imageFileType); /** - * @brief Return a list of extensions supported by openImageIO ie exists in extension_list from imageio.h - * @return A vector containing all supported extensions + * @brief Return a list of extensions supported by OpenImageIO (ie. extensions existing in extension_list from imageio.h). + * The list of supported extensions also includes video formats. + * @return a vector containing all the extensions supported by OpenImageIO. */ std::vector getSupportedExtensions(); /** - * @brief Check if input image extension is supported by openImageIO ie exists in extension_list from imageio.h - * @param[in] ext - image extension - * @return true if valid extension + * @brief Check if an input image extension is supported by OpenImageIO (ie. it exists in extension_list from imageio.h). + * This function might return true if the input is a video rather than an image, as long as the video format is + * supported by OpenImageIO. + * @param[in] extension the input image extension. + * @return true if the extension is valid and supported by OpenImageIO, false otherwise. */ -bool isSupported(const std::string& ext); +bool isSupported(const std::string& extension); +/** + * @brief Check if the extension is a video format supported by OpenImageIO, based on the list provided by OpenImageIO + * (https://openimageio.readthedocs.io/en/latest/builtinplugins.html#movie-formats-using-ffmpeg). + * @param[in] extension the input file's extension. + * @return true if the extension is a valid video extension supported by OpenImageIO, false otherwise. + */ +bool isVideoExtension(const std::string& extension); /** * @brief Data type use to write the image diff --git a/src/aliceVision/keyframe/CMakeLists.txt b/src/aliceVision/keyframe/CMakeLists.txt index 4963899122..8d148a8b1f 100644 --- a/src/aliceVision/keyframe/CMakeLists.txt +++ b/src/aliceVision/keyframe/CMakeLists.txt @@ -1,6 +1,5 @@ # Headers set(keyframe_files_headers - SharpnessSelectionPreset.hpp KeyframeSelector.hpp ) @@ -13,11 +12,13 @@ alicevision_add_library(aliceVision_keyframe SOURCES ${keyframe_files_headers} ${keyframe_files_sources} PUBLIC_LINKS aliceVision_dataio - aliceVision_feature - aliceVision_voctree OpenImageIO::OpenImageIO PRIVATE_LINKS aliceVision_sensorDB aliceVision_system Boost::filesystem ) + +if(ALICEVISION_HAVE_OPENCV) + target_link_libraries(aliceVision_keyframe PUBLIC ${OpenCV_LIBS}) +endif() \ No newline at end of file diff --git a/src/aliceVision/keyframe/KeyframeSelector.cpp b/src/aliceVision/keyframe/KeyframeSelector.cpp index 2243ab5c3e..bbe06d11bc 100644 --- a/src/aliceVision/keyframe/KeyframeSelector.cpp +++ b/src/aliceVision/keyframe/KeyframeSelector.cpp @@ -5,9 +5,7 @@ // You can obtain one at https://mozilla.org/MPL/2.0/. #include "KeyframeSelector.hpp" -#include #include -#include #include #include @@ -17,460 +15,783 @@ #include #include #include +#include + + namespace fs = boost::filesystem; namespace aliceVision { namespace keyframe { - /** * @brief Get a random int in order to generate uid. - * @warning The random don't use a repeatable seed to avoid conflicts between different launches on different data sets. + * @warning The random doesn't use a repeatable seed to avoid conflicts between different launches on different data sets * @return int between 0 and std::numeric_limits::max() */ int getRandomInt() { - std::random_device rd; // will be used to obtain a seed for the random number engine - std::mt19937 randomTwEngine(rd()); // standard mersenne_twister_engine seeded with rd() - std::uniform_int_distribution<> randomDist(0, std::numeric_limits::max()); - return randomDist(randomTwEngine); + std::random_device rd; // will be used to obtain a seed for the random number engine + std::mt19937 randomTwEngine(rd()); // standard mersenne_twister_engine seeded with rd() + std::uniform_int_distribution<> randomDist(0, std::numeric_limits::max()); + return randomDist(randomTwEngine); +} + +/** + * @brief Find the median value in an unsorted vector of double values. + * @param[in] vec The unsorted vector of double values + * @return double the median value + */ +double findMedian(const std::vector& vec) +{ + std::vector vecCopy = vec; + if (vecCopy.size() > 0 && vecCopy.size() % 2 == 0) { + const auto medianIt1 = vecCopy.begin() + vecCopy.size() / 2 - 1; + std::nth_element(vecCopy.begin(), medianIt1, vecCopy.end()); + const auto med1 = *medianIt1; + + const auto medianIt2 = vecCopy.begin() + vecCopy.size() / 2; + std::nth_element(vecCopy.begin(), medianIt2, vecCopy.end()); + const auto med2 = *medianIt2; + + return (med1 + med2) / 2.0; + } else if (vecCopy.size() > 0) { + const auto medianIt = vecCopy.begin() + vecCopy.size() / 2; + std::nth_element(vecCopy.begin(), medianIt, vecCopy.end()); + return *medianIt; + } + + return 0.0; } KeyframeSelector::KeyframeSelector(const std::vector& mediaPaths, const std::string& sensorDbPath, - const std::string& voctreeFilePath, const std::string& outputFolder) - : _mediaPaths(mediaPaths) - , _sensorDbPath(sensorDbPath) - , _voctreeFilePath(voctreeFilePath) - , _outputFolder(outputFolder) + : _mediaPaths(mediaPaths) + , _sensorDbPath(sensorDbPath) + , _outputFolder(outputFolder) { - if((_maxOutFrame != 0) && - !_hasSharpnessSelection && - !_hasSparseDistanceSelection) - { - ALICEVISION_LOG_ERROR("KeyframeSelector needs at least one selection method if output frame limited !"); - throw std::invalid_argument("KeyframeSelector needs at least one selection method if output frame limited !"); - } - - // load vocabulary tree - _voctree.reset(new aliceVision::voctree::VocabularyTree(voctreeFilePath)); - - { - ALICEVISION_LOG_INFO("vocabulary tree loaded with :" << std::endl - << "\t- " << _voctree->levels() << " levels" << std::endl - << "\t- " << _voctree->splits() << " branching factor" << std::endl); - } - - // check number of input media filePaths - if(mediaPaths.empty()) - { - ALICEVISION_LOG_ERROR("Cannot create KeyframeSelector without a media file path !"); - throw std::invalid_argument("Cannot create KeyframeSelector without a media file path !"); - } - - // resize mediasInfo container - _mediasInfo.resize(mediaPaths.size()); - - // create SIFT image describer - _imageDescriber.reset(new feature::ImageDescriber_SIFT()); + // Check that a least one media file path has been provided + if (mediaPaths.empty()) { + ALICEVISION_THROW(std::invalid_argument, "Cannot create KeyframeSelector without at least one media file path!"); + } + + scoresMap["Sharpness"] = &_sharpnessScores; + scoresMap["OpticalFlow"] = &_flowScores; } -void KeyframeSelector::process() +void KeyframeSelector::processRegular() { - // create feeds and count minimum number of frames - std::size_t nbFrames = std::numeric_limits::max(); - for(std::size_t mediaIndex = 0; mediaIndex < _mediaPaths.size(); ++mediaIndex) - { - const auto& path = _mediaPaths.at(mediaIndex); - - // create a feed provider per mediaPaths - _feeds.emplace_back(new dataio::FeedProvider(path)); - - const auto& feed = *_feeds.back(); - - // check if feed is initialized - if(!feed.isInit()) - { - ALICEVISION_LOG_ERROR("Cannot initialize the FeedProvider with " << path); - throw std::invalid_argument("Cannot while initialize the FeedProvider with " + path); - } + _selectedKeyframes.clear(); - // update minimum number of frames - nbFrames = std::min(nbFrames, feed.nbFrames() - static_cast( _cameraInfos.at(mediaIndex).frameOffset)); - } - - // check if minimum number of frame is zero - if(nbFrames == 0) - { - ALICEVISION_LOG_ERROR("One or multiple medias can't be found or empty !"); - throw std::invalid_argument("One or multiple medias can't be found or empty !"); - } - - // resize selection data vector - _framesData.resize(nbFrames); - - // feed provider variables - image::Image< image::RGBColor> image; // original image - camera::PinholeRadialK3 queryIntrinsics; // image associated camera intrinsics - bool hasIntrinsics = false; // true if queryIntrinsics is valid - std::string currentImgName; // current image name - - // process variables - const unsigned int frameStep = _maxFrameStep - _minFrameStep; - const unsigned int tileSharpSubset = (_nbTileSide * _nbTileSide) / _sharpSubset; - - // create output folders - if(_feeds.size() > 1) - { - const std::string rigFolder = _outputFolder + "/rig/"; - if(!fs::exists(rigFolder)) - fs::create_directory(rigFolder); - - for(std::size_t mediaIndex = 0 ; mediaIndex < _feeds.size(); ++mediaIndex) - { - const std::string subPoseFolder = rigFolder + std::to_string(mediaIndex); - if(!fs::exists(subPoseFolder)) - fs::create_directory(subPoseFolder); + std::size_t nbFrames = std::numeric_limits::max(); + std::vector> feeds; + + for (std::size_t mediaIndex = 0; mediaIndex < _mediaPaths.size(); ++mediaIndex) { + const auto& path = _mediaPaths.at(mediaIndex); + + // Create a feed provider per mediaPaths + feeds.push_back(std::make_unique(path)); + const auto& feed = *feeds.back(); + + // Check if feed is initialized + if (!feed.isInit()) { + ALICEVISION_THROW(std::invalid_argument, "Cannot initialize the FeedProvider with " << path); + } + + // Update minimum number of frames + nbFrames = std::min(nbFrames, static_cast(feed.nbFrames())); } - } - - // feed and metadata initialization - for(std::size_t mediaIndex = 0 ; mediaIndex < _feeds.size(); ++mediaIndex) - { - // first frame with offset - _feeds.at(mediaIndex)->goToFrame(_cameraInfos.at(mediaIndex).frameOffset); - - if(!_feeds.at(mediaIndex)->readImage(image, queryIntrinsics, currentImgName, hasIntrinsics)) - { - ALICEVISION_LOG_ERROR("Cannot read media first frame " << _mediaPaths[mediaIndex]); - throw std::invalid_argument("Cannot read media first frame " + _mediaPaths[mediaIndex]); + + // Check if minimum number of frame is zero + if (nbFrames == 0) { + ALICEVISION_THROW(std::invalid_argument, "One or multiple medias can't be found or empty!"); } - // define output image metadata - if(!_cameraInfos.at(mediaIndex).focalIsMM) - { - convertFocalLengthInMM(_cameraInfos.at(mediaIndex), image.Width()); + unsigned int step = _minFrameStep; + if (_maxFrameStep > 0) { + // By default, if _maxFrameStep is set, set the step to be right between _minFrameStep and _maxFrameStep + step = step + static_cast((_maxFrameStep - _minFrameStep) / 2); } - // define media informations - auto& mediaInfo = _mediasInfo.at(mediaIndex); - mediaInfo.tileHeight = (image.Height() / 2) / _nbTileSide; - mediaInfo.tileWidth = (image.Width() / 2) / _nbTileSide; - mediaInfo.spec = oiio::ImageSpec(image.Width(), image.Height(), 3, oiio::TypeDesc::UINT8); // always jpeg - mediaInfo.spec.attribute("jpeg:subsampling", "4:4:4"); // always subsampling 4:4:4 - mediaInfo.spec.attribute("oiio:ColorSpace", "sRGB"); // always sRGB - mediaInfo.spec.attribute("Make", _cameraInfos[mediaIndex].brand); - mediaInfo.spec.attribute("Model", _cameraInfos[mediaIndex].model); - mediaInfo.spec.attribute("Exif:BodySerialNumber", std::to_string(getRandomInt())); // TODO: use Exif:OriginalRawFileName instead - mediaInfo.spec.attribute("Exif:FocalLength", _cameraInfos[mediaIndex].focalLength); - } - - // iteration process - _keyframeIndexes.clear(); - std::size_t currentFrameStep = _minFrameStep + 1; // start directly (dont skip minFrameStep first frames) - - for(std::size_t frameIndex = 0; frameIndex < _framesData.size(); ++frameIndex) - { - ALICEVISION_LOG_INFO("frame : " << frameIndex); - bool frameSelected = true; - auto& frameData = _framesData.at(frameIndex); - frameData.mediasData.resize(_feeds.size()); - - for(std::size_t mediaIndex = 0; mediaIndex < _feeds.size(); ++mediaIndex) - { - ALICEVISION_LOG_DEBUG("media : " << _mediaPaths.at(mediaIndex)); - auto& feed = *_feeds.at(mediaIndex); - - if(frameSelected) // false if a camera of a rig is not selected - { - if(!feed.readImage(image, queryIntrinsics, currentImgName, hasIntrinsics)) - { - ALICEVISION_LOG_ERROR("Cannot read frame '" << currentImgName << "' !"); - throw std::invalid_argument("Cannot read frame '" + currentImgName + "' !"); + /** + * To respect the _minFrameStep, _maxFrameStep and _maxOutFrames constraints as much as possible: + * - if _maxOutFrames is set and the current step is too small to sample over the entire sequence, + * the step should be increased; + * - if _maxOutFrames is set and the adjusted step became too big and does not respect _maxFrameStep anymore, + * the step should be set to _maxFrameStep - in that case, _maxOutFrames might be reached before the end of + * the sequence + */ + if (_maxOutFrames > 0 && nbFrames / _maxOutFrames > step) { + step = (nbFrames / _maxOutFrames) + 1; // + 1 to prevent ending up with more than _maxOutFrame selected frames + if (_maxFrameStep > 0 && step > _maxFrameStep) { + step = _maxFrameStep; } + } - // compute sharpness and sparse distance - if(!computeFrameData(image, frameIndex, mediaIndex, tileSharpSubset)) - { - frameSelected = false; - } - } + for (unsigned int id = 0; id < nbFrames; id += step) { + ALICEVISION_LOG_INFO("Selecting frame with ID " << id); + _selectedKeyframes.push_back(id); + if (_maxOutFrames > 0 && _selectedKeyframes.size() >= _maxOutFrames) + break; + } + + ALICEVISION_LOG_INFO("Finished selecting all the keyframes! " << _selectedKeyframes.size() << "/" << + nbFrames << " frames have been selected."); +} + +void KeyframeSelector::processSmart(const float pxDisplacement, const std::size_t rescaledWidthSharpness, + const std::size_t rescaledWidthFlow, const std::size_t sharpnessWindowSize, + const std::size_t flowCellSize, const bool skipSharpnessComputation) +{ + _selectedKeyframes.clear(); + _selectedFrames.clear(); - feed.goToNextFrame(); + // Step 0: compute all the scores + computeScores(rescaledWidthSharpness, rescaledWidthFlow, sharpnessWindowSize, flowCellSize, skipSharpnessComputation); + + // Step 1: determine subsequences based on the motion accumulation + std::vector subsequenceLimits; + subsequenceLimits.push_back(0); // Always use the first frame as the starting point + + std::size_t sequenceSize = _sharpnessScores.size(); + + // All frames are unselected so far + _selectedFrames.resize(sequenceSize); + std::fill(_selectedFrames.begin(), _selectedFrames.end(), '0'); + + float step = pxDisplacement * std::min(_frameWidth, _frameHeight) / 100.0; + double motionAcc = 0.0; + + /* Starts at 1 because the first frame's motion score will be -1. + * Ends at sequenceSize - 1 to ensure the last frame cannot be pushed twice. */ + for (std::size_t i = 1; i < sequenceSize - 1; ++i) { + motionAcc += _flowScores.at(i) > -1.f ? _flowScores.at(i) : 0.f; + if (motionAcc >= step) { + subsequenceLimits.push_back(i); + motionAcc = 0.0; // Reset the motion accumulator + } } + subsequenceLimits.push_back(sequenceSize - 1); + + // Step 2: check whether the min/max output frames constraints are respected + if (!(subsequenceLimits.size() - 1 >= _minOutFrames && subsequenceLimits.size() - 1 <= _maxOutFrames)) { + ALICEVISION_LOG_INFO("Preliminary selection does not provide the right number of frames (" + << subsequenceLimits.size() - 1 << " keyframes, should be between " << _minOutFrames + << " and " << _maxOutFrames << ")."); + + std::vector newLimits = subsequenceLimits; // Prevents first 'newLimits.size() - 1' from overflowing + const double displacementDiff = 0.5; // The displacement must be 0.5px smaller/bigger than the previous one + + if (subsequenceLimits.size() - 1 < _minOutFrames) { + // Not enough frames, reduce the motion step + ALICEVISION_LOG_INFO("Not enough keyframes, the motion step will be reduced of " << displacementDiff + << "%."); + bool sampleRegularly = false; + while (newLimits.size() - 1 < _minOutFrames) { + newLimits.clear(); + newLimits.push_back(0); + step = std::max(0.0, step - displacementDiff); + + if (step == 0.0) { // The criterion does not make sense anymore, exit to sample regularly instead + sampleRegularly = true; + break; + } + motionAcc = 0.0; + + for (std::size_t i = 1; i < sequenceSize - 1; ++i) { + motionAcc += _flowScores.at(i) > -1.f ? _flowScores.at(i) : 0.f; + if (motionAcc >= step) { + newLimits.push_back(i); + motionAcc = 0.0; + } + } + newLimits.push_back(sequenceSize - 1); + } + + if (sampleRegularly) { + // Sample regularly the whole sequence to get minOutFrames subsequences + ALICEVISION_LOG_INFO("The motion step has been reduced to 0 and cannot be used anymore. Keyframes will " + "be sampled regularly instead."); + newLimits.clear(); + newLimits.push_back(0); + std::size_t stepSize = (sequenceSize / _minOutFrames) + 1; + + for (std::size_t i = 1; i < sequenceSize - 1; i += stepSize) + newLimits.push_back(i); + newLimits.push_back(sequenceSize - 1); + } + } else { + // Too many frames, increase the motion step + ALICEVISION_LOG_INFO("Too many keyframes, the motion step will be increased of " << displacementDiff + << "%."); + while (newLimits.size() - 1 > _maxOutFrames) { + newLimits.clear(); + newLimits.push_back(0); + step = step + displacementDiff; + motionAcc = 0.0; + + for (std::size_t i = 1; i < sequenceSize - 1; ++i) { + motionAcc += _flowScores.at(i) > -1.f ? _flowScores.at(i) : 0.f; + if (motionAcc >= step) { + newLimits.push_back(i); + motionAcc = 0.0; + } + } + newLimits.push_back(sequenceSize - 1); + } + } - { - if(frameSelected) - { - ALICEVISION_LOG_INFO(" > selected" << std::endl); - frameData.selected = true; - if(_hasSharpnessSelection) - frameData.computeAvgSharpness(); - } - else - { - ALICEVISION_LOG_INFO(" > skipped" << std::endl); - frameData.mediasData.clear(); // remove unselected mediasData - } + subsequenceLimits.clear(); + subsequenceLimits = newLimits; } - // selection process - if(currentFrameStep >= _maxFrameStep) - { - currentFrameStep = _minFrameStep; - bool hasKeyframe = false; - std::size_t keyframeIndex = 0; - float maxSharpness = 0; - float minDistScore = std::numeric_limits::max(); - - // find the best selected frame - if(_hasSharpnessSelection) - { - // find the sharpest selected frame - for(std::size_t index = frameIndex - (frameStep - 1); index <= frameIndex; ++index) - { - if(_framesData[index].selected && (_framesData[index].avgSharpness > maxSharpness)) - { - hasKeyframe = true; - keyframeIndex = index; - maxSharpness = _framesData[index].avgSharpness; - } + // Step 3: for each subsequence, find the keyframe + for (std::size_t i = 1; i < subsequenceLimits.size(); ++i) { + double bestSharpness = 0.0; + std::size_t bestIndex = 0; + std::size_t subsequenceSize = subsequenceLimits.at(i) - subsequenceLimits.at(i - 1); + ALICEVISION_LOG_DEBUG("Subsequence [" << subsequenceLimits.at(i - 1) << ", " << subsequenceLimits.at(i) << "]"); + + // Weights for the whole subsequence [1.0; 2.0] (1.0 is on the subsequence's limits, 2.0 on its center) + std::deque weights; + const double weightStep = 1.f / (static_cast(subsequenceSize - 1) / 2.f); + weights.push_back(2.0); // The frame in the middle of the subsequence has the biggest weight + if (subsequenceSize % 2 == 0) + weights.push_back(2.0); // For subsequences of even size, two frames are equally in the middle + + float currentWeight = 2.0; + while (weights.size() != subsequenceSize) { + currentWeight -= weightStep; + weights.push_front(currentWeight); + weights.push_back(currentWeight); } - } - else if(_hasSparseDistanceSelection) - { - // find the smallest sparseDistance selected frame - for(std::size_t index = frameIndex - (frameStep - 1); index <= frameIndex; ++index) - { - if(_framesData[index].selected && (_framesData[index].maxDistScore < minDistScore)) - { - hasKeyframe = true; - keyframeIndex = index; - minDistScore = _framesData[index].maxDistScore; - } + + std::size_t weightPosition = 0; + for (std::size_t j = subsequenceLimits.at(i - 1); j < subsequenceLimits.at(i); ++j) { + auto sharpness = _sharpnessScores.at(j) * weights.at(weightPosition); + ++weightPosition; + if (sharpness > bestSharpness) { + bestIndex = j; + bestSharpness = sharpness; + } } - } - else - { - // use the first frame of the step - hasKeyframe = true; - keyframeIndex = frameIndex - (frameStep - 1); - } - - // save keyframe - if(hasKeyframe) - { - ALICEVISION_LOG_INFO("keyframe choice : " << keyframeIndex << std::endl); - - // write keyframe - for(std::size_t mediaIndex = 0; mediaIndex < _feeds.size(); ++mediaIndex) - { - auto& feed = *_feeds.at(mediaIndex); - - feed.goToFrame(keyframeIndex + _cameraInfos.at(mediaIndex).frameOffset); - - if(_maxOutFrame == 0) // no limit of keyframes (direct evaluation) - { - feed.readImage(image, queryIntrinsics, currentImgName, hasIntrinsics); - writeKeyframe(image, keyframeIndex, mediaIndex); - } + ALICEVISION_LOG_INFO("Selecting frame with ID " << bestIndex); + _selectedKeyframes.push_back(bestIndex); + _selectedFrames.at(bestIndex) = '1'; // The frame has been selected, flip it to 1 + } + + ALICEVISION_LOG_INFO("Finished selecting all the keyframes! " << _selectedKeyframes.size() << "/" << + sequenceSize << " frames have been selected."); +} + +bool KeyframeSelector::computeScores(const std::size_t rescaledWidthSharpness, const std::size_t rescaledWidthFlow, + const std::size_t sharpnessWindowSize, const std::size_t flowCellSize, + const bool skipSharpnessComputation) +{ + // Reset the computed scores + _sharpnessScores.clear(); + _flowScores.clear(); + + // Reset the frame size + _frameWidth = 0; + _frameHeight = 0; + + // Create feeds and count minimum number of frames + std::size_t nbFrames = std::numeric_limits::max(); + std::vector> feeds; + + for (std::size_t mediaIndex = 0; mediaIndex < _mediaPaths.size(); ++mediaIndex) { + const auto& path = _mediaPaths.at(mediaIndex); + + // Create a feed provider per mediaPaths + feeds.push_back(std::make_unique(path)); + const auto& feed = *feeds.back(); + + // Check if feed is initialized + if (!feed.isInit()) { + ALICEVISION_THROW(std::invalid_argument, "Cannot initialize the FeedProvider with " << path); + } + + // Update minimum number of frames + nbFrames = std::min(nbFrames, (size_t)feed.nbFrames()); + } + + // Check if minimum number of frame is zero + if (nbFrames == 0) { + ALICEVISION_THROW(std::invalid_argument, "One or multiple medias can't be found or is empty!"); + } + + // Feed provider variables + image::Image image; // original image + camera::PinholeRadialK3 queryIntrinsics; // image associated camera intrinsics + bool hasIntrinsics = false; // true if queryIntrinsics is valid + std::string currentImgName; // current image name + + // Feed and metadata initialization + for (std::size_t mediaIndex = 0; mediaIndex < feeds.size(); ++mediaIndex) { + // First frame with offset + feeds.at(mediaIndex)->goToFrame(0); + + if (!feeds.at(mediaIndex)->readImage(image, queryIntrinsics, currentImgName, hasIntrinsics)) { + ALICEVISION_THROW(std::invalid_argument, "Cannot read media first frame " << _mediaPaths[mediaIndex]); } - _framesData[keyframeIndex].keyframe = true; - _keyframeIndexes.push_back(keyframeIndex); - - frameIndex = keyframeIndex + _minFrameStep - 1; - } - else - { - ALICEVISION_LOG_INFO("keyframe choice : none" << std::endl); - } } - ++currentFrameStep; - } - - if(_maxOutFrame == 0) // no limit of keyframes (evaluation and write already done) - { - return; - } - - // if limited number of keyframe, select smallest sparse distance - { - std::vector< std::tuple > keyframes; - - for(std::size_t i = 0; i < _framesData.size(); ++i) - { - if(_framesData[i].keyframe) - { - keyframes.emplace_back(_framesData[i].maxDistScore, 1 / _framesData[i].avgSharpness, i); - } + + std::size_t currentFrame = 0; + cv::Mat currentMatSharpness; // OpenCV matrix for the sharpness computation + cv::Mat previousMatFlow, currentMatFlow; // OpenCV matrices for the optical flow computation + auto ptrFlow = cv::optflow::createOptFlow_DeepFlow(); + + while (currentFrame < nbFrames) { + double minimalSharpness = skipSharpnessComputation ? 1.0f : std::numeric_limits::max(); + double minimalFlow = std::numeric_limits::max(); + + for (std::size_t mediaIndex = 0; mediaIndex < feeds.size(); ++mediaIndex) { + auto& feed = *feeds.at(mediaIndex); + + if (currentFrame > 0) { // Get currentFrame - 1 for the optical flow computation + previousMatFlow = readImage(feed, rescaledWidthFlow); + feed.goToNextFrame(); + } + + /* Handle input feeds that may have invalid or missing frames: + * - catch the "invalid argument" exception thrown by "readImage" if a frame is invalid or missing + * - try reading the next frame instead + * - if the next frame is correctly read, then push dummy scores for the invalid frame and go on with + * the process + * - otherwise (feed not correctly moved to the next frame), throw a runtime error exception as something + * is wrong with the video + */ + if (!skipSharpnessComputation) { + try { + // Read image for sharpness and rescale it if requested + currentMatSharpness = readImage(feed, rescaledWidthSharpness); + } catch (const std::invalid_argument& ex) { + // currentFrame + 1 = currently evaluated frame with indexing starting at 1, for display reasons + // currentFrame + 2 = next frame to evaluate with indexing starting at 1, for display reasons + ALICEVISION_LOG_WARNING("Invalid or missing frame " << currentFrame + 1 + << ", attempting to read frame " << currentFrame + 2 << "."); + bool success = feed.goToFrame(++currentFrame); + if (success) { + // Will throw an exception if next frame is also invalid + currentMatSharpness = readImage(feed, rescaledWidthSharpness); + // If no exception has been thrown, push dummy scores for the frame that was skipped + _sharpnessScores.push_back(-1.f); + _flowScores.push_back(-1.f); + } else + ALICEVISION_THROW_ERROR("Could not go to frame " << currentFrame + 1 + << " either. The feed might be corrupted."); + } + } + + if (rescaledWidthSharpness == rescaledWidthFlow && !skipSharpnessComputation) { + currentMatFlow = currentMatSharpness; + } else { + currentMatFlow = readImage(feed, rescaledWidthFlow); + } + + if (_frameWidth == 0 && _frameHeight == 0) { // Will be used later on to determine the motion accumulation step + _frameWidth = currentMatFlow.size().width; + _frameHeight = currentMatFlow.size().height; + } + + // Compute sharpness + if (!skipSharpnessComputation) { + const double sharpness = computeSharpness(currentMatSharpness, sharpnessWindowSize); + minimalSharpness = std::min(minimalSharpness, sharpness); + } + + // Compute optical flow + if (currentFrame > 0) { + const double flow = estimateFlow(ptrFlow, currentMatFlow, previousMatFlow, flowCellSize); + minimalFlow = std::min(minimalFlow, flow); + } + + ALICEVISION_LOG_INFO("Finished processing frame " << currentFrame + 1 << "/" << nbFrames); + } + + // Save scores for the current frame + _sharpnessScores.push_back(minimalSharpness); + _flowScores.push_back(currentFrame > 0 ? minimalFlow : -1.f); + ++currentFrame; } - std::sort(keyframes.begin(), keyframes.end()); - - const std::size_t nbOutFrames = std::min(static_cast(_maxOutFrame), keyframes.size()); - - for(std::size_t i = 0; i < nbOutFrames; ++i) - { - const std::size_t frameIndex = std::get<2>(keyframes.at(i)); - for(std::size_t mediaIndex = 0; mediaIndex < _feeds.size(); ++mediaIndex) - { - auto& feed = *_feeds.at(mediaIndex); - feed.goToFrame(frameIndex + _cameraInfos.at(mediaIndex).frameOffset); - feed.readImage(image, queryIntrinsics, currentImgName, hasIntrinsics); - writeKeyframe(image, frameIndex, mediaIndex); - } + + return true; +} + +bool KeyframeSelector::writeSelection(const std::vector& brands, + const std::vector& models, + const std::vector& mmFocals, + const bool renameKeyframes, + const std::string& outputExtension, + const image::EStorageDataType storageDataType) const +{ + image::Image image; + camera::PinholeRadialK3 queryIntrinsics; + bool hasIntrinsics = false; + std::string currentImgName; + + for (std::size_t id = 0; id < _mediaPaths.size(); ++id) { + const auto& path = _mediaPaths.at(id); + + // Create a feed provider per mediaPaths + dataio::FeedProvider feed(path); + + // Check if feed is initialized + if (!feed.isInit()) { + ALICEVISION_LOG_ERROR("Cannot initialize the FeedProvider with " << path); + return false; + } + + std::string processedOutputFolder = _outputFolder; + if (_mediaPaths.size() > 1) { + const std::string rigFolder = _outputFolder + "/rig/"; + if (!fs::exists(rigFolder)) { + fs::create_directory(rigFolder); + } + + processedOutputFolder = rigFolder + std::to_string(id); + if (!fs::exists(processedOutputFolder)) { + fs::create_directory(processedOutputFolder); + } + } + + unsigned int outputKeyframeCnt = 0; // Used if the "renameKeyframes" option is enabled + for (const auto pos : _selectedKeyframes) { + if (!feed.goToFrame(pos)) { + ALICEVISION_LOG_ERROR("Invalid frame position " << pos << ". Ignoring this frame."); + continue; + } + + if (!feed.readImage(image, queryIntrinsics, currentImgName, hasIntrinsics)) { + ALICEVISION_LOG_ERROR("Error reading image"); + return false; + } + + oiio::ImageSpec inputSpec; + inputSpec.extra_attribs = image::readImageMetadata(currentImgName); + int orientation = inputSpec.get_int_attribute("Orientation", 1); + float pixelAspectRatio = inputSpec.get_float_attribute("PixelAspectRatio", 1.0f); + std::string colorspace = inputSpec.get_string_attribute("oiio:Colorspace", ""); + + oiio::ParamValueList metadata; + metadata.push_back(oiio::ParamValue("Make", brands[id])); + metadata.push_back(oiio::ParamValue("Model", models[id])); + metadata.push_back(oiio::ParamValue("Exif:BodySerialNumber", std::to_string(getRandomInt()))); + metadata.push_back(oiio::ParamValue("Exif:FocalLength", mmFocals[id])); + metadata.push_back(oiio::ParamValue("Exif:ImageUniqueID", std::to_string(getRandomInt()))); + metadata.push_back(oiio::ParamValue("Orientation", orientation)); // Will not propagate for PNG outputs + if (outputExtension != "jpg") // TODO: propagate pixelAspectRatio properly for JPG + metadata.push_back(oiio::ParamValue("PixelAspectRatio", pixelAspectRatio)); + + fs::path folder = _outputFolder; + std::ostringstream filenameSS; + if (renameKeyframes) + filenameSS << std::setw(5) << std::setfill('0') << outputKeyframeCnt++ << "." << outputExtension; + else + filenameSS << std::setw(5) << std::setfill('0') << pos << "." << outputExtension; + const auto filepath = (processedOutputFolder / fs::path(filenameSS.str())).string(); + + image::ImageWriteOptions options; + // If the feed is a video, frames are read as OpenCV RGB matrices before being converted to image::ImageRGB + if (feed.isVideo()) { + options.fromColorSpace(image::EImageColorSpace::SRGB); + options.toColorSpace(image::EImageColorSpace::AUTO); + } else { // Otherwise, the frames have been read without any conversion, they should be written as such + if (colorspace == "sRGB") + options.fromColorSpace(image::EImageColorSpace::SRGB); + + if (outputExtension == "exr") + options.toColorSpace(image::EImageColorSpace::NO_CONVERSION); + else + options.toColorSpace(image::EImageColorSpace::AUTO); + } + + if (storageDataType != image::EStorageDataType::Undefined && outputExtension == "exr"){ + options.storageDataType(storageDataType); + } + + image::writeImage(filepath, image, options, metadata); + ALICEVISION_LOG_DEBUG("Wrote selected keyframe " << pos); + } } - } + + return true; } -float KeyframeSelector::computeSharpness(const image::Image& imageGray, - const unsigned int tileHeight, - const unsigned int tileWidth, - const unsigned int tileSharpSubset) const +bool KeyframeSelector::exportScoresToFile(const std::string& filename, const bool exportSelectedFrames) const { - image::Image scharrXDer; - image::Image scharrYDer; + std::size_t sequenceSize = scoresMap.begin()->second->size(); + if (sequenceSize == 0) { + ALICEVISION_LOG_ERROR("Nothing to export, scores do not seem to have been computed!"); + return false; + } + + std::ofstream os; + os.open((fs::path(_outputFolder) / filename).string(), std::ios::app); - image::ImageScharrXDerivative(imageGray, scharrXDer); // normalized - image::ImageScharrYDerivative(imageGray, scharrYDer); // normalized + if (!os.is_open()) { + ALICEVISION_LOG_ERROR("Unable to open the scores file: " << filename << "."); + return false; + } - scharrXDer = scharrXDer.cwiseAbs(); // absolute value - scharrYDer = scharrYDer.cwiseAbs(); // absolute value + ALICEVISION_LOG_DEBUG("Exporting scores as CSV file: " << filename << " (export selected frames: " + << exportSelectedFrames << ")"); - // image tiles - std::vector averageTileIntensity; - const float tileSizeInv = 1 / static_cast(tileHeight * tileWidth); + os.seekp(0, std::ios::end); // Put the cursor at the end of the file + if (os.tellp() == std::streampos(0)) { // 'tellp' returns the cursor's position + // If the file does not exist yet, add a header + std::string header = "FrameNb;"; + for (const auto& mapIterator : scoresMap) + header += mapIterator.first + ";"; - for(std::size_t y = 0; y < (_nbTileSide * tileHeight); y += tileHeight) - { - for(std::size_t x = 0; x < (_nbTileSide * tileWidth); x += tileWidth) - { - const auto sum = scharrXDer.block(y, x, tileHeight, tileWidth).sum() + scharrYDer.block(y, x, tileHeight, tileWidth).sum(); - averageTileIntensity.push_back(sum * tileSizeInv); + if (exportSelectedFrames) + header += "Selected;"; + + os << header << "\n"; } - } - // sort tiles average pixel intensity - std::sort(averageTileIntensity.begin(), averageTileIntensity.end()); + for (std::size_t index = 0; index < sequenceSize; ++index) { + os << index << ";"; // First column: frame index - // return the sum of the subset average pixel intensity - return std::accumulate(averageTileIntensity.end() - tileSharpSubset, averageTileIntensity.end(), 0.0f) / tileSharpSubset; -} + for (const auto& mapIterator : scoresMap) + os << mapIterator.second->at(index) << ";"; + if (exportSelectedFrames) + os << _selectedFrames.at(index); + os << "\n"; + } + os.close(); + return true; +} -bool KeyframeSelector::computeFrameData(const image::Image& image, - std::size_t frameIndex, - std::size_t mediaIndex, - unsigned int tileSharpSubset) +bool KeyframeSelector::exportFlowVisualisation(const std::size_t rescaledWidth) { - if(!_hasSharpnessSelection && !_hasSparseDistanceSelection) - return true; // nothing to do - - image::Image imageGray; // grayscale image - image::Image imageGrayHalfSample; // half resolution grayscale image - - const auto& currMediaInfo = _mediasInfo.at(mediaIndex); - auto& currframeData = _framesData.at(frameIndex); - auto& currMediaData = currframeData.mediasData.at(mediaIndex); - - // get grayscale image and resize - image::ConvertPixelType(image, &imageGray); - image::ImageHalfSample(imageGray, imageGrayHalfSample); - - // compute sharpness - if(_hasSharpnessSelection) - { - currMediaData.sharpness = computeSharpness(imageGrayHalfSample, - currMediaInfo.tileHeight, - currMediaInfo.tileWidth, - tileSharpSubset); - ALICEVISION_LOG_DEBUG( " - sharpness : " << currMediaData.sharpness); - } - - if((currMediaData.sharpness > _sharpnessThreshold) || !_hasSharpnessSelection) - { - bool noKeyframe = (_keyframeIndexes.empty()); - - // compute current frame sparse histogram - std::unique_ptr regions; - _imageDescriber->describe(imageGrayHalfSample, regions); - currMediaData.histogram = voctree::SparseHistogram(_voctree->quantizeToSparse(dynamic_cast(regions.get())->Descriptors())); - - // compute sparseDistance - if(!noKeyframe && _hasSparseDistanceSelection) - { - unsigned int nbKeyframetoCompare = (_keyframeIndexes.size() < _nbKeyFrameDist)? _keyframeIndexes.size() : _nbKeyFrameDist; - - for(std::size_t i = _keyframeIndexes.size() - nbKeyframetoCompare; i < _keyframeIndexes.size(); ++i) - { - for(auto& media : _framesData.at(_keyframeIndexes.at(i)).mediasData) - { - currMediaData.distScore = std::max(currMediaData.distScore, std::abs(voctree::sparseDistance(media.histogram, currMediaData.histogram, "strongCommonPoints"))); + // Create feeds and count minimum number of frames + std::size_t nbFrames = std::numeric_limits::max(); + std::vector> feeds; + std::vector outputFolders; + + for (std::size_t mediaIndex = 0; mediaIndex < _mediaPaths.size(); ++mediaIndex) { + const auto& path = _mediaPaths.at(mediaIndex); + + // Create a feed provider per mediaPaths + feeds.emplace_back(new dataio::FeedProvider(path)); + auto& feed = *feeds.back(); + + // Check if feed is initialized + if (!feed.isInit()) { + ALICEVISION_LOG_ERROR("Cannot initialize the FeedProvider with " << path); + return false; } - } - currframeData.maxDistScore = std::max(currframeData.maxDistScore, currMediaData.distScore); - ALICEVISION_LOG_DEBUG(" - distScore : " << currMediaData.distScore); + + feed.goToFrame(0); + + // Update minimum number of frames + nbFrames = std::min(nbFrames, (size_t)feed.nbFrames()); + + // If there is a rig, create the corresponding folders + std::string processedOutputFolder = _outputFolder; + if (_mediaPaths.size() > 1) { + const std::string rigFolder = _outputFolder + "/rig/"; + if (!fs::exists(rigFolder)) { + fs::create_directory(rigFolder); + } + + processedOutputFolder = rigFolder + std::to_string(mediaIndex); + if (!fs::exists(processedOutputFolder)) { + fs::create_directory(processedOutputFolder); + } + } + + // Save the output paths + outputFolders.push_back(processedOutputFolder); } - if(noKeyframe || (currMediaData.distScore < _distScoreMax)) - { - return true; + if (nbFrames == 0) { + ALICEVISION_LOG_ERROR("No frame to visualise optical flow from!"); + return false; + } + + size_t currentFrame = 0; + cv::Mat previousMat, currentMat; // OpenCV matrices for the optical flow computation + auto ptrFlow = cv::optflow::createOptFlow_DeepFlow(); + + /* To be able to handle the rigs and to avoid storing the optical flow results for all frames in case + * we might want to export them, we need to recompute the optical flow for all the frames, even if it has already + * been computed in computeScores(). */ + while (currentFrame < nbFrames) { + for (std::size_t mediaIndex = 0; mediaIndex < feeds.size(); ++mediaIndex) { + auto& feed = *feeds.at(mediaIndex); + + if (currentFrame > 0) { // Get currentFrame - 1 for the optical flow computation + previousMat = readImage(feed, rescaledWidth); + feed.goToNextFrame(); + } + + // Handle invalid or missing frames + try { + currentMat = readImage(feed, rescaledWidth); // Read image and rescale it if requested + } catch (const std::invalid_argument& ex) { + ALICEVISION_LOG_WARNING("Invalid or missing frame " << currentFrame + 1 + << ", attempting to read frame " << currentFrame + 2 << "."); + bool success = feed.goToFrame(++currentFrame); + if (success) + currentMat = readImage(feed, rescaledWidth); + else + ALICEVISION_THROW_ERROR("Could not go to frame " << currentFrame + 1 + << " either. The feed might be corrupted."); + } + + if (currentFrame > 0) { + cv::Mat flow; + ptrFlow->calc(currentMat, previousMat, flow); + + cv::Mat flowParts[2]; + cv::split(flow, flowParts); + cv::Mat magnitude, angle, magnNorm; + cv::cartToPolar(flowParts[0], flowParts[1], magnitude, angle, true); + cv::normalize(magnitude, magnNorm, 0.0f, 1.0f, cv::NORM_MINMAX); + angle *= ((1.f / 360.f) * (180.f / 255.f)); + + cv::Mat _hsv[3], hsv, hsv8, bgr; + _hsv[0] = angle; + _hsv[1] = cv::Mat::ones(angle.size(), CV_32F); + _hsv[2] = magnNorm; + cv::merge(_hsv, 3, hsv); + hsv.convertTo(hsv8, CV_8U, 255.0); + cv::cvtColor(hsv8, bgr, cv::COLOR_HSV2BGR); + + std::ostringstream filenameSS; + filenameSS << std::setw(5) << std::setfill('0') << currentFrame << ".png"; + cv::imwrite(outputFolders.at(mediaIndex) + "/OF_" + filenameSS.str(), bgr); + ALICEVISION_LOG_DEBUG("Wrote OF_" << filenameSS.str() << "!"); + } + } + ++currentFrame; } - } - return false; + + return true; } -void KeyframeSelector::writeKeyframe(const image::Image& image, - std::size_t frameIndex, - std::size_t mediaIndex) +cv::Mat KeyframeSelector::readImage(dataio::FeedProvider &feed, std::size_t width) { - auto& mediaInfo = _mediasInfo.at(mediaIndex); - fs::path folder{_outputFolder}; + image::Image image; + camera::PinholeRadialK3 queryIntrinsics; + bool hasIntrinsics = false; + std::string currentImgName; - if(_feeds.size() > 1) - folder /= fs::path("rig") / fs::path(std::to_string(mediaIndex)); + if (!feed.readImage(image, queryIntrinsics, currentImgName, hasIntrinsics)) { + ALICEVISION_THROW(std::invalid_argument, "Cannot read frame '" << currentImgName << "'!"); + } - std::ostringstream filenameSS; - filenameSS << std::setw(_padding) << std::setfill('0') << frameIndex << ".jpg"; + // Convert content to OpenCV + cv::Mat cvFrame(cv::Size(image.cols(), image.rows()), CV_8UC3, image.data(), image.cols() * 3); - const auto filepath = (folder / fs::path(filenameSS.str())).string(); + // Convert to grayscale + cv::Mat cvGrayscale; + cv::cvtColor(cvFrame, cvGrayscale, cv::COLOR_BGR2GRAY); - mediaInfo.spec.attribute("Exif:ImageUniqueID", std::to_string(getRandomInt())); + // Resize to smaller size if requested + if (width == 0) + return cvGrayscale; - std::unique_ptr out(oiio::ImageOutput::create(filepath)); - - if(out.get() == nullptr) - { - throw std::invalid_argument("Cannot create image file : " + filepath); - } - - if(!out->open(filepath, mediaInfo.spec)) - { - throw std::invalid_argument("Cannot open image file : " + filepath); - } + cv::Mat cvRescaled; + if (cvGrayscale.cols > width && width > 0) { + cv::resize(cvGrayscale, cvRescaled, + cv::Size(width,double(cvGrayscale.rows) * double(width) / double(cvGrayscale.cols))); + } - out->write_image(oiio::TypeDesc::UINT8, image.data()); // always jpeg - out->close(); + return cvRescaled; } -void KeyframeSelector::convertFocalLengthInMM(CameraInfo& cameraInfo, int imageWidth) +double KeyframeSelector::computeSharpness(const cv::Mat& grayscaleImage, const std::size_t windowSize) { - assert(imageWidth > 0); - - sensorDB::Datasheet find; - std::vector vecDatabase; - sensorDB::parseDatabase(_sensorDbPath, vecDatabase); - - if(sensorDB::getInfo(cameraInfo.brand, cameraInfo.model, vecDatabase, find)) - { - cameraInfo.focalLength = (cameraInfo.focalLength * find._sensorWidth) / imageWidth; - cameraInfo.focalIsMM = true; - ALICEVISION_LOG_INFO("Focal length converted in mm : " << cameraInfo.focalLength); - } - else - { - ALICEVISION_LOG_WARNING("Cannot convert focal length in mm : " << cameraInfo.brand << " / " << cameraInfo.model); - } + if (windowSize > grayscaleImage.size().width || windowSize > grayscaleImage.size().height) { + ALICEVISION_THROW(std::invalid_argument, + "Cannot use a sliding window bigger than the image (sliding window size: " + << windowSize << ", image size: " << grayscaleImage.size().width << "x" + << grayscaleImage.size().height << ")"); + } + + cv::Mat sum, squaredSum, laplacian; + cv::Laplacian(grayscaleImage, laplacian, CV_64F); + cv::integral(laplacian, sum, squaredSum); + + double totalCount = windowSize * windowSize; + double maxstd = 0.0; + + // TODO: do not slide the window pixel by pixel to speed up computations + // Starts at 1 because the integral image is padded with 0s on the top and left borders + for (int y = 1; y < sum.rows - windowSize; ++y) { + for (int x = 1; x < sum.cols - windowSize; ++x) { + double tl = sum.at(y, x); + double tr = sum.at(y, x + windowSize); + double bl = sum.at(y + windowSize, x); + double br = sum.at(y + windowSize, x + windowSize); + const double s1 = br + tl - tr - bl; + + tl = squaredSum.at(y, x); + tr = squaredSum.at(y, x + windowSize); + bl = squaredSum.at(y + windowSize, x); + br = squaredSum.at(y + windowSize, x + windowSize); + const double s2 = br + tl - tr - bl; + + const double std2 = std::sqrt((s2 - (s1 * s1) / totalCount) / totalCount); + maxstd = std::max(maxstd, std2); + } + } + + return maxstd; +} + +double KeyframeSelector::estimateFlow(const cv::Ptr& ptrFlow, const cv::Mat& grayscaleImage, + const cv::Mat& previousGrayscaleImage, const std::size_t cellSize) +{ + if (cellSize > grayscaleImage.size().width) { // If the cell size is bigger than the height, it will be adjusted + ALICEVISION_THROW(std::invalid_argument, + "Cannot use a cell size bigger than the image's width (cell size: " << cellSize + << ", image's width: " << grayscaleImage.size().width << ")"); + } + + if (grayscaleImage.size() != previousGrayscaleImage.size()) { + ALICEVISION_THROW(std::invalid_argument, + "The images used for the optical flow computation have different sizes (" + << grayscaleImage.size().width << "x" << grayscaleImage.size().height << " and " + << previousGrayscaleImage.size().width << "x" << previousGrayscaleImage.size().height + << ")"); + } + + cv::Mat flow; + ptrFlow->calc(grayscaleImage, previousGrayscaleImage, flow); + + cv::Mat sumflow; + cv::integral(flow, sumflow, CV_64F); + + double norm; + std::vector motionByCell; + + // Starts at 1 because the integral matrix is padded with 0s on the top and left borders + for (std::size_t y = 1; y < sumflow.size().height; y += cellSize) { + std::size_t maxCellSizeHeight = cellSize; + if (std::min(sumflow.size().height, int(y + cellSize)) == sumflow.size().height) + maxCellSizeHeight = sumflow.size().height - y; + + for (std::size_t x = 1; x < sumflow.size().width; x += cellSize) { + std::size_t maxCellSizeWidth = cellSize; + if (std::min(sumflow.size().width, int(x + cellSize)) == sumflow.size().width) + maxCellSizeWidth = sumflow.size().width - x; + cv::Point2d tl = sumflow.at(y, x); + cv::Point2d tr = sumflow.at(y, x + maxCellSizeWidth - 1); + cv::Point2d bl = sumflow.at(y + maxCellSizeHeight - 1, x); + cv::Point2d br = sumflow.at(y + maxCellSizeHeight - 1, x + maxCellSizeWidth - 1); + cv::Point2d s = br + tl - tr - bl; + norm = std::hypot(s.x, s.y) / (maxCellSizeHeight * maxCellSizeWidth); + motionByCell.push_back(norm); + } + } + + return findMedian(motionByCell); } } // namespace keyframe diff --git a/src/aliceVision/keyframe/KeyframeSelector.hpp b/src/aliceVision/keyframe/KeyframeSelector.hpp index c5bfff9e18..9ce9060ba4 100644 --- a/src/aliceVision/keyframe/KeyframeSelector.hpp +++ b/src/aliceVision/keyframe/KeyframeSelector.hpp @@ -6,18 +6,19 @@ #pragma once -#include -#include #include -#include +#include #include +#include +#include #include +#include +#include #include #include #include - namespace aliceVision { namespace image { @@ -32,334 +33,247 @@ namespace oiio = OIIO; class KeyframeSelector { -private: - // SIFT descriptor definition - const static std::size_t _dimension = 128; - using DescriptorFloat = aliceVision::feature::Descriptor; - public: + /** + * @brief KeyframeSelector constructor + * @param[in] mediaPath video file path or image sequence directory + * @param[in] sensorDbPath camera sensor width database path + * @param[in] outputFolder output keyframes directory + */ + KeyframeSelector(const std::vector& mediaPaths, + const std::string& sensorDbPath, + const std::string& outputFolder); - /** - * @brief Camera informations - */ - struct CameraInfo { - /// Camera brand - std::string brand = "Custom"; - /// Camera model - std::string model = "radial3"; - /// Focal length in mm or px - float focalLength = 1.2f; - /// Camera frame offset - unsigned int frameOffset = 0; - /// If focalIsMM is false, focalLength is in px - bool focalIsMM = true; - }; - - /** - * @brief KeyframeSelector constructor - * @param[in] mediaPath video file path or image sequence directory - * @param[in] sensorDbPath camera sensor width database path - * @param[in] voctreeFilePath vocabulary tree path - * @param[in] outputFolder output keyframes directory - */ - KeyframeSelector(const std::vector& mediaPaths, - const std::string& sensorDbPath, - const std::string& voctreeFilePath, - const std::string& outputFolder); - - /** - * @brief KeyframeSelector copy constructor - NO COPY - * @param[in] copy keyframeSelector - */ - KeyframeSelector(const KeyframeSelector& copy) = delete; - - /** - * @brief Process media paths and extract keyframes - */ - void process(); - - /** - * @brief Set if selector use keyframe sparse distance selection - * @param[in] useSparseDistanceSelection True or False - */ - void useSparseDistanceSelection(bool useSparseDistanceSelection) - { - _hasSparseDistanceSelection = useSparseDistanceSelection; - } - - /** - * @brief Set if selector use keyframe sharpness selection - * @param[in] useSharpnessSelection True or False - */ - void useSharpnessSelection(bool useSharpnessSelection) - { - _hasSharpnessSelection = useSharpnessSelection; - } - - /** - * @brief Set cameras informations for output keyframes - * @param[in] cameras informations - */ - void setCameraInfos(const std::vector& cameraInfos) - { - _cameraInfos = cameraInfos; - } - - /** - * @brief Set sparse distance max score - * @param[in] distScoreMax max strong common points - */ - void setSparseDistanceMaxScore(float distScoreMax) - { - _distScoreMax = distScoreMax; - } - - /** - * @brief Set Sharpness selection preset - * @param[in] sharpnessPreset enum - */ - void setSharpnessSelectionPreset(ESharpnessSelectionPreset sharpnessPreset) - { - switch(sharpnessPreset) + /** + * @brief KeyframeSelector copy constructor - NO COPY + * @param[in] copy keyframeSelector + */ + KeyframeSelector(const KeyframeSelector& copy) = delete; + + /** + * @brief Process media paths and build a list of selected keyframes using a regular sampling over time + */ + void processRegular(); + + /** + * @brief Process media paths and build a list of selected keyframes using a smart method based on sharpness + * and optical flow estimation. The whole process can be described as follows: + * - Step 0: compute the sharpness and optical flow scores for all the frames in all the sequences + * - Step 1: split the whole sequence into subsequences depending on the accumulated movement ("motion step") + * - Step 2: check whether the number of subsequences corresponds to what we want + * - if we do not have enough frames, we reduce the motion step until we get the required + * number of frames + * - if we have too many frames, we increase the motion step until we get the required number of + * frames + * - Step 3: for each subsequence, find the frame that best fit both a sharpness criteria (as sharp as + * possible) and a temporal criteria (as in the middle of the subsequence as possible); the goal + * of these criteria is to avoid the following cases: + * - the selected frame is well located temporally but is blurry + * - the selected frame is very sharp but is located at the very beginning or very end of the + * subsequence, meaning that it is likely adjacent to another very sharp frame in another + * subsequence; in that case, we might select two very sharp frames that are consecutive with no + * significant differences in their motion + * - Step 4: push the selected frames' IDs + * @param[in] pxDisplacement in percent, the minimum of displaced pixels in the image since the last selected frame + * @param[in] rescaledWidthSharpness to resize the input frames to before using them to compute the + * sharpness scores (if equal to 0, no rescale will be performed) + * @param[in] rescaledWidthFlow the width to resize the input frames to before using them to compute the + * motion scores (if equal to 0, no rescale will be performed) + * @param[in] sharpnessWindowSize the size of the sliding window used to compute sharpness scores, in pixels + * @param[in] flowCellSize the size of the cells within a frame that are used to compute the optical flow scores, + * in pixels + * @param[in] skipSharpnessComputation if true, the sharpness score computations will not be performed and a fixed + * sharpness score will be given to all the input frames + */ + void processSmart(const float pxDisplacement, const std::size_t rescaledWidthSharpness, + const std::size_t rescaledWidthFlow, const std::size_t sharpnessWindowSize, + const std::size_t flowCellSize, const bool skipSharpnessComputation = false); + + /** + * @brief Compute the sharpness and optical flow scores for the input media paths + * @param[in] rescaledWidthSharpness the width to resize the input frames to before using them to compute the + * sharpness scores (if equal to 0, no rescale will be performed) + * @param[in] rescaledWidthFlow the width to resize the input frames to before using them to compute the + * motion scores (if equal to 0, no rescale will be performed) + * @param[in] sharpnessWindowSize the size of the sliding window used to compute sharpness scores, in pixels + * @param[in] flowCellSize the size of the cells within a frame that are used to compute the optical flow scores, + * in pixels + * @param[in] skipSharpnessComputation if true, the sharpness score computations will not be performed and a fixed + * sharpness score will be given to all the input frames + * @return true if the scores have been successfully computed for all frames, false otherwise + */ + bool computeScores(const std::size_t rescaledWidthSharpness, const std::size_t rescaledWidthFlow, + const std::size_t sharpnessWindowSize, const std::size_t flowCellSize, + const bool skipSharpnessComputation); + + /** + * @brief Write the selected keyframes in the output folder + * @param[in] brands brand name for each camera + * @param[in] models model name for each camera + * @param[in] mmFocals focal in millimeters for each camera + * @param[in] renameKeyframes name output keyframes as consecutive frames instead of using their index as a name + * @param[in] outputExtension file extension of the written keyframes + * @param[in] storageDataType EXR storage data type for the output keyframes (ignored when the extension is not EXR) + * @return true if all the selected keyframes were successfully written, false otherwise + */ + bool writeSelection(const std::vector& brands, const std::vector& models, + const std::vector& mmFocals, const bool renameKeyframes, const std::string& outputExtension, + const image::EStorageDataType storageDataType = image::EStorageDataType::Undefined) const; + + /** + * @brief Export the computed sharpness and optical flow scores to a CSV file + * @param[in] filename the name of the CSV file (e.g. "scores.csv"), which will be written in the output folder + * @param[in] exportSelectedFrames add a column with 1s and 0s depending on whether the frame has been selected + * @return true if the CSV was correctly written to disk, false otherwise + */ + bool exportScoresToFile(const std::string& filename, const bool exportSelectedFrames = false) const; + + /** + * @brief Export optical flow HSV visualisation for each frame as a PNG image + * @param[in] rescaledWidth the width to resize the input frames to before computing the optical flow (if equal + * to 0, no rescale will be performed) + * @return true if the frames have been correctly exported, false otherwise + */ + bool exportFlowVisualisation(const std::size_t rescaledWidth); + + /** + * @brief Set the minimum frame step parameter for the processing algorithm + * @param[in] frameStep minimum number of frames between two keyframes + */ + void setMinFrameStep(unsigned int frameStep) { - // arbitrary thresholds - case ESharpnessSelectionPreset::ULTRA: _sharpnessThreshold = 20.0f; break; - case ESharpnessSelectionPreset::HIGH: _sharpnessThreshold = 17.0f; break; - case ESharpnessSelectionPreset::NORMAL: _sharpnessThreshold = 15.0f; break; - case ESharpnessSelectionPreset::MEDIUM: _sharpnessThreshold = 10.0f; break; - case ESharpnessSelectionPreset::LOW: _sharpnessThreshold = 8.0f; break; - case ESharpnessSelectionPreset::VERY_LOW: _sharpnessThreshold = 6.0f; break; - case ESharpnessSelectionPreset::NONE: _sharpnessThreshold = .0f; break; - default: throw std::out_of_range("Invalid sharpnessPreset enum"); + _minFrameStep = frameStep; } - } - - /** - * @brief Set sharp subset size for process algorithm - * @param[in] subset sharp part of the image (1 = all, 2 = size/2, ...) - */ - void setSharpSubset(unsigned int subset) - { - _sharpSubset = subset; - } - - /** - * @brief Set min frame step for process algorithm - * @param[in] frameStep minimum number of frames between two keyframes - */ - void setMinFrameStep(unsigned int frameStep) - { - _minFrameStep = frameStep; - } - - /** - * @brief Set max frame step for process algorithm - * @param[in] frameStep maximum number of frames after which a keyframe can be taken - */ - void setMaxFrameStep(unsigned int frameStep) - { - _maxFrameStep = frameStep; - } - - /** - * @brief Set max output frame number for process algorithm - * @param[in] nbFrame maximum number of output frames (if 0, no limit) - */ - void setMaxOutFrame(unsigned int nbFrame) - { - _maxOutFrame = nbFrame; - } - - /** - * @brief Get sharp subset size for process algorithm - * @return sharp part of the image (1 = all, 2 = size/2, ...) - */ - unsigned int getSharpSubset() const - { - return _sharpSubset; - } - - /** - * @brief Get min frame step for process algorithm - * @return minimum number of frames between two keyframes - */ - unsigned int getMinFrameStep() const - { - return _minFrameStep; - } - - /** - * @brief Get max output frame number for process algorithm - * @return maximum number of frames for trying to select a keyframe - */ - unsigned int getMaxFrameStep() const - { - return _maxFrameStep; - } - - /** - * @brief Get max output frame number for process algorithm - * @return maximum number of output frames (if 0, no limit) - */ - unsigned int getMaxOutFrame() const - { - return _maxOutFrame; - } - -private: - // Paths - - /// Media paths - std::vector _mediaPaths; - /// Camera sensor width database - std::string _sensorDbPath; - /// Voctree file path - std::string _voctreeFilePath; - /// Output folder for keyframes - std::string _outputFolder; - - // Algorithm variables - - /// Sharp part of the image (1 = all, 2 = size/2, ...) - unsigned int _sharpSubset = 4; - /// Minimum number of frame between two keyframes - unsigned int _minFrameStep = 12; - /// Maximum number of frame for evaluation - unsigned int _maxFrameStep = 36; - /// Maximum number of output frame (0 = no limit) - unsigned int _maxOutFrame = 0; - /// Number of tiles per side - unsigned int _nbTileSide = 20; - /// Number of previous keyframe distances in order to evaluate distance score - unsigned int _nbKeyFrameDist = 10; - /// Use padding on digits for exported frames - unsigned int _padding = 7; - /// Sharpness threshold (image with higher sharpness will be selected) - float _sharpnessThreshold = 15.0f; - /// Distance max score (image with smallest distance from the last keyframe will be selected) - float _distScoreMax = 100.0f; - /// Use sharpness selection - bool _hasSharpnessSelection = true; - /// Use sparseDistance selection - bool _hasSparseDistanceSelection = true; - - /// Camera metadatas - std::vector _cameraInfos; - - // Tools - - /// Image describer in order to extract describer - std::unique_ptr _imageDescriber; - /// Voctree in order to compute sparseHistogram - std::unique_ptr< aliceVision::voctree::VocabularyTree > _voctree; - /// Feed provider for media paths images extraction - std::vector< std::unique_ptr > _feeds; - - // Process structures - - /** - * @brief Process media global informations - */ - struct MediaInfo - { - /// height of the tile - unsigned int tileHeight = 0; - /// width of the tile - unsigned int tileWidth = 0; - /// openImageIO image spec - oiio::ImageSpec spec; - }; - - /** - * @brief Process media informations at a specific frame - */ - struct MediaData - { - /// sharpness score - float sharpness = 0; - /// maximum distance score with keyframe media histograms - float distScore = 0; - /// sparseHistogram - voctree::SparseHistogram histogram; - }; - - /** - * @brief Process frame (or set of frames) informations - */ - struct FrameData - { - /// average sharpness score of all media - float avgSharpness = 0; - /// maximum voctree distance score of all media - float maxDistScore = 0; - /// frame (or set of frames) selected for evaluation - bool selected = false; - /// frame is a keyframe - bool keyframe = false; - /// medias process data - std::vector mediasData; + /** + * @brief Set the maximum frame step parameter for the processing algorithm + * @param[in] frameStep maximum number of frames between two keyframes + */ + void setMaxFrameStep(unsigned int frameStep) + { + _maxFrameStep = frameStep; + } /** - * @brief Compute average sharpness score + * @brief Set the minimum output frame number parameter for the processing algorithm + * @param[in] nbFrames minimum number of output frames */ - void computeAvgSharpness() + void setMinOutFrames(unsigned int nbFrames) { - for(const auto& media : mediasData) - avgSharpness += media.sharpness; - avgSharpness /= mediasData.size(); + _minOutFrames = nbFrames; } - }; - - /// MediaInfo structure per input medias - std::vector _mediasInfo; - /// FrameData structure per frame - std::vector _framesData; - /// Keyframe indexes container - std::vector _keyframeIndexes; - - /** - * @brief Compute sharpness score of a given image - * @param[in] imageGray given image in grayscale - * @param[in] tileHeight height of tile - * @param[in] tileWidth width of tile - * @param[in] tileSharpSubset number of sharp tiles - * @return sharpness score - */ - float computeSharpness(const image::Image& imageGray, - const unsigned int tileHeight, - const unsigned int tileWidth, - const unsigned int tileSharpSubset) const; - - /** - * @brief Compute sharpness and distance score for a given image - * @param[in] image an image of the media - * @param[in] frameIndex the image index in the media sequence - * @param[in] mediaIndex the media index - * @param[in] tileSharpSubset number of sharp tiles - * @return true if the frame is selected - */ - bool computeFrameData(const image::Image& image, - std::size_t frameIndex, - std::size_t mediaIndex, - unsigned int tileSharpSubset); - - /** - * @brief Write a keyframe and metadata - * @param[in] image an image of the media - * @param[in] frameIndex the image index in the media sequence - * @param[in] mediaIndex the media index - */ - void writeKeyframe(const image::Image& image, - std::size_t frameIndex, - std::size_t mediaIndex); - - /** - * @brief Convert focal length from px to mm using sensor width database - * @param[in] camera informations - * @param[in] imageWidth media image width in px - */ - void convertFocalLengthInMM(CameraInfo& cameraInfo, int imageWidth); + + /** + * @brief Set the maximum output frame number parameter for the processing algorithm + * @param[in] nbFrames maximum number of output frames (if 0, no limit for the regular algorithm) + */ + void setMaxOutFrames(unsigned int nbFrames) + { + _maxOutFrames = nbFrames; + } + + /** + * @brief Get the minimum frame step parameter for the processing algorithm + * @return minimum number of frames between two keyframes + */ + unsigned int getMinFrameStep() const + { + return _minFrameStep; + } + + /** + * @brief Get the maximum output frame number parameter for the processing algorithm + * @return maximum number of frames between two keyframes + */ + unsigned int getMaxFrameStep() const + { + return _maxFrameStep; + } + + /** + * @brief Get the minimum output frame for the processing algorithm + * @return minimum number of output frames + */ + unsigned int getMinOutFrames() const + { + return _minOutFrames; + } + + /** + * @brief Get the maximum output frame number for the processing algorithm + * @return maximum number of output frames (if 0, no limit for the regular algorithm) + */ + unsigned int getMaxOutFrames() const + { + return _maxOutFrames; + } + +private: + /** + * @brief Read an image from a feed provider into a grayscale OpenCV matrix, and rescale it if a size is provided. + * @param[in] feed The feed provider + * @param[in] width The width to resize the input image to. The height will be adjusted with respect to the size ratio. + * There will be no resizing if this parameter is set to 0 + * @return An OpenCV Mat object containing the image + */ + cv::Mat readImage(dataio::FeedProvider &feed, std::size_t width = 0); + + /** + * @brief Compute the sharpness scores for an input grayscale frame with a sliding window + * @param[in] grayscaleImage the input grayscale matrix of the frame + * @param[in] windowSize the size of the sliding window + * @return a double value representing the sharpness score of the sharpest tile in the image + */ + double computeSharpness(const cv::Mat& grayscaleImage, const std::size_t windowSize); + + /** + * @brief Estimate the optical flow score for an input grayscale frame based on its previous frame cell by cell + * @param[in] ptrFlow the OpenCV's DenseOpticalFlow object + * @param[in] grayscaleImage the grayscale matrix of the current frame + * @param[in] previousGrayscaleImage the grayscale matrix of the previous frame + * @param[in] cellSize the size of the evaluated cells within the frame + * @return a double value representing the median motion of all the image's cells + */ + double estimateFlow(const cv::Ptr& ptrFlow, const cv::Mat& grayscaleImage, + const cv::Mat& previousGrayscaleImage, const std::size_t cellSize); + + /// Selected keyframes IDs + std::vector _selectedKeyframes; + + /// Media paths + std::vector _mediaPaths; + /// Camera sensor width database + std::string _sensorDbPath; + /// Output folder for keyframes + std::string _outputFolder; + + // Parameters common to both the regular and smart methods + /// Maximum number of output frames (0 = no limit) + unsigned int _maxOutFrames = 0; + + // Regular algorithm parameters + /// Minimum number of frames between two keyframes + unsigned int _minFrameStep = 12; + /// Maximum number of frames between two keyframes + unsigned int _maxFrameStep = 36; + + // Smart algorithm parameters + /// Minimum number of output frames + unsigned int _minOutFrames = 10; + + /// Sharpness scores for each frame + std::vector _sharpnessScores; + /// Optical flow scores for each frame + std::vector _flowScores; + /// Vector containing 1s for frames that have been selected, 0 for those which have not + std::vector _selectedFrames; + + /// Size of the frame (afer rescale, if any is applied) + unsigned int _frameWidth = 0; + unsigned int _frameHeight = 0; + + /// Map score vectors with names for export + std::map*> scoresMap; }; } // namespace keyframe diff --git a/src/aliceVision/keyframe/README.md b/src/aliceVision/keyframe/README.md new file mode 100644 index 0000000000..e9bc2cce8a --- /dev/null +++ b/src/aliceVision/keyframe/README.md @@ -0,0 +1,137 @@ +# Keyframe Selection + +This module provides several methods to perform a keyframe selection. + +The goal of the keyframe selection is to extract, from an input video or an input sequence of images, keyframes. +Two methods are currently supported: +- a **regular** selection method, which selects keyframes regularly across the input video / sequence according to a set of parameters; +- a **smart** selection method, which analyses the sharpness and motion of all the frames to select those which are deemed the most relevant (a frame is considered relevant if it contains significant motion in comparison to the last selected keyframe while being as sharp as possible). + +The selected keyframes can be written as JPG, PNG or EXR images, and the storage data type can be specified when the EXR file extension is selected. + +The keyframe selection module supports the following inputs: +- a path to a video file (e.g. "/path/to/video.mp4") +- a path to a folder containing images (e.g. "/path/to/folder/") +- a path to a folder containing images with a regular expression (e.g. "/path/to/folder/*.exr") + +Camera rigs are also supported. + +## Regular selection method + +The regular selection samples frames regularly over time with respect to some user-provided constraints, that can be combined: +- `minFrameStep`: the minimum number of frames between two selected keyframes. If only `minFrameStep` is set, one keyframe will be selected every `minFrameStep` all along the video. +- `maxNbOutFrames`: the maximum number of selected keyframes (if set to 0, the number of selected keyframes will be unlimited). If only `maxNbOutFrames` is set, `maxNbOutFrames` keyframes equally spaced along the video will be selected. + +If both `minFrameStep` and `maxNbOutFrames` are set, up to `maxNbOutFrames` keyframes separated by at least `minFrameStep` frames will be selected. Examples of the parameter combinations are available in the [Examples](#examples) section. + +### Advanced regular selection + +For a more advanced regular selection, another parameter, `maxFrameStep`, is available to combine a relatively strict sampling with a maximum number of output frames. `maxFrameStep` sets the maximum number of frames between two selected keyframes and ensures that there will not be way more frames between two keyframes than expected when `maxNbOutFrames` is also set. `maxFrameStep` always takes precedence over `maxNbOutFrames`, meaning that the input video / sequence might not be sampled entirely for all the constraints to be respected. + +Combinations of the different parameters and the influence of `maxFrameStep` are shown in the [Examples](#examples) section. + + +### Examples + +The expected behaviour for the regular selection depending on the set parameters can be summed up as follows: + +- If only `minFrameStep` is set, the whole sequence will be sampled and a keyframe will be selected every `minFrameStep`. E.g: if a sequence has 2000 frames and `minFrameStep = 100`, 21 keyframes will be selected, with exactly 100 frames between them. + +- If `minFrameStep` and `maxNbOutFrames` are set, there will never be less than `minFrameStep` between the keyframes, but there might be more for the whole sequence to be sampled while respecting `maxNbOutFrames`. E.g: if a sequence has 2000 frames, `minFrameStep = 100` and `maxNbOutFrames = 10`, 10 keyframes with 222 frames between them will be selected, so both `maxNbOutFrames` and `minFrameStep` are respected. If the sequence has 500 frames and `minFrameStep = 100` / `maxNbOutFrames = 10`, there will be 6 keyframes with 100 frames between them. No matter the value of the parameters, the entire sequence will be sampled. + +- If `minFrameStep` and `maxFrameStep` are both set but `maxNbOutFrames` is not, then the step between two keyframes will be exactly between `minFrameStep` and `maxFrameStep`. If `minFrameStep = 100` and `maxFrameStep = 200` without other constraints, it is equivalent to setting `minFrameStep = 150`. + +- If `minFrameStep`, `maxFrameStep` and `maxNbOutFrames` are all set, then `maxFrameStep` prevents the step between two frames from increasing too much to respect `maxNbOutFrames`. With the sequence of 2000 frames, having `minFrameStep = 100`, `maxFrameStep = 150` and `maxNbOutFrames = 10` will lead to 10 keyframes with 150 frames between each, and the sampling will stop before reaching the end of the sequence so that all the constraints are respected. In the same example, if `maxFrameStep = 300`, then there will be 10 keyframes with 222 frames between them, and the whole sequence will be sampled. + +## Smart selection method + +The smart selection works in two steps: +- for each frame in the input video / sequence, a sharpness score and a motion score are computed; +- the sharpness and motion scores are used as well as the temporal position of the evaluated frame to determine whether the frame will be selected. + +The method aims at selecting a frame that is as sharp as possible with significant motion compared to the previously selected frame: consecutive frames should not be selected as keyframes if they do not contain enough motion, even if they are both very sharp. + +The minimum and maximum number of selected keyframes with the smart method can be set with the following parameters: +- `minNbOutFrames`: the minimum number of selected keyframes; +- `maxNbOutFrames`: the maximum number of selected keyframes. + +### Frame scoring + +For both the sharpness and motion scores, the evaluated frame is converted to a grayscale OpenCV matrix that may be rescaled +Scores are computed on grayscale images, which may have been rescaled using the `rescaledWidth` parameter. + +#### Sharpness score + +The Laplacian of the input frame is first computed, followed by the integral image of the Laplacian. A sliding window of size `sharpnessWindowSize` is used to compute the standard deviation of the averaged Laplacian locally. The final sharpness score will be the highest standard deviation found. + +The image is evaluated with a sliding window instead of as a whole to prevent giving a bad score (low standard deviation) to a frame that contains a sharp element but is overall blurry. + +#### Motion score + +The dense optical flow of a frame is computed. The frame is then divided into cells of `flowCellSize` pixels in which the motion vectors are averaged to obtain a displacement value (in pixels) within that cell. Once all the displacement values have been computed, the median value of these displacement values is used as the motion score. + +### Selection + +Once both the sharpness and motion scores have been computed, subsequences are identified based on the motion accumulation across frames. The motion accumulation threshold is set with `pxDisplacement` which represents, in per cent, the number of pixels that need to have moved since the last keyframe for the motion to be significant. As the motion scores represent a displacement value for each frame, summing them over time until the accumulation reaches the threshold allows to divide the input video / sequence into subsequences that all contain significant motion. + +Within each subsequence, a single frame is to be selected as a keyframe. Before proceeding to the selection itself, the number of identified subsequences is checked to ensure that the minimum and maximum number of requested output keyframes are respected. +- If not enough subsequences have been identified, the motion accumulation threshold is lowered iteratively with a step of 0.5 px until it either reaches 0 or gives out an expected number of subsequences. If 0 is reached, the motion accumulation criterion stops making sense and is thus replaced by a regular sampling: in that specific case, the smart method falls back to the regular method's behaviour. +- If too many subsequences have been identified, the motion accumulation threshold is increased iteratively with a step of 0.5 px until an acceptable number of subsequences is identified. + +A keyframe is thus selected for each subsequence, based on its sharpness score as well as its position in its subsequence: the sharpness score of each frame is combined to a weight based on its position within the subsequence, with the best weights applied to the frames located at the middle of the subsequence, and the worst weights applied to the frames located on the subsequence's borders. + +The weights aim at favouring the selection of keyframes that are as temporally far from each other as possible. Using only the sharpness scores to select a keyframe within a subsequence could lead to two consecutive very sharp frames, respectively located at the very end of a subsequence and at the very beginning of the following subsequence, being selected. This would hinder the relevancy of the whole process, as they would likely not contain any significant difference. + +### Debug options + +Debug options specific to the smart selection method are available: +- Export scores to CSV: the sharpness and motion scores for all the frames are written to a CSV file; +- Visualise the optical flow: the computed motion vectors are, for each frame, visualised with HSV images that are written as PNG images; +- Skip the sharpess score computations: the motion scores are computed normally, but all the sharpness score computations are skipped and replaced by a fixed value (1.0), which allows to assess the impact of the sharpness score computations (and, by extension, of the motion scores) on the global processing time; +- Skip the frame selection: the scores are computed normally (the sharpness scores can be skipped) but will not be used to perform the final selection. This is mainly useful to determine the processing time solely dedicated to the score computations or, combined with the CSV export export, to evaluate the quality of the scoring without needing to go through the complete selection process. + + +## API + +- Constructor +```cpp +KeyframeSelector(const std::vector& mediaPaths, + const std::string& sensorDbPath, + const std::string& outputFolder); +``` +- Selection with regular method +```cpp +void processRegular(); +``` +- Selection with smart method +```cpp +void processSmart(const float pxDisplacement, + const std::size_t rescaledWidthSharpness, + const std::size_t rescaledWidthFlow, + const std::size_t sharpnessWindowSize, + const std::size_t flowCellSize, + const bool skipSharpnessComputation = false); +``` +- Score computation +```cpp +bool computeScores(const std::size_t rescaledWidthSharpness, + const std::size_t rescaledWidthFlow, + const std::size_t sharpnessWindowSize, + const std::size_t flowCellSize, + const bool skipSharpnessComputation); +``` +- Write selected keyframes +```cpp +bool writeSelection(const std::vector& brands, + const std::vector& models, + const std::vector& mmFocals, + const std::string& outputExtension, + const image::EStorageDataType storageDataType = image::EStorageDataType::Undefined) const; +``` +- Debug options +```cpp +bool exportScoresToFile(const std::string& filename, + const bool exportSelectedFrames = false) const; + +bool exportFlowVisualisation(const std::size_t rescaledWidth); +``` \ No newline at end of file diff --git a/src/aliceVision/keyframe/SharpnessSelectionPreset.hpp b/src/aliceVision/keyframe/SharpnessSelectionPreset.hpp deleted file mode 100644 index cbac01f96d..0000000000 --- a/src/aliceVision/keyframe/SharpnessSelectionPreset.hpp +++ /dev/null @@ -1,87 +0,0 @@ -// This file is part of the AliceVision project. -// Copyright (c) 2017 AliceVision contributors. -// This Source Code Form is subject to the terms of the Mozilla Public License, -// v. 2.0. If a copy of the MPL was not distributed with this file, -// You can obtain one at https://mozilla.org/MPL/2.0/. - -#pragma once - -#include -#include -#include -#include - -namespace aliceVision { -namespace keyframe { - -/** - * @brief Sharpness selection preset enum - */ -enum class ESharpnessSelectionPreset : std::uint8_t -{ - ULTRA - , HIGH - , NORMAL - , MEDIUM - , LOW - , VERY_LOW - , NONE -}; - -/** - * @brief convert an enum ESharpnessSelectionPreset to its corresponding string - * @param ESharpnessSelectionPreset - * @return String - */ -inline std::string ESharpnessSelectionPreset_enumToString(ESharpnessSelectionPreset sharpnessPreset) -{ - switch(sharpnessPreset) - { - case ESharpnessSelectionPreset::ULTRA: return "ultra"; - case ESharpnessSelectionPreset::HIGH: return "high"; - case ESharpnessSelectionPreset::NORMAL: return "normal"; - case ESharpnessSelectionPreset::MEDIUM: return "medium"; - case ESharpnessSelectionPreset::LOW: return "low"; - case ESharpnessSelectionPreset::VERY_LOW: return "very_low"; - case ESharpnessSelectionPreset::NONE: return "none"; - } - throw std::out_of_range("Invalid sharpnessPreset enum"); -} - -/** - * @brief convert a string sharpnessPreset to its corresponding enum ESharpnessSelectionPreset - * @param String - * @return ESharpnessSelectionPreset - */ -inline ESharpnessSelectionPreset ESharpnessSelectionPreset_stringToEnum(const std::string& sharpnessPreset) -{ - std::string preset = sharpnessPreset; - std::transform(preset.begin(), preset.end(), preset.begin(), ::tolower); //tolower - - if(preset == "ultra") return ESharpnessSelectionPreset::ULTRA; - if(preset == "high") return ESharpnessSelectionPreset::HIGH; - if(preset == "normal") return ESharpnessSelectionPreset::NORMAL; - if(preset == "medium") return ESharpnessSelectionPreset::MEDIUM; - if(preset == "low") return ESharpnessSelectionPreset::LOW; - if(preset == "very_low") return ESharpnessSelectionPreset::VERY_LOW; - if(preset == "none") return ESharpnessSelectionPreset::NONE; - - throw std::out_of_range("Invalid sharpnessPreset : " + sharpnessPreset); -} - -inline std::ostream& operator<<(std::ostream& os, const ESharpnessSelectionPreset sharpnessPreset) -{ - os << ESharpnessSelectionPreset_enumToString(sharpnessPreset); - return os; -} - -inline std::istream& operator>>(std::istream& in, ESharpnessSelectionPreset &sharpnessPreset) -{ - std::string token; - in >> token; - sharpnessPreset = ESharpnessSelectionPreset_stringToEnum(token); - return in; -} - -} // namespace keyframe -} // namespace aliceVision diff --git a/src/software/utils/CMakeLists.txt b/src/software/utils/CMakeLists.txt index 12e7ffef1e..561ee04e30 100644 --- a/src/software/utils/CMakeLists.txt +++ b/src/software/utils/CMakeLists.txt @@ -218,15 +218,18 @@ alicevision_add_software(aliceVision_sfmLocalization # Keyframe selection # - export keyframes from video files / image sequence directories -alicevision_add_software(aliceVision_keyframeSelection - SOURCE main_keyframeSelection.cpp - FOLDER ${FOLDER_SOFTWARE_UTILS} - LINKS aliceVision_system - aliceVision_keyframe - ${OPENIMAGEIO_LIBRARIES} - Boost::program_options - Boost::filesystem +if(ALICEVISION_HAVE_OPENCV) + alicevision_add_software(aliceVision_keyframeSelection + SOURCE main_keyframeSelection.cpp + FOLDER ${FOLDER_SOFTWARE_UTILS} + LINKS aliceVision_system + aliceVision_image + aliceVision_keyframe + ${OPENIMAGEIO_LIBRARIES} + Boost::program_options + Boost::filesystem ) +endif() # Print distances between 3D objects diff --git a/src/software/utils/main_keyframeSelection.cpp b/src/software/utils/main_keyframeSelection.cpp index 6c805e4421..446f0c5ae9 100644 --- a/src/software/utils/main_keyframeSelection.cpp +++ b/src/software/utils/main_keyframeSelection.cpp @@ -4,6 +4,7 @@ // v. 2.0. If a copy of the MPL was not distributed with this file, // You can obtain one at https://mozilla.org/MPL/2.0/. +#include #include #include #include @@ -17,185 +18,240 @@ // These constants define the current software version. // They must be updated when the command line is changed. -#define ALICEVISION_SOFTWARE_VERSION_MAJOR 2 +#define ALICEVISION_SOFTWARE_VERSION_MAJOR 3 #define ALICEVISION_SOFTWARE_VERSION_MINOR 0 -using namespace aliceVision::keyframe; +using namespace aliceVision; namespace po = boost::program_options; namespace fs = boost::filesystem; +const std::string supportedExtensions = "exr, jpg, png"; + int aliceVision_main(int argc, char** argv) { - // command-line parameters - std::vector mediaPaths; // media file path list - std::vector brands; // media brand list - std::vector models; // media model list - std::vector mmFocals; // media focal (mm) list - std::vector pxFocals; // media focal (px) list - std::vector frameOffsets; // media frame offset list - std::string sensorDbPath; // camera sensor width database - std::string voctreeFilePath; // SIFT voctree file path - std::string outputFolder; // output folder for keyframes - - // algorithm variables - bool useSparseDistanceSelection = true; - bool useSharpnessSelection = true; - std::string sharpnessPreset = ESharpnessSelectionPreset_enumToString(ESharpnessSelectionPreset::NORMAL); - float sparseDistMaxScore = 100.0f; - unsigned int sharpSubset = 4; - unsigned int minFrameStep = 12; - unsigned int maxFrameStep = 36; - unsigned int maxNbOutFrame = 0; - - po::options_description inputParams("Required parameters"); - inputParams.add_options() - ("mediaPaths", po::value< std::vector >(&mediaPaths)->required()->multitoken(), - "Input video files or image sequence directories.") - ("sensorDbPath", po::value(&sensorDbPath)->required(), - "Camera sensor width database path.") - ("voctreePath", po::value(&voctreeFilePath)->required(), - "Vocabulary tree path.") - ("outputFolder", po::value(&outputFolder)->required(), - "Output keyframes folder for .jpg"); - - po::options_description metadataParams("Metadata parameters"); - metadataParams.add_options() - ("brands", po::value< std::vector >(&brands)->default_value(brands)->multitoken(), - "Camera brands.") - ("models", po::value< std::vector >(&models)->default_value(models)->multitoken(), - "Camera models.") - ("mmFocals", po::value< std::vector >(&mmFocals)->default_value(mmFocals)->multitoken(), - "Focals in mm (will be use if not 0).") - ("pxFocals", po::value< std::vector >(&pxFocals)->default_value(pxFocals)->multitoken(), - "Focals in px (will be use and convert in mm if not 0).") - ("frameOffsets", po::value< std::vector >(&frameOffsets)->default_value(frameOffsets)->multitoken(), - "Frame offsets."); - - po::options_description algorithmParams("Algorithm parameters"); - algorithmParams.add_options() - ("useSparseDistanceSelection", po::value(&useSparseDistanceSelection)->default_value(useSparseDistanceSelection), - "Use sparseDistance selection in order to avoid similar keyframes") - ("useSharpnessSelection", po::value(&useSharpnessSelection)->default_value(useSharpnessSelection), - "Use frame sharpness score for keyframe selection") - ("sparseDistMaxScore", po::value(&sparseDistMaxScore)->default_value(sparseDistMaxScore), - "Maximum number of strong common points between two keyframes") - ("sharpnessPreset", po::value(&sharpnessPreset)->default_value(sharpnessPreset), - "Preset for sharpnessSelection : " - "{ultra, high, normal, low, very_low, none}") - ("sharpSubset", po::value(&sharpSubset)->default_value(sharpSubset), - "sharp part of the image (1 = all, 2 = size/2, ...) ") - ("minFrameStep", po::value(&minFrameStep)->default_value(minFrameStep), - "minimum number of frames between two keyframes") - ("maxFrameStep", po::value(&maxFrameStep)->default_value(maxFrameStep), - "maximum number of frames after which a keyframe can be taken") - ("maxNbOutFrame", po::value(&maxNbOutFrame)->default_value(maxNbOutFrame), - "maximum number of output frames (0 = no limit)"); - - - aliceVision::CmdLine cmdline("This program is used to extract keyframes from single camera or a camera rig.\n" - "AliceVision keyframeSelection"); - cmdline.add(inputParams); - cmdline.add(metadataParams); - cmdline.add(algorithmParams); - if (!cmdline.execute(argc, argv)) - { - return EXIT_FAILURE; - } - - const std::size_t nbCameras = mediaPaths.size(); - - // check output folder and update to its absolute path - { - const fs::path outDir = fs::absolute(outputFolder); - outputFolder = outDir.string(); - if(!fs::is_directory(outDir)) + // Command-line parameters + std::vector mediaPaths; // media file path list + std::vector brands; // media brand list + std::vector models; // media model list + std::vector mmFocals; // media focal (mm) list + std::string sensorDbPath; // camera sensor width database + std::string outputFolder; // output folder for keyframes + + // Algorithm variables + bool useSmartSelection = true; // enable the smart selection instead of the regular one + unsigned int minFrameStep = 12; // minimum number of frames between two keyframes (regular selection) + unsigned int maxFrameStep = 36; // maximum number of frames between two keyframes (regular selection) + unsigned int minNbOutFrames = 10; // minimum number of selected keyframes (smart selection) + unsigned int maxNbOutFrames = 2000; // maximum number of selected keyframes (both selections) + float pxDisplacement = 10.0; // percentage of pixels that have moved across frames since last keyframe (smart selection) + std::size_t rescaledWidthSharp = 720; // width of the rescaled frames for the sharpness; 0 if no rescale is performed (smart selection) + std::size_t rescaledWidthFlow = 720; // width of the rescaled frames for the flow; 0 if no rescale is performed (smart selection) + std::size_t sharpnessWindowSize = 200; // sliding window's size in sharpness computation (smart selection) + std::size_t flowCellSize = 90; // size of the cells within a frame used to compute the optical flow (smart selection) + std::string outputExtension = "exr"; // file extension of the written keyframes + image::EStorageDataType exrDataType = // storage data type for EXR output files + image::EStorageDataType::Float; + bool renameKeyframes = false; // name selected keyframes as consecutive frames instead of using their index as a name + + // Debug options + bool exportScores = false; // export the sharpness and optical flow scores to a CSV file + std::string csvFilename = "scores.csv"; // name of the CSV file containing the scores + bool exportSelectedFrames = false; // export the selected frames (1 for selected, 0 for not selected) + bool skipSelection = false; // only compute the scores and do not proceed with the selection + bool exportFlowVisualisation = false; // export optical flow visualisation for all the frames + bool flowVisualisationOnly = false; // export optical flow visualisation for all the frames but do not compute scores + bool skipSharpnessComputation = false; // skip sharpness score computations + + po::options_description inputParams("Required parameters"); + inputParams.add_options() + ("mediaPaths", po::value>(&mediaPaths)->required()->multitoken(), + "Input video files or image sequence directories.") + ("sensorDbPath", po::value(&sensorDbPath)->required(), + "Camera sensor width database path.") + ("outputFolder", po::value(&outputFolder)->required(), + "Output folder in which the selected keyframes are written."); + + po::options_description metadataParams("Metadata parameters"); + metadataParams.add_options() + ("brands", po::value>(&brands)->default_value(brands)->multitoken(), + "Camera brands.") + ("models", po::value>(&models)->default_value(models)->multitoken(), + "Camera models.") + ("mmFocals", po::value>(&mmFocals)->default_value(mmFocals)->multitoken(), + "Focals in mm (ignored if equal to 0)."); + + po::options_description algorithmParams("Algorithm parameters"); // Parameters common to both methods + algorithmParams.add_options() + ("maxNbOutFrames", po::value(&maxNbOutFrames)->default_value(maxNbOutFrames), + "Maximum number of output keyframes.\n" + "\t- For the regular method, 0 = no limit. 'minFrameStep' and 'maxFrameStep' will always be respected, " + "so combining them with this parameter might cause the selection to stop before reaching the end of the " + "input sequence(s).\n" + "\t- For the smart method, the default value is set to 2000.") + ("renameKeyframes", po::value(&renameKeyframes)->default_value(renameKeyframes), + "Instead of naming the keyframes according to their index in the input sequence / video, rename them as " + "consecutive frames, starting from 0.\n" + "If the selected keyframes should have originally be written as [00015.exr, 00294.exr, 00825.exr], they " + "will instead be written as [00000.exr, 00001.exr, 00002.exr] if this option is enabled.") + ("outputExtension", po::value(&outputExtension)->default_value(outputExtension), + "File extension of the output keyframes.") + ("storageDataType", po::value(&exrDataType)->default_value(exrDataType), + ("Storage data type for EXR output files: " + image::EStorageDataType_informations()).c_str()); + + po::options_description regularAlgorithmParams("Regular algorithm parameters"); + regularAlgorithmParams.add_options() + ("minFrameStep", po::value(&minFrameStep)->default_value(minFrameStep), + "Minimum number of frames between two keyframes.") + ("maxFrameStep", po::value(&maxFrameStep)->default_value(maxFrameStep), + "Maximum number of frames after which a keyframe can be taken (ignored if equal to 0)."); + + po::options_description smartAlgorithmParams("Smart algorithm parameters"); + smartAlgorithmParams.add_options() + ("useSmartSelection", po::value(&useSmartSelection)->default_value(useSmartSelection), + "True to use the smart keyframe selection method, false to use the regular keyframe selection method.") + ("minNbOutFrames", po::value(&minNbOutFrames)->default_value(minNbOutFrames), + "Minimum number of output keyframes.") + ("pxDisplacement", po::value(&pxDisplacement)->default_value(pxDisplacement), + "Percentage of pixels in the image that have been displaced since the last selected frame. The absolute " + "number of moving pixels is determined using min(imageWidth, imageHeight).") + ("rescaledWidthSharpness", po::value(&rescaledWidthSharp)->default_value(rescaledWidthSharp), + "Width, in pixels, of the rescaled input frames used to compute the sharpness scores. The height of the " + "rescaled frames will be automatically determined to preserve the aspect ratio. 0 = no rescale.") + ("rescaledWidthFlow", po::value(&rescaledWidthFlow)->default_value(rescaledWidthFlow), + "Width, in pixels, of the rescaled input frames used to compute the motion scores. The height of the " + "rescaled frames will be automatically determined to preserve the aspect ratio. 0 = no rescale.") + ("sharpnessWindowSize", po::value(&sharpnessWindowSize)->default_value(sharpnessWindowSize), + "Size, in pixels, of the sliding window that is used to compute the sharpness score of a frame.") + ("flowCellSize", po::value(&flowCellSize)->default_value(flowCellSize), + "Size, in pixels, of the cells within an input frame that are used to compute the optical flow scores."); + + po::options_description debugParams("Debug parameters"); + debugParams.add_options() + ("exportScores", po::value(&exportScores)->default_value(exportScores), + "Export the sharpness and optical flow scores to a CSV file.") + ("csvFilename", po::value(&csvFilename)->default_value(csvFilename), + "Name of the CSV file containing the sharpness and optical flow scores.") + ("exportSelectedFrames", po::value(&exportSelectedFrames)->default_value(exportSelectedFrames), + "Add a column in the exported CSV file containing the selected frames (1 for frames that have been " + "selected, 0 otherwise).") + ("skipSelection", po::value(&skipSelection)->default_value(skipSelection), + "Only compute the sharpness and optical flow scores, but do not proceed with the selection.") + ("exportFlowVisualisation", po::value(&exportFlowVisualisation)->default_value(exportFlowVisualisation), + "For all frames, export the optical flow visualisation in HSV as PNG images.") + ("flowVisualisationOnly", po::value(&flowVisualisationOnly)->default_value(flowVisualisationOnly), + "Export the optical flow visualisation in HSV as PNG images for all frames but do not compute scores.") + ("skipSharpnessComputation", po::value(&skipSharpnessComputation)->default_value(skipSharpnessComputation), + "Skip the computations for the sharpness score of each frame. A fixed sharpness score of 1.0 will be " + "assigned to each frame."); + + aliceVision::CmdLine cmdline("This program is used to extract keyframes from single camera or a camera rig.\n" + "AliceVision keyframeSelection"); + cmdline.add(inputParams); + cmdline.add(metadataParams); + cmdline.add(algorithmParams); + cmdline.add(regularAlgorithmParams); + cmdline.add(smartAlgorithmParams); + cmdline.add(debugParams); + if (!cmdline.execute(argc, argv)) { + return EXIT_FAILURE; + } + + const std::size_t nbCameras = mediaPaths.size(); + + // Check output folder and update to its absolute path { - ALICEVISION_LOG_ERROR("Cannot find folder: " << outputFolder); - return EXIT_FAILURE; + const fs::path outDir = fs::absolute(outputFolder); + outputFolder = outDir.string(); + if (!fs::is_directory(outDir)) { + ALICEVISION_LOG_ERROR("Cannot find folder: " << outputFolder); + return EXIT_FAILURE; + } + } + + if (nbCameras < 1) { + ALICEVISION_LOG_ERROR("Program needs at least one media path."); + return EXIT_FAILURE; + } + + if (maxFrameStep > 0 && minFrameStep >= maxFrameStep) { + ALICEVISION_LOG_ERROR("Setting 'minFrameStep' should be less than setting 'maxFrameStep'."); + return EXIT_FAILURE; } - } - - if(nbCameras < 1) - { - ALICEVISION_LOG_ERROR("Program need at least one media path."); - return EXIT_FAILURE; - } - - if(minFrameStep >= maxFrameStep) - { - ALICEVISION_LOG_ERROR("Option minFrameStep should be less than option maxFrameStep."); - return EXIT_FAILURE; - } - - brands.resize(nbCameras); - models.resize(nbCameras); - mmFocals.resize(nbCameras); - pxFocals.resize(nbCameras); - frameOffsets.resize(nbCameras); - - // debugging prints, print out all the parameters - { - if(nbCameras == 1) - ALICEVISION_LOG_INFO("Single camera"); - else - ALICEVISION_LOG_INFO("Camera rig of " << nbCameras << " cameras."); - for(std::size_t i = 0; i < nbCameras; ++i) + if (minNbOutFrames < 1) { + ALICEVISION_LOG_ERROR("The minimum number of output keyframes cannot be less than 1."); + return EXIT_FAILURE; + } + + if (supportedExtensions.find(outputExtension) == std::string::npos) { + ALICEVISION_LOG_ERROR("Unsupported extension for the output file. Supported extensions are: " + << supportedExtensions); + return EXIT_FAILURE; + } + + brands.resize(nbCameras); + models.resize(nbCameras); + mmFocals.resize(nbCameras); + + // Debugging prints, print out all the parameters { - ALICEVISION_LOG_INFO("camera: " << mediaPaths.at(i) << std::endl - << "\t - brand: " << brands.at(i) << std::endl - << "\t - model: " << models.at(i) << std::endl - << "\t - focal (mm): " << mmFocals.at(i) << std::endl - << "\t - focal (px): " << pxFocals.at(i) << std::endl - << "\t - frame offset: " << frameOffsets.at(i) << std::endl); + if (nbCameras == 1) + ALICEVISION_LOG_INFO("Single camera"); + else + ALICEVISION_LOG_INFO("Camera rig of " << nbCameras << " cameras."); + + for (std::size_t i = 0; i < nbCameras; ++i) { + ALICEVISION_LOG_INFO("Camera: " << mediaPaths.at(i) << std::endl + << "\t - brand: " << brands.at(i) << std::endl + << "\t - model: " << models.at(i) << std::endl + << "\t - focal (mm): " << mmFocals.at(i) << std::endl); + } + } + + // Initialize KeyframeSelector + keyframe::KeyframeSelector selector(mediaPaths, sensorDbPath, outputFolder); + + // Set frame-related algorithm parameters + selector.setMinFrameStep(minFrameStep); + selector.setMaxFrameStep(maxFrameStep); + selector.setMinOutFrames(minNbOutFrames); + selector.setMaxOutFrames(maxNbOutFrames); + + if (flowVisualisationOnly) { + bool exported = selector.exportFlowVisualisation(rescaledWidthFlow); + if (exported) + return EXIT_SUCCESS; + else + return EXIT_FAILURE; } - } - - // initialize KeyframeSelector - KeyframeSelector selector(mediaPaths, sensorDbPath, voctreeFilePath, outputFolder); - - // initialize media metadatas vector - std::vector cameraInfos(nbCameras); - - for(std::size_t i = 0; i < nbCameras; ++i) - { - KeyframeSelector::CameraInfo& metadata = cameraInfos.at(i); - - const std::string& brand = brands.at(i); - const std::string& model = models.at(i); - const float mmFocal = mmFocals.at(i); - const float pxFocal = pxFocals.at(i); - const unsigned int frameOffset = frameOffsets.at(i); - - if(!brand.empty()) - metadata.brand = brand; - if(!model.empty()) - metadata.model = model; - - metadata.frameOffset = frameOffset; - - if((pxFocal == .0f) && (mmFocal == .0f)) - continue; - - metadata.focalIsMM = (pxFocal == .0f); - metadata.focalLength = metadata.focalIsMM ? mmFocal : std::fabs(pxFocal); - } - - selector.setCameraInfos(cameraInfos); - - // set algorithm parameters - selector.useSparseDistanceSelection(useSparseDistanceSelection); - selector.useSharpnessSelection(useSharpnessSelection); - selector.setSparseDistanceMaxScore(sparseDistMaxScore); - selector.setSharpnessSelectionPreset(ESharpnessSelectionPreset_stringToEnum(sharpnessPreset)); - selector.setSharpSubset(sharpSubset); - selector.setMinFrameStep(minFrameStep); - selector.setMaxFrameStep(maxFrameStep); - selector.setMaxOutFrame(maxNbOutFrame); - - // process - selector.process(); - - return EXIT_SUCCESS; + + if (skipSelection) { + selector.computeScores(rescaledWidthSharp, rescaledWidthFlow, sharpnessWindowSize, flowCellSize, + skipSharpnessComputation); + if (exportScores) + selector.exportScoresToFile(csvFilename); // Frames have not been selected, ignore 'exportSelectedFrames' + if (exportFlowVisualisation) + selector.exportFlowVisualisation(rescaledWidthFlow); + + return EXIT_SUCCESS; + } + + // Process media paths with regular or smart method + if (useSmartSelection) + selector.processSmart(pxDisplacement, rescaledWidthSharp, rescaledWidthFlow, sharpnessWindowSize, flowCellSize, + skipSharpnessComputation); + else + selector.processRegular(); + + // Write selected keyframes + selector.writeSelection(brands, models, mmFocals, renameKeyframes, outputExtension, exrDataType); + + // If debug options are set, export the scores as a CSV file and / or the motion vectors as images + if (exportScores) + selector.exportScoresToFile(csvFilename, exportSelectedFrames); + if (exportFlowVisualisation) + selector.exportFlowVisualisation(rescaledWidthFlow); + + return EXIT_SUCCESS; }