From f91a8c04af6a28a63f47a5e85f9be3f0a83df781 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Candice=20Bent=C3=A9jac?= Date: Fri, 20 Jan 2023 12:09:07 +0100 Subject: [PATCH 01/28] [dataio] VideoFeed: Improve error handling when moving to specific frames Rely on OpenCV's return values to determine whether the move is successful or not. --- src/aliceVision/dataio/VideoFeed.cpp | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/src/aliceVision/dataio/VideoFeed.cpp b/src/aliceVision/dataio/VideoFeed.cpp index 48fbe7d48d..cd7d69e10b 100644 --- a/src/aliceVision/dataio/VideoFeed.cpp +++ b/src/aliceVision/dataio/VideoFeed.cpp @@ -214,22 +214,12 @@ bool VideoFeed::FeederImpl::goToFrame(const unsigned int frame) ALICEVISION_LOG_WARNING("We cannot open the video file."); return false; } - - if(_isLive) + + if (_isLive) return goToNextFrame(); - - if(frame > 0) - { - _videoCapture.set(cv::CAP_PROP_POS_FRAMES, frame); - _videoCapture.grab(); - return true; - } - else - { - _videoCapture.set(cv::CAP_PROP_POS_FRAMES, 0); - _videoCapture.grab(); - return false; - } + + _videoCapture.set(cv::CAP_PROP_POS_FRAMES, frame); + return _videoCapture.grab(); } bool VideoFeed::FeederImpl::goToNextFrame() From edfe4f44d13c4ae2b27a32848398b8c515935b1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Candice=20Bent=C3=A9jac?= Date: Tue, 31 Jan 2023 17:37:35 +0100 Subject: [PATCH 02/28] KeyframeSelection: Remove previous methods and add a new regular one This commit removes all the methods previously used for keyframe selection and adds a new one, which samples the input sequence(s) regularly to select the keyframes. The new regular selection selects keyframes based on three optional constraints: - the minimum number of frames between two keyframes - the maximum number of frames between two keyframes - the maximum number of selected keyframes --- src/aliceVision/keyframe/CMakeLists.txt | 3 - src/aliceVision/keyframe/KeyframeSelector.cpp | 522 ++++-------------- src/aliceVision/keyframe/KeyframeSelector.hpp | 271 +-------- .../keyframe/SharpnessSelectionPreset.hpp | 87 --- src/software/utils/main_keyframeSelection.cpp | 133 ++--- 5 files changed, 173 insertions(+), 843 deletions(-) delete mode 100644 src/aliceVision/keyframe/SharpnessSelectionPreset.hpp diff --git a/src/aliceVision/keyframe/CMakeLists.txt b/src/aliceVision/keyframe/CMakeLists.txt index 4963899122..365176f371 100644 --- a/src/aliceVision/keyframe/CMakeLists.txt +++ b/src/aliceVision/keyframe/CMakeLists.txt @@ -1,6 +1,5 @@ # Headers set(keyframe_files_headers - SharpnessSelectionPreset.hpp KeyframeSelector.hpp ) @@ -13,8 +12,6 @@ alicevision_add_library(aliceVision_keyframe SOURCES ${keyframe_files_headers} ${keyframe_files_sources} PUBLIC_LINKS aliceVision_dataio - aliceVision_feature - aliceVision_voctree OpenImageIO::OpenImageIO PRIVATE_LINKS aliceVision_sensorDB diff --git a/src/aliceVision/keyframe/KeyframeSelector.cpp b/src/aliceVision/keyframe/KeyframeSelector.cpp index 2243ab5c3e..7388395dab 100644 --- a/src/aliceVision/keyframe/KeyframeSelector.cpp +++ b/src/aliceVision/keyframe/KeyframeSelector.cpp @@ -30,448 +30,158 @@ namespace keyframe { */ int getRandomInt() { - std::random_device rd; // will be used to obtain a seed for the random number engine - std::mt19937 randomTwEngine(rd()); // standard mersenne_twister_engine seeded with rd() - std::uniform_int_distribution<> randomDist(0, std::numeric_limits::max()); - return randomDist(randomTwEngine); + std::random_device rd; // will be used to obtain a seed for the random number engine + std::mt19937 randomTwEngine(rd()); // standard mersenne_twister_engine seeded with rd() + std::uniform_int_distribution<> randomDist(0, std::numeric_limits::max()); + return randomDist(randomTwEngine); } KeyframeSelector::KeyframeSelector(const std::vector& mediaPaths, const std::string& sensorDbPath, - const std::string& voctreeFilePath, const std::string& outputFolder) - : _mediaPaths(mediaPaths) - , _sensorDbPath(sensorDbPath) - , _voctreeFilePath(voctreeFilePath) - , _outputFolder(outputFolder) + : _mediaPaths(mediaPaths) + , _sensorDbPath(sensorDbPath) + , _outputFolder(outputFolder) { - if((_maxOutFrame != 0) && - !_hasSharpnessSelection && - !_hasSparseDistanceSelection) - { - ALICEVISION_LOG_ERROR("KeyframeSelector needs at least one selection method if output frame limited !"); - throw std::invalid_argument("KeyframeSelector needs at least one selection method if output frame limited !"); - } - - // load vocabulary tree - _voctree.reset(new aliceVision::voctree::VocabularyTree(voctreeFilePath)); - - { - ALICEVISION_LOG_INFO("vocabulary tree loaded with :" << std::endl - << "\t- " << _voctree->levels() << " levels" << std::endl - << "\t- " << _voctree->splits() << " branching factor" << std::endl); - } - - // check number of input media filePaths - if(mediaPaths.empty()) - { - ALICEVISION_LOG_ERROR("Cannot create KeyframeSelector without a media file path !"); - throw std::invalid_argument("Cannot create KeyframeSelector without a media file path !"); - } - - // resize mediasInfo container - _mediasInfo.resize(mediaPaths.size()); - - // create SIFT image describer - _imageDescriber.reset(new feature::ImageDescriber_SIFT()); + // Check that a least one media file path has been provided + if (mediaPaths.empty()) { + ALICEVISION_THROW(std::invalid_argument, "Cannot create KeyframeSelector without at least one media file path!"); + } } -void KeyframeSelector::process() +void KeyframeSelector::processRegular() { - // create feeds and count minimum number of frames - std::size_t nbFrames = std::numeric_limits::max(); - for(std::size_t mediaIndex = 0; mediaIndex < _mediaPaths.size(); ++mediaIndex) - { - const auto& path = _mediaPaths.at(mediaIndex); - - // create a feed provider per mediaPaths - _feeds.emplace_back(new dataio::FeedProvider(path)); - - const auto& feed = *_feeds.back(); - - // check if feed is initialized - if(!feed.isInit()) - { - ALICEVISION_LOG_ERROR("Cannot initialize the FeedProvider with " << path); - throw std::invalid_argument("Cannot while initialize the FeedProvider with " + path); - } + _selectedKeyframes.clear(); - // update minimum number of frames - nbFrames = std::min(nbFrames, feed.nbFrames() - static_cast( _cameraInfos.at(mediaIndex).frameOffset)); - } - - // check if minimum number of frame is zero - if(nbFrames == 0) - { - ALICEVISION_LOG_ERROR("One or multiple medias can't be found or empty !"); - throw std::invalid_argument("One or multiple medias can't be found or empty !"); - } - - // resize selection data vector - _framesData.resize(nbFrames); - - // feed provider variables - image::Image< image::RGBColor> image; // original image - camera::PinholeRadialK3 queryIntrinsics; // image associated camera intrinsics - bool hasIntrinsics = false; // true if queryIntrinsics is valid - std::string currentImgName; // current image name - - // process variables - const unsigned int frameStep = _maxFrameStep - _minFrameStep; - const unsigned int tileSharpSubset = (_nbTileSide * _nbTileSide) / _sharpSubset; - - // create output folders - if(_feeds.size() > 1) - { - const std::string rigFolder = _outputFolder + "/rig/"; - if(!fs::exists(rigFolder)) - fs::create_directory(rigFolder); - - for(std::size_t mediaIndex = 0 ; mediaIndex < _feeds.size(); ++mediaIndex) - { - const std::string subPoseFolder = rigFolder + std::to_string(mediaIndex); - if(!fs::exists(subPoseFolder)) - fs::create_directory(subPoseFolder); - } - } - - // feed and metadata initialization - for(std::size_t mediaIndex = 0 ; mediaIndex < _feeds.size(); ++mediaIndex) - { - // first frame with offset - _feeds.at(mediaIndex)->goToFrame(_cameraInfos.at(mediaIndex).frameOffset); - - if(!_feeds.at(mediaIndex)->readImage(image, queryIntrinsics, currentImgName, hasIntrinsics)) - { - ALICEVISION_LOG_ERROR("Cannot read media first frame " << _mediaPaths[mediaIndex]); - throw std::invalid_argument("Cannot read media first frame " + _mediaPaths[mediaIndex]); - } + std::size_t nbFrames = std::numeric_limits::max(); + std::vector> feeds; - // define output image metadata - if(!_cameraInfos.at(mediaIndex).focalIsMM) - { - convertFocalLengthInMM(_cameraInfos.at(mediaIndex), image.Width()); - } + for (std::size_t mediaIndex = 0; mediaIndex < _mediaPaths.size(); ++mediaIndex) { + const auto& path = _mediaPaths.at(mediaIndex); - // define media informations - auto& mediaInfo = _mediasInfo.at(mediaIndex); - mediaInfo.tileHeight = (image.Height() / 2) / _nbTileSide; - mediaInfo.tileWidth = (image.Width() / 2) / _nbTileSide; - mediaInfo.spec = oiio::ImageSpec(image.Width(), image.Height(), 3, oiio::TypeDesc::UINT8); // always jpeg - mediaInfo.spec.attribute("jpeg:subsampling", "4:4:4"); // always subsampling 4:4:4 - mediaInfo.spec.attribute("oiio:ColorSpace", "sRGB"); // always sRGB - mediaInfo.spec.attribute("Make", _cameraInfos[mediaIndex].brand); - mediaInfo.spec.attribute("Model", _cameraInfos[mediaIndex].model); - mediaInfo.spec.attribute("Exif:BodySerialNumber", std::to_string(getRandomInt())); // TODO: use Exif:OriginalRawFileName instead - mediaInfo.spec.attribute("Exif:FocalLength", _cameraInfos[mediaIndex].focalLength); - } - - // iteration process - _keyframeIndexes.clear(); - std::size_t currentFrameStep = _minFrameStep + 1; // start directly (dont skip minFrameStep first frames) - - for(std::size_t frameIndex = 0; frameIndex < _framesData.size(); ++frameIndex) - { - ALICEVISION_LOG_INFO("frame : " << frameIndex); - bool frameSelected = true; - auto& frameData = _framesData.at(frameIndex); - frameData.mediasData.resize(_feeds.size()); - - for(std::size_t mediaIndex = 0; mediaIndex < _feeds.size(); ++mediaIndex) - { - ALICEVISION_LOG_DEBUG("media : " << _mediaPaths.at(mediaIndex)); - auto& feed = *_feeds.at(mediaIndex); - - if(frameSelected) // false if a camera of a rig is not selected - { - if(!feed.readImage(image, queryIntrinsics, currentImgName, hasIntrinsics)) - { - ALICEVISION_LOG_ERROR("Cannot read frame '" << currentImgName << "' !"); - throw std::invalid_argument("Cannot read frame '" + currentImgName + "' !"); - } + // Create a feed provider per mediaPaths + feeds.push_back(std::make_unique(path)); + const auto& feed = *feeds.back(); - // compute sharpness and sparse distance - if(!computeFrameData(image, frameIndex, mediaIndex, tileSharpSubset)) - { - frameSelected = false; + // Check if feed is initialized + if (!feed.isInit()) { + ALICEVISION_THROW(std::invalid_argument, "Cannot initialize the FeedProvider with " << path); } - } - feed.goToNextFrame(); + // Update minimum number of frames + nbFrames = std::min(nbFrames, static_cast(feed.nbFrames())); } - { - if(frameSelected) - { - ALICEVISION_LOG_INFO(" > selected" << std::endl); - frameData.selected = true; - if(_hasSharpnessSelection) - frameData.computeAvgSharpness(); - } - else - { - ALICEVISION_LOG_INFO(" > skipped" << std::endl); - frameData.mediasData.clear(); // remove unselected mediasData - } + // Check if minimum number of frame is zero + if (nbFrames == 0) { + ALICEVISION_THROW(std::invalid_argument, "One or multiple medias can't be found or empty!"); } - // selection process - if(currentFrameStep >= _maxFrameStep) - { - currentFrameStep = _minFrameStep; - bool hasKeyframe = false; - std::size_t keyframeIndex = 0; - float maxSharpness = 0; - float minDistScore = std::numeric_limits::max(); - - // find the best selected frame - if(_hasSharpnessSelection) - { - // find the sharpest selected frame - for(std::size_t index = frameIndex - (frameStep - 1); index <= frameIndex; ++index) - { - if(_framesData[index].selected && (_framesData[index].avgSharpness > maxSharpness)) - { - hasKeyframe = true; - keyframeIndex = index; - maxSharpness = _framesData[index].avgSharpness; - } - } - } - else if(_hasSparseDistanceSelection) - { - // find the smallest sparseDistance selected frame - for(std::size_t index = frameIndex - (frameStep - 1); index <= frameIndex; ++index) - { - if(_framesData[index].selected && (_framesData[index].maxDistScore < minDistScore)) - { - hasKeyframe = true; - keyframeIndex = index; - minDistScore = _framesData[index].maxDistScore; - } - } - } - else - { - // use the first frame of the step - hasKeyframe = true; - keyframeIndex = frameIndex - (frameStep - 1); - } - - // save keyframe - if(hasKeyframe) - { - ALICEVISION_LOG_INFO("keyframe choice : " << keyframeIndex << std::endl); - - // write keyframe - for(std::size_t mediaIndex = 0; mediaIndex < _feeds.size(); ++mediaIndex) - { - auto& feed = *_feeds.at(mediaIndex); - - feed.goToFrame(keyframeIndex + _cameraInfos.at(mediaIndex).frameOffset); - - if(_maxOutFrame == 0) // no limit of keyframes (direct evaluation) - { - feed.readImage(image, queryIntrinsics, currentImgName, hasIntrinsics); - writeKeyframe(image, keyframeIndex, mediaIndex); - } - } - _framesData[keyframeIndex].keyframe = true; - _keyframeIndexes.push_back(keyframeIndex); - - frameIndex = keyframeIndex + _minFrameStep - 1; - } - else - { - ALICEVISION_LOG_INFO("keyframe choice : none" << std::endl); - } - } - ++currentFrameStep; - } - - if(_maxOutFrame == 0) // no limit of keyframes (evaluation and write already done) - { - return; - } - - // if limited number of keyframe, select smallest sparse distance - { - std::vector< std::tuple > keyframes; - - for(std::size_t i = 0; i < _framesData.size(); ++i) - { - if(_framesData[i].keyframe) - { - keyframes.emplace_back(_framesData[i].maxDistScore, 1 / _framesData[i].avgSharpness, i); - } - } - std::sort(keyframes.begin(), keyframes.end()); - - const std::size_t nbOutFrames = std::min(static_cast(_maxOutFrame), keyframes.size()); - - for(std::size_t i = 0; i < nbOutFrames; ++i) - { - const std::size_t frameIndex = std::get<2>(keyframes.at(i)); - for(std::size_t mediaIndex = 0; mediaIndex < _feeds.size(); ++mediaIndex) - { - auto& feed = *_feeds.at(mediaIndex); - feed.goToFrame(frameIndex + _cameraInfos.at(mediaIndex).frameOffset); - feed.readImage(image, queryIntrinsics, currentImgName, hasIntrinsics); - writeKeyframe(image, frameIndex, mediaIndex); - } - } - } -} - -float KeyframeSelector::computeSharpness(const image::Image& imageGray, - const unsigned int tileHeight, - const unsigned int tileWidth, - const unsigned int tileSharpSubset) const -{ - image::Image scharrXDer; - image::Image scharrYDer; - - image::ImageScharrXDerivative(imageGray, scharrXDer); // normalized - image::ImageScharrYDerivative(imageGray, scharrYDer); // normalized - - scharrXDer = scharrXDer.cwiseAbs(); // absolute value - scharrYDer = scharrYDer.cwiseAbs(); // absolute value - - // image tiles - std::vector averageTileIntensity; - const float tileSizeInv = 1 / static_cast(tileHeight * tileWidth); - - for(std::size_t y = 0; y < (_nbTileSide * tileHeight); y += tileHeight) - { - for(std::size_t x = 0; x < (_nbTileSide * tileWidth); x += tileWidth) - { - const auto sum = scharrXDer.block(y, x, tileHeight, tileWidth).sum() + scharrYDer.block(y, x, tileHeight, tileWidth).sum(); - averageTileIntensity.push_back(sum * tileSizeInv); + unsigned int step = _minFrameStep; + if (_maxFrameStep > 0) { + // By default, if _maxFrameStep is set, set the step to be right between _minFrameStep and _maxFrameStep + step = step + static_cast((_maxFrameStep - _minFrameStep) / 2); } - } - - // sort tiles average pixel intensity - std::sort(averageTileIntensity.begin(), averageTileIntensity.end()); - - // return the sum of the subset average pixel intensity - return std::accumulate(averageTileIntensity.end() - tileSharpSubset, averageTileIntensity.end(), 0.0f) / tileSharpSubset; -} - -bool KeyframeSelector::computeFrameData(const image::Image& image, - std::size_t frameIndex, - std::size_t mediaIndex, - unsigned int tileSharpSubset) -{ - if(!_hasSharpnessSelection && !_hasSparseDistanceSelection) - return true; // nothing to do - - image::Image imageGray; // grayscale image - image::Image imageGrayHalfSample; // half resolution grayscale image - - const auto& currMediaInfo = _mediasInfo.at(mediaIndex); - auto& currframeData = _framesData.at(frameIndex); - auto& currMediaData = currframeData.mediasData.at(mediaIndex); - - // get grayscale image and resize - image::ConvertPixelType(image, &imageGray); - image::ImageHalfSample(imageGray, imageGrayHalfSample); - - // compute sharpness - if(_hasSharpnessSelection) - { - currMediaData.sharpness = computeSharpness(imageGrayHalfSample, - currMediaInfo.tileHeight, - currMediaInfo.tileWidth, - tileSharpSubset); - ALICEVISION_LOG_DEBUG( " - sharpness : " << currMediaData.sharpness); - } - - if((currMediaData.sharpness > _sharpnessThreshold) || !_hasSharpnessSelection) - { - bool noKeyframe = (_keyframeIndexes.empty()); - - // compute current frame sparse histogram - std::unique_ptr regions; - _imageDescriber->describe(imageGrayHalfSample, regions); - currMediaData.histogram = voctree::SparseHistogram(_voctree->quantizeToSparse(dynamic_cast(regions.get())->Descriptors())); - - // compute sparseDistance - if(!noKeyframe && _hasSparseDistanceSelection) - { - unsigned int nbKeyframetoCompare = (_keyframeIndexes.size() < _nbKeyFrameDist)? _keyframeIndexes.size() : _nbKeyFrameDist; - - for(std::size_t i = _keyframeIndexes.size() - nbKeyframetoCompare; i < _keyframeIndexes.size(); ++i) - { - for(auto& media : _framesData.at(_keyframeIndexes.at(i)).mediasData) - { - currMediaData.distScore = std::max(currMediaData.distScore, std::abs(voctree::sparseDistance(media.histogram, currMediaData.histogram, "strongCommonPoints"))); + /** + * To respect the _minFrameStep, _maxFrameStep and _maxOutFrame constraints as much as possible: + * - if _maxOutFrame is set and the current step is too small to sample over the entire sequence, + * the step should be increased; + * - if _maxOutFrame is set and the adjusted step became too big and does not respect _maxFrameStep anymore, + * the step should be set to _maxFrameStep - in that case, _maxOutFrame might be reached before the end of + * the sequence + */ + if (_maxOutFrame > 0 && nbFrames / _maxOutFrame > step) { + step = (nbFrames / _maxOutFrame) + 1; // + 1 to prevent ending up with more than _maxOutFrame selected frames + if (_maxFrameStep > 0 && step > _maxFrameStep) { + step = _maxFrameStep; } - } - currframeData.maxDistScore = std::max(currframeData.maxDistScore, currMediaData.distScore); - ALICEVISION_LOG_DEBUG(" - distScore : " << currMediaData.distScore); } - if(noKeyframe || (currMediaData.distScore < _distScoreMax)) - { - return true; + for (unsigned int id = 0; id < nbFrames; id += step) { + ALICEVISION_LOG_DEBUG("Selecting frame with ID " << id); + _selectedKeyframes.push_back(id); + if (_maxOutFrame > 0 && _selectedKeyframes.size() >= _maxOutFrame) + break; } - } - return false; } -void KeyframeSelector::writeKeyframe(const image::Image& image, - std::size_t frameIndex, - std::size_t mediaIndex) +bool KeyframeSelector::writeSelection(const std::vector& brands, + const std::vector& models, + const std::vector& mmFocals) const { - auto& mediaInfo = _mediasInfo.at(mediaIndex); - fs::path folder{_outputFolder}; + image::Image image; + camera::PinholeRadialK3 queryIntrinsics; + bool hasIntrinsics = false; + std::string currentImgName; - if(_feeds.size() > 1) - folder /= fs::path("rig") / fs::path(std::to_string(mediaIndex)); + for (std::size_t id = 0; id < _mediaPaths.size(); ++id) { + const auto& path = _mediaPaths.at(id); - std::ostringstream filenameSS; - filenameSS << std::setw(_padding) << std::setfill('0') << frameIndex << ".jpg"; + // Create a feed provider per mediaPaths + dataio::FeedProvider feed(path); - const auto filepath = (folder / fs::path(filenameSS.str())).string(); + // Check if feed is initialized + if (!feed.isInit()) { + ALICEVISION_LOG_ERROR("Cannot initialize the FeedProvider with " << path); + return false; + } - mediaInfo.spec.attribute("Exif:ImageUniqueID", std::to_string(getRandomInt())); + std::string processedOutputFolder = _outputFolder; + if (_mediaPaths.size() > 1) { + const std::string rigFolder = _outputFolder + "/rig/"; + if (!fs::exists(rigFolder)) { + fs::create_directory(rigFolder); + } + + processedOutputFolder = rigFolder + std::to_string(id); + if (!fs::exists(processedOutputFolder)) { + fs::create_directory(processedOutputFolder); + } + } - std::unique_ptr out(oiio::ImageOutput::create(filepath)); - - if(out.get() == nullptr) - { - throw std::invalid_argument("Cannot create image file : " + filepath); - } - - if(!out->open(filepath, mediaInfo.spec)) - { - throw std::invalid_argument("Cannot open image file : " + filepath); - } + for (const auto pos : _selectedKeyframes) { + if (!feed.goToFrame(pos)) { + ALICEVISION_LOG_ERROR("Invalid frame position. Ignoring this frame."); + continue; + } + + if (!feed.readImage(image, queryIntrinsics, currentImgName, hasIntrinsics)) { + ALICEVISION_LOG_ERROR("Error reading image"); + return false; + } + + oiio::ParamValueList metadata; + metadata.push_back(oiio::ParamValue("Make", brands[id])); + metadata.push_back(oiio::ParamValue("Model", models[id])); + metadata.push_back(oiio::ParamValue("Exif:BodySerialNumber", std::to_string(getRandomInt()))); + metadata.push_back(oiio::ParamValue("Exif:FocalLength", mmFocals[id])); + metadata.push_back(oiio::ParamValue("Exif:ImageUniqueID", std::to_string(getRandomInt()))); + + fs::path folder = _outputFolder; + std::ostringstream filenameSS; + filenameSS << std::setw(5) << std::setfill('0') << pos << ".exr"; + const auto filepath = (processedOutputFolder / fs::path(filenameSS.str())).string(); + + image::ImageWriteOptions options; + // If the feed is a video, frames are read as OpenCV RGB matrices before being converted to image::ImageRGB + if (feed.isVideo()) { + options.fromColorSpace(image::EImageColorSpace::SRGB); + options.toColorSpace(image::EImageColorSpace::AUTO); + } else { // Otherwise, the frames have been read without any conversion, they should be written as such + if (outputExtension == "exr") + options.toColorSpace(image::EImageColorSpace::NO_CONVERSION); + else + options.toColorSpace(image::EImageColorSpace::AUTO); + } + + image::writeImage(filepath, image, options, metadata); + } + } - out->write_image(oiio::TypeDesc::UINT8, image.data()); // always jpeg - out->close(); + return true; } -void KeyframeSelector::convertFocalLengthInMM(CameraInfo& cameraInfo, int imageWidth) -{ - assert(imageWidth > 0); - - sensorDB::Datasheet find; - std::vector vecDatabase; - sensorDB::parseDatabase(_sensorDbPath, vecDatabase); - - if(sensorDB::getInfo(cameraInfo.brand, cameraInfo.model, vecDatabase, find)) - { - cameraInfo.focalLength = (cameraInfo.focalLength * find._sensorWidth) / imageWidth; - cameraInfo.focalIsMM = true; - ALICEVISION_LOG_INFO("Focal length converted in mm : " << cameraInfo.focalLength); - } - else - { - ALICEVISION_LOG_WARNING("Cannot convert focal length in mm : " << cameraInfo.brand << " / " << cameraInfo.model); - } -} } // namespace keyframe } // namespace aliceVision diff --git a/src/aliceVision/keyframe/KeyframeSelector.hpp b/src/aliceVision/keyframe/KeyframeSelector.hpp index c5bfff9e18..5da739b70d 100644 --- a/src/aliceVision/keyframe/KeyframeSelector.hpp +++ b/src/aliceVision/keyframe/KeyframeSelector.hpp @@ -6,10 +6,7 @@ #pragma once -#include -#include #include -#include #include @@ -32,39 +29,15 @@ namespace oiio = OIIO; class KeyframeSelector { -private: - // SIFT descriptor definition - const static std::size_t _dimension = 128; - using DescriptorFloat = aliceVision::feature::Descriptor; - public: - - /** - * @brief Camera informations - */ - struct CameraInfo { - /// Camera brand - std::string brand = "Custom"; - /// Camera model - std::string model = "radial3"; - /// Focal length in mm or px - float focalLength = 1.2f; - /// Camera frame offset - unsigned int frameOffset = 0; - /// If focalIsMM is false, focalLength is in px - bool focalIsMM = true; - }; - /** * @brief KeyframeSelector constructor * @param[in] mediaPath video file path or image sequence directory * @param[in] sensorDbPath camera sensor width database path - * @param[in] voctreeFilePath vocabulary tree path * @param[in] outputFolder output keyframes directory */ KeyframeSelector(const std::vector& mediaPaths, const std::string& sensorDbPath, - const std::string& voctreeFilePath, const std::string& outputFolder); /** @@ -74,77 +47,22 @@ class KeyframeSelector KeyframeSelector(const KeyframeSelector& copy) = delete; /** - * @brief Process media paths and extract keyframes - */ - void process(); - - /** - * @brief Set if selector use keyframe sparse distance selection - * @param[in] useSparseDistanceSelection True or False - */ - void useSparseDistanceSelection(bool useSparseDistanceSelection) - { - _hasSparseDistanceSelection = useSparseDistanceSelection; - } - - /** - * @brief Set if selector use keyframe sharpness selection - * @param[in] useSharpnessSelection True or False - */ - void useSharpnessSelection(bool useSharpnessSelection) - { - _hasSharpnessSelection = useSharpnessSelection; - } - - /** - * @brief Set cameras informations for output keyframes - * @param[in] cameras informations + * @brief Process media paths and build a list of selected keyframes using a regular sampling over time */ - void setCameraInfos(const std::vector& cameraInfos) - { - _cameraInfos = cameraInfos; - } + void processRegular(); /** - * @brief Set sparse distance max score - * @param[in] distScoreMax max strong common points + * @brief Write the selected keyframes in the output folder + * @param[in] brands brand name for each camera + * @param[in] models model name for each camera + * @param[in] mmFocals focal in millimeters for each camera + * @return true if all the selected keyframes were successfully written, false otherwise */ - void setSparseDistanceMaxScore(float distScoreMax) - { - _distScoreMax = distScoreMax; - } + bool writeSelection(const std::vector& brands, const std::vector& models, + const std::vector& mmFocals) const; /** - * @brief Set Sharpness selection preset - * @param[in] sharpnessPreset enum - */ - void setSharpnessSelectionPreset(ESharpnessSelectionPreset sharpnessPreset) - { - switch(sharpnessPreset) - { - // arbitrary thresholds - case ESharpnessSelectionPreset::ULTRA: _sharpnessThreshold = 20.0f; break; - case ESharpnessSelectionPreset::HIGH: _sharpnessThreshold = 17.0f; break; - case ESharpnessSelectionPreset::NORMAL: _sharpnessThreshold = 15.0f; break; - case ESharpnessSelectionPreset::MEDIUM: _sharpnessThreshold = 10.0f; break; - case ESharpnessSelectionPreset::LOW: _sharpnessThreshold = 8.0f; break; - case ESharpnessSelectionPreset::VERY_LOW: _sharpnessThreshold = 6.0f; break; - case ESharpnessSelectionPreset::NONE: _sharpnessThreshold = .0f; break; - default: throw std::out_of_range("Invalid sharpnessPreset enum"); - } - } - - /** - * @brief Set sharp subset size for process algorithm - * @param[in] subset sharp part of the image (1 = all, 2 = size/2, ...) - */ - void setSharpSubset(unsigned int subset) - { - _sharpSubset = subset; - } - - /** - * @brief Set min frame step for process algorithm + * @brief Set the minimum frame step parameter for the processing algorithm * @param[in] frameStep minimum number of frames between two keyframes */ void setMinFrameStep(unsigned int frameStep) @@ -153,8 +71,8 @@ class KeyframeSelector } /** - * @brief Set max frame step for process algorithm - * @param[in] frameStep maximum number of frames after which a keyframe can be taken + * @brief Set the maximum frame step parameter for the processing algorithm + * @param[in] frameStep maximum number of frames between two keyframes */ void setMaxFrameStep(unsigned int frameStep) { @@ -162,7 +80,7 @@ class KeyframeSelector } /** - * @brief Set max output frame number for process algorithm + * @brief Set the maximum output frame number parameter for the processing algorithm * @param[in] nbFrame maximum number of output frames (if 0, no limit) */ void setMaxOutFrame(unsigned int nbFrame) @@ -171,16 +89,7 @@ class KeyframeSelector } /** - * @brief Get sharp subset size for process algorithm - * @return sharp part of the image (1 = all, 2 = size/2, ...) - */ - unsigned int getSharpSubset() const - { - return _sharpSubset; - } - - /** - * @brief Get min frame step for process algorithm + * @brief Get the minimum frame step parameter for the processing algorithm * @return minimum number of frames between two keyframes */ unsigned int getMinFrameStep() const @@ -189,8 +98,8 @@ class KeyframeSelector } /** - * @brief Get max output frame number for process algorithm - * @return maximum number of frames for trying to select a keyframe + * @brief Get the maximum output frame number parameter for the processing algorithm + * @return maximum number of frames between two keyframes */ unsigned int getMaxFrameStep() const { @@ -198,7 +107,7 @@ class KeyframeSelector } /** - * @brief Get max output frame number for process algorithm + * @brief Get the max output frame number for process algorithm * @return maximum number of output frames (if 0, no limit) */ unsigned int getMaxOutFrame() const @@ -207,159 +116,23 @@ class KeyframeSelector } private: - - // Paths + /// Selected keyframes IDs + std::vector _selectedKeyframes; /// Media paths std::vector _mediaPaths; /// Camera sensor width database std::string _sensorDbPath; - /// Voctree file path - std::string _voctreeFilePath; /// Output folder for keyframes std::string _outputFolder; - // Algorithm variables - /// Sharp part of the image (1 = all, 2 = size/2, ...) - unsigned int _sharpSubset = 4; - /// Minimum number of frame between two keyframes + /// Minimum number of frames between two keyframes unsigned int _minFrameStep = 12; - /// Maximum number of frame for evaluation + /// Maximum number of frames between two keyframes unsigned int _maxFrameStep = 36; - /// Maximum number of output frame (0 = no limit) + /// Maximum number of output frames (0 = no limit) unsigned int _maxOutFrame = 0; - /// Number of tiles per side - unsigned int _nbTileSide = 20; - /// Number of previous keyframe distances in order to evaluate distance score - unsigned int _nbKeyFrameDist = 10; - /// Use padding on digits for exported frames - unsigned int _padding = 7; - /// Sharpness threshold (image with higher sharpness will be selected) - float _sharpnessThreshold = 15.0f; - /// Distance max score (image with smallest distance from the last keyframe will be selected) - float _distScoreMax = 100.0f; - /// Use sharpness selection - bool _hasSharpnessSelection = true; - /// Use sparseDistance selection - bool _hasSparseDistanceSelection = true; - - /// Camera metadatas - std::vector _cameraInfos; - - // Tools - - /// Image describer in order to extract describer - std::unique_ptr _imageDescriber; - /// Voctree in order to compute sparseHistogram - std::unique_ptr< aliceVision::voctree::VocabularyTree > _voctree; - /// Feed provider for media paths images extraction - std::vector< std::unique_ptr > _feeds; - - // Process structures - - /** - * @brief Process media global informations - */ - struct MediaInfo - { - /// height of the tile - unsigned int tileHeight = 0; - /// width of the tile - unsigned int tileWidth = 0; - /// openImageIO image spec - oiio::ImageSpec spec; - }; - - /** - * @brief Process media informations at a specific frame - */ - struct MediaData - { - /// sharpness score - float sharpness = 0; - /// maximum distance score with keyframe media histograms - float distScore = 0; - /// sparseHistogram - voctree::SparseHistogram histogram; - }; - - /** - * @brief Process frame (or set of frames) informations - */ - struct FrameData - { - /// average sharpness score of all media - float avgSharpness = 0; - /// maximum voctree distance score of all media - float maxDistScore = 0; - /// frame (or set of frames) selected for evaluation - bool selected = false; - /// frame is a keyframe - bool keyframe = false; - /// medias process data - std::vector mediasData; - - /** - * @brief Compute average sharpness score - */ - void computeAvgSharpness() - { - for(const auto& media : mediasData) - avgSharpness += media.sharpness; - avgSharpness /= mediasData.size(); - } - }; - - /// MediaInfo structure per input medias - std::vector _mediasInfo; - /// FrameData structure per frame - std::vector _framesData; - /// Keyframe indexes container - std::vector _keyframeIndexes; - - /** - * @brief Compute sharpness score of a given image - * @param[in] imageGray given image in grayscale - * @param[in] tileHeight height of tile - * @param[in] tileWidth width of tile - * @param[in] tileSharpSubset number of sharp tiles - * @return sharpness score - */ - float computeSharpness(const image::Image& imageGray, - const unsigned int tileHeight, - const unsigned int tileWidth, - const unsigned int tileSharpSubset) const; - - /** - * @brief Compute sharpness and distance score for a given image - * @param[in] image an image of the media - * @param[in] frameIndex the image index in the media sequence - * @param[in] mediaIndex the media index - * @param[in] tileSharpSubset number of sharp tiles - * @return true if the frame is selected - */ - bool computeFrameData(const image::Image& image, - std::size_t frameIndex, - std::size_t mediaIndex, - unsigned int tileSharpSubset); - - /** - * @brief Write a keyframe and metadata - * @param[in] image an image of the media - * @param[in] frameIndex the image index in the media sequence - * @param[in] mediaIndex the media index - */ - void writeKeyframe(const image::Image& image, - std::size_t frameIndex, - std::size_t mediaIndex); - - /** - * @brief Convert focal length from px to mm using sensor width database - * @param[in] camera informations - * @param[in] imageWidth media image width in px - */ - void convertFocalLengthInMM(CameraInfo& cameraInfo, int imageWidth); }; } // namespace keyframe diff --git a/src/aliceVision/keyframe/SharpnessSelectionPreset.hpp b/src/aliceVision/keyframe/SharpnessSelectionPreset.hpp deleted file mode 100644 index cbac01f96d..0000000000 --- a/src/aliceVision/keyframe/SharpnessSelectionPreset.hpp +++ /dev/null @@ -1,87 +0,0 @@ -// This file is part of the AliceVision project. -// Copyright (c) 2017 AliceVision contributors. -// This Source Code Form is subject to the terms of the Mozilla Public License, -// v. 2.0. If a copy of the MPL was not distributed with this file, -// You can obtain one at https://mozilla.org/MPL/2.0/. - -#pragma once - -#include -#include -#include -#include - -namespace aliceVision { -namespace keyframe { - -/** - * @brief Sharpness selection preset enum - */ -enum class ESharpnessSelectionPreset : std::uint8_t -{ - ULTRA - , HIGH - , NORMAL - , MEDIUM - , LOW - , VERY_LOW - , NONE -}; - -/** - * @brief convert an enum ESharpnessSelectionPreset to its corresponding string - * @param ESharpnessSelectionPreset - * @return String - */ -inline std::string ESharpnessSelectionPreset_enumToString(ESharpnessSelectionPreset sharpnessPreset) -{ - switch(sharpnessPreset) - { - case ESharpnessSelectionPreset::ULTRA: return "ultra"; - case ESharpnessSelectionPreset::HIGH: return "high"; - case ESharpnessSelectionPreset::NORMAL: return "normal"; - case ESharpnessSelectionPreset::MEDIUM: return "medium"; - case ESharpnessSelectionPreset::LOW: return "low"; - case ESharpnessSelectionPreset::VERY_LOW: return "very_low"; - case ESharpnessSelectionPreset::NONE: return "none"; - } - throw std::out_of_range("Invalid sharpnessPreset enum"); -} - -/** - * @brief convert a string sharpnessPreset to its corresponding enum ESharpnessSelectionPreset - * @param String - * @return ESharpnessSelectionPreset - */ -inline ESharpnessSelectionPreset ESharpnessSelectionPreset_stringToEnum(const std::string& sharpnessPreset) -{ - std::string preset = sharpnessPreset; - std::transform(preset.begin(), preset.end(), preset.begin(), ::tolower); //tolower - - if(preset == "ultra") return ESharpnessSelectionPreset::ULTRA; - if(preset == "high") return ESharpnessSelectionPreset::HIGH; - if(preset == "normal") return ESharpnessSelectionPreset::NORMAL; - if(preset == "medium") return ESharpnessSelectionPreset::MEDIUM; - if(preset == "low") return ESharpnessSelectionPreset::LOW; - if(preset == "very_low") return ESharpnessSelectionPreset::VERY_LOW; - if(preset == "none") return ESharpnessSelectionPreset::NONE; - - throw std::out_of_range("Invalid sharpnessPreset : " + sharpnessPreset); -} - -inline std::ostream& operator<<(std::ostream& os, const ESharpnessSelectionPreset sharpnessPreset) -{ - os << ESharpnessSelectionPreset_enumToString(sharpnessPreset); - return os; -} - -inline std::istream& operator>>(std::istream& in, ESharpnessSelectionPreset &sharpnessPreset) -{ - std::string token; - in >> token; - sharpnessPreset = ESharpnessSelectionPreset_stringToEnum(token); - return in; -} - -} // namespace keyframe -} // namespace aliceVision diff --git a/src/software/utils/main_keyframeSelection.cpp b/src/software/utils/main_keyframeSelection.cpp index 6c805e4421..2eaa2b4fc1 100644 --- a/src/software/utils/main_keyframeSelection.cpp +++ b/src/software/utils/main_keyframeSelection.cpp @@ -27,70 +27,47 @@ namespace fs = boost::filesystem; int aliceVision_main(int argc, char** argv) { - // command-line parameters + // Command-line parameters std::vector mediaPaths; // media file path list std::vector brands; // media brand list std::vector models; // media model list std::vector mmFocals; // media focal (mm) list - std::vector pxFocals; // media focal (px) list - std::vector frameOffsets; // media frame offset list std::string sensorDbPath; // camera sensor width database - std::string voctreeFilePath; // SIFT voctree file path std::string outputFolder; // output folder for keyframes - // algorithm variables - bool useSparseDistanceSelection = true; - bool useSharpnessSelection = true; - std::string sharpnessPreset = ESharpnessSelectionPreset_enumToString(ESharpnessSelectionPreset::NORMAL); - float sparseDistMaxScore = 100.0f; - unsigned int sharpSubset = 4; + // Algorithm variables unsigned int minFrameStep = 12; unsigned int maxFrameStep = 36; unsigned int maxNbOutFrame = 0; po::options_description inputParams("Required parameters"); inputParams.add_options() - ("mediaPaths", po::value< std::vector >(&mediaPaths)->required()->multitoken(), + ("mediaPaths", po::value>(&mediaPaths)->required()->multitoken(), "Input video files or image sequence directories.") ("sensorDbPath", po::value(&sensorDbPath)->required(), "Camera sensor width database path.") - ("voctreePath", po::value(&voctreeFilePath)->required(), - "Vocabulary tree path.") ("outputFolder", po::value(&outputFolder)->required(), - "Output keyframes folder for .jpg"); + "Output folder in which the selected keyframes are written."); po::options_description metadataParams("Metadata parameters"); metadataParams.add_options() - ("brands", po::value< std::vector >(&brands)->default_value(brands)->multitoken(), + ("brands", po::value>(&brands)->default_value(brands)->multitoken(), "Camera brands.") - ("models", po::value< std::vector >(&models)->default_value(models)->multitoken(), + ("models", po::value>(&models)->default_value(models)->multitoken(), "Camera models.") - ("mmFocals", po::value< std::vector >(&mmFocals)->default_value(mmFocals)->multitoken(), - "Focals in mm (will be use if not 0).") - ("pxFocals", po::value< std::vector >(&pxFocals)->default_value(pxFocals)->multitoken(), - "Focals in px (will be use and convert in mm if not 0).") - ("frameOffsets", po::value< std::vector >(&frameOffsets)->default_value(frameOffsets)->multitoken(), - "Frame offsets."); + ("mmFocals", po::value>(&mmFocals)->default_value(mmFocals)->multitoken(), + "Focals in mm (ignored if equal to 0)."); po::options_description algorithmParams("Algorithm parameters"); algorithmParams.add_options() - ("useSparseDistanceSelection", po::value(&useSparseDistanceSelection)->default_value(useSparseDistanceSelection), - "Use sparseDistance selection in order to avoid similar keyframes") - ("useSharpnessSelection", po::value(&useSharpnessSelection)->default_value(useSharpnessSelection), - "Use frame sharpness score for keyframe selection") - ("sparseDistMaxScore", po::value(&sparseDistMaxScore)->default_value(sparseDistMaxScore), - "Maximum number of strong common points between two keyframes") - ("sharpnessPreset", po::value(&sharpnessPreset)->default_value(sharpnessPreset), - "Preset for sharpnessSelection : " - "{ultra, high, normal, low, very_low, none}") - ("sharpSubset", po::value(&sharpSubset)->default_value(sharpSubset), - "sharp part of the image (1 = all, 2 = size/2, ...) ") ("minFrameStep", po::value(&minFrameStep)->default_value(minFrameStep), - "minimum number of frames between two keyframes") + "Minimum number of frames between two keyframes.") ("maxFrameStep", po::value(&maxFrameStep)->default_value(maxFrameStep), - "maximum number of frames after which a keyframe can be taken") + "Maximum number of frames after which a keyframe can be taken (ignored if equal to 0).") ("maxNbOutFrame", po::value(&maxNbOutFrame)->default_value(maxNbOutFrame), - "maximum number of output frames (0 = no limit)"); + "Maximum number of output keyframes (0 = no limit).\n" + "'minFrameStep' and 'maxFrameStep' will always be respected, so combining them with this " + "parameter might cause the selection to stop before reaching the end of the input sequence(s)."); aliceVision::CmdLine cmdline("This program is used to extract keyframes from single camera or a camera rig.\n" @@ -98,104 +75,64 @@ int aliceVision_main(int argc, char** argv) cmdline.add(inputParams); cmdline.add(metadataParams); cmdline.add(algorithmParams); - if (!cmdline.execute(argc, argv)) - { + if (!cmdline.execute(argc, argv)) { return EXIT_FAILURE; } const std::size_t nbCameras = mediaPaths.size(); - // check output folder and update to its absolute path + // Check output folder and update to its absolute path { const fs::path outDir = fs::absolute(outputFolder); outputFolder = outDir.string(); - if(!fs::is_directory(outDir)) - { + if (!fs::is_directory(outDir)) { ALICEVISION_LOG_ERROR("Cannot find folder: " << outputFolder); return EXIT_FAILURE; } } - if(nbCameras < 1) - { - ALICEVISION_LOG_ERROR("Program need at least one media path."); + if (nbCameras < 1) { + ALICEVISION_LOG_ERROR("Program needs at least one media path."); return EXIT_FAILURE; } - if(minFrameStep >= maxFrameStep) - { - ALICEVISION_LOG_ERROR("Option minFrameStep should be less than option maxFrameStep."); + if (maxFrameStep > 0 && minFrameStep >= maxFrameStep) { + ALICEVISION_LOG_ERROR("Setting 'minFrameStep' should be less than setting 'maxFrameStep'."); return EXIT_FAILURE; } brands.resize(nbCameras); models.resize(nbCameras); mmFocals.resize(nbCameras); - pxFocals.resize(nbCameras); - frameOffsets.resize(nbCameras); - // debugging prints, print out all the parameters + // Debugging prints, print out all the parameters { - if(nbCameras == 1) + if (nbCameras == 1) ALICEVISION_LOG_INFO("Single camera"); else ALICEVISION_LOG_INFO("Camera rig of " << nbCameras << " cameras."); - for(std::size_t i = 0; i < nbCameras; ++i) - { - ALICEVISION_LOG_INFO("camera: " << mediaPaths.at(i) << std::endl + for (std::size_t i = 0; i < nbCameras; ++i) { + ALICEVISION_LOG_INFO("Camera: " << mediaPaths.at(i) << std::endl << "\t - brand: " << brands.at(i) << std::endl << "\t - model: " << models.at(i) << std::endl - << "\t - focal (mm): " << mmFocals.at(i) << std::endl - << "\t - focal (px): " << pxFocals.at(i) << std::endl - << "\t - frame offset: " << frameOffsets.at(i) << std::endl); + << "\t - focal (mm): " << mmFocals.at(i) << std::endl); } } - // initialize KeyframeSelector - KeyframeSelector selector(mediaPaths, sensorDbPath, voctreeFilePath, outputFolder); - - // initialize media metadatas vector - std::vector cameraInfos(nbCameras); - - for(std::size_t i = 0; i < nbCameras; ++i) - { - KeyframeSelector::CameraInfo& metadata = cameraInfos.at(i); - - const std::string& brand = brands.at(i); - const std::string& model = models.at(i); - const float mmFocal = mmFocals.at(i); - const float pxFocal = pxFocals.at(i); - const unsigned int frameOffset = frameOffsets.at(i); - - if(!brand.empty()) - metadata.brand = brand; - if(!model.empty()) - metadata.model = model; - - metadata.frameOffset = frameOffset; + // Initialize KeyframeSelector + KeyframeSelector selector(mediaPaths, sensorDbPath, outputFolder); - if((pxFocal == .0f) && (mmFocal == .0f)) - continue; - - metadata.focalIsMM = (pxFocal == .0f); - metadata.focalLength = metadata.focalIsMM ? mmFocal : std::fabs(pxFocal); - } - - selector.setCameraInfos(cameraInfos); - - // set algorithm parameters - selector.useSparseDistanceSelection(useSparseDistanceSelection); - selector.useSharpnessSelection(useSharpnessSelection); - selector.setSparseDistanceMaxScore(sparseDistMaxScore); - selector.setSharpnessSelectionPreset(ESharpnessSelectionPreset_stringToEnum(sharpnessPreset)); - selector.setSharpSubset(sharpSubset); + // Set algorithm parameters selector.setMinFrameStep(minFrameStep); selector.setMaxFrameStep(maxFrameStep); selector.setMaxOutFrame(maxNbOutFrame); - - // process - selector.process(); - + + // Process media paths with regular method + selector.processRegular(); + + // Write selected keyframes + selector.writeSelection(brands, models, mmFocals); + return EXIT_SUCCESS; } From 454635919c9f73fba34cc8c4b26d58cd4883a2d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Candice=20Bent=C3=A9jac?= Date: Fri, 20 Jan 2023 20:28:30 +0100 Subject: [PATCH 03/28] KeyframeSelection: Harmonize indentation across files Use the same indentation (4 spaces) in main_keyframeSelection.hpp and KeyframeSelector.hpp as in KeyframeSelector.cpp. --- src/aliceVision/keyframe/KeyframeSelector.hpp | 202 +++++++++--------- src/software/utils/main_keyframeSelection.cpp | 202 +++++++++--------- 2 files changed, 202 insertions(+), 202 deletions(-) diff --git a/src/aliceVision/keyframe/KeyframeSelector.hpp b/src/aliceVision/keyframe/KeyframeSelector.hpp index 5da739b70d..090df13db2 100644 --- a/src/aliceVision/keyframe/KeyframeSelector.hpp +++ b/src/aliceVision/keyframe/KeyframeSelector.hpp @@ -30,109 +30,109 @@ namespace oiio = OIIO; class KeyframeSelector { public: - /** - * @brief KeyframeSelector constructor - * @param[in] mediaPath video file path or image sequence directory - * @param[in] sensorDbPath camera sensor width database path - * @param[in] outputFolder output keyframes directory - */ - KeyframeSelector(const std::vector& mediaPaths, - const std::string& sensorDbPath, - const std::string& outputFolder); - - /** - * @brief KeyframeSelector copy constructor - NO COPY - * @param[in] copy keyframeSelector - */ - KeyframeSelector(const KeyframeSelector& copy) = delete; - - /** - * @brief Process media paths and build a list of selected keyframes using a regular sampling over time - */ - void processRegular(); - - /** - * @brief Write the selected keyframes in the output folder - * @param[in] brands brand name for each camera - * @param[in] models model name for each camera - * @param[in] mmFocals focal in millimeters for each camera - * @return true if all the selected keyframes were successfully written, false otherwise - */ - bool writeSelection(const std::vector& brands, const std::vector& models, - const std::vector& mmFocals) const; - - /** - * @brief Set the minimum frame step parameter for the processing algorithm - * @param[in] frameStep minimum number of frames between two keyframes - */ - void setMinFrameStep(unsigned int frameStep) - { - _minFrameStep = frameStep; - } - - /** - * @brief Set the maximum frame step parameter for the processing algorithm - * @param[in] frameStep maximum number of frames between two keyframes - */ - void setMaxFrameStep(unsigned int frameStep) - { - _maxFrameStep = frameStep; - } - - /** - * @brief Set the maximum output frame number parameter for the processing algorithm - * @param[in] nbFrame maximum number of output frames (if 0, no limit) - */ - void setMaxOutFrame(unsigned int nbFrame) - { - _maxOutFrame = nbFrame; - } - - /** - * @brief Get the minimum frame step parameter for the processing algorithm - * @return minimum number of frames between two keyframes - */ - unsigned int getMinFrameStep() const - { - return _minFrameStep; - } - - /** - * @brief Get the maximum output frame number parameter for the processing algorithm - * @return maximum number of frames between two keyframes - */ - unsigned int getMaxFrameStep() const - { - return _maxFrameStep; - } - - /** - * @brief Get the max output frame number for process algorithm - * @return maximum number of output frames (if 0, no limit) - */ - unsigned int getMaxOutFrame() const - { - return _maxOutFrame; - } + /** + * @brief KeyframeSelector constructor + * @param[in] mediaPath video file path or image sequence directory + * @param[in] sensorDbPath camera sensor width database path + * @param[in] outputFolder output keyframes directory + */ + KeyframeSelector(const std::vector& mediaPaths, + const std::string& sensorDbPath, + const std::string& outputFolder); + + /** + * @brief KeyframeSelector copy constructor - NO COPY + * @param[in] copy keyframeSelector + */ + KeyframeSelector(const KeyframeSelector& copy) = delete; + + /** + * @brief Process media paths and build a list of selected keyframes using a regular sampling over time + */ + void processRegular(); + + /** + * @brief Write the selected keyframes in the output folder + * @param[in] brands brand name for each camera + * @param[in] models model name for each camera + * @param[in] mmFocals focal in millimeters for each camera + * @return true if all the selected keyframes were successfully written, false otherwise + */ + bool writeSelection(const std::vector& brands, const std::vector& models, + const std::vector& mmFocals) const; + + /** + * @brief Set the minimum frame step parameter for the processing algorithm + * @param[in] frameStep minimum number of frames between two keyframes + */ + void setMinFrameStep(unsigned int frameStep) + { + _minFrameStep = frameStep; + } + + /** + * @brief Set the maximum frame step parameter for the processing algorithm + * @param[in] frameStep maximum number of frames between two keyframes + */ + void setMaxFrameStep(unsigned int frameStep) + { + _maxFrameStep = frameStep; + } + + /** + * @brief Set the maximum output frame number parameter for the processing algorithm + * @param[in] nbFrame maximum number of output frames (if 0, no limit) + */ + void setMaxOutFrame(unsigned int nbFrame) + { + _maxOutFrame = nbFrame; + } + + /** + * @brief Get the minimum frame step parameter for the processing algorithm + * @return minimum number of frames between two keyframes + */ + unsigned int getMinFrameStep() const + { + return _minFrameStep; + } + + /** + * @brief Get the maximum output frame number parameter for the processing algorithm + * @return maximum number of frames between two keyframes + */ + unsigned int getMaxFrameStep() const + { + return _maxFrameStep; + } + + /** + * @brief Get the max output frame number for process algorithm + * @return maximum number of output frames (if 0, no limit) + */ + unsigned int getMaxOutFrame() const + { + return _maxOutFrame; + } private: - /// Selected keyframes IDs - std::vector _selectedKeyframes; - - /// Media paths - std::vector _mediaPaths; - /// Camera sensor width database - std::string _sensorDbPath; - /// Output folder for keyframes - std::string _outputFolder; - - - /// Minimum number of frames between two keyframes - unsigned int _minFrameStep = 12; - /// Maximum number of frames between two keyframes - unsigned int _maxFrameStep = 36; - /// Maximum number of output frames (0 = no limit) - unsigned int _maxOutFrame = 0; + /// Selected keyframes IDs + std::vector _selectedKeyframes; + + /// Media paths + std::vector _mediaPaths; + /// Camera sensor width database + std::string _sensorDbPath; + /// Output folder for keyframes + std::string _outputFolder; + + + /// Minimum number of frames between two keyframes + unsigned int _minFrameStep = 12; + /// Maximum number of frames between two keyframes + unsigned int _maxFrameStep = 36; + /// Maximum number of output frames (0 = no limit) + unsigned int _maxOutFrame = 0; }; } // namespace keyframe diff --git a/src/software/utils/main_keyframeSelection.cpp b/src/software/utils/main_keyframeSelection.cpp index 2eaa2b4fc1..9af6a640ba 100644 --- a/src/software/utils/main_keyframeSelection.cpp +++ b/src/software/utils/main_keyframeSelection.cpp @@ -27,112 +27,112 @@ namespace fs = boost::filesystem; int aliceVision_main(int argc, char** argv) { - // Command-line parameters - std::vector mediaPaths; // media file path list - std::vector brands; // media brand list - std::vector models; // media model list - std::vector mmFocals; // media focal (mm) list - std::string sensorDbPath; // camera sensor width database - std::string outputFolder; // output folder for keyframes - - // Algorithm variables - unsigned int minFrameStep = 12; - unsigned int maxFrameStep = 36; - unsigned int maxNbOutFrame = 0; - - po::options_description inputParams("Required parameters"); - inputParams.add_options() - ("mediaPaths", po::value>(&mediaPaths)->required()->multitoken(), - "Input video files or image sequence directories.") - ("sensorDbPath", po::value(&sensorDbPath)->required(), - "Camera sensor width database path.") - ("outputFolder", po::value(&outputFolder)->required(), - "Output folder in which the selected keyframes are written."); - - po::options_description metadataParams("Metadata parameters"); - metadataParams.add_options() - ("brands", po::value>(&brands)->default_value(brands)->multitoken(), - "Camera brands.") - ("models", po::value>(&models)->default_value(models)->multitoken(), - "Camera models.") - ("mmFocals", po::value>(&mmFocals)->default_value(mmFocals)->multitoken(), - "Focals in mm (ignored if equal to 0)."); - - po::options_description algorithmParams("Algorithm parameters"); - algorithmParams.add_options() - ("minFrameStep", po::value(&minFrameStep)->default_value(minFrameStep), - "Minimum number of frames between two keyframes.") - ("maxFrameStep", po::value(&maxFrameStep)->default_value(maxFrameStep), - "Maximum number of frames after which a keyframe can be taken (ignored if equal to 0).") - ("maxNbOutFrame", po::value(&maxNbOutFrame)->default_value(maxNbOutFrame), - "Maximum number of output keyframes (0 = no limit).\n" - "'minFrameStep' and 'maxFrameStep' will always be respected, so combining them with this " - "parameter might cause the selection to stop before reaching the end of the input sequence(s)."); - - - aliceVision::CmdLine cmdline("This program is used to extract keyframes from single camera or a camera rig.\n" - "AliceVision keyframeSelection"); - cmdline.add(inputParams); - cmdline.add(metadataParams); - cmdline.add(algorithmParams); - if (!cmdline.execute(argc, argv)) { - return EXIT_FAILURE; - } - - const std::size_t nbCameras = mediaPaths.size(); - - // Check output folder and update to its absolute path - { - const fs::path outDir = fs::absolute(outputFolder); - outputFolder = outDir.string(); - if (!fs::is_directory(outDir)) { - ALICEVISION_LOG_ERROR("Cannot find folder: " << outputFolder); - return EXIT_FAILURE; + // Command-line parameters + std::vector mediaPaths; // media file path list + std::vector brands; // media brand list + std::vector models; // media model list + std::vector mmFocals; // media focal (mm) list + std::string sensorDbPath; // camera sensor width database + std::string outputFolder; // output folder for keyframes + + // Algorithm variables + unsigned int minFrameStep = 12; + unsigned int maxFrameStep = 36; + unsigned int maxNbOutFrame = 0; + + po::options_description inputParams("Required parameters"); + inputParams.add_options() + ("mediaPaths", po::value>(&mediaPaths)->required()->multitoken(), + "Input video files or image sequence directories.") + ("sensorDbPath", po::value(&sensorDbPath)->required(), + "Camera sensor width database path.") + ("outputFolder", po::value(&outputFolder)->required(), + "Output folder in which the selected keyframes are written."); + + po::options_description metadataParams("Metadata parameters"); + metadataParams.add_options() + ("brands", po::value>(&brands)->default_value(brands)->multitoken(), + "Camera brands.") + ("models", po::value>(&models)->default_value(models)->multitoken(), + "Camera models.") + ("mmFocals", po::value>(&mmFocals)->default_value(mmFocals)->multitoken(), + "Focals in mm (ignored if equal to 0)."); + + po::options_description algorithmParams("Algorithm parameters"); + algorithmParams.add_options() + ("minFrameStep", po::value(&minFrameStep)->default_value(minFrameStep), + "Minimum number of frames between two keyframes.") + ("maxFrameStep", po::value(&maxFrameStep)->default_value(maxFrameStep), + "Maximum number of frames after which a keyframe can be taken (ignored if equal to 0).") + ("maxNbOutFrame", po::value(&maxNbOutFrame)->default_value(maxNbOutFrame), + "Maximum number of output keyframes (0 = no limit).\n" + "'minFrameStep' and 'maxFrameStep' will always be respected, so combining them with this " + "parameter might cause the selection to stop before reaching the end of the input sequence(s)."); + + + aliceVision::CmdLine cmdline("This program is used to extract keyframes from single camera or a camera rig.\n" + "AliceVision keyframeSelection"); + cmdline.add(inputParams); + cmdline.add(metadataParams); + cmdline.add(algorithmParams); + if (!cmdline.execute(argc, argv)) { + return EXIT_FAILURE; } - } - - if (nbCameras < 1) { - ALICEVISION_LOG_ERROR("Program needs at least one media path."); - return EXIT_FAILURE; - } - - if (maxFrameStep > 0 && minFrameStep >= maxFrameStep) { - ALICEVISION_LOG_ERROR("Setting 'minFrameStep' should be less than setting 'maxFrameStep'."); - return EXIT_FAILURE; - } - - brands.resize(nbCameras); - models.resize(nbCameras); - mmFocals.resize(nbCameras); - - // Debugging prints, print out all the parameters - { - if (nbCameras == 1) - ALICEVISION_LOG_INFO("Single camera"); - else - ALICEVISION_LOG_INFO("Camera rig of " << nbCameras << " cameras."); - - for (std::size_t i = 0; i < nbCameras; ++i) { - ALICEVISION_LOG_INFO("Camera: " << mediaPaths.at(i) << std::endl - << "\t - brand: " << brands.at(i) << std::endl - << "\t - model: " << models.at(i) << std::endl - << "\t - focal (mm): " << mmFocals.at(i) << std::endl); + + const std::size_t nbCameras = mediaPaths.size(); + + // Check output folder and update to its absolute path + { + const fs::path outDir = fs::absolute(outputFolder); + outputFolder = outDir.string(); + if (!fs::is_directory(outDir)) { + ALICEVISION_LOG_ERROR("Cannot find folder: " << outputFolder); + return EXIT_FAILURE; + } + } + + if (nbCameras < 1) { + ALICEVISION_LOG_ERROR("Program needs at least one media path."); + return EXIT_FAILURE; + } + + if (maxFrameStep > 0 && minFrameStep >= maxFrameStep) { + ALICEVISION_LOG_ERROR("Setting 'minFrameStep' should be less than setting 'maxFrameStep'."); + return EXIT_FAILURE; + } + + brands.resize(nbCameras); + models.resize(nbCameras); + mmFocals.resize(nbCameras); + + // Debugging prints, print out all the parameters + { + if (nbCameras == 1) + ALICEVISION_LOG_INFO("Single camera"); + else + ALICEVISION_LOG_INFO("Camera rig of " << nbCameras << " cameras."); + + for (std::size_t i = 0; i < nbCameras; ++i) { + ALICEVISION_LOG_INFO("Camera: " << mediaPaths.at(i) << std::endl + << "\t - brand: " << brands.at(i) << std::endl + << "\t - model: " << models.at(i) << std::endl + << "\t - focal (mm): " << mmFocals.at(i) << std::endl); + } } - } - // Initialize KeyframeSelector - KeyframeSelector selector(mediaPaths, sensorDbPath, outputFolder); + // Initialize KeyframeSelector + KeyframeSelector selector(mediaPaths, sensorDbPath, outputFolder); - // Set algorithm parameters - selector.setMinFrameStep(minFrameStep); - selector.setMaxFrameStep(maxFrameStep); - selector.setMaxOutFrame(maxNbOutFrame); + // Set algorithm parameters + selector.setMinFrameStep(minFrameStep); + selector.setMaxFrameStep(maxFrameStep); + selector.setMaxOutFrame(maxNbOutFrame); - // Process media paths with regular method - selector.processRegular(); + // Process media paths with regular method + selector.processRegular(); - // Write selected keyframes - selector.writeSelection(brands, models, mmFocals); + // Write selected keyframes + selector.writeSelection(brands, models, mmFocals); - return EXIT_SUCCESS; + return EXIT_SUCCESS; } From ff48245420ea861549874eda84ae97a67d6563fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Candice=20Bent=C3=A9jac?= Date: Tue, 24 Jan 2023 14:17:07 +0000 Subject: [PATCH 04/28] KeyframeSelection: Add OpenCV dependency and the support of Optical Flow --- src/CMakeLists.txt | 2 +- src/aliceVision/CMakeLists.txt | 2 +- src/aliceVision/keyframe/CMakeLists.txt | 4 ++++ src/software/utils/CMakeLists.txt | 18 ++++++++++-------- 4 files changed, 16 insertions(+), 10 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b13e045055..f19617ff1b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -475,7 +475,7 @@ set(ALICEVISION_HAVE_OPENCV_CONTRIB 0) if(ALICEVISION_BUILD_SFM) if(NOT ALICEVISION_USE_OPENCV STREQUAL "OFF") - find_package(OpenCV COMPONENTS core imgproc video imgcodecs videoio features2d photo) + find_package(OpenCV COMPONENTS core imgproc video imgcodecs videoio features2d optflow photo) if(OpenCV_FOUND) # We do not set the minimal version directly in find_package diff --git a/src/aliceVision/CMakeLists.txt b/src/aliceVision/CMakeLists.txt index 01eeb3770f..1d5df2a1ee 100644 --- a/src/aliceVision/CMakeLists.txt +++ b/src/aliceVision/CMakeLists.txt @@ -18,7 +18,6 @@ if(ALICEVISION_BUILD_SFM) add_subdirectory(graph) add_subdirectory(gpu) add_subdirectory(imageMatching) - add_subdirectory(keyframe) add_subdirectory(linearProgramming) add_subdirectory(localization) add_subdirectory(matching) @@ -35,6 +34,7 @@ if(ALICEVISION_BUILD_SFM) add_subdirectory(calibration) if(ALICEVISION_HAVE_OPENCV) add_subdirectory(imageMasking) + add_subdirectory(keyframe) endif() endif() diff --git a/src/aliceVision/keyframe/CMakeLists.txt b/src/aliceVision/keyframe/CMakeLists.txt index 365176f371..8d148a8b1f 100644 --- a/src/aliceVision/keyframe/CMakeLists.txt +++ b/src/aliceVision/keyframe/CMakeLists.txt @@ -18,3 +18,7 @@ alicevision_add_library(aliceVision_keyframe aliceVision_system Boost::filesystem ) + +if(ALICEVISION_HAVE_OPENCV) + target_link_libraries(aliceVision_keyframe PUBLIC ${OpenCV_LIBS}) +endif() \ No newline at end of file diff --git a/src/software/utils/CMakeLists.txt b/src/software/utils/CMakeLists.txt index 12e7ffef1e..a03be1c10f 100644 --- a/src/software/utils/CMakeLists.txt +++ b/src/software/utils/CMakeLists.txt @@ -218,15 +218,17 @@ alicevision_add_software(aliceVision_sfmLocalization # Keyframe selection # - export keyframes from video files / image sequence directories -alicevision_add_software(aliceVision_keyframeSelection - SOURCE main_keyframeSelection.cpp - FOLDER ${FOLDER_SOFTWARE_UTILS} - LINKS aliceVision_system - aliceVision_keyframe - ${OPENIMAGEIO_LIBRARIES} - Boost::program_options - Boost::filesystem +if(ALICEVISION_HAVE_OPENCV) + alicevision_add_software(aliceVision_keyframeSelection + SOURCE main_keyframeSelection.cpp + FOLDER ${FOLDER_SOFTWARE_UTILS} + LINKS aliceVision_system + aliceVision_keyframe + ${OPENIMAGEIO_LIBRARIES} + Boost::program_options + Boost::filesystem ) +endif() # Print distances between 3D objects From 90c7dd70ebe9ce80dcbd695feadcced4f66ab69e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Candice=20Bent=C3=A9jac?= Date: Tue, 7 Feb 2023 10:23:06 +0100 Subject: [PATCH 05/28] KeyframeSelection: Add smart selection based on sharpness and optical flow Select frames depending on the motion accumulation and the sharpness. - The sharpness is computed across the image using a sliding window; the sharpest part of the image is kept to be used as the sharpness score. - The optical flow is computed cell by cell within a frame, and the median value of the cells is used as the flow score. Subsequences are identified based on motion accumulation across frames. Within each subsequence, a keyframe is selected based on both its sharpness and its position in the subsequence (frames located at the middle have an advantage over frames located on the subsequence's borders). The goal is to select frames that show a significant difference in their motion / are not too close to each other in time, and are as sharp as possible. --- src/aliceVision/keyframe/KeyframeSelector.cpp | 386 +++++++++++++++++- src/aliceVision/keyframe/KeyframeSelector.hpp | 122 +++++- src/software/utils/main_keyframeSelection.cpp | 72 +++- 3 files changed, 543 insertions(+), 37 deletions(-) diff --git a/src/aliceVision/keyframe/KeyframeSelector.cpp b/src/aliceVision/keyframe/KeyframeSelector.cpp index 7388395dab..f6fac24043 100644 --- a/src/aliceVision/keyframe/KeyframeSelector.cpp +++ b/src/aliceVision/keyframe/KeyframeSelector.cpp @@ -7,7 +7,6 @@ #include "KeyframeSelector.hpp" #include #include -#include #include #include @@ -17,6 +16,7 @@ #include #include #include + namespace fs = boost::filesystem; namespace aliceVision { @@ -25,17 +25,44 @@ namespace keyframe { /** * @brief Get a random int in order to generate uid. - * @warning The random don't use a repeatable seed to avoid conflicts between different launches on different data sets. + * @warning The random doesn't use a repeatable seed to avoid conflicts between different launches on different data sets * @return int between 0 and std::numeric_limits::max() */ int getRandomInt() { std::random_device rd; // will be used to obtain a seed for the random number engine - std::mt19937 randomTwEngine(rd()); // standard mersenne_twister_engine seeded with rd() + std::mt19937 randomTwEngine(rd()); // standard mersenne_twister_engine seeded with rd() std::uniform_int_distribution<> randomDist(0, std::numeric_limits::max()); return randomDist(randomTwEngine); } +/** + * @brief Find the median value in an unsorted vector of double values. + * @param[in] vec The unsorted vector of double values + * @return double the median value + */ +double findMedian(const std::vector& vec) +{ + std::vector vecCopy = vec; + if (vecCopy.size() > 0 && vecCopy.size() % 2 == 0) { + const auto medianIt1 = vecCopy.begin() + vecCopy.size() / 2 - 1; + std::nth_element(vecCopy.begin(), medianIt1, vecCopy.end()); + const auto med1 = *medianIt1; + + const auto medianIt2 = vecCopy.begin() + vecCopy.size() / 2; + std::nth_element(vecCopy.begin(), medianIt2, vecCopy.end()); + const auto med2 = *medianIt2; + + return (med1 + med2) / 2.0; + } else if (vecCopy.size() > 0) { + const auto medianIt = vecCopy.begin() + vecCopy.size() / 2; + std::nth_element(vecCopy.begin(), medianIt, vecCopy.end()); + return *medianIt; + } + + return 0.0; +} + KeyframeSelector::KeyframeSelector(const std::vector& mediaPaths, const std::string& sensorDbPath, const std::string& outputFolder) @@ -84,15 +111,15 @@ void KeyframeSelector::processRegular() } /** - * To respect the _minFrameStep, _maxFrameStep and _maxOutFrame constraints as much as possible: - * - if _maxOutFrame is set and the current step is too small to sample over the entire sequence, + * To respect the _minFrameStep, _maxFrameStep and _maxOutFrames constraints as much as possible: + * - if _maxOutFrames is set and the current step is too small to sample over the entire sequence, * the step should be increased; - * - if _maxOutFrame is set and the adjusted step became too big and does not respect _maxFrameStep anymore, - * the step should be set to _maxFrameStep - in that case, _maxOutFrame might be reached before the end of + * - if _maxOutFrames is set and the adjusted step became too big and does not respect _maxFrameStep anymore, + * the step should be set to _maxFrameStep - in that case, _maxOutFrames might be reached before the end of * the sequence */ - if (_maxOutFrame > 0 && nbFrames / _maxOutFrame > step) { - step = (nbFrames / _maxOutFrame) + 1; // + 1 to prevent ending up with more than _maxOutFrame selected frames + if (_maxOutFrames > 0 && nbFrames / _maxOutFrames > step) { + step = (nbFrames / _maxOutFrames) + 1; // + 1 to prevent ending up with more than _maxOutFrame selected frames if (_maxFrameStep > 0 && step > _maxFrameStep) { step = _maxFrameStep; } @@ -101,11 +128,230 @@ void KeyframeSelector::processRegular() for (unsigned int id = 0; id < nbFrames; id += step) { ALICEVISION_LOG_DEBUG("Selecting frame with ID " << id); _selectedKeyframes.push_back(id); - if (_maxOutFrame > 0 && _selectedKeyframes.size() >= _maxOutFrame) + if (_maxOutFrames > 0 && _selectedKeyframes.size() >= _maxOutFrames) break; } } +void KeyframeSelector::processSmart(const float pxDisplacement, const std::size_t rescaledWidth, + const std::size_t sharpnessWindowSize, const std::size_t flowCellSize) +{ + _selectedKeyframes.clear(); + + // Step 0: compute all the scores + computeScores(rescaledWidth, sharpnessWindowSize, flowCellSize); + + // Step 1: determine subsequences based on the motion accumulation + std::vector subsequenceLimits; + subsequenceLimits.push_back(0); // Always use the first frame as the starting point + + std::size_t sequenceSize = _sharpnessScores.size(); + float step = pxDisplacement * std::min(_frameWidth, _frameHeight) / 100.0; + double motionAcc = 0.0; + for (std::size_t i = 1; i < sequenceSize; ++i) { // Starts at 1 because the first frame's motion score will be -1 + motionAcc += _flowScores.at(i); + if (motionAcc >= step) { + subsequenceLimits.push_back(i); + motionAcc = 0.0; // Reset the motion accumulator + } + } + subsequenceLimits.push_back(sequenceSize - 1); + + // Step 2: check whether the min/max output frames constraints are respected + if (!(subsequenceLimits.size() - 1 >= _minOutFrames && subsequenceLimits.size() - 1 <= _maxOutFrames)) { + ALICEVISION_LOG_DEBUG("Preliminary selection does not provide enough frames (" << subsequenceLimits.size() - 1 + << " keyframes, should be between " << _minOutFrames << " and " << _maxOutFrames << ")"); + + std::vector newLimits = subsequenceLimits; // Prevents first 'newLimits.size() - 1' from overflowing + const double displacementDiff = 0.5; // The displacement must be 0.5px smaller/bigger than the previous one + + if (subsequenceLimits.size() - 1 < _minOutFrames) { + // Not enough frames, reduce the motion step + bool sampleRegularly = false; + while (newLimits.size() - 1 < _minOutFrames) { + newLimits.clear(); + newLimits.push_back(0); + step = std::max(0.0, step - displacementDiff); + + if (step == 0.0) { // The criterion does not make sense anymore, exit to sample regularly instead + sampleRegularly = true; + break; + } + motionAcc = 0.0; + + for (std::size_t i = 1; i < sequenceSize; ++i) { + motionAcc += _flowScores.at(i); + if (motionAcc >= step) { + newLimits.push_back(i); + motionAcc = 0.0; + } + } + newLimits.push_back(sequenceSize - 1); + } + + if (sampleRegularly) { + // Sample regularly the whole sequence to get minOutFrames subsequences + newLimits.clear(); + newLimits.push_back(0); + std::size_t stepSize = (sequenceSize / _minOutFrames) + 1; + + for (std::size_t i = 1; i < sequenceSize; i += stepSize) + newLimits.push_back(i); + newLimits.push_back(sequenceSize - 1); + } + } else { + // Too many frames, increase the motion step + while (newLimits.size() - 1 > _maxOutFrames) { + newLimits.clear(); + newLimits.push_back(0); + step = step + displacementDiff; + motionAcc = 0.0; + + for (std::size_t i = 1; i < sequenceSize; ++i) { + motionAcc += _flowScores.at(i); + if (motionAcc >= step) { + newLimits.push_back(i); + motionAcc = 0.0; + } + } + newLimits.push_back(sequenceSize - 1); + } + } + + subsequenceLimits.clear(); + subsequenceLimits = newLimits; + } + + // Step 3: for each subsequence, find the keyframe + for (std::size_t i = 1; i < subsequenceLimits.size(); ++i) { + double bestSharpness = 0.0; + std::size_t bestIndex = 0; + std::size_t subsequenceSize = subsequenceLimits.at(i) - subsequenceLimits.at(i - 1); + + // Weights for the whole subsequence [1.0; 2.0] (1.0 is on the subsequence's limits, 2.0 on its center) + std::deque weights; + const double weightStep = 1.f / (static_cast(subsequenceSize - 1) / 2.f); + weights.push_back(2.0); // The frame in the middle of the subsequence has the biggest weight + if (subsequenceSize % 2 == 0) + weights.push_back(2.0); // For subsequences of even size, two frames are equally in the middle + + float currentWeight = 2.0; + while (weights.size() != subsequenceSize) { + currentWeight -= weightStep; + weights.push_front(currentWeight); + weights.push_back(currentWeight); + } + + std::size_t weightPosition = 0; + for (std::size_t j = subsequenceLimits.at(i - 1); j < subsequenceLimits.at(i); ++j) { + auto sharpness = _sharpnessScores.at(j) * weights.at(weightPosition); + ++weightPosition; + if (sharpness > bestSharpness) { + bestIndex = j; + bestSharpness = sharpness; + } + } + ALICEVISION_LOG_DEBUG("Selecting frame " << bestIndex); + _selectedKeyframes.push_back(bestIndex); + } +} + +bool KeyframeSelector::computeScores(const std::size_t rescaledWidth, const std::size_t sharpnessWindowSize, + const std::size_t flowCellSize) +{ + // Reset the computed scores + _sharpnessScores.clear(); + _flowScores.clear(); + + // Reset the frame size + _frameWidth = 0; + _frameHeight = 0; + + // Create feeds and count minimum number of frames + std::size_t nbFrames = std::numeric_limits::max(); + std::vector> feeds; + + for (std::size_t mediaIndex = 0; mediaIndex < _mediaPaths.size(); ++mediaIndex) { + const auto& path = _mediaPaths.at(mediaIndex); + + // Create a feed provider per mediaPaths + feeds.push_back(std::make_unique(path)); + const auto& feed = *feeds.back(); + + // Check if feed is initialized + if (!feed.isInit()) { + ALICEVISION_THROW(std::invalid_argument, "Cannot initialize the FeedProvider with " << path); + } + + // Update minimum number of frames + nbFrames = std::min(nbFrames, (size_t)feed.nbFrames()); + } + + // Check if minimum number of frame is zero + if (nbFrames == 0) { + ALICEVISION_THROW(std::invalid_argument, "One or multiple medias can't be found or is empty!"); + } + + // Feed provider variables + image::Image image; // original image + camera::PinholeRadialK3 queryIntrinsics; // image associated camera intrinsics + bool hasIntrinsics = false; // true if queryIntrinsics is valid + std::string currentImgName; // current image name + + // Feed and metadata initialization + for (std::size_t mediaIndex = 0; mediaIndex < feeds.size(); ++mediaIndex) { + // First frame with offset + feeds.at(mediaIndex)->goToFrame(0); + + if (!feeds.at(mediaIndex)->readImage(image, queryIntrinsics, currentImgName, hasIntrinsics)) { + ALICEVISION_THROW(std::invalid_argument, "Cannot read media first frame " << _mediaPaths[mediaIndex]); + } + } + + std::size_t currentFrame = 0; + cv::Mat previousMat, currentMat; // OpenCV matrices for the optical flow computation + auto ptrFlow = cv::optflow::createOptFlow_DeepFlow(); + + while (currentFrame < nbFrames) { + double minimalSharpness = std::numeric_limits::max(); + double minimalFlow = std::numeric_limits::max(); + + for (std::size_t mediaIndex = 0; mediaIndex < feeds.size(); ++mediaIndex) { + auto& feed = *feeds.at(mediaIndex); + + if (currentFrame > 0) { // Get currentFrame - 1 for the optical flow computation + previousMat = readImage(feed, rescaledWidth); + feed.goToNextFrame(); + } + + currentMat = readImage(feed, rescaledWidth); // Read image and rescale it if requested + if (_frameWidth == 0 && _frameHeight == 0) { + _frameWidth = currentMat.size().width; + _frameHeight = currentMat.size().height; + } + + // Compute sharpness + const double sharpness = computeSharpness(currentMat, sharpnessWindowSize); + minimalSharpness = std::min(minimalSharpness, sharpness); + + // Compute optical flow + if (currentFrame > 0) { + const double flow = estimateFlow(ptrFlow, currentMat, previousMat, flowCellSize); + minimalFlow = std::min(minimalFlow, flow); + } + + ALICEVISION_LOG_DEBUG("Finished processing frame " << currentFrame + 1 << "/" << nbFrames); + } + + // Save scores for the current frame + _sharpnessScores.push_back(minimalSharpness); + _flowScores.push_back(currentFrame > 0 ? minimalFlow : -1.f); + ++currentFrame; + } + + return true; +} + bool KeyframeSelector::writeSelection(const std::vector& brands, const std::vector& models, const std::vector& mmFocals) const @@ -176,12 +422,132 @@ bool KeyframeSelector::writeSelection(const std::vector& brands, } image::writeImage(filepath, image, options, metadata); + ALICEVISION_LOG_DEBUG("Wrote selected keyframe " << pos); } } return true; } +cv::Mat KeyframeSelector::readImage(dataio::FeedProvider &feed, std::size_t width) +{ + image::Image image; + camera::PinholeRadialK3 queryIntrinsics; + bool hasIntrinsics = false; + std::string currentImgName; + + if (!feed.readImage(image, queryIntrinsics, currentImgName, hasIntrinsics)) { + ALICEVISION_THROW(std::invalid_argument, "Cannot read frame '" << currentImgName << "'!"); + } + + // Convert content to OpenCV + cv::Mat cvFrame(cv::Size(image.cols(), image.rows()), CV_8UC3, image.data(), image.cols() * 3); + + // Convert to grayscale + cv::Mat cvGrayscale; + cv::cvtColor(cvFrame, cvGrayscale, cv::COLOR_BGR2GRAY); + + // Resize to smaller size if requested + if (width == 0) + return cvGrayscale; + + cv::Mat cvRescaled; + if (cvGrayscale.cols > width && width > 0) { + cv::resize(cvGrayscale, cvRescaled, + cv::Size(width,double(cvGrayscale.rows) * double(width) / double(cvGrayscale.cols))); + } + + return cvRescaled; +} + +double KeyframeSelector::computeSharpness(const cv::Mat& grayscaleImage, const std::size_t windowSize) +{ + if (windowSize > grayscaleImage.size().width || windowSize > grayscaleImage.size().height) { + ALICEVISION_THROW(std::invalid_argument, + "Cannot use a sliding window bigger than the image (sliding window size: " + << windowSize << ", image size: " << grayscaleImage.size().width << "x" + << grayscaleImage.size().height << ")"); + } + + cv::Mat sum, squaredSum, laplacian; + cv::Laplacian(grayscaleImage, laplacian, CV_64F); + cv::integral(laplacian, sum, squaredSum); + + double totalCount = windowSize * windowSize; + double maxstd = 0.0; + + // TODO: do not slide the window pixel by pixel to speed up computations + // Starts at 1 because the integral image is padded with 0s on the top and left borders + for (int y = 1; y < sum.rows - windowSize; ++y) { + for (int x = 1; x < sum.cols - windowSize; ++x) { + double tl = sum.at(y, x); + double tr = sum.at(y, x + windowSize); + double bl = sum.at(y + windowSize, x); + double br = sum.at(y + windowSize, x + windowSize); + const double s1 = br + tl - tr - bl; + + tl = squaredSum.at(y, x); + tr = squaredSum.at(y, x + windowSize); + bl = squaredSum.at(y + windowSize, x); + br = squaredSum.at(y + windowSize, x + windowSize); + const double s2 = br + tl - tr - bl; + + const double std2 = std::sqrt((s2 - (s1 * s1) / totalCount) / totalCount); + maxstd = std::max(maxstd, std2); + } + } + + return maxstd; +} + +double KeyframeSelector::estimateFlow(const cv::Ptr& ptrFlow, const cv::Mat& grayscaleImage, + const cv::Mat& previousGrayscaleImage, const std::size_t cellSize) +{ + if (cellSize > grayscaleImage.size().width) { // If the cell size is bigger than the height, it will be adjusted + ALICEVISION_THROW(std::invalid_argument, + "Cannot use a cell size bigger than the image's width (cell size: " << cellSize + << ", image's width: " << grayscaleImage.size().width << ")"); + } + + if (grayscaleImage.size() != previousGrayscaleImage.size()) { + ALICEVISION_THROW(std::invalid_argument, + "The images used for the optical flow computation have different sizes (" + << grayscaleImage.size().width << "x" << grayscaleImage.size().height << " and " + << previousGrayscaleImage.size().width << "x" << previousGrayscaleImage.size().height + << ")"); + } + + cv::Mat flow; + ptrFlow->calc(grayscaleImage, previousGrayscaleImage, flow); + + cv::Mat sumflow; + cv::integral(flow, sumflow, CV_64F); + + double norm; + std::vector motionByCell; + + // Starts at 1 because the integral matrix is padded with 0s on the top and left borders + for (std::size_t y = 1; y < sumflow.size().height; y += cellSize) { + std::size_t maxCellSizeHeight = cellSize; + if (std::min(sumflow.size().height, int(y + cellSize)) == sumflow.size().height) + maxCellSizeHeight = sumflow.size().height - y; + + for (std::size_t x = 1; x < sumflow.size().width; x += cellSize) { + std::size_t maxCellSizeWidth = cellSize; + if (std::min(sumflow.size().width, int(x + cellSize)) == sumflow.size().width) + maxCellSizeWidth = sumflow.size().width - x; + cv::Point2d tl = sumflow.at(y, x); + cv::Point2d tr = sumflow.at(y, x + maxCellSizeWidth - 1); + cv::Point2d bl = sumflow.at(y + maxCellSizeHeight - 1, x); + cv::Point2d br = sumflow.at(y + maxCellSizeHeight - 1, x + maxCellSizeWidth - 1); + cv::Point2d s = br + tl - tr - bl; + norm = std::hypot(s.x, s.y) / (maxCellSizeHeight * maxCellSizeWidth); + motionByCell.push_back(norm); + } + } + + return findMedian(motionByCell); +} } // namespace keyframe } // namespace aliceVision diff --git a/src/aliceVision/keyframe/KeyframeSelector.hpp b/src/aliceVision/keyframe/KeyframeSelector.hpp index 090df13db2..fd38406f7c 100644 --- a/src/aliceVision/keyframe/KeyframeSelector.hpp +++ b/src/aliceVision/keyframe/KeyframeSelector.hpp @@ -9,12 +9,14 @@ #include #include +#include +#include #include +#include #include #include #include - namespace aliceVision { namespace image { @@ -51,6 +53,45 @@ class KeyframeSelector */ void processRegular(); + /** + * @brief Process media paths and build a list of selected keyframes using a smart method based on sharpness + * and optical flow estimation. The whole process can be described as follows: + * - Step 0: compute the sharpness and optical flow scores for all the frames in all the sequences + * - Step 1: split the whole sequence into subsequences depending on the accumulated movement ("motion step") + * - Step 2: check whether the number of subsequences corresponds to what we want + * - if we do not have enough frames, we reduce the motion step until we get the required + * number of frames + * - if we have too many frames, we increase the motion step until we get the required number of + * frames + * - Step 3: for each subsequence, find the frame that best fit both a sharpness criteria (as sharp as + * possible) and a temporal criteria (as in the middle of the subsequence as possible); the goal + * of these criteria is to avoid the following cases: + * - the selected frame is well located temporally but is blurry + * - the selected frame is very sharp but is located at the very beginning or very end of the + * subsequence, meaning that it is likely adjacent to another very sharp frame in another + * subsequence; in that case, we might select two very sharp frames that are consecutive with no + * significant differences in their motion + * - Step 4: push the selected frames' IDs + * @param[in] pxDisplacement in percent, the minimum of displaced pixels in the image since the last selected frame + * @param[in] sharpnessWindowSize the size of the sliding window used to compute sharpness scores, in pixels + * @param[in] flowCellSize the size of the cells within a frame that are used to compute the optical flow scores, + * in pixels + */ + void processSmart(const float pxDisplacement, const std::size_t rescaledWidth, + const std::size_t sharpnessWindowSize, const std::size_t flowCellSize); + + /** + * @brief Compute the sharpness and optical flow scores for the input media paths + * @param[in] rescaledWidth the width to resize the input frames to before processing them (if equal to 0, no + * rescale will be performed) + * @param[in] sharpnessWindowSize the size of the sliding window used to compute sharpness scores, in pixels + * @param[in] flowCellSize the size of the cells within a frame that are used to compute the optical flow scores, + * in pixels + * @return true if the scores have been successfully computed for all frames, false otherwise + */ + bool computeScores(const std::size_t rescaledWidth, const std::size_t sharpnessWindowSize, + const std::size_t flowCellSize); + /** * @brief Write the selected keyframes in the output folder * @param[in] brands brand name for each camera @@ -79,13 +120,22 @@ class KeyframeSelector _maxFrameStep = frameStep; } + /** + * @brief Set the minimum output frame number parameter for the processing algorithm + * @param[in] nbFrames minimum number of output frames + */ + void setMinOutFrames(unsigned int nbFrames) + { + _minOutFrames = nbFrames; + } + /** * @brief Set the maximum output frame number parameter for the processing algorithm - * @param[in] nbFrame maximum number of output frames (if 0, no limit) + * @param[in] nbFrames maximum number of output frames (if 0, no limit for the regular algorithm) */ - void setMaxOutFrame(unsigned int nbFrame) + void setMaxOutFrames(unsigned int nbFrames) { - _maxOutFrame = nbFrame; + _maxOutFrames = nbFrames; } /** @@ -107,15 +157,52 @@ class KeyframeSelector } /** - * @brief Get the max output frame number for process algorithm - * @return maximum number of output frames (if 0, no limit) + * @brief Get the minimum output frame for the processing algorithm + * @return minimum number of output frames */ - unsigned int getMaxOutFrame() const + unsigned int getMinOutFrames() const { - return _maxOutFrame; + return _minOutFrames; + } + + /** + * @brief Get the maximum output frame number for the processing algorithm + * @return maximum number of output frames (if 0, no limit for the regular algorithm) + */ + unsigned int getMaxOutFrames() const + { + return _maxOutFrames; } private: + /** + * @brief Read an image from a feed provider into a grayscale OpenCV matrix, and rescale it if a size is provided. + * @param[in] feed The feed provider + * @param[in] width The width to resize the input image to. The height will be adjusted with respect to the size ratio. + * There will be no resizing if this parameter is set to 0 + * @return An OpenCV Mat object containing the image + */ + cv::Mat readImage(dataio::FeedProvider &feed, std::size_t width = 0); + + /** + * @brief Compute the sharpness scores for an input grayscale frame with a sliding window + * @param[in] grayscaleImage the input grayscale matrix of the frame + * @param[in] windowSize the size of the sliding window + * @return a double value representing the sharpness score of the sharpest tile in the image + */ + double computeSharpness(const cv::Mat& grayscaleImage, const std::size_t windowSize); + + /** + * @brief Estimate the optical flow score for an input grayscale frame based on its previous frame cell by cell + * @param[in] ptrFlow the OpenCV's DenseOpticalFlow object + * @param[in] grayscaleImage the grayscale matrix of the current frame + * @param[in] previousGrayscaleImage the grayscale matrix of the previous frame + * @param[in] cellSize the size of the evaluated cells within the frame + * @return a double value representing the median motion of all the image's cells + */ + double estimateFlow(const cv::Ptr& ptrFlow, const cv::Mat& grayscaleImage, + const cv::Mat& previousGrayscaleImage, const std::size_t cellSize); + /// Selected keyframes IDs std::vector _selectedKeyframes; @@ -126,13 +213,28 @@ class KeyframeSelector /// Output folder for keyframes std::string _outputFolder; + // Parameters common to both the regular and smart methods + /// Maximum number of output frames (0 = no limit) + unsigned int _maxOutFrames = 0; + // Regular algorithm parameters /// Minimum number of frames between two keyframes unsigned int _minFrameStep = 12; /// Maximum number of frames between two keyframes unsigned int _maxFrameStep = 36; - /// Maximum number of output frames (0 = no limit) - unsigned int _maxOutFrame = 0; + + // Smart algorithm parameters + /// Minimum number of output frames + unsigned int _minOutFrames = 10; + + /// Sharpness scores for each frame + std::vector _sharpnessScores; + /// Optical flow scores for each frame + std::vector _flowScores; + + /// Size of the frame (afer rescale, if any is applied) + unsigned int _frameWidth = 0; + unsigned int _frameHeight = 0; }; } // namespace keyframe diff --git a/src/software/utils/main_keyframeSelection.cpp b/src/software/utils/main_keyframeSelection.cpp index 9af6a640ba..55e04ddf90 100644 --- a/src/software/utils/main_keyframeSelection.cpp +++ b/src/software/utils/main_keyframeSelection.cpp @@ -36,9 +36,15 @@ int aliceVision_main(int argc, char** argv) std::string outputFolder; // output folder for keyframes // Algorithm variables - unsigned int minFrameStep = 12; - unsigned int maxFrameStep = 36; - unsigned int maxNbOutFrame = 0; + bool useSmartSelection = true; // enable the smart selection instead of the regular one + unsigned int minFrameStep = 12; // minimum number of frames between two keyframes (regular selection) + unsigned int maxFrameStep = 36; // maximum number of frames between two keyframes (regular selection) + unsigned int minNbOutFrames = 10; // minimum number of selected keyframes (smart selection) + unsigned int maxNbOutFrames = 2000; // maximum number of selected keyframes (both selections) + float pxDisplacement = 3.0; // percentage of pixels that have moved across frames since last keyframe (smart selection) + std::size_t rescaledWidth = 720; // width of the rescaled frames; 0 if no rescale is performed (smart selection) + std::size_t sharpnessWindowSize = 200; // sliding window's size in sharpness computation (smart selection) + std::size_t flowCellSize = 90; // size of the cells within a frame used to compute the optical flow (smart selection) po::options_description inputParams("Required parameters"); inputParams.add_options() @@ -58,23 +64,46 @@ int aliceVision_main(int argc, char** argv) ("mmFocals", po::value>(&mmFocals)->default_value(mmFocals)->multitoken(), "Focals in mm (ignored if equal to 0)."); - po::options_description algorithmParams("Algorithm parameters"); + po::options_description algorithmParams("Algorithm parameters"); // Parameters common to both methods algorithmParams.add_options() + ("maxNbOutFrames", po::value(&maxNbOutFrames)->default_value(maxNbOutFrames), + "Maximum number of output keyframes.\n" + "\t- For the regular method, 0 = no limit. 'minFrameStep' and 'maxFrameStep' will always be respected, " + "so combining them with this parameter might cause the selection to stop before reaching the end of the " + "input sequence(s).\n" + "\t- For the smart method, the default value is set to 2000."); + + po::options_description regularAlgorithmParams("Regular algorithm parameters"); + regularAlgorithmParams.add_options() ("minFrameStep", po::value(&minFrameStep)->default_value(minFrameStep), "Minimum number of frames between two keyframes.") ("maxFrameStep", po::value(&maxFrameStep)->default_value(maxFrameStep), - "Maximum number of frames after which a keyframe can be taken (ignored if equal to 0).") - ("maxNbOutFrame", po::value(&maxNbOutFrame)->default_value(maxNbOutFrame), - "Maximum number of output keyframes (0 = no limit).\n" - "'minFrameStep' and 'maxFrameStep' will always be respected, so combining them with this " - "parameter might cause the selection to stop before reaching the end of the input sequence(s)."); - + "Maximum number of frames after which a keyframe can be taken (ignored if equal to 0)."); + + po::options_description smartAlgorithmParams("Smart algorithm parameters"); + smartAlgorithmParams.add_options() + ("useSmartSelection", po::value(&useSmartSelection)->default_value(useSmartSelection), + "True to use the smart keyframe selection method, false to use the regular keyframe selection method.") + ("minNbOutFrames", po::value(&minNbOutFrames)->default_value(minNbOutFrames), + "Minimum number of output keyframes.") + ("pxDisplacement", po::value(&pxDisplacement)->default_value(pxDisplacement), + "Percentage of pixels in the image that have been displaced since the last selected frame. The absolute " + "number of moving pixels is determined using min(imageWidth, imageHeight).") + ("rescaledWidth", po::value(&rescaledWidth)->default_value(rescaledWidth), + "Width, in pixels, of the rescaled input frames used to compute the scores. The height of the rescaled " + "frames will be automatically determined to preserve the aspect ratio. 0 = no rescale.") + ("sharpnessWindowSize", po::value(&sharpnessWindowSize)->default_value(sharpnessWindowSize), + "Size, in pixels, of the sliding window that is used to compute the sharpness score of a frame.") + ("flowCellSize", po::value(&flowCellSize)->default_value(flowCellSize), + "Size, in pixels, of the cells within an input frame that are used to compute the optical flow scores."); aliceVision::CmdLine cmdline("This program is used to extract keyframes from single camera or a camera rig.\n" "AliceVision keyframeSelection"); cmdline.add(inputParams); cmdline.add(metadataParams); cmdline.add(algorithmParams); + cmdline.add(regularAlgorithmParams); + cmdline.add(smartAlgorithmParams); if (!cmdline.execute(argc, argv)) { return EXIT_FAILURE; } @@ -86,8 +115,8 @@ int aliceVision_main(int argc, char** argv) const fs::path outDir = fs::absolute(outputFolder); outputFolder = outDir.string(); if (!fs::is_directory(outDir)) { - ALICEVISION_LOG_ERROR("Cannot find folder: " << outputFolder); - return EXIT_FAILURE; + ALICEVISION_LOG_ERROR("Cannot find folder: " << outputFolder); + return EXIT_FAILURE; } } @@ -101,6 +130,11 @@ int aliceVision_main(int argc, char** argv) return EXIT_FAILURE; } + if (minNbOutFrames < 1) { + ALICEVISION_LOG_ERROR("The minimum number of output keyframes cannot be less than 1."); + return EXIT_FAILURE; + } + brands.resize(nbCameras); models.resize(nbCameras); mmFocals.resize(nbCameras); @@ -123,13 +157,17 @@ int aliceVision_main(int argc, char** argv) // Initialize KeyframeSelector KeyframeSelector selector(mediaPaths, sensorDbPath, outputFolder); - // Set algorithm parameters + // Set frame-related algorithm parameters selector.setMinFrameStep(minFrameStep); selector.setMaxFrameStep(maxFrameStep); - selector.setMaxOutFrame(maxNbOutFrame); - - // Process media paths with regular method - selector.processRegular(); + selector.setMinOutFrames(minNbOutFrames); + selector.setMaxOutFrames(maxNbOutFrames); + + // Process media paths with regular or smart method + if (useSmartSelection) + selector.processSmart(pxDisplacement, rescaledWidth, sharpnessWindowSize, flowCellSize); + else + selector.processRegular(); // Write selected keyframes selector.writeSelection(brands, models, mmFocals); From 70279a48a2179aca7c652510952fbae2a64b8f08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Candice=20Bent=C3=A9jac?= Date: Fri, 27 Jan 2023 18:41:09 +0100 Subject: [PATCH 06/28] [keyframe] Support exporting scores to a CSV file Sharpness and optical flow scores can now be exported to a CSV file. Additionally, the CSV file may contain a column indicating whether a frame was selected as a keyframe. --- src/aliceVision/keyframe/KeyframeSelector.cpp | 56 +++++++++++++++++++ src/aliceVision/keyframe/KeyframeSelector.hpp | 14 +++++ 2 files changed, 70 insertions(+) diff --git a/src/aliceVision/keyframe/KeyframeSelector.cpp b/src/aliceVision/keyframe/KeyframeSelector.cpp index f6fac24043..1165520506 100644 --- a/src/aliceVision/keyframe/KeyframeSelector.cpp +++ b/src/aliceVision/keyframe/KeyframeSelector.cpp @@ -74,6 +74,9 @@ KeyframeSelector::KeyframeSelector(const std::vector& mediaPaths, if (mediaPaths.empty()) { ALICEVISION_THROW(std::invalid_argument, "Cannot create KeyframeSelector without at least one media file path!"); } + + scoresMap["Sharpness"] = &_sharpnessScores; + scoresMap["OpticalFlow"] = &_flowScores; } void KeyframeSelector::processRegular() @@ -137,6 +140,7 @@ void KeyframeSelector::processSmart(const float pxDisplacement, const std::size_ const std::size_t sharpnessWindowSize, const std::size_t flowCellSize) { _selectedKeyframes.clear(); + _selectedFrames.clear(); // Step 0: compute all the scores computeScores(rescaledWidth, sharpnessWindowSize, flowCellSize); @@ -146,6 +150,11 @@ void KeyframeSelector::processSmart(const float pxDisplacement, const std::size_ subsequenceLimits.push_back(0); // Always use the first frame as the starting point std::size_t sequenceSize = _sharpnessScores.size(); + + // All frames are unselected so far + _selectedFrames.resize(sequenceSize); + std::fill(_selectedFrames.begin(), _selectedFrames.end(), '0'); + float step = pxDisplacement * std::min(_frameWidth, _frameHeight) / 100.0; double motionAcc = 0.0; for (std::size_t i = 1; i < sequenceSize; ++i) { // Starts at 1 because the first frame's motion score will be -1 @@ -253,6 +262,7 @@ void KeyframeSelector::processSmart(const float pxDisplacement, const std::size_ } ALICEVISION_LOG_DEBUG("Selecting frame " << bestIndex); _selectedKeyframes.push_back(bestIndex); + _selectedFrames.at(bestIndex) = '1'; // The frame has been selected, flip it to 1 } } @@ -429,6 +439,52 @@ bool KeyframeSelector::writeSelection(const std::vector& brands, return true; } +bool KeyframeSelector::exportScoresToFile(const std::string& filename, const bool exportSelectedFrames) const +{ + std::size_t sequenceSize = scoresMap.begin()->second->size(); + if (sequenceSize == 0) { + ALICEVISION_LOG_ERROR("Nothing to export, scores do not seem to have been computed!"); + return false; + } + + std::ofstream os; + os.open((fs::path(_outputFolder) / filename).string(), std::ios::app); + + if (!os.is_open()) { + ALICEVISION_LOG_ERROR("Unable to open the scores file: " << filename << "."); + return false; + } + + ALICEVISION_LOG_DEBUG("Exporting scores as CSV file: " << filename << " (export selected frames: " + << exportSelectedFrames << ")"); + + os.seekp(0, std::ios::end); // Put the cursor at the end of the file + if (os.tellp() == std::streampos(0)) { // 'tellp' returns the cursor's position + // If the file does not exist yet, add a header + std::string header = "FrameNb;"; + for (const auto& mapIterator : scoresMap) + header += mapIterator.first + ";"; + + if (exportSelectedFrames) + header += "Selected;"; + + os << header << "\n"; + } + + for (std::size_t index = 0; index < sequenceSize; ++index) { + os << index << ";"; // First column: frame index + + for (const auto& mapIterator : scoresMap) + os << mapIterator.second->at(index) << ";"; + if (exportSelectedFrames) + os << _selectedFrames.at(index); + os << "\n"; + } + + os.close(); + return true; +} + cv::Mat KeyframeSelector::readImage(dataio::FeedProvider &feed, std::size_t width) { image::Image image; diff --git a/src/aliceVision/keyframe/KeyframeSelector.hpp b/src/aliceVision/keyframe/KeyframeSelector.hpp index fd38406f7c..9e3c51214f 100644 --- a/src/aliceVision/keyframe/KeyframeSelector.hpp +++ b/src/aliceVision/keyframe/KeyframeSelector.hpp @@ -14,6 +14,7 @@ #include #include +#include #include #include #include @@ -102,6 +103,14 @@ class KeyframeSelector bool writeSelection(const std::vector& brands, const std::vector& models, const std::vector& mmFocals) const; + /** + * @brief Export the computed sharpness and optical flow scores to a CSV file + * @param[in] filename the name of the CSV file (e.g. "scores.csv"), which will be written in the output folder + * @param[in] exportSelectedFrames add a column with 1s and 0s depending on whether the frame has been selected + * @return true if the CSV was correctly written to disk, false otherwise + */ + bool exportScoresToFile(const std::string& filename, const bool exportSelectedFrames = false) const; + /** * @brief Set the minimum frame step parameter for the processing algorithm * @param[in] frameStep minimum number of frames between two keyframes @@ -231,10 +240,15 @@ class KeyframeSelector std::vector _sharpnessScores; /// Optical flow scores for each frame std::vector _flowScores; + /// Vector containing 1s for frames that have been selected, 0 for those which have not + std::vector _selectedFrames; /// Size of the frame (afer rescale, if any is applied) unsigned int _frameWidth = 0; unsigned int _frameHeight = 0; + + /// Map score vectors with names for export + std::map*> scoresMap; }; } // namespace keyframe From a5ef84e0d32a1886207ad2ccd5ffde604724c721 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Candice=20Bent=C3=A9jac?= Date: Tue, 24 Jan 2023 18:06:49 +0100 Subject: [PATCH 07/28] [utils] KeyframeSelection: Add debug options to export scores as CSV --- src/software/utils/main_keyframeSelection.cpp | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/software/utils/main_keyframeSelection.cpp b/src/software/utils/main_keyframeSelection.cpp index 55e04ddf90..ed9c5defb1 100644 --- a/src/software/utils/main_keyframeSelection.cpp +++ b/src/software/utils/main_keyframeSelection.cpp @@ -46,6 +46,11 @@ int aliceVision_main(int argc, char** argv) std::size_t sharpnessWindowSize = 200; // sliding window's size in sharpness computation (smart selection) std::size_t flowCellSize = 90; // size of the cells within a frame used to compute the optical flow (smart selection) + // Debug options + bool exportScores = false; // export the sharpness and optical flow scores to a CSV file + std::string csvFilename = "scores.csv"; // name of the CSV file containing the scores + bool exportSelectedFrames = false; // export the selected frames (1 for selected, 0 for not selected) + po::options_description inputParams("Required parameters"); inputParams.add_options() ("mediaPaths", po::value>(&mediaPaths)->required()->multitoken(), @@ -97,6 +102,16 @@ int aliceVision_main(int argc, char** argv) ("flowCellSize", po::value(&flowCellSize)->default_value(flowCellSize), "Size, in pixels, of the cells within an input frame that are used to compute the optical flow scores."); + po::options_description debugParams("Debug parameters"); + debugParams.add_options() + ("exportScores", po::value(&exportScores)->default_value(exportScores), + "Export the sharpness and optical flow scores to a CSV file.") + ("csvFilename", po::value(&csvFilename)->default_value(csvFilename), + "Name of the CSV file containing the sharpness and optical flow scores.") + ("exportSelectedFrames", po::value(&exportSelectedFrames)->default_value(exportSelectedFrames), + "Add a column in the exported CSV file containing the selected frames (1 for frames that have been " + "selected, 0 otherwise)."); + aliceVision::CmdLine cmdline("This program is used to extract keyframes from single camera or a camera rig.\n" "AliceVision keyframeSelection"); cmdline.add(inputParams); @@ -104,6 +119,7 @@ int aliceVision_main(int argc, char** argv) cmdline.add(algorithmParams); cmdline.add(regularAlgorithmParams); cmdline.add(smartAlgorithmParams); + cmdline.add(debugParams); if (!cmdline.execute(argc, argv)) { return EXIT_FAILURE; } @@ -172,5 +188,9 @@ int aliceVision_main(int argc, char** argv) // Write selected keyframes selector.writeSelection(brands, models, mmFocals); + // If debug options are set, export the scores as a CSV file + if (exportScores) + selector.exportScoresToFile(csvFilename, exportSelectedFrames); + return EXIT_SUCCESS; } From cfe84993e4d095921b8162b4a8ca28dc8e45bbf9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Candice=20Bent=C3=A9jac?= Date: Wed, 25 Jan 2023 09:32:04 +0100 Subject: [PATCH 08/28] [utils] KeyframeSelection: Add a debug option to skip the selection The scores will be computed and potentially exported to a CSV file, but the final keyframe selection will not be performed. --- src/software/utils/main_keyframeSelection.cpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/software/utils/main_keyframeSelection.cpp b/src/software/utils/main_keyframeSelection.cpp index ed9c5defb1..35e29aa98f 100644 --- a/src/software/utils/main_keyframeSelection.cpp +++ b/src/software/utils/main_keyframeSelection.cpp @@ -50,6 +50,7 @@ int aliceVision_main(int argc, char** argv) bool exportScores = false; // export the sharpness and optical flow scores to a CSV file std::string csvFilename = "scores.csv"; // name of the CSV file containing the scores bool exportSelectedFrames = false; // export the selected frames (1 for selected, 0 for not selected) + bool skipSelection = false; // only compute the scores and do not proceed with the selection po::options_description inputParams("Required parameters"); inputParams.add_options() @@ -110,7 +111,9 @@ int aliceVision_main(int argc, char** argv) "Name of the CSV file containing the sharpness and optical flow scores.") ("exportSelectedFrames", po::value(&exportSelectedFrames)->default_value(exportSelectedFrames), "Add a column in the exported CSV file containing the selected frames (1 for frames that have been " - "selected, 0 otherwise)."); + "selected, 0 otherwise).") + ("skipSelection", po::value(&skipSelection)->default_value(skipSelection), + "Only compute the sharpness and optical flow scores, but do not proceed with the selection."); aliceVision::CmdLine cmdline("This program is used to extract keyframes from single camera or a camera rig.\n" "AliceVision keyframeSelection"); @@ -179,6 +182,14 @@ int aliceVision_main(int argc, char** argv) selector.setMinOutFrames(minNbOutFrames); selector.setMaxOutFrames(maxNbOutFrames); + if (skipSelection) { + selector.computeScores(rescaledWidth, sharpnessWindowSize, flowCellSize); + if (exportScores) + selector.exportScoresToFile(csvFilename); // Frames have not been selected, ignore 'exportSelectedFrames' + + return EXIT_SUCCESS; + } + // Process media paths with regular or smart method if (useSmartSelection) selector.processSmart(pxDisplacement, rescaledWidth, sharpnessWindowSize, flowCellSize); From ec7f0e5c4ba654609c0d35f03e06a92c6c8054ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Candice=20Bent=C3=A9jac?= Date: Tue, 31 Jan 2023 20:19:20 +0100 Subject: [PATCH 09/28] [keyframe] Add export of motion vectors to HSV images for each frame For each frame in the input sequence(s), compute the optical flow and export the motion vectors in HSV as PNG images. Rigs are supported. --- src/aliceVision/keyframe/KeyframeSelector.cpp | 96 +++++++++++++++++++ src/aliceVision/keyframe/KeyframeSelector.hpp | 8 ++ 2 files changed, 104 insertions(+) diff --git a/src/aliceVision/keyframe/KeyframeSelector.cpp b/src/aliceVision/keyframe/KeyframeSelector.cpp index 1165520506..c61d717561 100644 --- a/src/aliceVision/keyframe/KeyframeSelector.cpp +++ b/src/aliceVision/keyframe/KeyframeSelector.cpp @@ -485,6 +485,102 @@ bool KeyframeSelector::exportScoresToFile(const std::string& filename, const boo return true; } +bool KeyframeSelector::exportFlowVisualisation(const std::size_t rescaledWidth) +{ + // Create feeds and count minimum number of frames + std::size_t nbFrames = std::numeric_limits::max(); + std::vector> feeds; + std::vector outputFolders; + + for (std::size_t mediaIndex = 0; mediaIndex < _mediaPaths.size(); ++mediaIndex) { + const auto& path = _mediaPaths.at(mediaIndex); + + // Create a feed provider per mediaPaths + feeds.emplace_back(new dataio::FeedProvider(path)); + auto& feed = *feeds.back(); + + // Check if feed is initialized + if (!feed.isInit()) { + ALICEVISION_LOG_ERROR("Cannot initialize the FeedProvider with " << path); + return false; + } + + feed.goToFrame(0); + + // Update minimum number of frames + nbFrames = std::min(nbFrames, (size_t)feed.nbFrames()); + + // If there is a rig, create the corresponding folders + std::string processedOutputFolder = _outputFolder; + if (_mediaPaths.size() > 1) { + const std::string rigFolder = _outputFolder + "/rig/"; + if (!fs::exists(rigFolder)) { + fs::create_directory(rigFolder); + } + + processedOutputFolder = rigFolder + std::to_string(mediaIndex); + if (!fs::exists(processedOutputFolder)) { + fs::create_directory(processedOutputFolder); + } + } + + // Save the output paths + outputFolders.push_back(processedOutputFolder); + } + + if (nbFrames == 0) { + ALICEVISION_LOG_ERROR("No frame to visualise optical flow from!"); + return false; + } + + size_t currentFrame = 0; + cv::Mat previousMat, currentMat; // OpenCV matrices for the optical flow computation + auto ptrFlow = cv::optflow::createOptFlow_DeepFlow(); + + /* To be able to handle the rigs and to avoid storing the optical flow results for all frames in case + * we might want to export them, we need to recompute the optical flow for all the frames, even if it has already + * been computed in computeScores(). */ + while (currentFrame < nbFrames) { + for (std::size_t mediaIndex = 0; mediaIndex < feeds.size(); ++mediaIndex) { + auto& feed = *feeds.at(mediaIndex); + + if (currentFrame > 0) { // Get currentFrame - 1 for the optical flow computation + previousMat = readImage(feed, rescaledWidth); + feed.goToNextFrame(); + } + + currentMat = readImage(feed, rescaledWidth); // Read image and rescale it if requested + + if (currentFrame > 0) { + cv::Mat flow; + ptrFlow->calc(currentMat, previousMat, flow); + + cv::Mat flowParts[2]; + cv::split(flow, flowParts); + cv::Mat magnitude, angle, magnNorm; + cv::cartToPolar(flowParts[0], flowParts[1], magnitude, angle, true); + cv::normalize(magnitude, magnNorm, 0.0f, 1.0f, cv::NORM_MINMAX); + angle *= ((1.f / 360.f) * (180.f / 255.f)); + + cv::Mat _hsv[3], hsv, hsv8, bgr; + _hsv[0] = angle; + _hsv[1] = cv::Mat::ones(angle.size(), CV_32F); + _hsv[2] = magnNorm; + cv::merge(_hsv, 3, hsv); + hsv.convertTo(hsv8, CV_8U, 255.0); + cv::cvtColor(hsv8, bgr, cv::COLOR_HSV2BGR); + + std::ostringstream filenameSS; + filenameSS << std::setw(5) << std::setfill('0') << currentFrame << ".png"; + cv::imwrite(outputFolders.at(mediaIndex) + "/OF_" + filenameSS.str(), bgr); + } + } + ++currentFrame; + } + + return true; +} + cv::Mat KeyframeSelector::readImage(dataio::FeedProvider &feed, std::size_t width) { image::Image image; diff --git a/src/aliceVision/keyframe/KeyframeSelector.hpp b/src/aliceVision/keyframe/KeyframeSelector.hpp index 9e3c51214f..f6c76d743d 100644 --- a/src/aliceVision/keyframe/KeyframeSelector.hpp +++ b/src/aliceVision/keyframe/KeyframeSelector.hpp @@ -111,6 +111,14 @@ class KeyframeSelector */ bool exportScoresToFile(const std::string& filename, const bool exportSelectedFrames = false) const; + /** + * @brief Export optical flow HSV visualisation for each frame as a PNG image + * @param[in] rescaledWidth the width to resize the input frames to before computing the optical flow (if equal + * to 0, no rescale will be performed) + * @return true if the frames have been correctly exported, false otherwise + */ + bool exportFlowVisualisation(const std::size_t rescaledWidth); + /** * @brief Set the minimum frame step parameter for the processing algorithm * @param[in] frameStep minimum number of frames between two keyframes From df7b50e9e0f59f2249e2b820e621e99ed73b0309 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Candice=20Bent=C3=A9jac?= Date: Wed, 25 Jan 2023 09:33:59 +0100 Subject: [PATCH 10/28] [utils] KeyframeSelection: Add options to export optical flow visualisation Two options are added to export the motion vectors in HSV as PNG images: - one to export them on top of everything else - one to export only them and do no other computation --- src/software/utils/main_keyframeSelection.cpp | 22 +++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/src/software/utils/main_keyframeSelection.cpp b/src/software/utils/main_keyframeSelection.cpp index 35e29aa98f..4064d361f6 100644 --- a/src/software/utils/main_keyframeSelection.cpp +++ b/src/software/utils/main_keyframeSelection.cpp @@ -51,6 +51,8 @@ int aliceVision_main(int argc, char** argv) std::string csvFilename = "scores.csv"; // name of the CSV file containing the scores bool exportSelectedFrames = false; // export the selected frames (1 for selected, 0 for not selected) bool skipSelection = false; // only compute the scores and do not proceed with the selection + bool exportFlowVisualisation = false; // export optical flow visualisation for all the frames + bool flowVisualisationOnly = false; // export optical flow visualisation for all the frames but do not compute scores po::options_description inputParams("Required parameters"); inputParams.add_options() @@ -113,7 +115,11 @@ int aliceVision_main(int argc, char** argv) "Add a column in the exported CSV file containing the selected frames (1 for frames that have been " "selected, 0 otherwise).") ("skipSelection", po::value(&skipSelection)->default_value(skipSelection), - "Only compute the sharpness and optical flow scores, but do not proceed with the selection."); + "Only compute the sharpness and optical flow scores, but do not proceed with the selection.") + ("exportFlowVisualisation", po::value(&exportFlowVisualisation)->default_value(exportFlowVisualisation), + "For all frames, export the optical flow visualisation in HSV as PNG images.") + ("flowVisualisationOnly", po::value(&flowVisualisationOnly)->default_value(flowVisualisationOnly), + "Export the optical flow visualisation in HSV as PNG images for all frames but do not compute scores."); aliceVision::CmdLine cmdline("This program is used to extract keyframes from single camera or a camera rig.\n" "AliceVision keyframeSelection"); @@ -182,10 +188,20 @@ int aliceVision_main(int argc, char** argv) selector.setMinOutFrames(minNbOutFrames); selector.setMaxOutFrames(maxNbOutFrames); + if (flowVisualisationOnly) { + bool exported = selector.exportFlowVisualisation(rescaledWidth); + if (exported) + return EXIT_SUCCESS; + else + return EXIT_FAILURE; + } + if (skipSelection) { selector.computeScores(rescaledWidth, sharpnessWindowSize, flowCellSize); if (exportScores) selector.exportScoresToFile(csvFilename); // Frames have not been selected, ignore 'exportSelectedFrames' + if (exportFlowVisualisation) + selector.exportFlowVisualisation(rescaledWidth); return EXIT_SUCCESS; } @@ -199,9 +215,11 @@ int aliceVision_main(int argc, char** argv) // Write selected keyframes selector.writeSelection(brands, models, mmFocals); - // If debug options are set, export the scores as a CSV file + // If debug options are set, export the scores as a CSV file and / or the motion vectors as images if (exportScores) selector.exportScoresToFile(csvFilename, exportSelectedFrames); + if (exportFlowVisualisation) + selector.exportFlowVisualisation(rescaledWidth); return EXIT_SUCCESS; } From 00356641a12b264b056169bbf42a7fb5789f3b46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Candice=20Bent=C3=A9jac?= Date: Fri, 27 Jan 2023 18:50:27 +0100 Subject: [PATCH 11/28] [keyframe] Handle corner case when identifying subsequences This commit handles the specific case where the last frame of an input sequence happens to be the one that pushes the motion accumulation over the threshold: the last frame was being pushed a first time within the 'for' loop, and then a second time outside of it. This led the last subsequence to have a size of 0, which led to the 'while' loop condition filling the weights' vector to never be broken. --- src/aliceVision/keyframe/KeyframeSelector.cpp | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/src/aliceVision/keyframe/KeyframeSelector.cpp b/src/aliceVision/keyframe/KeyframeSelector.cpp index c61d717561..154261d0a8 100644 --- a/src/aliceVision/keyframe/KeyframeSelector.cpp +++ b/src/aliceVision/keyframe/KeyframeSelector.cpp @@ -157,7 +157,10 @@ void KeyframeSelector::processSmart(const float pxDisplacement, const std::size_ float step = pxDisplacement * std::min(_frameWidth, _frameHeight) / 100.0; double motionAcc = 0.0; - for (std::size_t i = 1; i < sequenceSize; ++i) { // Starts at 1 because the first frame's motion score will be -1 + + /* Starts at 1 because the first frame's motion score will be -1. + * Ends at sequenceSize - 1 to ensure the last frame cannot be pushed twice. */ + for (std::size_t i = 1; i < sequenceSize - 1; ++i) { motionAcc += _flowScores.at(i); if (motionAcc >= step) { subsequenceLimits.push_back(i); @@ -188,7 +191,7 @@ void KeyframeSelector::processSmart(const float pxDisplacement, const std::size_ } motionAcc = 0.0; - for (std::size_t i = 1; i < sequenceSize; ++i) { + for (std::size_t i = 1; i < sequenceSize - 1; ++i) { motionAcc += _flowScores.at(i); if (motionAcc >= step) { newLimits.push_back(i); @@ -204,7 +207,7 @@ void KeyframeSelector::processSmart(const float pxDisplacement, const std::size_ newLimits.push_back(0); std::size_t stepSize = (sequenceSize / _minOutFrames) + 1; - for (std::size_t i = 1; i < sequenceSize; i += stepSize) + for (std::size_t i = 1; i < sequenceSize - 1; i += stepSize) newLimits.push_back(i); newLimits.push_back(sequenceSize - 1); } @@ -216,7 +219,7 @@ void KeyframeSelector::processSmart(const float pxDisplacement, const std::size_ step = step + displacementDiff; motionAcc = 0.0; - for (std::size_t i = 1; i < sequenceSize; ++i) { + for (std::size_t i = 1; i < sequenceSize - 1; ++i) { motionAcc += _flowScores.at(i); if (motionAcc >= step) { newLimits.push_back(i); @@ -236,6 +239,7 @@ void KeyframeSelector::processSmart(const float pxDisplacement, const std::size_ double bestSharpness = 0.0; std::size_t bestIndex = 0; std::size_t subsequenceSize = subsequenceLimits.at(i) - subsequenceLimits.at(i - 1); + ALICEVISION_LOG_DEBUG("Subsequence [" << subsequenceLimits.at(i - 1) << ", " << subsequenceLimits.at(i) << "]"); // Weights for the whole subsequence [1.0; 2.0] (1.0 is on the subsequence's limits, 2.0 on its center) std::deque weights; @@ -260,10 +264,12 @@ void KeyframeSelector::processSmart(const float pxDisplacement, const std::size_ bestSharpness = sharpness; } } - ALICEVISION_LOG_DEBUG("Selecting frame " << bestIndex); + ALICEVISION_LOG_DEBUG("Selecting frame with ID " << bestIndex); _selectedKeyframes.push_back(bestIndex); _selectedFrames.at(bestIndex) = '1'; // The frame has been selected, flip it to 1 } + + ALICEVISION_LOG_INFO("Finished selecting all the keyframes!"); } bool KeyframeSelector::computeScores(const std::size_t rescaledWidth, const std::size_t sharpnessWindowSize, @@ -350,7 +356,7 @@ bool KeyframeSelector::computeScores(const std::size_t rescaledWidth, const std: minimalFlow = std::min(minimalFlow, flow); } - ALICEVISION_LOG_DEBUG("Finished processing frame " << currentFrame + 1 << "/" << nbFrames); + ALICEVISION_LOG_INFO("Finished processing frame " << currentFrame + 1 << "/" << nbFrames); } // Save scores for the current frame @@ -573,6 +579,7 @@ bool KeyframeSelector::exportFlowVisualisation(const std::size_t rescaledWidth) std::ostringstream filenameSS; filenameSS << std::setw(5) << std::setfill('0') << currentFrame << ".png"; cv::imwrite(outputFolders.at(mediaIndex) + "/OF_" + filenameSS.str(), bgr); + ALICEVISION_LOG_DEBUG("Wrote OF_" << filenameSS.str() << "!"); } } ++currentFrame; From abb4f52be52d5b7e4746524cb6d734514fef3c29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Candice=20Bent=C3=A9jac?= Date: Wed, 25 Jan 2023 18:28:24 +0100 Subject: [PATCH 12/28] [utils] KeyframeSelection: Update software version --- src/software/utils/main_keyframeSelection.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/software/utils/main_keyframeSelection.cpp b/src/software/utils/main_keyframeSelection.cpp index 4064d361f6..ca4584d242 100644 --- a/src/software/utils/main_keyframeSelection.cpp +++ b/src/software/utils/main_keyframeSelection.cpp @@ -17,7 +17,7 @@ // These constants define the current software version. // They must be updated when the command line is changed. -#define ALICEVISION_SOFTWARE_VERSION_MAJOR 2 +#define ALICEVISION_SOFTWARE_VERSION_MAJOR 3 #define ALICEVISION_SOFTWARE_VERSION_MINOR 0 using namespace aliceVision::keyframe; From 1e77494669f27b060d324bc7569217098a74df53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Candice=20Bent=C3=A9jac?= Date: Tue, 31 Jan 2023 19:51:49 +0100 Subject: [PATCH 13/28] KeyframeSelection: Add support for several output extension files Keyframes may be written as JPG, PNG or EXR files. If the EXR format is selected, the storage data type can be specified as well. --- src/aliceVision/keyframe/KeyframeSelector.cpp | 11 ++++++--- src/aliceVision/keyframe/KeyframeSelector.hpp | 6 ++++- src/software/utils/CMakeLists.txt | 1 + src/software/utils/main_keyframeSelection.cpp | 24 +++++++++++++++---- 4 files changed, 34 insertions(+), 8 deletions(-) diff --git a/src/aliceVision/keyframe/KeyframeSelector.cpp b/src/aliceVision/keyframe/KeyframeSelector.cpp index 154261d0a8..6683d0fc69 100644 --- a/src/aliceVision/keyframe/KeyframeSelector.cpp +++ b/src/aliceVision/keyframe/KeyframeSelector.cpp @@ -5,7 +5,6 @@ // You can obtain one at https://mozilla.org/MPL/2.0/. #include "KeyframeSelector.hpp" -#include #include #include @@ -370,7 +369,9 @@ bool KeyframeSelector::computeScores(const std::size_t rescaledWidth, const std: bool KeyframeSelector::writeSelection(const std::vector& brands, const std::vector& models, - const std::vector& mmFocals) const + const std::vector& mmFocals, + const std::string& outputExtension, + const image::EStorageDataType storageDataType) const { image::Image image; camera::PinholeRadialK3 queryIntrinsics; @@ -422,7 +423,7 @@ bool KeyframeSelector::writeSelection(const std::vector& brands, fs::path folder = _outputFolder; std::ostringstream filenameSS; - filenameSS << std::setw(5) << std::setfill('0') << pos << ".exr"; + filenameSS << std::setw(5) << std::setfill('0') << pos << "." << outputExtension; const auto filepath = (processedOutputFolder / fs::path(filenameSS.str())).string(); image::ImageWriteOptions options; @@ -437,6 +438,10 @@ bool KeyframeSelector::writeSelection(const std::vector& brands, options.toColorSpace(image::EImageColorSpace::AUTO); } + if (storageDataType != image::EStorageDataType::Undefined && outputExtension == "exr"){ + options.storageDataType(storageDataType); + } + image::writeImage(filepath, image, options, metadata); ALICEVISION_LOG_DEBUG("Wrote selected keyframe " << pos); } diff --git a/src/aliceVision/keyframe/KeyframeSelector.hpp b/src/aliceVision/keyframe/KeyframeSelector.hpp index f6c76d743d..3c6e6a5329 100644 --- a/src/aliceVision/keyframe/KeyframeSelector.hpp +++ b/src/aliceVision/keyframe/KeyframeSelector.hpp @@ -7,6 +7,7 @@ #pragma once #include +#include #include #include @@ -98,10 +99,13 @@ class KeyframeSelector * @param[in] brands brand name for each camera * @param[in] models model name for each camera * @param[in] mmFocals focal in millimeters for each camera + * @param[in] outputExtension file extension of the written keyframes + * @param[in] storageDataType EXR storage data type for the output keyframes (ignored when the extension is not EXR) * @return true if all the selected keyframes were successfully written, false otherwise */ bool writeSelection(const std::vector& brands, const std::vector& models, - const std::vector& mmFocals) const; + const std::vector& mmFocals, const std::string& outputExtension, + const image::EStorageDataType storageDataType = image::EStorageDataType::Undefined) const; /** * @brief Export the computed sharpness and optical flow scores to a CSV file diff --git a/src/software/utils/CMakeLists.txt b/src/software/utils/CMakeLists.txt index a03be1c10f..561ee04e30 100644 --- a/src/software/utils/CMakeLists.txt +++ b/src/software/utils/CMakeLists.txt @@ -223,6 +223,7 @@ if(ALICEVISION_HAVE_OPENCV) SOURCE main_keyframeSelection.cpp FOLDER ${FOLDER_SOFTWARE_UTILS} LINKS aliceVision_system + aliceVision_image aliceVision_keyframe ${OPENIMAGEIO_LIBRARIES} Boost::program_options diff --git a/src/software/utils/main_keyframeSelection.cpp b/src/software/utils/main_keyframeSelection.cpp index ca4584d242..1539582290 100644 --- a/src/software/utils/main_keyframeSelection.cpp +++ b/src/software/utils/main_keyframeSelection.cpp @@ -4,6 +4,7 @@ // v. 2.0. If a copy of the MPL was not distributed with this file, // You can obtain one at https://mozilla.org/MPL/2.0/. +#include #include #include #include @@ -20,11 +21,13 @@ #define ALICEVISION_SOFTWARE_VERSION_MAJOR 3 #define ALICEVISION_SOFTWARE_VERSION_MINOR 0 -using namespace aliceVision::keyframe; +using namespace aliceVision; namespace po = boost::program_options; namespace fs = boost::filesystem; +const std::string supportedExtensions = "exr, jpg, png"; + int aliceVision_main(int argc, char** argv) { // Command-line parameters @@ -45,6 +48,9 @@ int aliceVision_main(int argc, char** argv) std::size_t rescaledWidth = 720; // width of the rescaled frames; 0 if no rescale is performed (smart selection) std::size_t sharpnessWindowSize = 200; // sliding window's size in sharpness computation (smart selection) std::size_t flowCellSize = 90; // size of the cells within a frame used to compute the optical flow (smart selection) + std::string outputExtension = "exr"; // file extension of the written keyframes + image::EStorageDataType exrDataType = // storage data type for EXR output files + image::EStorageDataType::Float; // Debug options bool exportScores = false; // export the sharpness and optical flow scores to a CSV file @@ -79,7 +85,11 @@ int aliceVision_main(int argc, char** argv) "\t- For the regular method, 0 = no limit. 'minFrameStep' and 'maxFrameStep' will always be respected, " "so combining them with this parameter might cause the selection to stop before reaching the end of the " "input sequence(s).\n" - "\t- For the smart method, the default value is set to 2000."); + "\t- For the smart method, the default value is set to 2000.") + ("outputExtension", po::value(&outputExtension)->default_value(outputExtension), + "File extension of the output keyframes.") + ("storageDataType", po::value(&exrDataType)->default_value(exrDataType), + ("Storage data type for EXR output files: " + image::EStorageDataType_informations()).c_str()); po::options_description regularAlgorithmParams("Regular algorithm parameters"); regularAlgorithmParams.add_options() @@ -160,6 +170,12 @@ int aliceVision_main(int argc, char** argv) return EXIT_FAILURE; } + if (supportedExtensions.find(outputExtension) == std::string::npos) { + ALICEVISION_LOG_ERROR("Unsupported extension for the output file. Supported extensions are: " + << supportedExtensions); + return EXIT_FAILURE; + } + brands.resize(nbCameras); models.resize(nbCameras); mmFocals.resize(nbCameras); @@ -180,7 +196,7 @@ int aliceVision_main(int argc, char** argv) } // Initialize KeyframeSelector - KeyframeSelector selector(mediaPaths, sensorDbPath, outputFolder); + keyframe::KeyframeSelector selector(mediaPaths, sensorDbPath, outputFolder); // Set frame-related algorithm parameters selector.setMinFrameStep(minFrameStep); @@ -213,7 +229,7 @@ int aliceVision_main(int argc, char** argv) selector.processRegular(); // Write selected keyframes - selector.writeSelection(brands, models, mmFocals); + selector.writeSelection(brands, models, mmFocals, outputExtension, exrDataType); // If debug options are set, export the scores as a CSV file and / or the motion vectors as images if (exportScores) From 0cccda7e91018285855618b9c9feb7908d726453 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Candice=20Bent=C3=A9jac?= Date: Wed, 1 Feb 2023 18:47:44 +0100 Subject: [PATCH 14/28] KeyframeSelection: Split rescaling parameters for sharpness and flow scores The rescaled frames used to compute the sharpness and motion scores used to be the same, with a single parameter to specify the rescale value. As we may want to use different rescale values depending on whether we are computing the sharpness or the motion score (or no rescale for one but a rescale for the other), the existing "rescaledWidth" parameter is split into two new parameters, "rescaledWidthSharpness" and "rescaledWidthFlow". --- src/aliceVision/keyframe/KeyframeSelector.cpp | 34 ++++++++++++------- src/aliceVision/keyframe/KeyframeSelector.hpp | 19 +++++++---- src/software/utils/main_keyframeSelection.cpp | 22 +++++++----- 3 files changed, 47 insertions(+), 28 deletions(-) diff --git a/src/aliceVision/keyframe/KeyframeSelector.cpp b/src/aliceVision/keyframe/KeyframeSelector.cpp index 6683d0fc69..c5c7eedfcf 100644 --- a/src/aliceVision/keyframe/KeyframeSelector.cpp +++ b/src/aliceVision/keyframe/KeyframeSelector.cpp @@ -135,14 +135,15 @@ void KeyframeSelector::processRegular() } } -void KeyframeSelector::processSmart(const float pxDisplacement, const std::size_t rescaledWidth, - const std::size_t sharpnessWindowSize, const std::size_t flowCellSize) +void KeyframeSelector::processSmart(const float pxDisplacement, const std::size_t rescaledWidthSharpness, + const std::size_t rescaledWidthFlow, const std::size_t sharpnessWindowSize, + const std::size_t flowCellSize) { _selectedKeyframes.clear(); _selectedFrames.clear(); // Step 0: compute all the scores - computeScores(rescaledWidth, sharpnessWindowSize, flowCellSize); + computeScores(rescaledWidthSharpness, rescaledWidthFlow, sharpnessWindowSize, flowCellSize); // Step 1: determine subsequences based on the motion accumulation std::vector subsequenceLimits; @@ -271,8 +272,8 @@ void KeyframeSelector::processSmart(const float pxDisplacement, const std::size_ ALICEVISION_LOG_INFO("Finished selecting all the keyframes!"); } -bool KeyframeSelector::computeScores(const std::size_t rescaledWidth, const std::size_t sharpnessWindowSize, - const std::size_t flowCellSize) +bool KeyframeSelector::computeScores(const std::size_t rescaledWidthSharpness, const std::size_t rescaledWidthFlow, + const std::size_t sharpnessWindowSize, const std::size_t flowCellSize) { // Reset the computed scores _sharpnessScores.clear(); @@ -324,7 +325,8 @@ bool KeyframeSelector::computeScores(const std::size_t rescaledWidth, const std: } std::size_t currentFrame = 0; - cv::Mat previousMat, currentMat; // OpenCV matrices for the optical flow computation + cv::Mat currentMatSharpness; // OpenCV matrix for the sharpness computation + cv::Mat previousMatFlow, currentMatFlow; // OpenCV matrices for the optical flow computation auto ptrFlow = cv::optflow::createOptFlow_DeepFlow(); while (currentFrame < nbFrames) { @@ -335,23 +337,29 @@ bool KeyframeSelector::computeScores(const std::size_t rescaledWidth, const std: auto& feed = *feeds.at(mediaIndex); if (currentFrame > 0) { // Get currentFrame - 1 for the optical flow computation - previousMat = readImage(feed, rescaledWidth); + previousMatFlow = readImage(feed, rescaledWidthFlow); feed.goToNextFrame(); } - currentMat = readImage(feed, rescaledWidth); // Read image and rescale it if requested - if (_frameWidth == 0 && _frameHeight == 0) { - _frameWidth = currentMat.size().width; - _frameHeight = currentMat.size().height; + currentMatSharpness = readImage(feed, rescaledWidthSharpness); // Read image for sharpness and rescale it if requested + if (rescaledWidthSharpness == rescaledWidthFlow) { + currentMatFlow = currentMatSharpness; + } else { + currentMatFlow = readImage(feed, rescaledWidthFlow); + } + + if (_frameWidth == 0 && _frameHeight == 0) { // Will be used later on to determine the motion accumulation step + _frameWidth = currentMatFlow.size().width; + _frameHeight = currentMatFlow.size().height; } // Compute sharpness - const double sharpness = computeSharpness(currentMat, sharpnessWindowSize); + const double sharpness = computeSharpness(currentMatSharpness, sharpnessWindowSize); minimalSharpness = std::min(minimalSharpness, sharpness); // Compute optical flow if (currentFrame > 0) { - const double flow = estimateFlow(ptrFlow, currentMat, previousMat, flowCellSize); + const double flow = estimateFlow(ptrFlow, currentMatFlow, previousMatFlow, flowCellSize); minimalFlow = std::min(minimalFlow, flow); } diff --git a/src/aliceVision/keyframe/KeyframeSelector.hpp b/src/aliceVision/keyframe/KeyframeSelector.hpp index 3c6e6a5329..942210e015 100644 --- a/src/aliceVision/keyframe/KeyframeSelector.hpp +++ b/src/aliceVision/keyframe/KeyframeSelector.hpp @@ -75,24 +75,31 @@ class KeyframeSelector * significant differences in their motion * - Step 4: push the selected frames' IDs * @param[in] pxDisplacement in percent, the minimum of displaced pixels in the image since the last selected frame + * @param[in] rescaledWidthSharpness to resize the input frames to before using them to compute the + * sharpness scores (if equal to 0, no rescale will be performed) + * @param[in] rescaledWidthFlow the width to resize the input frames to before using them to compute the + * motion scores (if equal to 0, no rescale will be performed) * @param[in] sharpnessWindowSize the size of the sliding window used to compute sharpness scores, in pixels * @param[in] flowCellSize the size of the cells within a frame that are used to compute the optical flow scores, * in pixels */ - void processSmart(const float pxDisplacement, const std::size_t rescaledWidth, - const std::size_t sharpnessWindowSize, const std::size_t flowCellSize); + void processSmart(const float pxDisplacement, const std::size_t rescaledWidthSharpness, + const std::size_t rescaledWidthFlow, const std::size_t sharpnessWindowSize, + const std::size_t flowCellSize); /** * @brief Compute the sharpness and optical flow scores for the input media paths - * @param[in] rescaledWidth the width to resize the input frames to before processing them (if equal to 0, no - * rescale will be performed) + * @param[in] rescaledWidthSharpness the width to resize the input frames to before using them to compute the + * sharpness scores (if equal to 0, no rescale will be performed) + * @param[in] rescaledWidthFlow the width to resize the input frames to before using them to compute the + * motion scores (if equal to 0, no rescale will be performed) * @param[in] sharpnessWindowSize the size of the sliding window used to compute sharpness scores, in pixels * @param[in] flowCellSize the size of the cells within a frame that are used to compute the optical flow scores, * in pixels * @return true if the scores have been successfully computed for all frames, false otherwise */ - bool computeScores(const std::size_t rescaledWidth, const std::size_t sharpnessWindowSize, - const std::size_t flowCellSize); + bool computeScores(const std::size_t rescaledWidthSharpness, const std::size_t rescaledWidthFlow, + const std::size_t sharpnessWindowSize, const std::size_t flowCellSize); /** * @brief Write the selected keyframes in the output folder diff --git a/src/software/utils/main_keyframeSelection.cpp b/src/software/utils/main_keyframeSelection.cpp index 1539582290..d9dea32da9 100644 --- a/src/software/utils/main_keyframeSelection.cpp +++ b/src/software/utils/main_keyframeSelection.cpp @@ -45,7 +45,8 @@ int aliceVision_main(int argc, char** argv) unsigned int minNbOutFrames = 10; // minimum number of selected keyframes (smart selection) unsigned int maxNbOutFrames = 2000; // maximum number of selected keyframes (both selections) float pxDisplacement = 3.0; // percentage of pixels that have moved across frames since last keyframe (smart selection) - std::size_t rescaledWidth = 720; // width of the rescaled frames; 0 if no rescale is performed (smart selection) + std::size_t rescaledWidthSharp = 720; // width of the rescaled frames for the sharpness; 0 if no rescale is performed (smart selection) + std::size_t rescaledWidthFlow = 720; // width of the rescaled frames for the flow; 0 if no rescale is performed (smart selection) std::size_t sharpnessWindowSize = 200; // sliding window's size in sharpness computation (smart selection) std::size_t flowCellSize = 90; // size of the cells within a frame used to compute the optical flow (smart selection) std::string outputExtension = "exr"; // file extension of the written keyframes @@ -107,9 +108,12 @@ int aliceVision_main(int argc, char** argv) ("pxDisplacement", po::value(&pxDisplacement)->default_value(pxDisplacement), "Percentage of pixels in the image that have been displaced since the last selected frame. The absolute " "number of moving pixels is determined using min(imageWidth, imageHeight).") - ("rescaledWidth", po::value(&rescaledWidth)->default_value(rescaledWidth), - "Width, in pixels, of the rescaled input frames used to compute the scores. The height of the rescaled " - "frames will be automatically determined to preserve the aspect ratio. 0 = no rescale.") + ("rescaledWidthSharpness", po::value(&rescaledWidthSharp)->default_value(rescaledWidthSharp), + "Width, in pixels, of the rescaled input frames used to compute the sharpness scores. The height of the " + "rescaled frames will be automatically determined to preserve the aspect ratio. 0 = no rescale.") + ("rescaledWidthFlow", po::value(&rescaledWidthFlow)->default_value(rescaledWidthFlow), + "Width, in pixels, of the rescaled input frames used to compute the motion scores. The height of the " + "rescaled frames will be automatically determined to preserve the aspect ratio. 0 = no rescale.") ("sharpnessWindowSize", po::value(&sharpnessWindowSize)->default_value(sharpnessWindowSize), "Size, in pixels, of the sliding window that is used to compute the sharpness score of a frame.") ("flowCellSize", po::value(&flowCellSize)->default_value(flowCellSize), @@ -205,7 +209,7 @@ int aliceVision_main(int argc, char** argv) selector.setMaxOutFrames(maxNbOutFrames); if (flowVisualisationOnly) { - bool exported = selector.exportFlowVisualisation(rescaledWidth); + bool exported = selector.exportFlowVisualisation(rescaledWidthFlow); if (exported) return EXIT_SUCCESS; else @@ -213,18 +217,18 @@ int aliceVision_main(int argc, char** argv) } if (skipSelection) { - selector.computeScores(rescaledWidth, sharpnessWindowSize, flowCellSize); + selector.computeScores(rescaledWidthSharp, rescaledWidthFlow, sharpnessWindowSize, flowCellSize); if (exportScores) selector.exportScoresToFile(csvFilename); // Frames have not been selected, ignore 'exportSelectedFrames' if (exportFlowVisualisation) - selector.exportFlowVisualisation(rescaledWidth); + selector.exportFlowVisualisation(rescaledWidthFlow); return EXIT_SUCCESS; } // Process media paths with regular or smart method if (useSmartSelection) - selector.processSmart(pxDisplacement, rescaledWidth, sharpnessWindowSize, flowCellSize); + selector.processSmart(pxDisplacement, rescaledWidthSharp, rescaledWidthFlow, sharpnessWindowSize, flowCellSize); else selector.processRegular(); @@ -235,7 +239,7 @@ int aliceVision_main(int argc, char** argv) if (exportScores) selector.exportScoresToFile(csvFilename, exportSelectedFrames); if (exportFlowVisualisation) - selector.exportFlowVisualisation(rescaledWidth); + selector.exportFlowVisualisation(rescaledWidthFlow); return EXIT_SUCCESS; } From 97011431f4405168279d45b60fceb0fff5351935 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Candice=20Bent=C3=A9jac?= Date: Tue, 7 Feb 2023 11:04:19 +0100 Subject: [PATCH 15/28] [keyframe] Add a README describing the Keyframe module --- src/aliceVision/keyframe/README.md | 133 +++++++++++++++++++++++++++++ 1 file changed, 133 insertions(+) create mode 100644 src/aliceVision/keyframe/README.md diff --git a/src/aliceVision/keyframe/README.md b/src/aliceVision/keyframe/README.md new file mode 100644 index 0000000000..7f6dd34259 --- /dev/null +++ b/src/aliceVision/keyframe/README.md @@ -0,0 +1,133 @@ +# Keyframe Selection + +This module provides several methods to perform a keyframe selection. + +The goal of the keyframe selection is to extract, from an input video or an input sequence of images, keyframes. +Two methods are currently supported: +- a **regular** selection method, which selects keyframes regularly across the input video / sequence according to a set of parameters; +- a **smart** selection method, which analyses the sharpness and motion of all the frames to select those which are deemed the most relevant (a frame is considered relevant if it contains significant motion in comparison to the last selected keyframe while being as sharp as possible). + +The selected keyframes can be written as JPG, PNG or EXR images, and the storage data type can be specified when the EXR file extension is selected. + +The keyframe selection module supports the following inputs: +- a path to a video file (e.g. "/path/to/video.mp4") +- a path to a folder containing images (e.g. "/path/to/folder/") +- a path to a folder containing images with a regular expression (e.g. "/path/to/folder/*.exr") + +Camera rigs are also supported. + +## Regular selection method + +The regular selection samples frames regularly over time with respect to some user-provided constraints, that can be combined: +- `minFrameStep`: the minimum number of frames between two selected keyframes. If only `minFrameStep` is set, one keyframe will be selected every `minFrameStep` all along the video. +- `maxNbOutFrames`: the maximum number of selected keyframes (if set to 0, the number of selected keyframes will be unlimited). If only `maxNbOutFrames` is set, `maxNbOutFrames` keyframes equally spaced along the video will be selected. + +If both `minFrameStep` and `maxNbOutFrames` are set, up to `maxNbOutFrames` keyframes separated by at least `minFrameStep` frames will be selected. Examples of the parameter combinations are available in the [Examples](#examples) section. + +### Advanced regular selection + +For a more advanced regular selection, another parameter, `maxFrameStep`, is available to combine a relatively strict sampling with a maximum number of output frames. `maxFrameStep` sets the maximum number of frames between two selected keyframes and ensures that there will not be way more frames between two keyframes than expected when `maxNbOutFrames` is also set. `maxFrameStep` always takes precedence over `maxNbOutFrames`, meaning that the input video / sequence might not be sampled entirely for all the constraints to be respected. + +Combinations of the different parameters and the influence of `maxFrameStep` are shown in the [Examples](#examples) section. + + +### Examples + +The expected behaviour for the regular selection depending on the set parameters can be summed up as follows: + +- If only `minFrameStep` is set, the whole sequence will be sampled and a keyframe will be selected every `minFrameStep`. E.g: if a sequence has 2000 frames and `minFrameStep = 100`, 21 keyframes will be selected, with exactly 100 frames between them. + +- If `minFrameStep` and `maxNbOutFrames` are set, there will never be less than `minFrameStep` between the keyframes, but there might be more for the whole sequence to be sampled while respecting `maxNbOutFrames`. E.g: if a sequence has 2000 frames, `minFrameStep = 100` and `maxNbOutFrames = 10`, 10 keyframes with 222 frames between them will be selected, so both `maxNbOutFrames` and `minFrameStep` are respected. If the sequence has 500 frames and `minFrameStep = 100` / `maxNbOutFrames = 10`, there will be 6 keyframes with 100 frames between them. No matter the value of the parameters, the entire sequence will be sampled. + +- If `minFrameStep` and `maxFrameStep` are both set but `maxNbOutFrames` is not, then the step between two keyframes will be exactly between `minFrameStep` and `maxFrameStep`. If `minFrameStep = 100` and `maxFrameStep = 200` without other constraints, it is equivalent to setting `minFrameStep = 150`. + +- If `minFrameStep`, `maxFrameStep` and `maxNbOutFrames` are all set, then `maxFrameStep` prevents the step between two frames from increasing too much to respect `maxNbOutFrames`. With the sequence of 2000 frames, having `minFrameStep = 100`, `maxFrameStep = 150` and `maxNbOutFrames = 10` will lead to 10 keyframes with 150 frames between each, and the sampling will stop before reaching the end of the sequence so that all the constraints are respected. In the same example, if `maxFrameStep = 300`, then there will be 10 keyframes with 222 frames between them, and the whole sequence will be sampled. + +## Smart selection method + +The smart selection works in two steps: +- for each frame in the input video / sequence, a sharpness score and a motion score are computed; +- the sharpness and motion scores are used as well as the temporal position of the evaluated frame to determine whether the frame will be selected. + +The method aims at selecting a frame that is as sharp as possible with significant motion compared to the previously selected frame: consecutive frames should not be selected as keyframes if they do not contain enough motion, even if they are both very sharp. + +The minimum and maximum number of selected keyframes with the smart method can be set with the following parameters: +- `minNbOutFrames`: the minimum number of selected keyframes; +- `maxNbOutFrames`: the maximum number of selected keyframes. + +### Frame scoring + +For both the sharpness and motion scores, the evaluated frame is converted to a grayscale OpenCV matrix that may be rescaled +Scores are computed on grayscale images, which may have been rescaled using the `rescaledWidth` parameter. + +#### Sharpness score + +The Laplacian of the input frame is first computed, followed by the integral image of the Laplacian. A sliding window of size `sharpnessWindowSize` is used to compute the standard deviation of the averaged Laplacian locally. The final sharpness score will be the highest standard deviation found. + +The image is evaluated with a sliding window instead of as a whole to prevent giving a bad score (low standard deviation) to a frame that contains a sharp element but is overall blurry. + +#### Motion score + +The dense optical flow of a frame is computed. The frame is then divided into cells of `flowCellSize` pixels in which the motion vectors are averaged to obtain a displacement value (in pixels) within that cell. Once all the displacement values have been computed, the median value of these displacement values is used as the motion score. + +### Selection + +Once both the sharpness and motion scores have been computed, subsequences are identified based on the motion accumulation across frames. The motion accumulation threshold is set with `pxDisplacement` which represents, in per cent, the number of pixels that need to have moved since the last keyframe for the motion to be significant. As the motion scores represent a displacement value for each frame, summing them over time until the accumulation reaches the threshold allows to divide the input video / sequence into subsequences that all contain significant motion. + +Within each subsequence, a single frame is to be selected as a keyframe. Before proceeding to the selection itself, the number of identified subsequences is checked to ensure that the minimum and maximum number of requested output keyframes are respected. +- If not enough subsequences have been identified, the motion accumulation threshold is lowered iteratively with a step of 0.5 px until it either reaches 0 or gives out an expected number of subsequences. If 0 is reached, the motion accumulation criterion stops making sense and is thus replaced by a regular sampling: in that specific case, the smart method falls back to the regular method's behaviour. +- If too many subsequences have been identified, the motion accumulation threshold is increased iteratively with a step of 0.5 px until an acceptable number of subsequences is identified. + +A keyframe is thus selected for each subsequence, based on its sharpness score as well as its position in its subsequence: the sharpness score of each frame is combined to a weight based on its position within the subsequence, with the best weights applied to the frames located at the middle of the subsequence, and the worst weights applied to the frames located on the subsequence's borders. + +The weights aim at favouring the selection of keyframes that are as temporally far from each other as possible. Using only the sharpness scores to select a keyframe within a subsequence could lead to two consecutive very sharp frames, respectively located at the very end of a subsequence and at the very beginning of the following subsequence, being selected. This would hinder the relevancy of the whole process, as they would likely not contain any significant difference. + +### Debug options + +Debug options specific to the smart selection method are available: +- Export scores to CSV: the sharpness and motion scores for all the frames are written to a CSV file; +- Visualise the optical flow: the computed motion vectors are, for each frame, visualised with HSV images that are written as PNG images. + + +## API + +- Constructor +```cpp +KeyframeSelector(const std::vector& mediaPaths, + const std::string& sensorDbPath, + const std::string& outputFolder); +``` +- Selection with regular method +```cpp +void processRegular(); +``` +- Selection with smart method +```cpp +void processSmart(const float pxDisplacement, + const std::size_t rescaledWidthSharpness, + const std::size_t rescaledWidthFlow, + const std::size_t sharpnessWindowSize, + const std::size_t flowCellSize); +``` +- Score computation +```cpp +bool computeScores(const std::size_t rescaledWidthSharpness, + const std::size_t rescaledWidthFlow, + const std::size_t sharpnessWindowSize, + const std::size_t flowCellSize); +``` +- Write selected keyframes +```cpp +bool writeSelection(const std::vector& brands, + const std::vector& models, + const std::vector& mmFocals, + const std::string& outputExtension, + const image::EStorageDataType storageDataType = image::EStorageDataType::Undefined) const; +``` +- Debug options +```cpp +bool exportScoresToFile(const std::string& filename, + const bool exportSelectedFrames = false) const; + +bool exportFlowVisualisation(const std::size_t rescaledWidth); +``` \ No newline at end of file From 56a1939ef573ef969059c1a30534970ba1882c11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Candice=20Bent=C3=A9jac?= Date: Tue, 7 Feb 2023 11:33:00 +0100 Subject: [PATCH 16/28] KeyframeSelection: Add an option to name output keyframes consecutively By default, the selected keyframes are written with their index within the input sequence / video as their name. If frames at index 15, 294 and 825 are selected as keyframes, they will be written as 00015.exr, 00294.exr and 00825.exr. This commit adds an option that allows to name them as consecutive frames instead. Frames at index 15, 294 and 825 are now written as 00000.exr, 00001.exr and 00002.exr if the option is enabled. --- src/aliceVision/keyframe/KeyframeSelector.cpp | 7 ++++++- src/aliceVision/keyframe/KeyframeSelector.hpp | 3 ++- src/software/utils/main_keyframeSelection.cpp | 8 +++++++- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/src/aliceVision/keyframe/KeyframeSelector.cpp b/src/aliceVision/keyframe/KeyframeSelector.cpp index c5c7eedfcf..59d9fbd7f6 100644 --- a/src/aliceVision/keyframe/KeyframeSelector.cpp +++ b/src/aliceVision/keyframe/KeyframeSelector.cpp @@ -378,6 +378,7 @@ bool KeyframeSelector::computeScores(const std::size_t rescaledWidthSharpness, c bool KeyframeSelector::writeSelection(const std::vector& brands, const std::vector& models, const std::vector& mmFocals, + const bool renameKeyframes, const std::string& outputExtension, const image::EStorageDataType storageDataType) const { @@ -411,6 +412,7 @@ bool KeyframeSelector::writeSelection(const std::vector& brands, } } + unsigned int outputKeyframeCnt = 0; // Used if the "renameKeyframes" option is enabled for (const auto pos : _selectedKeyframes) { if (!feed.goToFrame(pos)) { ALICEVISION_LOG_ERROR("Invalid frame position. Ignoring this frame."); @@ -431,7 +433,10 @@ bool KeyframeSelector::writeSelection(const std::vector& brands, fs::path folder = _outputFolder; std::ostringstream filenameSS; - filenameSS << std::setw(5) << std::setfill('0') << pos << "." << outputExtension; + if (renameKeyframes) + filenameSS << std::setw(5) << std::setfill('0') << outputKeyframeCnt++ << "." << outputExtension; + else + filenameSS << std::setw(5) << std::setfill('0') << pos << "." << outputExtension; const auto filepath = (processedOutputFolder / fs::path(filenameSS.str())).string(); image::ImageWriteOptions options; diff --git a/src/aliceVision/keyframe/KeyframeSelector.hpp b/src/aliceVision/keyframe/KeyframeSelector.hpp index 942210e015..06eeeebe24 100644 --- a/src/aliceVision/keyframe/KeyframeSelector.hpp +++ b/src/aliceVision/keyframe/KeyframeSelector.hpp @@ -106,12 +106,13 @@ class KeyframeSelector * @param[in] brands brand name for each camera * @param[in] models model name for each camera * @param[in] mmFocals focal in millimeters for each camera + * @param[in] renameKeyframes name output keyframes as consecutive frames instead of using their index as a name * @param[in] outputExtension file extension of the written keyframes * @param[in] storageDataType EXR storage data type for the output keyframes (ignored when the extension is not EXR) * @return true if all the selected keyframes were successfully written, false otherwise */ bool writeSelection(const std::vector& brands, const std::vector& models, - const std::vector& mmFocals, const std::string& outputExtension, + const std::vector& mmFocals, const bool renameKeyframes, const std::string& outputExtension, const image::EStorageDataType storageDataType = image::EStorageDataType::Undefined) const; /** diff --git a/src/software/utils/main_keyframeSelection.cpp b/src/software/utils/main_keyframeSelection.cpp index d9dea32da9..d84ade7804 100644 --- a/src/software/utils/main_keyframeSelection.cpp +++ b/src/software/utils/main_keyframeSelection.cpp @@ -52,6 +52,7 @@ int aliceVision_main(int argc, char** argv) std::string outputExtension = "exr"; // file extension of the written keyframes image::EStorageDataType exrDataType = // storage data type for EXR output files image::EStorageDataType::Float; + bool renameKeyframes = false; // name selected keyframes as consecutive frames instead of using their index as a name // Debug options bool exportScores = false; // export the sharpness and optical flow scores to a CSV file @@ -87,6 +88,11 @@ int aliceVision_main(int argc, char** argv) "so combining them with this parameter might cause the selection to stop before reaching the end of the " "input sequence(s).\n" "\t- For the smart method, the default value is set to 2000.") + ("renameKeyframes", po::value(&renameKeyframes)->default_value(renameKeyframes), + "Instead of naming the keyframes according to their index in the input sequence / video, rename them as " + "consecutive frames, starting from 0.\n" + "If the selected keyframes should have originally be written as [00015.exr, 00294.exr, 00825.exr], they " + "will instead be written as [00000.exr, 00001.exr, 00002.exr] if this option is enabled.") ("outputExtension", po::value(&outputExtension)->default_value(outputExtension), "File extension of the output keyframes.") ("storageDataType", po::value(&exrDataType)->default_value(exrDataType), @@ -233,7 +239,7 @@ int aliceVision_main(int argc, char** argv) selector.processRegular(); // Write selected keyframes - selector.writeSelection(brands, models, mmFocals, outputExtension, exrDataType); + selector.writeSelection(brands, models, mmFocals, renameKeyframes, outputExtension, exrDataType); // If debug options are set, export the scores as a CSV file and / or the motion vectors as images if (exportScores) From 576dec22ba8c5fce6c037c2498ba402205e7d8f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Candice=20Bent=C3=A9jac?= Date: Wed, 8 Feb 2023 18:25:38 +0100 Subject: [PATCH 17/28] [keyframe] Handle missing frames within an input video If a frame is missing in a video sequence, instead of throwing an exception straight away, try reading the next frame. If the next frame is valid, then push dummy scores for the missing frame, and keep processing the input video. Otherwise, do throw the exception and stop the process. The dummy scores will be ignored in the final keyframe selection (explicitly in the case of the motion accumulation computation, implicitly when applying the weights during the sharpness selection). --- src/aliceVision/keyframe/KeyframeSelector.cpp | 50 ++++++++++++++++--- 1 file changed, 44 insertions(+), 6 deletions(-) diff --git a/src/aliceVision/keyframe/KeyframeSelector.cpp b/src/aliceVision/keyframe/KeyframeSelector.cpp index 59d9fbd7f6..096694496e 100644 --- a/src/aliceVision/keyframe/KeyframeSelector.cpp +++ b/src/aliceVision/keyframe/KeyframeSelector.cpp @@ -161,7 +161,7 @@ void KeyframeSelector::processSmart(const float pxDisplacement, const std::size_ /* Starts at 1 because the first frame's motion score will be -1. * Ends at sequenceSize - 1 to ensure the last frame cannot be pushed twice. */ for (std::size_t i = 1; i < sequenceSize - 1; ++i) { - motionAcc += _flowScores.at(i); + motionAcc += _flowScores.at(i) > -1.f ? _flowScores.at(i) : 0.f; if (motionAcc >= step) { subsequenceLimits.push_back(i); motionAcc = 0.0; // Reset the motion accumulator @@ -192,7 +192,7 @@ void KeyframeSelector::processSmart(const float pxDisplacement, const std::size_ motionAcc = 0.0; for (std::size_t i = 1; i < sequenceSize - 1; ++i) { - motionAcc += _flowScores.at(i); + motionAcc += _flowScores.at(i) > -1.f ? _flowScores.at(i) : 0.f; if (motionAcc >= step) { newLimits.push_back(i); motionAcc = 0.0; @@ -220,7 +220,7 @@ void KeyframeSelector::processSmart(const float pxDisplacement, const std::size_ motionAcc = 0.0; for (std::size_t i = 1; i < sequenceSize - 1; ++i) { - motionAcc += _flowScores.at(i); + motionAcc += _flowScores.at(i) > -1.f ? _flowScores.at(i) : 0.f; if (motionAcc >= step) { newLimits.push_back(i); motionAcc = 0.0; @@ -341,7 +341,33 @@ bool KeyframeSelector::computeScores(const std::size_t rescaledWidthSharpness, c feed.goToNextFrame(); } - currentMatSharpness = readImage(feed, rescaledWidthSharpness); // Read image for sharpness and rescale it if requested + /* Handle input feeds that may have invalid or missing frames: + * - catch the "invalid argument" exception thrown by "readImage" if a frame is invalid or missing + * - try reading the next frame instead + * - if the next frame is correctly read, then push dummy scores for the invalid frame and go on with + * the process + * - otherwise (feed not correctly moved to the next frame), throw a runtime error exception as something + * is wrong with the video + */ + try { + currentMatSharpness = readImage(feed, rescaledWidthSharpness); // Read image for sharpness and rescale it if requested + } catch (const std::invalid_argument& ex) { + // currentFrame + 1 = currently evaluated frame with indexing starting at 1, for display reasons + // currentFrame + 2 = next frame to evaluate with indexing starting at 1, for display reasons + ALICEVISION_LOG_WARNING("Invalid or missing frame " << currentFrame + 1 + << ", attempting to read frame " << currentFrame + 2 << "."); + bool success = feed.goToFrame(++currentFrame); + if (success) { + // Will throw an exception if next frame is also invalid + currentMatSharpness = readImage(feed, rescaledWidthSharpness); + // If no exception has been thrown, push dummy scores for the frame that was skipped + _sharpnessScores.push_back(-1.f); + _flowScores.push_back(-1.f); + } else + ALICEVISION_THROW_ERROR("Could not go to frame " << currentFrame + 1 + << " either. The feed might be corrupted."); + } + if (rescaledWidthSharpness == rescaledWidthFlow) { currentMatFlow = currentMatSharpness; } else { @@ -415,7 +441,7 @@ bool KeyframeSelector::writeSelection(const std::vector& brands, unsigned int outputKeyframeCnt = 0; // Used if the "renameKeyframes" option is enabled for (const auto pos : _selectedKeyframes) { if (!feed.goToFrame(pos)) { - ALICEVISION_LOG_ERROR("Invalid frame position. Ignoring this frame."); + ALICEVISION_LOG_ERROR("Invalid frame position " << pos << ". Ignoring this frame."); continue; } @@ -573,7 +599,19 @@ bool KeyframeSelector::exportFlowVisualisation(const std::size_t rescaledWidth) feed.goToNextFrame(); } - currentMat = readImage(feed, rescaledWidth); // Read image and rescale it if requested + // Handle invalid or missing frames + try { + currentMat = readImage(feed, rescaledWidth); // Read image and rescale it if requested + } catch (const std::invalid_argument& ex) { + ALICEVISION_LOG_WARNING("Invalid or missing frame " << currentFrame + 1 + << ", attempting to read frame " << currentFrame + 2 << "."); + bool success = feed.goToFrame(++currentFrame); + if (success) + currentMat = readImage(feed, rescaledWidth); + else + ALICEVISION_THROW_ERROR("Could not go to frame " << currentFrame + 1 + << " either. The feed might be corrupted."); + } if (currentFrame > 0) { cv::Mat flow; From 23cef09db83435f6eeb7eb1ec7e0b51895c559d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Candice=20Bent=C3=A9jac?= Date: Thu, 16 Feb 2023 17:27:23 +0100 Subject: [PATCH 18/28] [dataio] Clean-up trailing whitespaces --- src/aliceVision/dataio/FeedProvider.cpp | 12 ++--- src/aliceVision/dataio/FeedProvider.hpp | 12 ++--- src/aliceVision/dataio/IFeed.cpp | 8 ++-- src/aliceVision/dataio/IFeed.hpp | 16 +++---- src/aliceVision/dataio/ImageFeed.cpp | 58 ++++++++++++------------- src/aliceVision/dataio/ImageFeed.hpp | 16 +++---- src/aliceVision/dataio/VideoFeed.cpp | 40 ++++++++--------- src/aliceVision/dataio/VideoFeed.hpp | 14 +++--- 8 files changed, 88 insertions(+), 88 deletions(-) diff --git a/src/aliceVision/dataio/FeedProvider.cpp b/src/aliceVision/dataio/FeedProvider.cpp index d5763234a9..f4c31a66c3 100644 --- a/src/aliceVision/dataio/FeedProvider.cpp +++ b/src/aliceVision/dataio/FeedProvider.cpp @@ -22,7 +22,7 @@ namespace aliceVision{ namespace dataio{ -FeedProvider::FeedProvider(const std::string &feedPath, const std::string &calibPath) +FeedProvider::FeedProvider(const std::string &feedPath, const std::string &calibPath) : _isVideo(false), _isLiveFeed(false) { namespace bf = boost::filesystem; @@ -30,7 +30,7 @@ FeedProvider::FeedProvider(const std::string &feedPath, const std::string &calib { throw std::invalid_argument("Empty filepath."); } - if(bf::is_regular_file(bf::path(feedPath))) + if(bf::is_regular_file(bf::path(feedPath))) { // Image or video file const std::string extension = bf::path(feedPath).extension().string(); @@ -38,7 +38,7 @@ FeedProvider::FeedProvider(const std::string &feedPath, const std::string &calib { _feeder.reset(new ImageFeed(feedPath, calibPath)); } - else + else { #if ALICEVISION_IS_DEFINED(ALICEVISION_HAVE_OPENCV) // let's try it with a video @@ -96,12 +96,12 @@ bool FeedProvider::readImage(image::Image &imageGray, { return(_feeder->readImage(imageGray, camIntrinsics, mediaPath, hasIntrinsics)); } - + std::size_t FeedProvider::nbFrames() const { if(_isLiveFeed) return std::numeric_limits::infinity(); - + return _feeder->nbFrames(); } @@ -122,5 +122,5 @@ bool FeedProvider::isInit() const FeedProvider::~FeedProvider( ) { } -}//namespace dataio +}//namespace dataio }//namespace aliceVision diff --git a/src/aliceVision/dataio/FeedProvider.hpp b/src/aliceVision/dataio/FeedProvider.hpp index 1c6a1d1aa1..1d2cb13108 100644 --- a/src/aliceVision/dataio/FeedProvider.hpp +++ b/src/aliceVision/dataio/FeedProvider.hpp @@ -17,9 +17,9 @@ namespace dataio{ class FeedProvider { public: - + FeedProvider(const std::string &feedPath, const std::string &calibPath = ""); - + /** * @brief Provide a new RGB image from the feed. * @@ -51,7 +51,7 @@ class FeedProvider camera::PinholeRadialK3 &camIntrinsics, std::string &mediaPath, bool &hasIntrinsics); - + /** * @brief Provide a new grayscale image from the feed. * @@ -101,7 +101,7 @@ class FeedProvider * @return True if the feed is a video. */ bool isVideo() const {return _isVideo; } - + /** * @brief Return true if the feed is a live stream (e.g. a webcam). * @@ -110,7 +110,7 @@ class FeedProvider bool isLiveFeed() const {return _isLiveFeed; } virtual ~FeedProvider(); - + private: std::unique_ptr _feeder; bool _isVideo; @@ -118,6 +118,6 @@ class FeedProvider }; -}//namespace dataio +}//namespace dataio }//namespace aliceVision diff --git a/src/aliceVision/dataio/IFeed.cpp b/src/aliceVision/dataio/IFeed.cpp index ed266e128b..117aef9245 100644 --- a/src/aliceVision/dataio/IFeed.cpp +++ b/src/aliceVision/dataio/IFeed.cpp @@ -35,19 +35,19 @@ void readCalibrationFromFile(const std::string &filename, camera::PinholeRadialK int height = 0; const size_t numParam = 6; std::vector params(numParam, 0); - + fs >> width; fs >> height; for(size_t i = 0; i < numParam; ++i) { fs >> params[i]; } - camIntrinsics = camera::PinholeRadialK3(width, height, + camIntrinsics = camera::PinholeRadialK3(width, height, params[0], params[1], params[2], params[3], params[4], params[5]); - + fs.close(); } -}//namespace dataio +}//namespace dataio }//namespace aliceVision diff --git a/src/aliceVision/dataio/IFeed.hpp b/src/aliceVision/dataio/IFeed.hpp index 00e07c5662..68cee71740 100644 --- a/src/aliceVision/dataio/IFeed.hpp +++ b/src/aliceVision/dataio/IFeed.hpp @@ -23,7 +23,7 @@ class IFeed * @return True if the feed is correctly initialized. */ virtual bool isInit() const = 0; - + /** * @brief Provide a new RGB image from the feed * @param[out] imageRGB The new RGB image from the feed. @@ -51,7 +51,7 @@ class IFeed camera::PinholeRadialK3 &camIntrinsics, std::string &mediaPath, bool &hasIntrinsics) = 0; - + /** * @brief Provide a new grayscale image from the feed * @param[out] imageGray The new image from the feed. @@ -62,16 +62,16 @@ class IFeed * @return True if there is a new image, false otherwise. */ virtual bool readImage(image::Image &imageGray, - camera::PinholeRadialK3 &camIntrinsics, + camera::PinholeRadialK3 &camIntrinsics, std::string &mediaPath, - bool &hasIntrinsics) = 0; + bool &hasIntrinsics) = 0; virtual std::size_t nbFrames() const = 0; - + virtual bool goToFrame(const unsigned int frame) = 0; - + virtual bool goToNextFrame() = 0; - + virtual ~IFeed( ) {} }; @@ -84,6 +84,6 @@ class IFeed */ void readCalibrationFromFile(const std::string &filename, camera::PinholeRadialK3 &camIntrinsics); -}//namespace dataio +}//namespace dataio }//namespace aliceVision diff --git a/src/aliceVision/dataio/ImageFeed.cpp b/src/aliceVision/dataio/ImageFeed.cpp index 06d6cb00e3..9a13988604 100644 --- a/src/aliceVision/dataio/ImageFeed.cpp +++ b/src/aliceVision/dataio/ImageFeed.cpp @@ -28,13 +28,13 @@ namespace dataio{ class ImageFeed::FeederImpl { public: - + static bool isSupported(const std::string &ext); - + FeederImpl() : _isInit(false) {} - + FeederImpl(const std::string& imagePath, const std::string& calibPath); - + template bool readImage(image::Image &image, camera::PinholeRadialK3 &camIntrinsics, @@ -78,17 +78,17 @@ class ImageFeed::FeederImpl } return true; } - + std::size_t nbFrames() const; - + bool goToFrame(const unsigned int frame); - + bool goToNextFrame(); - - bool isInit() const {return _isInit;} - + + bool isInit() const {return _isInit;} + private: - + template bool feedWithJson(image::Image &image, camera::PinholeRadialK3 &camIntrinsics, @@ -124,7 +124,7 @@ class ImageFeed::FeederImpl } else { - const camera::PinholeRadialK3 * intrinsics = dynamic_cast(cam) ; + const camera::PinholeRadialK3 * intrinsics = dynamic_cast(cam); // simply copy values camIntrinsics = *intrinsics; @@ -134,17 +134,17 @@ class ImageFeed::FeederImpl ++_viewIterator; return true; } - + private: static const std::vector supportedExtensions; - + private: bool _isInit; bool _withCalibration; // It contains the images to be fed std::vector _images; camera::PinholeRadialK3 _camIntrinsics; - + bool _sfmMode = false; sfmData::SfMData _sfmdata; sfmData::Views::const_iterator _viewIterator; @@ -160,7 +160,7 @@ bool ImageFeed::FeederImpl::isSupported(const std::string &ext) return(std::find(start, end, boost::to_lower_copy(ext)) != end); } -ImageFeed::FeederImpl::FeederImpl(const std::string& imagePath, const std::string& calibPath) +ImageFeed::FeederImpl::FeederImpl(const std::string& imagePath, const std::string& calibPath) : _isInit(false) , _withCalibration(false) { @@ -189,7 +189,7 @@ ImageFeed::FeederImpl::FeederImpl(const std::string& imagePath, const std::strin // if it is an image file else if(ext == ".txt") { - // we expect a simple txt file with a list of path to images relative to the + // we expect a simple txt file with a list of path to images relative to the // location of the txt file itself std::fstream fs(imagePath, std::ios::in); std::string line; @@ -237,8 +237,8 @@ ImageFeed::FeederImpl::FeederImpl(const std::string& imagePath, const std::strin // since some OS will provide the files in a random order, first store them // in a priority queue and then fill the _image queue with the alphabetical // order from the priority queue - std::priority_queue, + std::priority_queue, std::greater > tmpSorter; for(; iterator != bf::directory_iterator(); ++iterator) { @@ -259,7 +259,7 @@ ImageFeed::FeederImpl::FeederImpl(const std::string& imagePath, const std::strin _images.push_back(tmpSorter.top()); tmpSorter.pop(); } - + _withCalibration = !calibPath.empty(); _sfmMode = false; _isInit = true; @@ -268,7 +268,7 @@ ImageFeed::FeederImpl::FeederImpl(const std::string& imagePath, const std::strin { throw std::invalid_argument("File or mode not yet implemented"); } - + // last thing: if _withCalibration is true it means that it is not a json and // a path to a calibration file has been passed // then load the calibration @@ -283,10 +283,10 @@ std::size_t ImageFeed::FeederImpl::nbFrames() const { if(!_isInit) return 0; - + if(_sfmMode) return _sfmdata.getViews().size(); - + return _images.size(); } @@ -295,10 +295,10 @@ bool ImageFeed::FeederImpl::goToFrame(const unsigned int frame) if(!_isInit) { _currentImageIndex = frame; - ALICEVISION_LOG_WARNING("Image feed is not initialized "); + ALICEVISION_LOG_WARNING("Image feed is not initialized"); return false; } - + // Reconstruction mode if(_sfmMode) { @@ -352,10 +352,10 @@ bool ImageFeed::FeederImpl::goToNextFrame() ImageFeed::ImageFeed() : _imageFeed(new FeederImpl()) { } -ImageFeed::ImageFeed(const std::string& imagePath, const std::string& calibPath) +ImageFeed::ImageFeed(const std::string& imagePath, const std::string& calibPath) : _imageFeed( new FeederImpl(imagePath, calibPath) ) { } -bool ImageFeed::readImage(image::Image &imageRGB, +bool ImageFeed::readImage(image::Image &imageRGB, camera::PinholeRadialK3 &camIntrinsics, std::string &mediaPath, bool &hasIntrinsics) @@ -371,7 +371,7 @@ bool ImageFeed::readImage(image::Image &imageGray, return(_imageFeed->readImage(imageGray, camIntrinsics, mediaPath, hasIntrinsics)); } -bool ImageFeed::readImage(image::Image &imageGray, +bool ImageFeed::readImage(image::Image &imageGray, camera::PinholeRadialK3 &camIntrinsics, std::string &mediaPath, bool &hasIntrinsics) @@ -414,5 +414,5 @@ bool ImageFeed::isSupported(const std::string &extension) ImageFeed::~ImageFeed() { } -}//namespace dataio +}//namespace dataio }//namespace aliceVision diff --git a/src/aliceVision/dataio/ImageFeed.hpp b/src/aliceVision/dataio/ImageFeed.hpp index 5d6d9ec70c..166ac3d869 100644 --- a/src/aliceVision/dataio/ImageFeed.hpp +++ b/src/aliceVision/dataio/ImageFeed.hpp @@ -21,7 +21,7 @@ class ImageFeed : public IFeed * @brief Empty constructor */ ImageFeed(); - + /** * @brief Set up an image based feed from a choice of different sources: * 1) a directory containing images @@ -44,7 +44,7 @@ class ImageFeed : public IFeed * @see readCalibrationFromFile() */ ImageFeed(const std::string& imagePath, const std::string& calibPath); - + /** * @brief Provide a new RGB image from the feed * @@ -89,13 +89,13 @@ class ImageFeed : public IFeed camera::PinholeRadialK3 &camIntrinsics, std::string &mediaPath, bool &hasIntrinsics); - + std::size_t nbFrames() const; - + bool goToFrame(const unsigned int frame); bool goToNextFrame(); - + /** * @brief Return true if the feed is correctly initialized. * @@ -104,7 +104,7 @@ class ImageFeed : public IFeed bool isInit() const; virtual ~ImageFeed( ); - + /** * @brief For a given extension, return true if that file can be used as input * for the feed. ImageFeed supports .json, .txt, and the most common image files. @@ -113,13 +113,13 @@ class ImageFeed : public IFeed * @return True if the file is supported. */ static bool isSupported(const std::string &extension); - + private: class FeederImpl; std::unique_ptr _imageFeed; }; -}//namespace dataio +}//namespace dataio }//namespace aliceVision diff --git a/src/aliceVision/dataio/VideoFeed.cpp b/src/aliceVision/dataio/VideoFeed.cpp index cd7d69e10b..9ae8b740f9 100644 --- a/src/aliceVision/dataio/VideoFeed.cpp +++ b/src/aliceVision/dataio/VideoFeed.cpp @@ -24,13 +24,13 @@ class VideoFeed::FeederImpl { public: FeederImpl() : _isInit(false) { } - + FeederImpl(const std::string &videoPath, const std::string &calibPath); - + FeederImpl(int videoDevice, const std::string &calibPath); - + bool isInit() const {return _isInit;} - + bool readImage(image::Image &imageRGB, camera::PinholeRadialK3 &camIntrinsics, std::string &mediaPath, @@ -40,18 +40,18 @@ class VideoFeed::FeederImpl camera::PinholeRadialK3 &camIntrinsics, std::string &mediaPath, bool &hasIntrinsics); - + bool readImage(image::Image &imageGray, camera::PinholeRadialK3 &camIntrinsics, std::string &mediaPath, bool &hasIntrinsics); - + bool goToFrame(const unsigned int frame); - + bool goToNextFrame(); - + std::size_t nbFrames() const; - + private: bool _isInit; bool _isLive; @@ -79,7 +79,7 @@ VideoFeed::FeederImpl::FeederImpl(const std::string &videoPath, const std::strin _withIntrinsics = !calibPath.empty(); if(_withIntrinsics) readCalibrationFromFile(calibPath, _camIntrinsics); - + _isInit = true; } @@ -95,12 +95,12 @@ VideoFeed::FeederImpl::FeederImpl(int videoDevice, const std::string &calibPath) } goToNextFrame(); - + // load the calibration path _withIntrinsics = !calibPath.empty(); if(_withIntrinsics) readCalibrationFromFile(calibPath, _camIntrinsics); - + _isInit = true; } @@ -116,15 +116,15 @@ bool VideoFeed::FeederImpl::readImage(image::Image &imageRGB, { return false; } - + if(frame.channels() == 3) { cv::Mat color; resize(frame, color, cv::Size(frame.cols, frame.rows)); - + cv::cvtColor(frame, color, cv::COLOR_BGR2RGB); imageRGB.resize(color.cols, color.rows); - + unsigned char* pixelPtr = (unsigned char*)color.data; for(int i = 0; i < color.rows; i++) { @@ -140,7 +140,7 @@ bool VideoFeed::FeederImpl::readImage(image::Image &imageRGB, ALICEVISION_LOG_WARNING("Error can't read RGB frame " << _videoPath); throw std::invalid_argument("Error can't read RGB frame " + _videoPath); } - + hasIntrinsics = _withIntrinsics; if(_withIntrinsics) camIntrinsics = _camIntrinsics; @@ -176,7 +176,7 @@ bool VideoFeed::FeederImpl::readImage(image::Image &imageGray, { return false; } - + if(frame.channels() == 3) { // convert to gray @@ -233,11 +233,11 @@ bool VideoFeed::FeederImpl::goToNextFrame() VideoFeed::VideoFeed() : _feeder(new FeederImpl()) { } -VideoFeed::VideoFeed(const std::string &videoPath, const std::string &calibPath) +VideoFeed::VideoFeed(const std::string &videoPath, const std::string &calibPath) : _feeder(new FeederImpl(videoPath, calibPath)) { } -VideoFeed::VideoFeed(int videoDevice, const std::string &calibPath) +VideoFeed::VideoFeed(int videoDevice, const std::string &calibPath) : _feeder(new FeederImpl(videoDevice, calibPath)) { } @@ -284,5 +284,5 @@ bool VideoFeed::isInit() const {return(_feeder->isInit()); } VideoFeed::~VideoFeed() { } -}//namespace dataio +}//namespace dataio }//namespace aliceVision diff --git a/src/aliceVision/dataio/VideoFeed.hpp b/src/aliceVision/dataio/VideoFeed.hpp index 93cc035287..69c4f8aec2 100644 --- a/src/aliceVision/dataio/VideoFeed.hpp +++ b/src/aliceVision/dataio/VideoFeed.hpp @@ -56,7 +56,7 @@ class VideoFeed : public IFeed * @see readCalibrationFromFile() */ VideoFeed(int videoDevice, const std::string &calibPath); - + /** * @brief Provide a new RGB image from the feed * @@ -101,13 +101,13 @@ class VideoFeed : public IFeed camera::PinholeRadialK3 &camIntrinsics, std::string &mediaPath, bool &hasIntrinsics); - + std::size_t nbFrames() const; - + bool goToFrame(const unsigned int frame); - + bool goToNextFrame(); - + /** * @brief Return true if the feed is correctly initialized. * @@ -115,8 +115,8 @@ class VideoFeed : public IFeed */ bool isInit() const; - virtual ~VideoFeed( ); - + virtual ~VideoFeed(); + private: class FeederImpl; std::unique_ptr _feeder; From fee37d83bdc8014451eb46f137f22c4e54fe1a4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Candice=20Bent=C3=A9jac?= Date: Thu, 16 Feb 2023 18:47:20 +0100 Subject: [PATCH 19/28] [image] Simplify retrieving the extensions supported by OpenImageIO "getSupportedExtensions" used to get the content of OIIO's "extension_list" and parse it. OIIO now provides a utility function that does the parsing of "extension_list" and returns it into a map. Using it directly simplifies the function's body. The documentation is also updated with more details. --- src/aliceVision/image/io.cpp | 22 ++++++++++------------ src/aliceVision/image/io.hpp | 15 +++++++++------ 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/src/aliceVision/image/io.cpp b/src/aliceVision/image/io.cpp index 7cbded20db..4a478f6ed9 100644 --- a/src/aliceVision/image/io.cpp +++ b/src/aliceVision/image/io.cpp @@ -126,28 +126,26 @@ std::istream& operator>>(std::istream& in, EImageFileType& imageFileType) std::vector getSupportedExtensions() { - static const std::string extensionList = oiio::get_string_attribute("extension_list"); std::vector supportedExtensions; - std::vector supportedFormat; - boost::split(supportedFormat, extensionList, boost::is_any_of(";"), boost::token_compress_on); - for(const std::string& format: supportedFormat) - { - std::vector extensions; - const std::string str = format.substr(format.find(":")+1); - boost::split(extensions, str, boost::is_any_of(","), boost::token_compress_on); - for(std::string& extension: extensions) - supportedExtensions.push_back(extension.insert(0, ".")); + // Map containing the parsed "extension_list" with each supported format and its associated extensions + static std::map> extensionList = oiio::get_extension_map(); + + for (auto& format : extensionList) { + for (auto& extension : format.second) { + supportedExtensions.push_back(extension.insert(0, ".")); + } } return supportedExtensions; } -bool isSupported(const std::string& ext) +bool isSupported(const std::string& extension) { static const std::vector supportedExtensions = getSupportedExtensions(); const auto start = supportedExtensions.begin(); const auto end = supportedExtensions.end(); - return (std::find(start, end, boost::to_lower_copy(ext)) != end); + return (std::find(start, end, boost::to_lower_copy(extension)) != end); +} } std::string EStorageDataType_informations() diff --git a/src/aliceVision/image/io.hpp b/src/aliceVision/image/io.hpp index 78d75ac3c2..599b4e83a7 100644 --- a/src/aliceVision/image/io.hpp +++ b/src/aliceVision/image/io.hpp @@ -100,17 +100,20 @@ std::ostream& operator<<(std::ostream& os, EImageFileType imageFileType); std::istream& operator>>(std::istream& in, EImageFileType& imageFileType); /** - * @brief Return a list of extensions supported by openImageIO ie exists in extension_list from imageio.h - * @return A vector containing all supported extensions + * @brief Return a list of extensions supported by OpenImageIO (ie. extensions existing in extension_list from imageio.h). + * The list of supported extensions also includes video formats. + * @return a vector containing all the extensions supported by OpenImageIO. */ std::vector getSupportedExtensions(); /** - * @brief Check if input image extension is supported by openImageIO ie exists in extension_list from imageio.h - * @param[in] ext - image extension - * @return true if valid extension + * @brief Check if an input image extension is supported by OpenImageIO (ie. it exists in extension_list from imageio.h). + * This function might return true if the input is a video rather than an image, as long as the video format is + * supported by OpenImageIO. + * @param[in] extension the input image extension. + * @return true if the extension is valid and supported by OpenImageIO, false otherwise. */ -bool isSupported(const std::string& ext); +bool isSupported(const std::string& extension); /** From 9de160fa7f9c0559e487ae48eda53c3361b3a73e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Candice=20Bent=C3=A9jac?= Date: Thu, 16 Feb 2023 18:57:21 +0100 Subject: [PATCH 20/28] [image] Add a function determining if the input extension is a video format "isSupported()" relies exclusively on the content of OpenImageIO's "extension_list", which contains all the formats supported as inputs, including video formats, with no distinction. A function "isVideoExtension" is added to determine whether the input extension is part of the OIIO-supported movie formats. The list of supported formats is hard-coded based on OIIO's documentation. --- src/aliceVision/image/io.cpp | 12 ++++++++++++ src/aliceVision/image/io.hpp | 7 +++++++ 2 files changed, 19 insertions(+) diff --git a/src/aliceVision/image/io.cpp b/src/aliceVision/image/io.cpp index 4a478f6ed9..fe272a348c 100644 --- a/src/aliceVision/image/io.cpp +++ b/src/aliceVision/image/io.cpp @@ -146,6 +146,18 @@ bool isSupported(const std::string& extension) const auto end = supportedExtensions.end(); return (std::find(start, end, boost::to_lower_copy(extension)) != end); } + +bool isVideoExtension(const std::string& extension) +{ + // List provided by OpenImageIO: + // https://openimageio.readthedocs.io/en/latest/builtinplugins.html#movie-formats-using-ffmpeg + static const std::array supportedExtensions = { + ".avi", ".qt", ".mov", ".mp4", ".m4a", ".m4v", + ".3gp", ".3g2", ".mj2", ".m4v", ".mpg" + }; + const auto start = supportedExtensions.begin(); + const auto end = supportedExtensions.end(); + return (std::find(start, end, boost::to_lower_copy(extension)) != end); } std::string EStorageDataType_informations() diff --git a/src/aliceVision/image/io.hpp b/src/aliceVision/image/io.hpp index 599b4e83a7..b35b96de4e 100644 --- a/src/aliceVision/image/io.hpp +++ b/src/aliceVision/image/io.hpp @@ -115,6 +115,13 @@ std::vector getSupportedExtensions(); */ bool isSupported(const std::string& extension); +/** + * @brief Check if the extension is a video format supported by OpenImageIO, based on the list provided by OpenImageIO + * (https://openimageio.readthedocs.io/en/latest/builtinplugins.html#movie-formats-using-ffmpeg). + * @param[in] extension the input file's extension. + * @return true if the extension is a valid video extension supported by OpenImageIO, false otherwise. + */ +bool isVideoExtension(const std::string& extension); /** * @brief Data type use to write the image From f2db007e5acce3274748ffdfa28feb1f1821d956 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Candice=20Bent=C3=A9jac?= Date: Thu, 16 Feb 2023 19:51:11 +0100 Subject: [PATCH 21/28] [dataio] Check if the input format is supported with the "image" module For ImageFeed: use image::isSupported and image::isVideoExtension to check that the input is supported by OpenImageIO but is not a video. For VideoFeed: add a VideoFeed::isSupported method to ensure that we do not try to open unsupported videos with OpenCV. The list of supported video formats is provided by OpenImageIO, which is based on ffmpeg like OpenCV. Both ImageFeed and VideoFeed have their own implementation of isSupported to ensure they can check whether they support a given input on their own. --- src/aliceVision/dataio/FeedProvider.cpp | 15 +++++++++++---- src/aliceVision/dataio/ImageFeed.cpp | 24 +++++++----------------- src/aliceVision/dataio/VideoFeed.cpp | 9 +++++++++ src/aliceVision/dataio/VideoFeed.hpp | 7 +++++++ 4 files changed, 34 insertions(+), 21 deletions(-) diff --git a/src/aliceVision/dataio/FeedProvider.cpp b/src/aliceVision/dataio/FeedProvider.cpp index f4c31a66c3..46ac44e1ad 100644 --- a/src/aliceVision/dataio/FeedProvider.cpp +++ b/src/aliceVision/dataio/FeedProvider.cpp @@ -40,14 +40,21 @@ FeedProvider::FeedProvider(const std::string &feedPath, const std::string &calib } else { + if(VideoFeed::isSupported(extension)) + { #if ALICEVISION_IS_DEFINED(ALICEVISION_HAVE_OPENCV) - // let's try it with a video - _feeder.reset(new VideoFeed(feedPath, calibPath)); - _isVideo = true; + // let's try it with a video + _feeder.reset(new VideoFeed(feedPath, calibPath)); + _isVideo = true; #else - throw std::invalid_argument("Unsupported mode! If you intended to use a video" + throw std::invalid_argument("Unsupported mode! If you intended to use a video" " please add OpenCV support"); #endif + } + else + { + throw std::invalid_argument("Unsupported file format: " + feedPath); + } } } // parent_path() returns "/foo/bar/" when input path equals to "/foo/bar/" diff --git a/src/aliceVision/dataio/ImageFeed.cpp b/src/aliceVision/dataio/ImageFeed.cpp index 9a13988604..c3dab7c24e 100644 --- a/src/aliceVision/dataio/ImageFeed.cpp +++ b/src/aliceVision/dataio/ImageFeed.cpp @@ -29,8 +29,6 @@ class ImageFeed::FeederImpl { public: - static bool isSupported(const std::string &ext); - FeederImpl() : _isInit(false) {} FeederImpl(const std::string& imagePath, const std::string& calibPath); @@ -135,9 +133,6 @@ class ImageFeed::FeederImpl return true; } -private: - static const std::vector supportedExtensions; - private: bool _isInit; bool _withCalibration; @@ -151,15 +146,6 @@ class ImageFeed::FeederImpl unsigned int _currentImageIndex = 0; }; -const std::vector ImageFeed::FeederImpl::supportedExtensions = {".jpg", ".jpeg", ".png", ".ppm", ".tif", ".tiff", ".exr"}; - -bool ImageFeed::FeederImpl::isSupported(const std::string &ext) -{ - const auto start = FeederImpl::supportedExtensions.begin(); - const auto end = FeederImpl::supportedExtensions.end(); - return(std::find(start, end, boost::to_lower_copy(ext)) != end); -} - ImageFeed::FeederImpl::FeederImpl(const std::string& imagePath, const std::string& calibPath) : _isInit(false) , _withCalibration(false) @@ -179,7 +165,7 @@ ImageFeed::FeederImpl::FeederImpl(const std::string& imagePath, const std::strin _sfmMode = true; } // if it is an image file - else if(FeederImpl::isSupported(ext)) + else if(image::isSupported(ext) && !image::isVideoExtension(ext)) { _images.push_back(imagePath); _withCalibration = !calibPath.empty(); @@ -244,7 +230,7 @@ ImageFeed::FeederImpl::FeederImpl(const std::string& imagePath, const std::strin { // get the extension of the current file to check whether it is an image const std::string ext = iterator->path().extension().string(); - if(FeederImpl::isSupported(ext)) + if(image::isSupported(ext) && !image::isVideoExtension(ext)) { const std::string filepath = iterator->path().string(); const std::string filename = iterator->path().filename().string(); @@ -252,6 +238,10 @@ ImageFeed::FeederImpl::FeederImpl(const std::string& imagePath, const std::strin if(filePattern.empty() || std::regex_match(filename, re)) tmpSorter.push(filepath); } + else + { + ALICEVISION_LOG_WARNING("Unsupported file extension " << ext << " for " << iterator->path().string() << "."); + } } // put all the retrieve files inside the queue while(!tmpSorter.empty()) @@ -408,7 +398,7 @@ bool ImageFeed::isSupported(const std::string &extension) } else { - return FeederImpl::isSupported(ext); + return (image::isSupported(ext) && !image::isVideoExtension(ext)); } } diff --git a/src/aliceVision/dataio/VideoFeed.cpp b/src/aliceVision/dataio/VideoFeed.cpp index 9ae8b740f9..3362b3dce4 100644 --- a/src/aliceVision/dataio/VideoFeed.cpp +++ b/src/aliceVision/dataio/VideoFeed.cpp @@ -8,6 +8,7 @@ #include #include +#include #include #include @@ -203,7 +204,10 @@ bool VideoFeed::FeederImpl::readImage(image::Image &imageGray, std::size_t VideoFeed::FeederImpl::nbFrames() const { if (!_videoCapture.isOpened()) + { + ALICEVISION_LOG_WARNING("The video file could not be opened."); return 0; + } return _videoCapture.get(cv::CAP_PROP_FRAME_COUNT); } @@ -282,6 +286,11 @@ bool VideoFeed::goToNextFrame() bool VideoFeed::isInit() const {return(_feeder->isInit()); } +bool VideoFeed::isSupported(const std::string &extension) +{ + return image::isVideoExtension(extension); +} + VideoFeed::~VideoFeed() { } }//namespace dataio diff --git a/src/aliceVision/dataio/VideoFeed.hpp b/src/aliceVision/dataio/VideoFeed.hpp index 69c4f8aec2..a715000908 100644 --- a/src/aliceVision/dataio/VideoFeed.hpp +++ b/src/aliceVision/dataio/VideoFeed.hpp @@ -117,6 +117,13 @@ class VideoFeed : public IFeed virtual ~VideoFeed(); +/** + * @brief For a given extension, return true if that file can be used as input video for the feed. + * @param extension The file extension to check in ".ext" format (case insensitive). + * @return True if the file is supported. + */ +static bool isSupported(const std::string &extension); + private: class FeederImpl; std::unique_ptr _feeder; From 281907aad04291e49c969d9775abfd133f20ebad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Candice=20Bent=C3=A9jac?= Date: Thu, 16 Feb 2023 20:25:06 +0100 Subject: [PATCH 22/28] [keyframe] Propage the input's orientation info to the output --- src/aliceVision/keyframe/KeyframeSelector.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/aliceVision/keyframe/KeyframeSelector.cpp b/src/aliceVision/keyframe/KeyframeSelector.cpp index 096694496e..8e7546da44 100644 --- a/src/aliceVision/keyframe/KeyframeSelector.cpp +++ b/src/aliceVision/keyframe/KeyframeSelector.cpp @@ -450,12 +450,17 @@ bool KeyframeSelector::writeSelection(const std::vector& brands, return false; } + oiio::ImageSpec inputSpec; + inputSpec.extra_attribs = image::readImageMetadata(currentImgName); + int orientation = inputSpec.get_int_attribute("Orientation", 1); + oiio::ParamValueList metadata; metadata.push_back(oiio::ParamValue("Make", brands[id])); metadata.push_back(oiio::ParamValue("Model", models[id])); metadata.push_back(oiio::ParamValue("Exif:BodySerialNumber", std::to_string(getRandomInt()))); metadata.push_back(oiio::ParamValue("Exif:FocalLength", mmFocals[id])); metadata.push_back(oiio::ParamValue("Exif:ImageUniqueID", std::to_string(getRandomInt()))); + metadata.push_back(oiio::ParamValue("Orientation", orientation)); // Will not propagate for PNG outputs fs::path folder = _outputFolder; std::ostringstream filenameSS; From 9ad5f6095f61f6584b0281b5fcfb80ac03ebcddf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Candice=20Bent=C3=A9jac?= Date: Thu, 16 Feb 2023 21:07:22 +0100 Subject: [PATCH 23/28] [keyframe] Propagate pixel aspect ratio when possible Propagate the pixel aspect ratio for EXR and PNG outputs. For JPG outputs, writing the pixel aspect ratio in that way leads to errors, as any aspect ratio > 1 is written as < 1. --- src/aliceVision/keyframe/KeyframeSelector.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/aliceVision/keyframe/KeyframeSelector.cpp b/src/aliceVision/keyframe/KeyframeSelector.cpp index 8e7546da44..8bb3b9bb22 100644 --- a/src/aliceVision/keyframe/KeyframeSelector.cpp +++ b/src/aliceVision/keyframe/KeyframeSelector.cpp @@ -453,6 +453,7 @@ bool KeyframeSelector::writeSelection(const std::vector& brands, oiio::ImageSpec inputSpec; inputSpec.extra_attribs = image::readImageMetadata(currentImgName); int orientation = inputSpec.get_int_attribute("Orientation", 1); + float pixelAspectRatio = inputSpec.get_float_attribute("PixelAspectRatio", 1.0f); oiio::ParamValueList metadata; metadata.push_back(oiio::ParamValue("Make", brands[id])); @@ -461,6 +462,8 @@ bool KeyframeSelector::writeSelection(const std::vector& brands, metadata.push_back(oiio::ParamValue("Exif:FocalLength", mmFocals[id])); metadata.push_back(oiio::ParamValue("Exif:ImageUniqueID", std::to_string(getRandomInt()))); metadata.push_back(oiio::ParamValue("Orientation", orientation)); // Will not propagate for PNG outputs + if (outputExtension != "jpg") // TODO: propagate pixelAspectRatio properly for JPG + metadata.push_back(oiio::ParamValue("PixelAspectRatio", pixelAspectRatio)); fs::path folder = _outputFolder; std::ostringstream filenameSS; From c2148b4048f6dbc6af64dd0c8bda5552b3aed61d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Candice=20Bent=C3=A9jac?= Date: Fri, 17 Feb 2023 09:25:43 +0100 Subject: [PATCH 24/28] [keyframe] Specify the input's colorspace when it is sRGB JPG images are always supposed to be in sRGB. When this information is available, it is better to include it in the "fromColorSpace" when writing the output to ensure the colorspace is appropriate. --- src/aliceVision/keyframe/KeyframeSelector.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/aliceVision/keyframe/KeyframeSelector.cpp b/src/aliceVision/keyframe/KeyframeSelector.cpp index 8bb3b9bb22..060a984edf 100644 --- a/src/aliceVision/keyframe/KeyframeSelector.cpp +++ b/src/aliceVision/keyframe/KeyframeSelector.cpp @@ -454,6 +454,7 @@ bool KeyframeSelector::writeSelection(const std::vector& brands, inputSpec.extra_attribs = image::readImageMetadata(currentImgName); int orientation = inputSpec.get_int_attribute("Orientation", 1); float pixelAspectRatio = inputSpec.get_float_attribute("PixelAspectRatio", 1.0f); + std::string colorspace = inputSpec.get_string_attribute("oiio:Colorspace", ""); oiio::ParamValueList metadata; metadata.push_back(oiio::ParamValue("Make", brands[id])); @@ -479,6 +480,9 @@ bool KeyframeSelector::writeSelection(const std::vector& brands, options.fromColorSpace(image::EImageColorSpace::SRGB); options.toColorSpace(image::EImageColorSpace::AUTO); } else { // Otherwise, the frames have been read without any conversion, they should be written as such + if (colorspace == "sRGB") + options.fromColorSpace(image::EImageColorSpace::SRGB); + if (outputExtension == "exr") options.toColorSpace(image::EImageColorSpace::NO_CONVERSION); else From 4f1f4db4e1f6fb8e5881beebb0b733ca80f33fb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Candice=20Bent=C3=A9jac?= Date: Thu, 23 Feb 2023 18:11:31 +0100 Subject: [PATCH 25/28] KeyframeSelection: Add debug option to skip sharpness score computation Add a "skipSharpnessComputation" debug option that allows to compute the scores with performing the sharpness score computations. All frames will be assigned a fixed sharpness score of 1.0. If the smart selection is applied with this option enabled, the selected frames will be those located in the center of each subset (determined with the motion scores) as the weights will be applied on constant scores. This option is useful to determine the impact of the sharpness score computation on the global processing time. --- src/aliceVision/keyframe/KeyframeSelector.cpp | 52 +++++++++++-------- src/aliceVision/keyframe/KeyframeSelector.hpp | 9 +++- src/aliceVision/keyframe/README.md | 10 ++-- src/software/utils/main_keyframeSelection.cpp | 12 +++-- 4 files changed, 52 insertions(+), 31 deletions(-) diff --git a/src/aliceVision/keyframe/KeyframeSelector.cpp b/src/aliceVision/keyframe/KeyframeSelector.cpp index 060a984edf..60bd91a961 100644 --- a/src/aliceVision/keyframe/KeyframeSelector.cpp +++ b/src/aliceVision/keyframe/KeyframeSelector.cpp @@ -137,13 +137,13 @@ void KeyframeSelector::processRegular() void KeyframeSelector::processSmart(const float pxDisplacement, const std::size_t rescaledWidthSharpness, const std::size_t rescaledWidthFlow, const std::size_t sharpnessWindowSize, - const std::size_t flowCellSize) + const std::size_t flowCellSize, const bool skipSharpnessComputation) { _selectedKeyframes.clear(); _selectedFrames.clear(); // Step 0: compute all the scores - computeScores(rescaledWidthSharpness, rescaledWidthFlow, sharpnessWindowSize, flowCellSize); + computeScores(rescaledWidthSharpness, rescaledWidthFlow, sharpnessWindowSize, flowCellSize, skipSharpnessComputation); // Step 1: determine subsequences based on the motion accumulation std::vector subsequenceLimits; @@ -273,7 +273,8 @@ void KeyframeSelector::processSmart(const float pxDisplacement, const std::size_ } bool KeyframeSelector::computeScores(const std::size_t rescaledWidthSharpness, const std::size_t rescaledWidthFlow, - const std::size_t sharpnessWindowSize, const std::size_t flowCellSize) + const std::size_t sharpnessWindowSize, const std::size_t flowCellSize, + const bool skipSharpnessComputation) { // Reset the computed scores _sharpnessScores.clear(); @@ -330,7 +331,7 @@ bool KeyframeSelector::computeScores(const std::size_t rescaledWidthSharpness, c auto ptrFlow = cv::optflow::createOptFlow_DeepFlow(); while (currentFrame < nbFrames) { - double minimalSharpness = std::numeric_limits::max(); + double minimalSharpness = skipSharpnessComputation ? 1.0f : std::numeric_limits::max(); double minimalFlow = std::numeric_limits::max(); for (std::size_t mediaIndex = 0; mediaIndex < feeds.size(); ++mediaIndex) { @@ -349,26 +350,29 @@ bool KeyframeSelector::computeScores(const std::size_t rescaledWidthSharpness, c * - otherwise (feed not correctly moved to the next frame), throw a runtime error exception as something * is wrong with the video */ - try { - currentMatSharpness = readImage(feed, rescaledWidthSharpness); // Read image for sharpness and rescale it if requested - } catch (const std::invalid_argument& ex) { - // currentFrame + 1 = currently evaluated frame with indexing starting at 1, for display reasons - // currentFrame + 2 = next frame to evaluate with indexing starting at 1, for display reasons - ALICEVISION_LOG_WARNING("Invalid or missing frame " << currentFrame + 1 - << ", attempting to read frame " << currentFrame + 2 << "."); - bool success = feed.goToFrame(++currentFrame); - if (success) { - // Will throw an exception if next frame is also invalid + if (!skipSharpnessComputation) { + try { + // Read image for sharpness and rescale it if requested currentMatSharpness = readImage(feed, rescaledWidthSharpness); - // If no exception has been thrown, push dummy scores for the frame that was skipped - _sharpnessScores.push_back(-1.f); - _flowScores.push_back(-1.f); - } else - ALICEVISION_THROW_ERROR("Could not go to frame " << currentFrame + 1 - << " either. The feed might be corrupted."); + } catch (const std::invalid_argument& ex) { + // currentFrame + 1 = currently evaluated frame with indexing starting at 1, for display reasons + // currentFrame + 2 = next frame to evaluate with indexing starting at 1, for display reasons + ALICEVISION_LOG_WARNING("Invalid or missing frame " << currentFrame + 1 + << ", attempting to read frame " << currentFrame + 2 << "."); + bool success = feed.goToFrame(++currentFrame); + if (success) { + // Will throw an exception if next frame is also invalid + currentMatSharpness = readImage(feed, rescaledWidthSharpness); + // If no exception has been thrown, push dummy scores for the frame that was skipped + _sharpnessScores.push_back(-1.f); + _flowScores.push_back(-1.f); + } else + ALICEVISION_THROW_ERROR("Could not go to frame " << currentFrame + 1 + << " either. The feed might be corrupted."); + } } - if (rescaledWidthSharpness == rescaledWidthFlow) { + if (rescaledWidthSharpness == rescaledWidthFlow && !skipSharpnessComputation) { currentMatFlow = currentMatSharpness; } else { currentMatFlow = readImage(feed, rescaledWidthFlow); @@ -380,8 +384,10 @@ bool KeyframeSelector::computeScores(const std::size_t rescaledWidthSharpness, c } // Compute sharpness - const double sharpness = computeSharpness(currentMatSharpness, sharpnessWindowSize); - minimalSharpness = std::min(minimalSharpness, sharpness); + if (!skipSharpnessComputation) { + const double sharpness = computeSharpness(currentMatSharpness, sharpnessWindowSize); + minimalSharpness = std::min(minimalSharpness, sharpness); + } // Compute optical flow if (currentFrame > 0) { diff --git a/src/aliceVision/keyframe/KeyframeSelector.hpp b/src/aliceVision/keyframe/KeyframeSelector.hpp index 06eeeebe24..9ce9060ba4 100644 --- a/src/aliceVision/keyframe/KeyframeSelector.hpp +++ b/src/aliceVision/keyframe/KeyframeSelector.hpp @@ -82,10 +82,12 @@ class KeyframeSelector * @param[in] sharpnessWindowSize the size of the sliding window used to compute sharpness scores, in pixels * @param[in] flowCellSize the size of the cells within a frame that are used to compute the optical flow scores, * in pixels + * @param[in] skipSharpnessComputation if true, the sharpness score computations will not be performed and a fixed + * sharpness score will be given to all the input frames */ void processSmart(const float pxDisplacement, const std::size_t rescaledWidthSharpness, const std::size_t rescaledWidthFlow, const std::size_t sharpnessWindowSize, - const std::size_t flowCellSize); + const std::size_t flowCellSize, const bool skipSharpnessComputation = false); /** * @brief Compute the sharpness and optical flow scores for the input media paths @@ -96,10 +98,13 @@ class KeyframeSelector * @param[in] sharpnessWindowSize the size of the sliding window used to compute sharpness scores, in pixels * @param[in] flowCellSize the size of the cells within a frame that are used to compute the optical flow scores, * in pixels + * @param[in] skipSharpnessComputation if true, the sharpness score computations will not be performed and a fixed + * sharpness score will be given to all the input frames * @return true if the scores have been successfully computed for all frames, false otherwise */ bool computeScores(const std::size_t rescaledWidthSharpness, const std::size_t rescaledWidthFlow, - const std::size_t sharpnessWindowSize, const std::size_t flowCellSize); + const std::size_t sharpnessWindowSize, const std::size_t flowCellSize, + const bool skipSharpnessComputation); /** * @brief Write the selected keyframes in the output folder diff --git a/src/aliceVision/keyframe/README.md b/src/aliceVision/keyframe/README.md index 7f6dd34259..e9bc2cce8a 100644 --- a/src/aliceVision/keyframe/README.md +++ b/src/aliceVision/keyframe/README.md @@ -86,7 +86,9 @@ The weights aim at favouring the selection of keyframes that are as temporally f Debug options specific to the smart selection method are available: - Export scores to CSV: the sharpness and motion scores for all the frames are written to a CSV file; -- Visualise the optical flow: the computed motion vectors are, for each frame, visualised with HSV images that are written as PNG images. +- Visualise the optical flow: the computed motion vectors are, for each frame, visualised with HSV images that are written as PNG images; +- Skip the sharpess score computations: the motion scores are computed normally, but all the sharpness score computations are skipped and replaced by a fixed value (1.0), which allows to assess the impact of the sharpness score computations (and, by extension, of the motion scores) on the global processing time; +- Skip the frame selection: the scores are computed normally (the sharpness scores can be skipped) but will not be used to perform the final selection. This is mainly useful to determine the processing time solely dedicated to the score computations or, combined with the CSV export export, to evaluate the quality of the scoring without needing to go through the complete selection process. ## API @@ -107,14 +109,16 @@ void processSmart(const float pxDisplacement, const std::size_t rescaledWidthSharpness, const std::size_t rescaledWidthFlow, const std::size_t sharpnessWindowSize, - const std::size_t flowCellSize); + const std::size_t flowCellSize, + const bool skipSharpnessComputation = false); ``` - Score computation ```cpp bool computeScores(const std::size_t rescaledWidthSharpness, const std::size_t rescaledWidthFlow, const std::size_t sharpnessWindowSize, - const std::size_t flowCellSize); + const std::size_t flowCellSize, + const bool skipSharpnessComputation); ``` - Write selected keyframes ```cpp diff --git a/src/software/utils/main_keyframeSelection.cpp b/src/software/utils/main_keyframeSelection.cpp index d84ade7804..39f1e1f37f 100644 --- a/src/software/utils/main_keyframeSelection.cpp +++ b/src/software/utils/main_keyframeSelection.cpp @@ -61,6 +61,7 @@ int aliceVision_main(int argc, char** argv) bool skipSelection = false; // only compute the scores and do not proceed with the selection bool exportFlowVisualisation = false; // export optical flow visualisation for all the frames bool flowVisualisationOnly = false; // export optical flow visualisation for all the frames but do not compute scores + bool skipSharpnessComputation = false; // skip sharpness score computations po::options_description inputParams("Required parameters"); inputParams.add_options() @@ -139,7 +140,10 @@ int aliceVision_main(int argc, char** argv) ("exportFlowVisualisation", po::value(&exportFlowVisualisation)->default_value(exportFlowVisualisation), "For all frames, export the optical flow visualisation in HSV as PNG images.") ("flowVisualisationOnly", po::value(&flowVisualisationOnly)->default_value(flowVisualisationOnly), - "Export the optical flow visualisation in HSV as PNG images for all frames but do not compute scores."); + "Export the optical flow visualisation in HSV as PNG images for all frames but do not compute scores.") + ("skipSharpnessComputation", po::value(&skipSharpnessComputation)->default_value(skipSharpnessComputation), + "Skip the computations for the sharpness score of each frame. A fixed sharpness score of 1.0 will be " + "assigned to each frame."); aliceVision::CmdLine cmdline("This program is used to extract keyframes from single camera or a camera rig.\n" "AliceVision keyframeSelection"); @@ -223,7 +227,8 @@ int aliceVision_main(int argc, char** argv) } if (skipSelection) { - selector.computeScores(rescaledWidthSharp, rescaledWidthFlow, sharpnessWindowSize, flowCellSize); + selector.computeScores(rescaledWidthSharp, rescaledWidthFlow, sharpnessWindowSize, flowCellSize, + skipSharpnessComputation); if (exportScores) selector.exportScoresToFile(csvFilename); // Frames have not been selected, ignore 'exportSelectedFrames' if (exportFlowVisualisation) @@ -234,7 +239,8 @@ int aliceVision_main(int argc, char** argv) // Process media paths with regular or smart method if (useSmartSelection) - selector.processSmart(pxDisplacement, rescaledWidthSharp, rescaledWidthFlow, sharpnessWindowSize, flowCellSize); + selector.processSmart(pxDisplacement, rescaledWidthSharp, rescaledWidthFlow, sharpnessWindowSize, flowCellSize, + skipSharpnessComputation); else selector.processRegular(); From f02e829ea5f17a055d96c58191a0f101e100958c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Candice=20Bent=C3=A9jac?= Date: Mon, 13 Mar 2023 11:57:58 +0100 Subject: [PATCH 26/28] [utils] KeyframeSelection: Update "pxDisplacement" default value to 10% --- src/software/utils/main_keyframeSelection.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/software/utils/main_keyframeSelection.cpp b/src/software/utils/main_keyframeSelection.cpp index 39f1e1f37f..446f0c5ae9 100644 --- a/src/software/utils/main_keyframeSelection.cpp +++ b/src/software/utils/main_keyframeSelection.cpp @@ -44,7 +44,7 @@ int aliceVision_main(int argc, char** argv) unsigned int maxFrameStep = 36; // maximum number of frames between two keyframes (regular selection) unsigned int minNbOutFrames = 10; // minimum number of selected keyframes (smart selection) unsigned int maxNbOutFrames = 2000; // maximum number of selected keyframes (both selections) - float pxDisplacement = 3.0; // percentage of pixels that have moved across frames since last keyframe (smart selection) + float pxDisplacement = 10.0; // percentage of pixels that have moved across frames since last keyframe (smart selection) std::size_t rescaledWidthSharp = 720; // width of the rescaled frames for the sharpness; 0 if no rescale is performed (smart selection) std::size_t rescaledWidthFlow = 720; // width of the rescaled frames for the flow; 0 if no rescale is performed (smart selection) std::size_t sharpnessWindowSize = 200; // sliding window's size in sharpness computation (smart selection) From 99b4e43c1c41e85c41b648479a06ae654b29308e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Candice=20Bent=C3=A9jac?= Date: Mon, 13 Mar 2023 18:59:57 +0100 Subject: [PATCH 27/28] [keyframe] Add some info logs and increase the level of some debug logs --- src/aliceVision/keyframe/KeyframeSelector.cpp | 21 ++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/src/aliceVision/keyframe/KeyframeSelector.cpp b/src/aliceVision/keyframe/KeyframeSelector.cpp index 60bd91a961..1538cf1603 100644 --- a/src/aliceVision/keyframe/KeyframeSelector.cpp +++ b/src/aliceVision/keyframe/KeyframeSelector.cpp @@ -128,11 +128,14 @@ void KeyframeSelector::processRegular() } for (unsigned int id = 0; id < nbFrames; id += step) { - ALICEVISION_LOG_DEBUG("Selecting frame with ID " << id); + ALICEVISION_LOG_INFO("Selecting frame with ID " << id); _selectedKeyframes.push_back(id); if (_maxOutFrames > 0 && _selectedKeyframes.size() >= _maxOutFrames) break; } + + ALICEVISION_LOG_INFO("Finished selecting all the keyframes! " << _selectedKeyframes.size() << "/" << + nbFrames << " frames have been selected."); } void KeyframeSelector::processSmart(const float pxDisplacement, const std::size_t rescaledWidthSharpness, @@ -171,14 +174,17 @@ void KeyframeSelector::processSmart(const float pxDisplacement, const std::size_ // Step 2: check whether the min/max output frames constraints are respected if (!(subsequenceLimits.size() - 1 >= _minOutFrames && subsequenceLimits.size() - 1 <= _maxOutFrames)) { - ALICEVISION_LOG_DEBUG("Preliminary selection does not provide enough frames (" << subsequenceLimits.size() - 1 - << " keyframes, should be between " << _minOutFrames << " and " << _maxOutFrames << ")"); + ALICEVISION_LOG_INFO("Preliminary selection does not provide the right number of frames (" + << subsequenceLimits.size() - 1 << " keyframes, should be between " << _minOutFrames + << " and " << _maxOutFrames << ")."); std::vector newLimits = subsequenceLimits; // Prevents first 'newLimits.size() - 1' from overflowing const double displacementDiff = 0.5; // The displacement must be 0.5px smaller/bigger than the previous one if (subsequenceLimits.size() - 1 < _minOutFrames) { // Not enough frames, reduce the motion step + ALICEVISION_LOG_INFO("Not enough keyframes, the motion step will be reduced of " << displacementDiff + << "%."); bool sampleRegularly = false; while (newLimits.size() - 1 < _minOutFrames) { newLimits.clear(); @@ -203,6 +209,8 @@ void KeyframeSelector::processSmart(const float pxDisplacement, const std::size_ if (sampleRegularly) { // Sample regularly the whole sequence to get minOutFrames subsequences + ALICEVISION_LOG_INFO("The motion step has been reduced to 0 and cannot be used anymore. Keyframes will " + "be sampled regularly instead."); newLimits.clear(); newLimits.push_back(0); std::size_t stepSize = (sequenceSize / _minOutFrames) + 1; @@ -213,6 +221,8 @@ void KeyframeSelector::processSmart(const float pxDisplacement, const std::size_ } } else { // Too many frames, increase the motion step + ALICEVISION_LOG_INFO("Too many keyframes, the motion step will be increased of " << displacementDiff + << "%."); while (newLimits.size() - 1 > _maxOutFrames) { newLimits.clear(); newLimits.push_back(0); @@ -264,12 +274,13 @@ void KeyframeSelector::processSmart(const float pxDisplacement, const std::size_ bestSharpness = sharpness; } } - ALICEVISION_LOG_DEBUG("Selecting frame with ID " << bestIndex); + ALICEVISION_LOG_INFO("Selecting frame with ID " << bestIndex); _selectedKeyframes.push_back(bestIndex); _selectedFrames.at(bestIndex) = '1'; // The frame has been selected, flip it to 1 } - ALICEVISION_LOG_INFO("Finished selecting all the keyframes!"); + ALICEVISION_LOG_INFO("Finished selecting all the keyframes! " << _selectedKeyframes.size() << "/" << + sequenceSize << " frames have been selected."); } bool KeyframeSelector::computeScores(const std::size_t rescaledWidthSharpness, const std::size_t rescaledWidthFlow, From 5cbbbd8bdeb1b6f22a5c9d97c216b42953727d8f Mon Sep 17 00:00:00 2001 From: Fabien Castan Date: Tue, 14 Mar 2023 14:10:11 +0100 Subject: [PATCH 28/28] [keyframe] build fix: add missing include --- src/aliceVision/keyframe/KeyframeSelector.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/aliceVision/keyframe/KeyframeSelector.cpp b/src/aliceVision/keyframe/KeyframeSelector.cpp index 1538cf1603..bbe06d11bc 100644 --- a/src/aliceVision/keyframe/KeyframeSelector.cpp +++ b/src/aliceVision/keyframe/KeyframeSelector.cpp @@ -15,13 +15,14 @@ #include #include #include +#include + namespace fs = boost::filesystem; namespace aliceVision { namespace keyframe { - /** * @brief Get a random int in order to generate uid. * @warning The random doesn't use a repeatable seed to avoid conflicts between different launches on different data sets