diff --git a/src/aliceVision/depthMap/BufPtr.hpp b/src/aliceVision/depthMap/BufPtr.hpp new file mode 100644 index 0000000000..769428b8a9 --- /dev/null +++ b/src/aliceVision/depthMap/BufPtr.hpp @@ -0,0 +1,63 @@ +// This file is part of the AliceVision project. +// Copyright (c) 2017 AliceVision contributors. +// This Source Code Form is subject to the terms of the Mozilla Public License, +// v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + +#pragma once + +// allows code sharing between NVCC and other compilers +#if defined(__NVCC__) +#define CUDA_HOST_DEVICE __host__ __device__ +#define CUDA_HOST __host__ +#else +#define CUDA_HOST_DEVICE +#define CUDA_HOST +#endif + +namespace aliceVision { +namespace depthMap { + +template +class BufPtr +{ +public: + + CUDA_HOST_DEVICE BufPtr(T* ptr, size_t pitch) + : _ptr( (unsigned char*)ptr ) + , _pitch( pitch ) + {} + + CUDA_HOST_DEVICE inline T* ptr() { return (T*)(_ptr); } + CUDA_HOST_DEVICE inline T* row(size_t y) { return (T*)(_ptr + y * _pitch); } + CUDA_HOST_DEVICE inline T& at(size_t x, size_t y) { return row(y)[x]; } + + CUDA_HOST_DEVICE inline const T* ptr() const { return (const T*)(_ptr); } + CUDA_HOST_DEVICE inline const T* row(size_t y) const { return (const T*)(_ptr + y * _pitch); } + CUDA_HOST_DEVICE inline const T& at(size_t x, size_t y) const { return row(y)[x]; } + +private: + BufPtr(); + BufPtr(const BufPtr&); + BufPtr& operator*=(const BufPtr&); + + unsigned char* const _ptr; + const size_t _pitch; +}; + + +template +static inline T* get3DBufferAt_h(T* ptr, size_t spitch, size_t pitch, size_t x, size_t y, size_t z) +{ + return ((T*)(((char*)ptr) + z * spitch + y * pitch)) + x; +} + +template +static inline const T* get3DBufferAt_h(const T* ptr, size_t spitch, size_t pitch, size_t x, size_t y, size_t z) +{ + return ((const T*)(((const char*)ptr) + z * spitch + y * pitch)) + x; +} + +} // namespace depthMap +} // namespace aliceVision + diff --git a/src/aliceVision/depthMap/CMakeLists.txt b/src/aliceVision/depthMap/CMakeLists.txt index 5d28204225..3137fcdb4f 100644 --- a/src/aliceVision/depthMap/CMakeLists.txt +++ b/src/aliceVision/depthMap/CMakeLists.txt @@ -1,12 +1,16 @@ # Headers set(depthMap_files_headers + BufPtr.hpp computeOnMultiGPUs.hpp depthMap.hpp - DepthSimMap.hpp + depthMapUtils.hpp + DepthMapParams.hpp Refine.hpp RefineParams.hpp Sgm.hpp + SgmDepthList.hpp SgmParams.hpp + Tile.hpp volumeIO.hpp ) @@ -14,62 +18,113 @@ set(depthMap_files_headers set(depthMap_files_sources computeOnMultiGPUs.cpp depthMap.cpp - DepthSimMap.cpp + depthMapUtils.cpp Refine.cpp Sgm.cpp - SgmParams.cpp + SgmDepthList.cpp volumeIO.cpp ) -# Cuda Headers -set(depthMap_cuda_files_headers - # Headers - cuda/deviceCommon/device_patch_es_glob.hpp - cuda/planeSweeping/host_utils.h - cuda/planeSweeping/plane_sweeping_cuda.hpp - # deviceCommon - cuda/deviceCommon/device_color.cu - cuda/deviceCommon/device_global.cu - cuda/deviceCommon/device_matrix.cu - cuda/deviceCommon/device_matrix.cuh - cuda/deviceCommon/device_patch_es.cu - cuda/deviceCommon/device_simStat.cu - cuda/deviceCommon/device_operators.cuh - cuda/deviceCommon/device_utils.cuh - cuda/deviceCommon/device_utils.h - # planeSweeping - cuda/planeSweeping/device_code.cu - cuda/planeSweeping/device_code_refine.cu - cuda/planeSweeping/device_code_volume.cu - cuda/planeSweeping/device_code_fuse.cu - # normalmap - cuda/normalmap/device_eig33.cuh +# Cuda Host Headers Only +set(depthMap_cuda_host_headers + cuda/host/LRUCameraCache.hpp + cuda/host/LRUCache.hpp + cuda/host/divUp.hpp + cuda/host/memory.hpp ) -set_source_files_properties(${depthMap_cuda_files_headers} +# Cuda Host Sources +set(depthMap_cuda_host_sources + cuda/host/utils.hpp + cuda/host/utils.cpp + cuda/host/DeviceStreamManager.cpp + cuda/host/DeviceStreamManager.hpp + cuda/host/DeviceCache.cpp + cuda/host/DeviceCache.hpp + cuda/host/DeviceCamera.cpp + cuda/host/DeviceCamera.hpp +) + +# device CUDA Headers Only +set(depthMap_cuda_device_headers + cuda/device/buffer.cuh + cuda/device/color.cuh + cuda/device/eig33.cuh + cuda/device/matrix.cuh + cuda/device/operators.cuh + cuda/device/Patch.cuh + cuda/device/SimStat.cuh +) + +# device CUDA Sources +set(depthMap_cuda_device_sources + cuda/device/DeviceCameraParams.hpp + cuda/device/DeviceCameraParams.cu +) + +# imageProcessing CUDA Sources +set(depthMap_cuda_imageProcessing_sources + cuda/imageProcessing/deviceGaussianFilter.hpp + cuda/imageProcessing/deviceGaussianFilter.cu + cuda/imageProcessing/deviceColorConversion.hpp + cuda/imageProcessing/deviceColorConversion.cu +) + +# normalMapping CUDA Headers Only +set(depthMap_cuda_normalMapping_headers + cuda/normalMapping/deviceNormalMapKernels.cuh +) + +# normalMapping CUDA Sources +set(depthMap_cuda_normalMapping_sources + cuda/normalMapping/deviceNormalMap.hpp + cuda/normalMapping/deviceNormalMap.cu + cuda/normalMapping/DeviceNormalMapper.hpp + cuda/normalMapping/DeviceNormalMapper.cpp +) + +# planeSweeping CUDA Headers Only +set(depthMap_cuda_planeSweeping_headers + cuda/planeSweeping/deviceDepthSimilarityMapKernels.cuh + cuda/planeSweeping/deviceSimilarityVolumeKernels.cuh +) + +# planeSweeping CUDA Sources +set(depthMap_cuda_planeSweeping_sources + cuda/planeSweeping/similarity.hpp + cuda/planeSweeping/deviceDepthSimilarityMap.hpp + cuda/planeSweeping/deviceDepthSimilarityMap.cu + cuda/planeSweeping/deviceSimilarityVolume.hpp + cuda/planeSweeping/deviceSimilarityVolume.cu +) + +set_source_files_properties(${depthMap_cuda_host_headers} + ${depthMap_cuda_device_headers} + ${depthMap_cuda_normalMapping_headers} + ${depthMap_cuda_planeSweeping_headers} + PROPERTIES HEADER_FILE_ONLY true ) +source_group("aliceVision_depthMap_cuda_host" FILES ${depthMap_cuda_host_headers} ${depthMap_cuda_host_sources}) +source_group("aliceVision_depthMap_cuda_device" FILES ${depthMap_cuda_device_headers} ${depthMap_cuda_device_sources}) +source_group("aliceVision_depthMap_cuda_imageProcessing" FILES ${depthMap_cuda_imageProcessing_sources}) +source_group("aliceVision_depthMap_cuda_normalMapping" FILES ${depthMap_cuda_normalMapping_headers} ${depthMap_cuda_normalMapping_sources}) +source_group("aliceVision_depthMap_cuda_planeSweeping" FILES ${depthMap_cuda_planeSweeping_headers} ${depthMap_cuda_planeSweeping_sources}) + # Cuda Sources set(depthMap_cuda_files_sources - cuda/commonStructures.hpp - cuda/FrameCacheMemory.cpp - cuda/FrameCacheMemory.hpp - cuda/LRUCache.hpp - cuda/OneTC.hpp - cuda/PlaneSweepingCuda.cpp - cuda/PlaneSweepingCuda.hpp - cuda/planeSweeping/plane_sweeping_cuda.hpp - cuda/planeSweeping/plane_sweeping_cuda.cu - cuda/normalmap/normal_map.hpp - cuda/normalmap/normal_map.cu - cuda/images/gauss_filter.hpp - cuda/images/gauss_filter.cu - ${depthMap_cuda_files_headers} + ${depthMap_cuda_host_headers} + ${depthMap_cuda_host_sources} + ${depthMap_cuda_device_headers} + ${depthMap_cuda_device_sources} + ${depthMap_cuda_imageProcessing_sources} + ${depthMap_cuda_normalMapping_headers} + ${depthMap_cuda_normalMapping_sources} + ${depthMap_cuda_planeSweeping_headers} + ${depthMap_cuda_planeSweeping_sources} ) -source_group("aliceVision_depthMap_cuda" FILES ${depthMap_cuda_files_sources}) - alicevision_add_library(aliceVision_depthMap USE_CUDA SOURCES @@ -81,6 +136,7 @@ alicevision_add_library(aliceVision_depthMap aliceVision_mvsUtils aliceVision_system Boost::filesystem + assimp::assimp ${CUDA_CUDADEVRT_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} #TODO shouldn't be here, but required to build on some machines PRIVATE_LINKS diff --git a/src/aliceVision/depthMap/DepthMapParams.hpp b/src/aliceVision/depthMap/DepthMapParams.hpp new file mode 100644 index 0000000000..17ccb0942d --- /dev/null +++ b/src/aliceVision/depthMap/DepthMapParams.hpp @@ -0,0 +1,37 @@ +// This file is part of the AliceVision project. +// Copyright (c) 2022 AliceVision contributors. +// This Source Code Form is subject to the terms of the Mozilla Public License, +// v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + +#pragma once + +#include +#include +#include + +namespace aliceVision { +namespace depthMap { + +/** + * @brief Depth Map Parameters + */ +struct DepthMapParams +{ + // user parameters + + mvsUtils::TileParams tileParams; //< tiling parameters + SgmParams sgmParams; //< parameters of Sgm process + RefineParams refineParams; //< parameters of Refine process + int maxTCams = 10; //< global T cameras maximum + bool chooseTCamsPerTile = true; //< choose T cameras per R tile or for the entire R image + bool exportTilePattern = false; //< export tile pattern obj + bool autoAdjustSmallImage = true; //< allow program to override parameters for the single tile case + + // constant parameters + + const bool useRefine = true; //< for debug purposes: enable or disable Refine process +}; + +} // namespace depthMap +} // namespace aliceVision diff --git a/src/aliceVision/depthMap/DepthSimMap.cpp b/src/aliceVision/depthMap/DepthSimMap.cpp deleted file mode 100644 index 510a7998b1..0000000000 --- a/src/aliceVision/depthMap/DepthSimMap.cpp +++ /dev/null @@ -1,483 +0,0 @@ -// This file is part of the AliceVision project. -// Copyright (c) 2017 AliceVision contributors. -// This Source Code Form is subject to the terms of the Mozilla Public License, -// v. 2.0. If a copy of the MPL was not distributed with this file, -// You can obtain one at https://mozilla.org/MPL/2.0/. - -#include "DepthSimMap.hpp" -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#define ALICEVISION_DEPTHMAP_UPSCALE_NEAREST_NEIGHBOR - - -namespace aliceVision { -namespace depthMap { - -DepthSimMap::DepthSimMap(int rc, const mvsUtils::MultiViewParams& mp, int scale, int step) - : _scale(scale) - , _step(step) - , _mp(mp) - , _rc(rc) -{ - _w = _mp.getWidth(_rc) / (_scale * _step); - _h = _mp.getHeight(_rc) / (_scale * _step); - _dsm.resize_with(_w * _h, DepthSim(-1.0f, 1.0f)); -} - -DepthSimMap::~DepthSimMap() -{ -} - -DepthSim getPixelValueInterpolated(const StaticVector& depthSimMap, double x, double y, int width, int height) -{ -#ifdef ALICEVISION_DEPTHMAP_UPSCALE_NEAREST_NEIGHBOR - // Nearest neighbor, no interpolation - int xp = static_cast(x + 0.5); - int yp = static_cast(y + 0.5); - - xp = std::min(xp, width - 1); - yp = std::min(yp, height - 1); - - return depthSimMap[yp * width + xp]; -#else - // Interpolate using the distance to the pixels center - int xp = static_cast(x); - int yp = static_cast(y); - xp = std::min(xp, width - 2); - yp = std::min(yp, height - 2); - const DepthSim lu = depthSimMap[yp * width + xp ]; - const DepthSim ru = depthSimMap[yp * width + xp + 1]; - const DepthSim rd = depthSimMap[(yp + 1) * width + xp + 1]; - const DepthSim ld = depthSimMap[(yp + 1) * width + xp ]; - - if(lu.depth <= 0.0f || ru.depth <= 0.0f || - rd.depth <= 0.0f || ld.depth <= 0.0f) - { - DepthSim acc(0.0f, 0.0f); - int count = 0; - if(lu.depth > 0.0f) - { - acc = acc + lu; - ++count; - } - if(ru.depth > 0.0f) - { - acc = acc + ru; - ++count; - } - if(rd.depth > 0.0f) - { - acc = acc + rd; - ++count; - } - if(ld.depth > 0.0f) - { - acc = acc + ld; - ++count; - } - if(count != 0) - { - return acc / float(count); - } - else - { - return DepthSim(-1.0f, 1.0f); - } - } - - // bilinear interpolation - const float ui = x - static_cast(xp); - const float vi = y - static_cast(yp); - const DepthSim u = lu + (ru - lu) * ui; - const DepthSim d = ld + (rd - ld) * ui; - const DepthSim out = u + (d - u) * vi; - - return out; -#endif -} - -void DepthSimMap::initFromSmaller(const DepthSimMap& other) -{ - if ((_scale * _step) > (other._scale * other._step)) - { - throw std::runtime_error("Error DepthSimMap: You cannot init from a larger map."); - } - const double ratio = double(_scale * _step) / double(other._scale * other._step); - - ALICEVISION_LOG_DEBUG("DepthSimMap::initFromSmaller: ratio=" << ratio << ", otherScaleStep=" << other._scale * other._step << ", scaleStep=" << _scale * _step); - for (int y = 0; y < _h; ++y) - { - const double oy = (double(y) - 0.5) * ratio; - for (int x = 0; x < _w; ++x) - { - const double ox = (double(x) - 0.5) * ratio; - const DepthSim otherDepthSim = getPixelValueInterpolated(other._dsm, ox, oy, other._w, other._h); - _dsm[y * _w + x] = otherDepthSim; - } - } -} - -void DepthSimMap::init(const DepthSimMap& other) -{ - if ((_scale != other._scale) || (_step != other._step)) - { - throw std::runtime_error("Error DepthSimMap: You can only add to the same _scale and step map."); - } - - for (int i = 0; i < _dsm.size(); i++) - { - _dsm[i] = other._dsm[i]; - } -} - -Point2d DepthSimMap::getMaxMinDepth() const -{ - float maxDepth = -1.0f; - float minDepth = std::numeric_limits::max(); - for (int j = 0; j < _w * _h; j++) - { - if (_dsm[j].depth > -1.0f) - { - maxDepth = std::max(maxDepth, _dsm[j].depth); - minDepth = std::min(minDepth, _dsm[j].depth); - } - } - return Point2d(maxDepth, minDepth); -} - -Point2d DepthSimMap::getMaxMinSim() const -{ - float maxSim = -1.0f; - float minSim = std::numeric_limits::max(); - for (int j = 0; j < _w * _h; j++) - { - if (_dsm[j].sim > -1.0f) - { - maxSim = std::max(maxSim, _dsm[j].sim); - minSim = std::min(minSim, _dsm[j].sim); - } - } - return Point2d(maxSim, minSim); -} - -float DepthSimMap::getPercentileDepth(float perc) const -{ - int step = std::max(1, (_w * _h) / 50000); - int n = (_w * _h) / std::max(1, (step - 1)); - StaticVector depths; - depths.reserve(n); - - for (int j = 0; j < _w * _h; j += step) - { - if (_dsm[j].depth > -1.0f) - { - depths.push_back(_dsm[j].depth); - } - } - - qsort(&depths[0], depths.size(), sizeof(float), qSortCompareFloatAsc); - - float out = depths[(float)((float)depths.size() * perc)]; - - return out; -} - -/** -* @brief Get depth map at the size of our input image (with _scale applied) -* from an internal buffer only computed for a subpart (based on the step). -*/ -void DepthSimMap::getDepthMapStep1(image::Image& out_depthMap) const -{ - // Size of our input image (with _scale applied) - const int wdm = _mp.getWidth(_rc) / _scale; - const int hdm = _mp.getHeight(_rc) / _scale; - - // Create a depth map at the size of our input image - out_depthMap.resize(wdm, hdm); - - const double ratio = 1.0 / double(_step); - - ALICEVISION_LOG_DEBUG("DepthSimMap::getDepthMapStep1: ratio=" << ratio); - for (int y = 0; y < hdm; ++y) - { - const double oy = (double(y) - 0.5) * ratio; - for (int x = 0; x < wdm; ++x) - { - const double ox = (double(x) - 0.5) * ratio; - const float depth = getPixelValueInterpolated(_dsm, ox, oy, _w, _h).depth; - out_depthMap(y, x) = depth; - } - } -} - -void DepthSimMap::getSimMapStep1(image::Image& out_simMap) const -{ - // Size of our input image (with _scale applied) - const int wdm = _mp.getWidth(_rc) / _scale; - const int hdm = _mp.getHeight(_rc) / _scale; - - // Create a depth map at the size of our input image - out_simMap.resize(wdm, hdm); - - const double ratio = 1.0 / double(_step); - - ALICEVISION_LOG_DEBUG("DepthSimMap::getDepthMapStep1: ratio=" << ratio); - for (int y = 0; y < hdm; ++y) - { - const double oy = (double(y) - 0.5) * ratio; - for (int x = 0; x < wdm; ++x) - { - const double ox = (double(x) - 0.5) * ratio; - const float sim = getPixelValueInterpolated(_dsm, ox, oy, _w, _h).sim; - out_simMap(y, x) = sim; - } - } -} - -void DepthSimMap::getDepthMapStep1XPart(StaticVector& out_depthMap, int xFrom, int partW) -{ - int wdm = _mp.getWidth(_rc) / _scale; - int hdm = _mp.getHeight(_rc) / _scale; - - out_depthMap.resize_with(wdm * hdm, -1.0f); - for (int yp = 0; yp < hdm; yp++) - { - for (int xp = xFrom; xp < xFrom + partW; xp++) - { - int x = xp / _step; - int y = yp / _step; - if ((x < _w) && (y < _h)) - { - float depth = _dsm[y * _w + x].depth; - out_depthMap[yp * partW + (xp - xFrom)] = depth; - } - } - } -} - -void DepthSimMap::getSimMapStep1XPart(StaticVector& out_simMap, int xFrom, int partW) -{ - int wdm = _mp.getWidth(_rc) / _scale; - int hdm = _mp.getHeight(_rc) / _scale; - - out_simMap.resize_with(wdm * hdm, -1.0f); - for (int yp = 0; yp < hdm; yp++) - { - for (int xp = xFrom; xp < xFrom + partW; xp++) - { - int x = xp / _step; - int y = yp / _step; - if ((x < _w) && (y < _h)) - { - float sim = _dsm[y * _w + x].sim; - out_simMap[yp * partW + (xp - xFrom)] = sim; - } - } - } -} - -void DepthSimMap::initJustFromDepthMap(const StaticVector& depthMap, float defaultSim) -{ - int wdm = _mp.getWidth(_rc) / _scale; - - for (int i = 0; i < _dsm.size(); i++) - { - int x = (i % _w) * _step; - int y = (i / _w) * _step; - if ((x < _w) && (y < _h)) - { - _dsm[i].depth = depthMap[y * wdm + x]; - _dsm[i].sim = defaultSim; - } - } -} - -void DepthSimMap::initJustFromDepthMap(const DepthSimMap& depthSimMap, float defaultSim) -{ - if (depthSimMap._w != _w || depthSimMap._h != _h) - throw std::runtime_error("DepthSimMap:initJustFromDepthMap: Error input depth map is not at the same size."); - - for (int y = 0; y < _h; ++y) - { - for (int x = 0; x < _w; ++x) - { - DepthSim& ds = _dsm[y * _w + x]; - ds.depth = depthSimMap._dsm[y * depthSimMap._w + x].depth; - ds.sim = defaultSim; - } - } -} - -void DepthSimMap::initFromDepthMapAndSimMap(const image::Image& depthMapT, - const image::Image& simMapT, - int depthSimMapsScale) -{ - int wdm = _mp.getWidth(_rc) / depthSimMapsScale; - int hdm = _mp.getHeight(_rc) / depthSimMapsScale; - - for (int i = 0; i < _dsm.size(); i++) - { - int x = (((i % _w) * _step) * _scale) / depthSimMapsScale; - int y = (((i / _w) * _step) * _scale) / depthSimMapsScale; - if ((x < wdm) && (y < hdm)) - { - int index = y * wdm + x; - _dsm[i].depth = depthMapT(index); - _dsm[i].sim = simMapT(index); - } - } -} - -void DepthSimMap::getDepthMap(image::Image& out_depthMap) const -{ - out_depthMap.resize(_w, _h); - for (int i = 0; i < _dsm.size(); i++) - { - out_depthMap(i) = _dsm[i].depth; - } -} - -void DepthSimMap::getSimMap(image::Image& out_simMap) const -{ - out_simMap.resize(_w, _h); - for (int i = 0; i < _dsm.size(); i++) - { - out_simMap(i) = _dsm[i].sim; - } -} - -void DepthSimMap::saveToImage(const std::string& filename, float simThr) const -{ - const int bufferWidth = 2 * _w; - image::Image colorBuffer(bufferWidth, _h); - - try - { - Point2d maxMinDepth; - maxMinDepth.x = getPercentileDepth(0.9) * 1.1; - maxMinDepth.y = getPercentileDepth(0.01) * 0.8; - - Point2d maxMinSim = Point2d(simThr, -1.0f); - if (simThr < -1.0f) - { - Point2d autoMaxMinSim = getMaxMinSim(); - // only use it if the default range is valid - if (std::abs(autoMaxMinSim.x - autoMaxMinSim.y) > std::numeric_limits::epsilon()) - maxMinSim = autoMaxMinSim; - - if (_mp.verbose) - ALICEVISION_LOG_DEBUG("saveToImage: max : " << maxMinSim.x << ", min: " << maxMinSim.y); - } - - for (int y = 0; y < _h; y++) - { - for (int x = 0; x < _w; x++) - { - const DepthSim& depthSim = _dsm[y * _w + x]; - float depth = (depthSim.depth - maxMinDepth.y) / (maxMinDepth.x - maxMinDepth.y); - colorBuffer(y, x) = getColorFromJetColorMap(depth); - - float sim = (depthSim.sim - maxMinSim.y) / (maxMinSim.x - maxMinSim.y); - colorBuffer(y, _w + x) = getColorFromJetColorMap(sim); - } - } - image::writeImage(filename, colorBuffer, - image::ImageWriteOptions().toColorSpace(image::EImageColorSpace::LINEAR) - .storageDataType(image::EStorageDataType::Float)); - } - catch (...) - { - ALICEVISION_LOG_ERROR("Failed to save '" << filename << "' (simThr: " << simThr << ")"); - } -} - -void DepthSimMap::save(const std::string& customSuffix, bool useStep1) const -{ - image::Image depthMap; - image::Image simMap; - if (useStep1) - { - getDepthMapStep1(depthMap); - getSimMapStep1(simMap); - } - else - { - getDepthMap(depthMap); - getSimMap(simMap); - } - - const int step = (useStep1 ? 1 : _step); - const int scaleStep = _scale * step; - - auto metadata = image::getMetadataFromMap(_mp.getMetadata(_rc)); - metadata.push_back(oiio::ParamValue("AliceVision:downscale", _mp.getDownscaleFactor(_rc) * scaleStep)); - - double s = scaleStep; - Point3d C = _mp.CArr[_rc]; - Matrix3x3 iP = _mp.iCamArr[_rc]; - if (s > 1.0) - { - Matrix3x4 P = _mp.camArr[_rc]; - for (int i = 0; i < 8; ++i) - P.m[i] /= s; - Matrix3x3 K, iK; - Matrix3x3 R, iR; - - P.decomposeProjectionMatrix(K, R, C); // replace C - iK = K.inverse(); - iR = R.inverse(); - iP = iR * iK; // replace iP - } - - metadata.push_back(oiio::ParamValue("AliceVision:CArr", oiio::TypeDesc(oiio::TypeDesc::DOUBLE, oiio::TypeDesc::VEC3), 1, C.m)); - metadata.push_back(oiio::ParamValue("AliceVision:iCamArr", oiio::TypeDesc(oiio::TypeDesc::DOUBLE, oiio::TypeDesc::MATRIX33), 1, iP.m)); - - { - const Point2d maxMinDepth = getMaxMinDepth(); - metadata.push_back(oiio::ParamValue("AliceVision:minDepth", static_cast(maxMinDepth.y))); - metadata.push_back(oiio::ParamValue("AliceVision:maxDepth", static_cast(maxMinDepth.x))); - } - - { - std::vector matrixP = _mp.getOriginalP(_rc); - metadata.push_back(oiio::ParamValue("AliceVision:P", oiio::TypeDesc(oiio::TypeDesc::DOUBLE, oiio::TypeDesc::MATRIX44), 1, matrixP.data())); - } - - const int nbDepthValues = std::count_if(depthMap.data(), depthMap.data() + depthMap.size(), [](float v) { return v > 0.0f; }); - metadata.push_back(oiio::ParamValue("AliceVision:nbDepthValues", oiio::TypeDesc::INT32, 1, &nbDepthValues)); - - image::writeImage(getFileNameFromIndex(_mp, _rc, mvsUtils::EFileType::depthMap, _scale, customSuffix), - depthMap, - image::ImageWriteOptions().toColorSpace(image::EImageColorSpace::LINEAR) - .storageDataType(image::EStorageDataType::Float), metadata); - image::writeImage(getFileNameFromIndex(_mp, _rc, mvsUtils::EFileType::simMap, _scale, customSuffix), - simMap, - image::ImageWriteOptions().toColorSpace(image::EImageColorSpace::LINEAR) - .storageDataType(image::EStorageDataType::Half), metadata); -} - -void DepthSimMap::load(int fromScale) -{ - image::Image depthMap; - image::Image simMap; - - image::readImage(getFileNameFromIndex(_mp, _rc, mvsUtils::EFileType::depthMap, fromScale), - depthMap, image::EImageColorSpace::NO_CONVERSION); - image::readImage(getFileNameFromIndex(_mp, _rc, mvsUtils::EFileType::simMap, fromScale), - simMap, image::EImageColorSpace::NO_CONVERSION); - - initFromDepthMapAndSimMap(depthMap, simMap, fromScale); -} - -} // namespace depthMap -} // namespace aliceVision diff --git a/src/aliceVision/depthMap/DepthSimMap.hpp b/src/aliceVision/depthMap/DepthSimMap.hpp deleted file mode 100644 index 9a7b1e0b5b..0000000000 --- a/src/aliceVision/depthMap/DepthSimMap.hpp +++ /dev/null @@ -1,126 +0,0 @@ -// This file is part of the AliceVision project. -// Copyright (c) 2017 AliceVision contributors. -// This Source Code Form is subject to the terms of the Mozilla Public License, -// v. 2.0. If a copy of the MPL was not distributed with this file, -// You can obtain one at https://mozilla.org/MPL/2.0/. - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -namespace aliceVision { -namespace depthMap { - -class DepthSim -{ -public: - union { - struct - { - float depth, sim; - }; - float m[2]; - }; - - inline DepthSim() - { - depth = 0.0; - sim = 0.0; - } - - inline DepthSim(float _depth, float _sim) - { - depth = _depth; - sim = _sim; - } - - inline DepthSim& operator=(const DepthSim& v) - { - depth = v.depth; - sim = v.sim; - return *this; - } - - inline DepthSim operator+(const DepthSim& v) const - { - DepthSim out; - out.depth = depth + v.depth; - out.sim = sim + v.sim; - return out; - } - inline DepthSim operator-(const DepthSim& v) const - { - DepthSim out; - out.depth = depth - v.depth; - out.sim = sim - v.sim; - return out; - } - inline DepthSim operator*(float v) const - { - DepthSim out; - out.depth = depth * v; - out.sim = sim * v; - return out; - } - inline DepthSim operator/(float v) const - { - DepthSim out; - out.depth = depth / v; - out.sim = sim / v; - return out; - } - inline bool operator<(const DepthSim& other) const - { - if(depth == other.depth) - return sim < other.sim; - return (depth < other.depth); - } -}; - - -class DepthSimMap -{ -public: - const mvsUtils::MultiViewParams& _mp; - const int _scale; - const int _step; - int _rc, _w, _h; - StaticVector _dsm; //< depth similarity map - - DepthSimMap(int rc, const mvsUtils::MultiViewParams& mp, int scale, int step); - ~DepthSimMap(); - - void initJustFromDepthMap(const StaticVector& depthMap, float defaultSim); - void initJustFromDepthMap(const DepthSimMap& depthSimMap, float defaultSim); - void initFromDepthMapAndSimMap(const image::Image& depthMapT, - const image::Image& simMapT, - int depthSimMapsScale); - - void initFromSmaller(const DepthSimMap& depthSimMap); - void init(const DepthSimMap& depthSimMap); - - Point2d getMaxMinDepth() const; - Point2d getMaxMinSim() const; - - float getPercentileDepth(float perc) const; - void getDepthMapStep1(image::Image& out_depthMap) const; - void getSimMapStep1(image::Image& out_simMap) const; - void getDepthMap(image::Image& out_depthMap) const; - void getSimMap(image::Image& out_simMap) const; - - void getDepthMapStep1XPart(StaticVector& out_depthMap, int xFrom, int partW); - void getSimMapStep1XPart(StaticVector& out_depthMap, int xFrom, int partW); - - void saveToImage(const std::string& filename, float simThr) const; - void save(const std::string& customSuffix = "", bool useStep1 = false) const; - void load(int fromScale); -}; - -} // namespace depthMap -} // namespace aliceVision diff --git a/src/aliceVision/depthMap/Refine.cpp b/src/aliceVision/depthMap/Refine.cpp index 92c84852d7..6ef2b7b0d1 100644 --- a/src/aliceVision/depthMap/Refine.cpp +++ b/src/aliceVision/depthMap/Refine.cpp @@ -8,312 +8,294 @@ #include #include -#include -#include - -#include -#include - #include #include -#include - #include -#include - -#include +#include +#include +#include +#include +#include namespace aliceVision { namespace depthMap { -namespace bfs = boost::filesystem; - -Refine::Refine(const RefineParams& refineParams, const mvsUtils::MultiViewParams& mp, PlaneSweepingCuda& cps, int rc) - : _rc(rc) - , _mp(mp) - , _cps(cps) +Refine::Refine(const mvsUtils::MultiViewParams& mp, + const mvsUtils::TileParams& tileParams, + const RefineParams& refineParams, + cudaStream_t stream) + : _mp(mp) + , _tileParams(tileParams) , _refineParams(refineParams) - , _depthSimMap(_rc, _mp, 1, 1) + , _stream(stream) { - _tCams = _mp.findNearestCamsFromLandmarks(_rc, _refineParams.maxTCams); -} + // get tile maximum dimensions + const int downscale = _refineParams.scale * _refineParams.stepXY; + const int maxTileWidth = divideRoundUp(tileParams.bufferWidth , downscale); + const int maxTileHeight = divideRoundUp(tileParams.bufferHeight, downscale); -Refine::~Refine() -{} + // compute depth/sim map maximum dimensions + const CudaSize<2> depthSimMapDim(maxTileWidth, maxTileHeight); -void Refine::upscaleSgmDepthSimMap(const DepthSimMap& sgmDepthSimMap, DepthSimMap& out_depthSimMapUpscaled) const -{ - const int w = _mp.getWidth(_rc); - const int h = _mp.getHeight(_rc); + // allocate depth/sim maps in device memory + _sgmDepthPixSizeMap_dmp.allocate(depthSimMapDim); + _refinedDepthSimMap_dmp.allocate(depthSimMapDim); + _optimizedDepthSimMap_dmp.allocate(depthSimMapDim); + + // allocate normal map in device memory + if(refineParams.useNormalMap) + _normalMap_dmp.allocate(depthSimMapDim); + + // compute volume maximum dimensions + const int nbDepthsToRefine = _refineParams.halfNbDepths * 2 + 1; + const CudaSize<3> volDim(maxTileWidth, maxTileHeight, nbDepthsToRefine); - out_depthSimMapUpscaled.initFromSmaller(sgmDepthSimMap); + // allocate refine volume in device memory + _volumeRefineSim_dmp.allocate(volDim); - // set sim (y) to pixsize - for(int y = 0; y < h; ++y) + // allocate depth/sim map optimization buffers + if(_refineParams.useColorOptimization) { - for(int x = 0; x < w; ++x) - { - const Point3d p = _mp.CArr[_rc] + (_mp.iCamArr[_rc] * Point2d(static_cast(x), static_cast(y))).normalize() * out_depthSimMapUpscaled._dsm[y * w + x].depth; - DepthSim& depthSim = out_depthSimMapUpscaled._dsm[y * w + x]; - - if(_refineParams.useTcOrRcPixSize) - { - depthSim.sim = _mp.getCamsMinPixelSize(p, _tCams); - } - else - { - depthSim.sim = _mp.getCamPixelSize(p, _rc); - } - } + _optImgVariance_dmp.allocate(depthSimMapDim); + _optTmpDepthMap_dmp.allocate(depthSimMapDim); } } -void Refine::filterMaskedPixels(DepthSimMap& out_depthSimMap) +double Refine::getDeviceMemoryConsumption() const { - auto img = _cps._ic.getImg_sync(_rc); + size_t bytes = 0; - const int h = _mp.getHeight(_rc); - const int w = _mp.getWidth(_rc); + bytes += _sgmDepthPixSizeMap_dmp.getBytesPadded(); + bytes += _refinedDepthSimMap_dmp.getBytesPadded(); + bytes += _optimizedDepthSimMap_dmp.getBytesPadded(); + bytes += _normalMap_dmp.getBytesPadded(); + bytes += _volumeRefineSim_dmp.getBytesPadded(); - for(int y = 0; y < h; ++y) + if(_refineParams.useColorOptimization) { - for(int x = 0; x < w; ++x) - { - const image::RGBAfColor& floatRGBA = (*img)(y, x); - - if (floatRGBA.a() < 0.1f) - { - DepthSim& depthSim = out_depthSimMap._dsm[y * w + x]; - - depthSim.depth = -2.0; - depthSim.sim = -1.0; - } - } + bytes += _optImgVariance_dmp.getBytesPadded(); + bytes += _optTmpDepthMap_dmp.getBytesPadded(); } + + return (double(bytes) / (1024.0 * 1024.0)); } -void Refine::refineDepthSimMapPerTc(int tc, DepthSimMap& depthSimMap) const +double Refine::getDeviceMemoryConsumptionUnpadded() const { - const system::Timer timer; - - ALICEVISION_LOG_DEBUG("Refine depth/sim map per tc (rc: " << _rc << ", tc: " << tc << ")"); + size_t bytes = 0; - const int scale = depthSimMap._scale; // for now should be 1 - const int w = _mp.getWidth(_rc) / scale; - const int h = _mp.getHeight(_rc) / scale; + bytes += _sgmDepthPixSizeMap_dmp.getBytesUnpadded(); + bytes += _refinedDepthSimMap_dmp.getBytesUnpadded(); + bytes += _optimizedDepthSimMap_dmp.getBytesUnpadded(); + bytes += _normalMap_dmp.getBytesUnpadded(); + bytes += _volumeRefineSim_dmp.getBytesUnpadded(); - // slicing in order to fit into GPU memory - const int nParts = 4; - const int wPart = w / nParts; - - for(int p = 0; p < nParts; ++p) + if(_refineParams.useColorOptimization) { - const int xFrom = p * wPart; - const int wPartAct = std::min(wPart, w - xFrom); - - StaticVector depthMap; - depthSimMap.getDepthMapStep1XPart(depthMap, xFrom, wPartAct); - - StaticVector simMap; - depthSimMap.getSimMapStep1XPart(simMap, xFrom, wPartAct); - - _cps.refineRcTcDepthMap(_rc, tc, depthMap, simMap, _refineParams, xFrom, wPartAct); - - for(int yp = 0; yp < h; ++yp) - { - for(int xp = xFrom; xp < xFrom + wPartAct; ++xp) - { - const float depth = depthMap[yp * wPartAct + (xp - xFrom)]; - const float sim = simMap[yp * wPartAct + (xp - xFrom)]; - const float oldSim = depthSimMap._dsm[(yp / depthSimMap._step) * depthSimMap._w + (xp / depthSimMap._step)].sim; - - if((depth > 0.0f) && (sim < oldSim)) - { - depthSimMap._dsm[(yp / depthSimMap._step) * depthSimMap._w + (xp / depthSimMap._step)] = DepthSim(depth, sim); - } - } - } + bytes += _optImgVariance_dmp.getBytesUnpadded(); + bytes += _optTmpDepthMap_dmp.getBytesUnpadded(); } - ALICEVISION_LOG_DEBUG("Refine depth/sim map per tc (rc: " << _rc << ", tc: " << tc << ") done in: " << timer.elapsedMs() << " ms."); + return (double(bytes) / (1024.0 * 1024.0)); } -void Refine::refineAndFuseDepthSimMap(const DepthSimMap& depthSimMapSgmUpscale, DepthSimMap& out_depthSimMapRefinedFused) const +void Refine::refineRc(const Tile& tile, const CudaDeviceMemoryPitched& in_sgmDepthSimMap_dmp, const CudaDeviceMemoryPitched& in_sgmNormalMap_dmp) { - const system::Timer timer; + const IndexT viewId = _mp.getViewId(tile.rc); - ALICEVISION_LOG_INFO("Refine and fuse depth/sim map (rc: " << _rc << ")"); + ALICEVISION_LOG_INFO(tile << "Refine depth/sim map of view id: " << viewId << ", rc: " << tile.rc << " (" << (tile.rc + 1) << " / " << _mp.ncams << ")."); - const int w = _mp.getWidth(_rc); - const int h = _mp.getHeight(_rc); + // compute upscaled SGM depth/pixSize map + { + // downscale the region of interest + const ROI downscaledRoi = downscaleROI(tile.roi, _refineParams.scale * _refineParams.stepXY); - StaticVector dataMaps; - dataMaps.reserve(_tCams.size() + 1); + // get R device camera from cache + DeviceCache& deviceCache = DeviceCache::getInstance(); + const DeviceCamera& rcDeviceCamera = deviceCache.requestCamera(tile.rc, _refineParams.scale, _mp); - // Put the raw upscaled SGM result first: - dataMaps.push_back(&depthSimMapSgmUpscale); // DO NOT ERASE ! + // upscale SGM depth/sim map and filter masked pixels (alpha) + cuda_depthSimMapUpscaleAndFilter(_sgmDepthPixSizeMap_dmp, in_sgmDepthSimMap_dmp, rcDeviceCamera, _refineParams, downscaledRoi, _stream); - for(int c = 0; c < _tCams.size(); ++c) - { - const int tc = _tCams[c]; + // export intermediate depth/sim map (if requested by user) + if(_refineParams.exportIntermediateDepthSimMaps) + writeDepthSimMap(tile.rc, _mp, _tileParams, tile.roi, _sgmDepthPixSizeMap_dmp, _refineParams.scale, _refineParams.stepXY, "_sgmUpscaled"); - DepthSimMap* depthSimMapC = new DepthSimMap(_rc, _mp, 1, 1); - depthSimMapC->initJustFromDepthMap(depthSimMapSgmUpscale, 1.0f); + // compute pixSize to replace similarity (this is usefull for depth/sim map optimization) + cuda_depthSimMapComputePixSize(_sgmDepthPixSizeMap_dmp, rcDeviceCamera, _refineParams, downscaledRoi, _stream); - refineDepthSimMapPerTc(tc, *depthSimMapC); - - dataMaps.push_back(depthSimMapC); - - if(_refineParams.exportIntermediateResults) + if(_refineParams.useNormalMap && in_sgmNormalMap_dmp.getBuffer() != nullptr) { - depthSimMapC->save("_refine_tc_" + std::to_string(tc) + "_" + std::to_string(_mp.getViewId(tc))); + cuda_normalMapUpscale(_normalMap_dmp, in_sgmNormalMap_dmp, downscaledRoi, _stream); } } - // slicing in order to fit into GPU memory - const int nhParts = 4; - const int hPartHeightGlob = h / nhParts; - - for(int hPart = 0; hPart < nhParts; hPart++) + // refine and fuse depth/sim map + if(_refineParams.useRefineFuse) + { + // refine and fuse with volume strategy + refineAndFuseDepthSimMap(tile); + } + else { - const int hPartHeight = std::min(h, (hPart + 1) * hPartHeightGlob) - hPart * hPartHeightGlob; + ALICEVISION_LOG_INFO(tile << "Refine and fuse depth/sim map volume disabled."); + cuda_depthSimMapCopyDepthOnly(_refinedDepthSimMap_dmp, _sgmDepthPixSizeMap_dmp, 1.0f, _stream); + } - // vector of one depthSimMap tile per T cameras - StaticVector*> dataMapsHPart; - dataMapsHPart.reserve(dataMaps.size()); + // export intermediate depth/sim map (if requested by user) + if(_refineParams.exportIntermediateDepthSimMaps) + writeDepthSimMap(tile.rc, _mp, _tileParams, tile.roi, _refinedDepthSimMap_dmp, _refineParams.scale, _refineParams.stepXY, "_refinedFused"); - for(int i = 0; i < dataMaps.size(); ++i) // iterate over T cameras - { - StaticVector* dataMapHPart = new StaticVector(); - dataMapHPart->resize(w * hPartHeight); + // optimize depth/sim map + if(_refineParams.useColorOptimization && _refineParams.optimizationNbIterations > 0) + { + optimizeDepthSimMap(tile); + } + else + { + ALICEVISION_LOG_INFO(tile << "Color optimize depth/sim map disabled."); + _optimizedDepthSimMap_dmp.copyFrom(_refinedDepthSimMap_dmp, _stream); + } - const StaticVector& dsm = dataMaps[i]->_dsm; + ALICEVISION_LOG_INFO(tile << "Refine depth/sim map done."); +} -#pragma omp parallel for - for(int y = 0; y < hPartHeight; y++) - { - for(int x = 0; x < w; x++) - { - (*dataMapHPart)[y * w + x] = dsm[(y + hPart * hPartHeightGlob) * w + x]; - } - } +void Refine::refineAndFuseDepthSimMap(const Tile& tile) +{ + ALICEVISION_LOG_INFO(tile << "Refine and fuse depth/sim map volume."); - dataMapsHPart.push_back(dataMapHPart); - } + // downscale the region of interest + const ROI downscaledRoi = downscaleROI(tile.roi, _refineParams.scale * _refineParams.stepXY); - StaticVector depthSimMapFusedHPart; - depthSimMapFusedHPart.resize_with(w * hPartHeight, DepthSim(-1.0f, 1.0f)); + // get the depth range + const Range depthRange(0, _volumeRefineSim_dmp.getSize().z()); - _cps.fuseDepthSimMapsGaussianKernelVoting(w, hPartHeight, - depthSimMapFusedHPart, - dataMapsHPart, - _refineParams); + // initialize the similarity volume at 0 + // each tc filtered and inverted similarity value will be summed in this volume + cuda_volumeInitialize(_volumeRefineSim_dmp, TSimRefine(0.f), _stream); -#pragma omp parallel for - for(int y = 0; y < hPartHeight; ++y) - { - for(int x = 0; x < w; ++x) - { - out_depthSimMapRefinedFused._dsm[(y + hPart * hPartHeightGlob) * w + x] = depthSimMapFusedHPart[y * w + x]; - } - } + // get device cache instance + DeviceCache& deviceCache = DeviceCache::getInstance(); - deleteAllPointers(dataMapsHPart); - } + // get R device camera from cache + const DeviceCamera& rcDeviceCamera = deviceCache.requestCamera(tile.rc, _refineParams.scale, _mp); - dataMaps[0] = nullptr; // it is input dsmap we dont want to delete it - for(int c = 1; c < dataMaps.size(); c++) + // compute for each RcTc each similarity value for each depth to refine + // sum the inverted / filtered similarity value, best value is the HIGHEST + for(std::size_t tci = 0; tci < tile.refineTCams.size(); ++tci) { - delete dataMaps[c]; + const int tc = tile.refineTCams.at(tci); + + // get T device camera from cache + const DeviceCamera& tcDeviceCamera = deviceCache.requestCamera(tc, _refineParams.scale, _mp); + + ALICEVISION_LOG_DEBUG(tile << "Refine similarity volume:" << std::endl + << "\t- rc: " << tile.rc << std::endl + << "\t- tc: " << tc << " (" << (tci + 1) << "/" << tile.refineTCams.size() << ")" << std::endl + << "\t- rc camera device id: " << rcDeviceCamera.getDeviceCamId() << std::endl + << "\t- tc camera device id: " << tcDeviceCamera.getDeviceCamId() << std::endl + << "\t- tile range x: [" << downscaledRoi.x.begin << " - " << downscaledRoi.x.end << "]" << std::endl + << "\t- tile range y: [" << downscaledRoi.y.begin << " - " << downscaledRoi.y.end << "]" << std::endl); + + cuda_volumeRefineSimilarity(_volumeRefineSim_dmp, + _sgmDepthPixSizeMap_dmp, + (_refineParams.useNormalMap) ? &_normalMap_dmp : nullptr, + rcDeviceCamera, + tcDeviceCamera, + _refineParams, + depthRange, + downscaledRoi, + _stream); } - ALICEVISION_LOG_INFO("Refine and fuse depth/sim map (rc: " << _rc << ") done in: " << timer.elapsedMs() << " ms."); + // export intermediate volume information (if requested by user) + exportVolumeInformation(tile, "afterRefine"); + + // retrieve the best depth/sim in the volume + // compute sub-pixel sample using a sliding gaussian + cuda_volumeRefineBestDepth(_refinedDepthSimMap_dmp, + _sgmDepthPixSizeMap_dmp, + _volumeRefineSim_dmp, + rcDeviceCamera, + _refineParams, + downscaledRoi, + _stream); + + ALICEVISION_LOG_INFO(tile << "Refine and fuse depth/sim map volume done."); } -void Refine::optimizeDepthSimMap(const DepthSimMap& depthSimMapSgmUpscale, // upscaled SGM depth sim map - const DepthSimMap& depthSimMapRefinedFused, // refined and fused depth sim map - DepthSimMap& out_depthSimMapOptimized) const // optimized depth sim map +void Refine::optimizeDepthSimMap(const Tile& tile) { - const system::Timer timer; - - ALICEVISION_LOG_INFO("Refine Optimizing depth/sim map (rc: " << _rc << ")"); + ALICEVISION_LOG_INFO(tile << "Color optimize depth/sim map."); + + // downscale the region of interest + const ROI downscaledRoi = downscaleROI(tile.roi, _refineParams.scale * _refineParams.stepXY); + + // get R device camera from cache + DeviceCache& deviceCache = DeviceCache::getInstance(); + const DeviceCamera& rcDeviceCamera = deviceCache.requestCamera(tile.rc, _refineParams.scale, _mp); + + cuda_depthSimMapOptimizeGradientDescent(_optimizedDepthSimMap_dmp, // output depth/sim map optimized + _optImgVariance_dmp, // image variance buffer pre-allocate + _optTmpDepthMap_dmp, // temporary depth map buffer pre-allocate + _sgmDepthPixSizeMap_dmp, // input SGM upscaled depth/pixSize map + _refinedDepthSimMap_dmp, // input refined and fused depth/sim map + rcDeviceCamera, + _refineParams, + downscaledRoi, + _stream); + + ALICEVISION_LOG_INFO(tile << "Color optimize depth/sim map done."); +} - if(_refineParams.nIters == 0) +void Refine::exportVolumeInformation(const Tile& tile, const std::string& name) const +{ + if(!_refineParams.exportIntermediateCrossVolumes && + !_refineParams.exportIntermediateVolume9pCsv) { - out_depthSimMapOptimized.init(depthSimMapRefinedFused); + // nothing to do return; } - const int h = _mp.getHeight(_rc); + // get tile begin indexes (default no tile) + int tileBeginX = -1; + int tileBeginY = -1; - // slicing in order to fit into GPU memory - // TODO: estimate the amount of VRAM available to decide the tiling - const int nParts = 4; - const int hPart = h / nParts; - - for(int part = 0; part < nParts; ++part) + if(tile.nbTiles > 1) { - const int yFrom = part * hPart; - const int hPartAct = std::min(hPart, h - yFrom); - _cps.optimizeDepthSimMapGradientDescent(_rc, - out_depthSimMapOptimized._dsm, - depthSimMapSgmUpscale._dsm, - depthSimMapRefinedFused._dsm, - _refineParams, - yFrom, hPartAct); + tileBeginX = tile.roi.x.begin; + tileBeginY = tile.roi.y.begin; } - ALICEVISION_LOG_INFO("Refine Optimizing depth/sim map (rc: " << _rc << ") done in: " << timer.elapsedMs() << " ms."); -} - -bool Refine::refineRc(const DepthSimMap& sgmDepthSimMap) -{ - const system::Timer timer; - const IndexT viewId = _mp.getViewId(_rc); + // copy device similarity volume to host memory + CudaHostMemoryHeap volumeSim_hmh(_volumeRefineSim_dmp.getSize()); + volumeSim_hmh.copyFrom(_volumeRefineSim_dmp); - ALICEVISION_LOG_INFO("Refine depth/sim map of view id: " << viewId << ", rc: " << _rc << " (" << (_rc + 1) << " / " << _mp.ncams << ")"); + // copy device SGM upscale depth/sim map to host memory + CudaHostMemoryHeap depthPixSizeMapSgmUpscale_hmh(_sgmDepthPixSizeMap_dmp.getSize()); + depthPixSizeMapSgmUpscale_hmh.copyFrom(_sgmDepthPixSizeMap_dmp); - if(_tCams.empty()) + if(_refineParams.exportIntermediateCrossVolumes) { - return false; - } + ALICEVISION_LOG_INFO(tile << "Export similarity volume cross (" << name << ")."); - DepthSimMap depthSimMapSgmUpscale(_rc, _mp, 1, 1); // depthSimMapVis - upscaleSgmDepthSimMap(sgmDepthSimMap, depthSimMapSgmUpscale); - filterMaskedPixels(depthSimMapSgmUpscale); + const std::string volumeCrossPath = getFileNameFromIndex(_mp, tile.rc, mvsUtils::EFileType::volumeCross, _refineParams.scale, "_" + name, tileBeginX, tileBeginY); - if(_refineParams.exportIntermediateResults) - { - depthSimMapSgmUpscale.save("_sgmUpscaled"); - } + exportSimilarityVolumeCross(volumeSim_hmh, depthPixSizeMapSgmUpscale_hmh, _mp, tile.rc, _refineParams, volumeCrossPath, tile.roi); - DepthSimMap depthSimMapRefinedFused(_rc, _mp, 1, 1); // depthSimMapPhoto + ALICEVISION_LOG_INFO(tile << "Export similarity volume cross (" << name << ") done."); + } - if(_refineParams.doRefineFuse) + if(_refineParams.exportIntermediateVolume9pCsv) { - refineAndFuseDepthSimMap(depthSimMapSgmUpscale, depthSimMapRefinedFused); + ALICEVISION_LOG_INFO(tile << "Export similarity volume 9 points CSV (" << name << ")."); - if(_refineParams.exportIntermediateResults) - { - depthSimMapRefinedFused.save("_refinedFused"); - } - } - else - { - depthSimMapRefinedFused.initJustFromDepthMap(depthSimMapSgmUpscale, 1.0f); - } + const std::string stats9Path = getFileNameFromIndex(_mp, tile.rc, mvsUtils::EFileType::stats9p, _refineParams.scale, "_refine", tileBeginX, tileBeginY); - if(_refineParams.doRefineOpt && _refineParams.nIters != 0) - { - optimizeDepthSimMap(depthSimMapSgmUpscale, depthSimMapRefinedFused, _depthSimMap); - } - else - { - _depthSimMap.init(depthSimMapRefinedFused); - } + exportSimilaritySamplesCSV(volumeSim_hmh, tile.rc, name, stats9Path); - ALICEVISION_LOG_INFO("Refine depth/sim map (rc: " << _rc << ") done in: " << timer.elapsedMs() << " ms."); - return true; + ALICEVISION_LOG_INFO(tile << "Export similarity volume 9 points CSV (" << name << ") done."); + } } } // namespace depthMap diff --git a/src/aliceVision/depthMap/Refine.hpp b/src/aliceVision/depthMap/Refine.hpp index 724b5375f2..f861730b22 100644 --- a/src/aliceVision/depthMap/Refine.hpp +++ b/src/aliceVision/depthMap/Refine.hpp @@ -6,75 +6,107 @@ #pragma once +#include #include -#include -#include +#include +#include +#include +#include +#include + +#include +#include namespace aliceVision { namespace depthMap { -struct RefineParams; -class PlaneSweepingCuda; - /** * @brief Depth Map Estimation Refine */ class Refine { public: - Refine(const RefineParams& refineParams, const mvsUtils::MultiViewParams& mp, PlaneSweepingCuda& cps, int rc); - ~Refine(); - bool refineRc(const DepthSimMap& sgmDepthSimMap); + /** + * @brief Refine constructor. + * @param[in] mp the multi-view parameters + * @param[in] tileParams tile workflow parameters + * @param[in] refineParams the Refine parameters + * @param[in] stream the stream for gpu execution + */ + Refine(const mvsUtils::MultiViewParams& mp, + const mvsUtils::TileParams& tileParams, + const RefineParams& refineParams, + cudaStream_t stream); - const StaticVector& getTCams() const { return _tCams; } - const DepthSimMap& getDepthSimMap() const { return _depthSimMap; } + // no default constructor + Refine() = delete; -private: + // default destructor + ~Refine() = default; - const RefineParams& _refineParams; - const mvsUtils::MultiViewParams& _mp; - PlaneSweepingCuda& _cps; + // final depth/similarity map getter + inline const CudaDeviceMemoryPitched& getDeviceDepthSimMap() const { return _optimizedDepthSimMap_dmp; } - const int _rc; // refine R camera index - StaticVector _tCams; // refine T camera indexes, compute in the constructor - DepthSimMap _depthSimMap; // refined, fused and optimized depth map + /** + * @brief Get memory consumpyion in device memory. + * @return device memory consumpyion (in MB) + */ + double getDeviceMemoryConsumption() const; /** - * @brief Upscale the given SGM depth/sim map. - * @param[in] sgmDepthSimMap the given SGM depth/sim map - * @param[in,out] out_depthSimMapUpscaled the given output depth/sim map - * @note Dimensions of the given output depth/sim map are used to compute the scale factor. + * @brief Get unpadded memory consumpyion in device memory. + * @return unpadded device memory consumpyion (in MB) */ - void upscaleSgmDepthSimMap(const DepthSimMap& sgmDepthSimMap, DepthSimMap& out_depthSimMapUpscaled) const; + double getDeviceMemoryConsumptionUnpadded() const; /** - * @brief Filter masked pixels (alpha < 0.1) of the given depth/sim map. - * @param[in,out] out_depthSimMap the given depth/sim map + * @brief Refine for a single R camera the Semi-Global Matching depth/sim map. + * @param[in] tile The given tile for Refine computation + * @param[in] in_sgmDepthSimMap_dmp the SGM result depth/sim map in device memory + * @param[in] in_sgmNormalMap_dmp the SGM result normal map in device memory (or empty) */ - void filterMaskedPixels(DepthSimMap& out_depthSimMap); + void refineRc(const Tile& tile, const CudaDeviceMemoryPitched& in_sgmDepthSimMap_dmp, const CudaDeviceMemoryPitched& in_sgmNormalMap_dmp); + +private: + + // private methods /** - * @brief Refine the given depth/sim map with the given T camera. - * @param[in] tc the given T camera index - * @param[int,out] depthSimMap the given output refined depth/sim map + * @brief Refine and fuse the given depth/sim map using volume strategy. + * @param[in] tile The given tile for Refine computation */ - void refineDepthSimMapPerTc(int tc, DepthSimMap& depthSimMap) const; + void refineAndFuseDepthSimMap(const Tile& tile); /** - * @brief Refine and fuse the given depth/sim map. - * @param[in] depthSimMapSgmUpscale the given upscaled SGM depth sim/map - * @param[out] out_depthSimMapRefinedFused the given output refined and fused depth/sim map + * @brief Optimize the refined depth/sim maps. + * @param[in] tile The given tile for Refine computation */ - void refineAndFuseDepthSimMap(const DepthSimMap& depthSimMapSgmUpscale, DepthSimMap& out_depthSimMapRefinedFused) const; + void optimizeDepthSimMap(const Tile& tile); /** - * @brief Optimize the given depth/sim maps. - * @param[in] depthSimMapSgmUpscale the given upscaled SGM depth/sim map - * @param[in] depthSimMapRefinedFused the given refined and fused depth/sim map - * @param[out] out_depthSimMapOptimized the given output optimized depth/sim map + * @brief Export volume cross alembic file and 9 points csv file. + * @param[in] tile The given tile for Refine computation + * @param[in] name the export filename */ - void optimizeDepthSimMap(const DepthSimMap& depthSimMapSgmUpscale, const DepthSimMap& depthSimMapRefinedFused, DepthSimMap& out_depthSimMapOptimized) const; + void exportVolumeInformation(const Tile& tile, const std::string& name) const; + + // private members + + const mvsUtils::MultiViewParams& _mp; //< Multi-view parameters + const mvsUtils::TileParams& _tileParams; //< tile workflow parameters + const RefineParams& _refineParams; //< Refine parameters + + // private members in device memory + + CudaDeviceMemoryPitched _sgmDepthPixSizeMap_dmp; //< rc upscaled SGM depth/pixSize map + CudaDeviceMemoryPitched _refinedDepthSimMap_dmp; //< rc refined and fused depth/sim map + CudaDeviceMemoryPitched _optimizedDepthSimMap_dmp; //< rc optimized depth/sim map + CudaDeviceMemoryPitched _normalMap_dmp; //< rc normal map + CudaDeviceMemoryPitched _volumeRefineSim_dmp; //< rc refine similarity volume + CudaDeviceMemoryPitched _optTmpDepthMap_dmp; //< for color optimization: temporary depth map buffer + CudaDeviceMemoryPitched _optImgVariance_dmp; //< for color optimization: image variance buffer + cudaStream_t _stream; //< stream for gpu execution }; } // namespace depthMap diff --git a/src/aliceVision/depthMap/RefineParams.hpp b/src/aliceVision/depthMap/RefineParams.hpp index 1ab2b48b1c..db342e4587 100644 --- a/src/aliceVision/depthMap/RefineParams.hpp +++ b/src/aliceVision/depthMap/RefineParams.hpp @@ -16,22 +16,28 @@ struct RefineParams { // user parameters + int scale = 1; + int stepXY = 1; int wsh = 3; - int maxTCams = 6; - int nDepthsToRefine = 31; - int nSamplesHalf = 150; - int nIters = 100; + int halfNbDepths = 15; + int nbSubsamples = 10; + int maxTCamsPerTile = 4; + int optimizationNbIterations = 100; double sigma = 15.0; double gammaC = 15.5; double gammaP = 8.0; - bool useTcOrRcPixSize = false; - bool exportIntermediateResults = false; + bool useRefineFuse = true; + bool useColorOptimization = true; + + // intermediate results export parameters + + bool exportIntermediateDepthSimMaps = false; + bool exportIntermediateCrossVolumes = false; + bool exportIntermediateVolume9pCsv = false; // constant parameters - const int scale = 1; // should remain at 1 for now, some Refine functions do not support this parameter - const bool doRefineFuse = true; - const bool doRefineOpt = true; + const bool useNormalMap = false; // for experimentation purposes }; } // namespace depthMap diff --git a/src/aliceVision/depthMap/Sgm.cpp b/src/aliceVision/depthMap/Sgm.cpp index 8ee71cf7ce..6020e1f69f 100644 --- a/src/aliceVision/depthMap/Sgm.cpp +++ b/src/aliceVision/depthMap/Sgm.cpp @@ -6,26 +6,14 @@ #include "Sgm.hpp" -#include #include -#include -#include - -#include -#include -#include -#include - -#include -#include -#include - #include -#include - -#include - -#include +#include +#include +#include +#include +#include +#include #include #include @@ -33,849 +21,339 @@ namespace aliceVision { namespace depthMap { -namespace bfs = boost::filesystem; - -Sgm::Sgm(const SgmParams& sgmParams, const mvsUtils::MultiViewParams& mp, PlaneSweepingCuda& cps, int rc) - : _rc(rc) - , _mp(mp) - , _cps(cps) +Sgm::Sgm(const mvsUtils::MultiViewParams& mp, + const mvsUtils::TileParams& tileParams, + const SgmParams& sgmParams, + cudaStream_t stream) + : _mp(mp) + , _tileParams(tileParams) , _sgmParams(sgmParams) - , _depthSimMap(_rc, _mp, _sgmParams.scale, _sgmParams.stepXY) + , _stream(stream) { - _tCams = _mp.findNearestCamsFromLandmarks(_rc, _sgmParams.maxTCams); - _depthsTcamsLimits.clear(); - - computeDepthsAndResetTCams(); -} - -Sgm::~Sgm() -{} - -bool Sgm::sgmRc() -{ - const system::Timer timer; - const IndexT viewId = _mp.getViewId(_rc); - - ALICEVISION_LOG_INFO("SGM depth/sim map of view id: " << viewId << ", rc: " << _rc << " (" << (_rc + 1) << " / " << _mp.ncams << ")"); - - if(_tCams.empty()) - { - return false; - } - - // log debug camera / depth information - logRcTcDepthInformation(); - - // compute volume dimensions - const int volDimX = _mp.getWidth(_rc) / (_sgmParams.scale * _sgmParams.stepXY); - const int volDimY = _mp.getHeight(_rc) / (_sgmParams.scale * _sgmParams.stepXY); - const int volDimZ = _depths.size(); + // get tile maximum dimensions + const int downscale = _sgmParams.scale * _sgmParams.stepXY; + const int maxTileWidth = divideRoundUp(tileParams.bufferWidth , downscale); + const int maxTileHeight = divideRoundUp(tileParams.bufferHeight, downscale); - const CudaSize<3> volDim(volDimX, volDimY, volDimZ); - - // log volumes allocation size / gpu device id - // this device need also to allocate: - // (max_img - 1) * X * Y * dims_at_a_time * sizeof(float) of device memory. + // allocate depth list in device memory { - int devid; - cudaGetDevice( &devid ); - ALICEVISION_LOG_DEBUG("Allocating 2 volumes (x: " << volDim.x() << ", y: " << volDim.y() << ", z: " << volDim.z() << ") on GPU device " << devid << "."); + const CudaSize<2> depthsDim(_sgmParams.maxDepths, 1); + _depths_hmh.allocate(depthsDim); + _depths_dmp.allocate(depthsDim); } - CudaDeviceMemoryPitched volumeSecBestSim_dmp(volDim); - CudaDeviceMemoryPitched volumeBestSim_dmp(volDim); - - checkStartingAndStoppingDepth(); + // allocate depth/sim map in device memory + _depthSimMap_dmp.allocate(CudaSize<2>(maxTileWidth, maxTileHeight)); - _cps.computeDepthSimMapVolume(_rc, volumeBestSim_dmp, volumeSecBestSim_dmp, volDim, _tCams.getData(), _depthsTcamsLimits.getData(), _depths.getData(), _sgmParams); + // allocate normal map in device memory + if(_sgmParams.computeNormalMap) + _normalMap_dmp.allocate(CudaSize<2>(maxTileWidth, maxTileHeight)); - // particular case with only one tc - if(_tCams.size() < 2) + // allocate similarity volumes in device memory { - // the second best volume has no valid similarity values - volumeSecBestSim_dmp.copyFrom(volumeBestSim_dmp); - } + const CudaSize<3> volDim(maxTileWidth, maxTileHeight, _sgmParams.maxDepths); - if (_sgmParams.exportIntermediateResults) - { - CudaHostMemoryHeap volumeSecBestSim_h(volumeSecBestSim_dmp.getSize()); - volumeSecBestSim_h.copyFrom(volumeSecBestSim_dmp); - - exportSimilarityVolume(volumeSecBestSim_h, _depths, _mp, _rc, _sgmParams.scale, _sgmParams.stepXY, _mp.getDepthMapsFolder() + std::to_string(viewId) + "_vol_beforeFiltering.abc"); - exportSimilaritySamplesCSV(volumeSecBestSim_h, _depths, _rc, _sgmParams.scale, _sgmParams.stepXY, "beforeFiltering", _mp.getDepthMapsFolder() + std::to_string(viewId) + "_9p.csv"); + _volumeBestSim_dmp.allocate(volDim); + _volumeSecBestSim_dmp.allocate(volDim); } - // reuse best sim to put filtered sim volume - CudaDeviceMemoryPitched& volumeFilteredSim_dmp = volumeBestSim_dmp; - - // Filter on the 3D volume to weight voxels based on their neighborhood strongness. - // So it downweights local minimums that are not supported by their neighborhood. - // this is here for experimental reason ... to show how SGGC work on non - // optimized depthmaps ... it must equals to true in normal case - if(_sgmParams.doSgmOptimizeVolume) + // allocate similarity volume optimization buffers + if(sgmParams.doSgmOptimizeVolume) { - _cps.sgmOptimizeSimVolume(_rc, volumeFilteredSim_dmp, volumeSecBestSim_dmp, volDim, _sgmParams); - } - else - { - volumeFilteredSim_dmp.copyFrom(volumeSecBestSim_dmp); + const size_t maxTileSide = std::max(maxTileWidth, maxTileHeight); + _volumeSliceAccA_dmp.allocate(CudaSize<2>(maxTileSide, _sgmParams.maxDepths)); + _volumeSliceAccB_dmp.allocate(CudaSize<2>(maxTileSide, _sgmParams.maxDepths)); + _volumeAxisAcc_dmp.allocate(CudaSize<2>(maxTileSide, 1)); } +} - if(_sgmParams.exportIntermediateResults) - { - CudaHostMemoryHeap volumeSecBestSim_h(volumeFilteredSim_dmp.getSize()); - volumeSecBestSim_h.copyFrom(volumeFilteredSim_dmp); +double Sgm::getDeviceMemoryConsumption() const +{ + size_t bytes = 0; - exportSimilarityVolume(volumeSecBestSim_h, _depths, _mp, _rc, _sgmParams.scale, _sgmParams.stepXY, _mp.getDepthMapsFolder() + std::to_string(viewId) + "_vol_afterFiltering.abc"); - exportSimilaritySamplesCSV(volumeSecBestSim_h, _depths, _rc, _sgmParams.scale, _sgmParams.stepXY, "afterFiltering", _mp.getDepthMapsFolder() + std::to_string(viewId) + "_9p.csv"); - } + bytes += _depths_dmp.getBytesPadded(); + bytes += _depthSimMap_dmp.getBytesPadded(); + bytes += _volumeBestSim_dmp.getBytesPadded(); + bytes += _volumeSecBestSim_dmp.getBytesPadded(); - // Retrieve best depth per pixel - // For each pixel, choose the voxel with the minimal similarity value - _cps.sgmRetrieveBestDepth(_rc, _depthSimMap, volumeFilteredSim_dmp, volDim, _depths, _sgmParams); + if(_sgmParams.computeNormalMap) + bytes += _normalMap_dmp.getBytesPadded(); - if(_sgmParams.exportIntermediateResults) + if(_sgmParams.doSgmOptimizeVolume) { - // { - // // Export RAW SGM results with the depths based on the input planes without interpolation - // DepthSimMap depthSimMapRawPlanes(_rc, _mp, _scale, _step); - // _sp.cps.SgmRetrieveBestDepth(depthSimMapRawPlanes, volumeSecBestSim_d, _depths, volDimX, volDimY, volDimZ, false); // interpolate=false - // depthSimMapRawPlanes.save("_sgmPlanes"); - // } - _depthSimMap.save("_sgm"); - _depthSimMap.save("_sgmStep1", true); + bytes += _volumeSliceAccA_dmp.getBytesPadded(); + bytes += _volumeSliceAccB_dmp.getBytesPadded(); + bytes += _volumeAxisAcc_dmp.getBytesPadded(); } - ALICEVISION_LOG_INFO("SGM depth/sim map (rc: " << _rc << ") done in: " << timer.elapsedMs() << " ms."); - return true; + return (double(bytes) / (1024.0 * 1024.0)); } -void Sgm::logRcTcDepthInformation() const +double Sgm::getDeviceMemoryConsumptionUnpadded() const { - std::ostringstream ostr; - ostr << "Camera / Depth information: " << std::endl - << "\t- rc camera:" << std::endl - << "\t - id: " << _rc << std::endl - << "\t - view id: " << _mp.getViewId(_rc) << std::endl - << "\t - depth planes: " << _depths.size() << std::endl - << "\t - depths range: [" << _depths[0] << "-" << _depths[_depths.size() - 1] << "]" << std::endl - << "\t- tc cameras:" << std::endl; - - for(int c = 0; c < _tCams.size(); c++) - { - ostr << "\t - tc camera (" << (c+1) << "/" << _tCams.size() << "):" << std::endl - << "\t - id: " << _tCams[c] << std::endl - << "\t - view id: " << _mp.getViewId(_tCams[c]) << std::endl - << "\t - depth planes: " << _depthsTcamsLimits[c].y << std::endl - << "\t - depths range: [" << _depths[_depthsTcamsLimits[c].x] << "-" - << _depths[_depthsTcamsLimits[c].x + _depthsTcamsLimits[c].y - 1] << "]" << std::endl - << "\t - depth indexes range: [" << _depthsTcamsLimits[c].x << "-" - << _depthsTcamsLimits[c].x + _depthsTcamsLimits[c].y << "]" << std::endl; - } + size_t bytes = 0; - ALICEVISION_LOG_DEBUG(ostr.str()); -} + bytes += _depths_dmp.getBytesUnpadded(); + bytes += _depthSimMap_dmp.getBytesUnpadded(); + bytes += _volumeBestSim_dmp.getBytesUnpadded(); + bytes += _volumeSecBestSim_dmp.getBytesUnpadded(); -void Sgm::checkStartingAndStoppingDepth() const -{ - struct MinOffX - { - bool operator()(const Pixel& l, const Pixel& r) const { return (l.x < r.x); } - }; + if(_sgmParams.computeNormalMap) + bytes += _normalMap_dmp.getBytesUnpadded(); - struct MinOffXplusY + if(_sgmParams.doSgmOptimizeVolume) { - bool operator()(const Pixel& l, const Pixel& r) const { return (l.x + l.y < r.x + r.y); } - }; - - { - const std::vector& depthTcamsLimitsVec = _depthsTcamsLimits.getData(); - const int startingDepth = - std::min_element(depthTcamsLimitsVec.begin(), depthTcamsLimitsVec.end(), MinOffX())->x; - const auto depth_it = std::max_element(depthTcamsLimitsVec.begin(), depthTcamsLimitsVec.end(), MinOffXplusY()); - const int stoppingDepth = depth_it->x + depth_it->y; - - // The overall starting depth index should always be zero. - assert(startingDepth == 0); - - // Usually stoppingDepth should be equal to the total number of depths. - // But due to sgmMaxDepths and sgmMaxDepthPerTc, we can have more depths - // than we finally use in all TC cameras. - assert(_depths.size() >= stoppingDepth); + bytes += _volumeSliceAccA_dmp.getBytesUnpadded(); + bytes += _volumeSliceAccB_dmp.getBytesUnpadded(); + bytes += _volumeAxisAcc_dmp.getBytesUnpadded(); } + + return (double(bytes) / (1024.0 * 1024.0)); } -void Sgm::computeDepthsAndResetTCams() +void Sgm::sgmRc(const Tile& tile, const SgmDepthList& tileDepthList) { - ALICEVISION_LOG_DEBUG("Compute depths and reset TCams"); + const IndexT viewId = _mp.getViewId(tile.rc); - std::size_t nbObsDepths; - float minObsDepth, maxObsDepth, midObsDepth; - _mp.getMinMaxMidNbDepth(_rc, minObsDepth, maxObsDepth, midObsDepth, nbObsDepths, _sgmParams.seedsRangePercentile); + ALICEVISION_LOG_INFO(tile << "SGM depth/sim map of view id: " << viewId << ", rc: " << tile.rc << " (" << (tile.rc + 1) << " / " << _mp.ncams << ")."); - StaticVector*>* alldepths; + // check SGM depth list and T cameras + if(tile.sgmTCams.empty() || tileDepthList.getDepths().empty()) + ALICEVISION_THROW_ERROR(tile << "Cannot compute Semi-Global Matching, no depths or no T cameras (viewId: " << viewId << ")."); + + // copy rc depth data in page-locked host memory + for(int i = 0; i < tileDepthList.getDepths().size(); ++i) + _depths_hmh(i, 0) = tileDepthList.getDepths()[i]; - // all depths from the principal ray provided by target cameras - if(nbObsDepths < 20) - alldepths = computeAllDepthsAndResetTCams(-1); - else - alldepths = computeAllDepthsAndResetTCams(midObsDepth); + // copy rc depth data in device memory + _depths_dmp.copyFrom(_depths_hmh, _stream); - float minDepthAll = std::numeric_limits::max(); - float maxDepthAll = 0.0f; - for(int i = 0; i < alldepths->size(); i++) - { - for(int j = 0; j < (*alldepths)[i]->size(); j++) - { - float depth = (*(*alldepths)[i])[j]; - minDepthAll = std::min(minDepthAll, depth); - maxDepthAll = std::max(maxDepthAll, depth); - } - } + // compute best sim and second best sim volumes + computeSimilarityVolumes(tile, tileDepthList); + + // export intermediate volume information (if requested by user) + exportVolumeInformation(tile, tileDepthList, _volumeSecBestSim_dmp, "beforeFiltering"); - if(!_sgmParams.useSfmSeeds || _mp.getInputSfMData().getLandmarks().empty()) + // this is here for experimental purposes + // to show how SGGC work on non optimized depthmaps + // it must equals to true in normal case + if(_sgmParams.doSgmOptimizeVolume) { - ALICEVISION_LOG_DEBUG("Select depth candidates without seeds. Nb observations: " << nbObsDepths); - - computeDepths(minDepthAll, maxDepthAll, (_sgmParams.stepZ > 0.0f ? _sgmParams.stepZ : 1.0f), alldepths); - - if(_sgmParams.maxDepths > 0 && _depths.size() > _sgmParams.maxDepths) - { - const float scaleFactor = float(_depths.size()) / float(_sgmParams.maxDepths); - ALICEVISION_LOG_DEBUG("nbDepths: " << _depths.size() << ", maxDepths: " << _sgmParams.maxDepths - << ", scaleFactor: " << scaleFactor); - computeDepths(minDepthAll, maxDepthAll, scaleFactor, alldepths); - } - if(_sgmParams.saveDepthsToSweepTxtFile) - { - const std::string fn = _mp.getDepthMapsFolder() + std::to_string(_mp.getViewId(_rc)) + "depthsAll.txt"; - FILE* f = fopen(fn.c_str(), "w"); - for(int j = 0; j < _depths.size(); j++) - { - fprintf(f, "%f\n", _depths[j]); - } - fclose(f); - } + optimizeSimilarityVolume(tile, tileDepthList); } else { - ALICEVISION_LOG_DEBUG("Select depth candidates from seeds. Nb observations: " << nbObsDepths); - ALICEVISION_LOG_DEBUG("Depth all: [" << minDepthAll << "-" << maxDepthAll << "]"); - float minDepth = minDepthAll; - float maxDepth = maxDepthAll; - - // if we get enough information from seeds, adjust min/maxDepth - if(nbObsDepths > 100) - { - minDepth = minObsDepth * (1.0f - _sgmParams.seedsRangeInflate); - maxDepth = maxObsDepth * (1.0f + _sgmParams.seedsRangeInflate); - - if(maxDepthAll < minDepth || minDepthAll > maxDepth) - { - // no intersection between min/maxDepth and min/maxDepthAll - // keep min/maxDepth value as is - } - else - { - // min/maxDepth intersection with min/maxDepthAll - minDepth = std::max(minDepthAll, minDepth); - maxDepth = std::min(maxDepthAll, maxDepth); - } - } - - // build the list of "best" depths for rc, from all tc cameras depths - computeDepths(minDepth, maxDepth, (_sgmParams.stepZ > 0.0f ? _sgmParams.stepZ : 1.0f), alldepths); - - // filter out depths if computeDepths gave too many values - if(_sgmParams.maxDepths > 0 && _depths.size() > _sgmParams.maxDepths) - { - const float scaleFactor = float(_depths.size()) / float(_sgmParams.maxDepths); - ALICEVISION_LOG_DEBUG("nbDepths: " << _depths.size() << ", maxDepths: " << _sgmParams.maxDepths - << ", scaleFactor: " << scaleFactor); - computeDepths(minDepth, maxDepth, scaleFactor, alldepths); - } - ALICEVISION_LOG_DEBUG("Selected depth range: [" << minDepth << "-" << maxDepth - << "], nb selected depths: " << _depths.size()); - - if(_sgmParams.saveDepthsToSweepTxtFile) - { - const std::string fn = _mp.getDepthMapsFolder() + std::to_string(_mp.getViewId(_rc)) + "depthsAll.txt"; - FILE* f = fopen(fn.c_str(), "w"); - for(int j = 0; j < _depths.size(); j++) - { - fprintf(f, "%f\n", _depths[j]); - } - fclose(f); - } + // best sim volume is normally reuse to put optimized similarity + _volumeBestSim_dmp.copyFrom(_volumeSecBestSim_dmp, _stream); } - // fill depthsTcamsLimits member variable with index range of depths to sweep - computeDepthsTcamsLimits(alldepths); - - if(_sgmParams.saveDepthsToSweepTxtFile) - { - const std::string fn = _mp.getDepthMapsFolder() + std::to_string(_mp.getViewId(_rc)) + "depthsTcamsLimits.txt"; - FILE* f = fopen(fn.c_str(), "w"); - for(int j = 0; j < _depthsTcamsLimits.size(); j++) - { - Pixel l = _depthsTcamsLimits[j]; - // fprintf(f,"%f %f\n",(*depths)[l.x],(*depths)[l.x+l.y-1]); - fprintf(f, "%i %i\n", l.x, l.y); - } - fclose(f); - } + // export intermediate volume information (if requested by user) + exportVolumeInformation(tile, tileDepthList, _volumeBestSim_dmp, "afterFiltering"); - if(_sgmParams.saveDepthsToSweepTxtFile) - { - const std::string fn = _mp.getDepthMapsFolder() + std::to_string(_mp.getViewId(_rc)) + "depths.txt"; - FILE* f = fopen(fn.c_str(), "w"); - for(int j = 0; j < _depths.size(); j++) - { - fprintf(f, "%f\n", _depths[j]); - } - fclose(f); - } + // retrieve best depth + retrieveBestDepth(tile, tileDepthList); - if(_sgmParams.saveDepthsToSweepTxtFile) + // export intermediate depth/sim map (if requested by user) + if(_sgmParams.exportIntermediateDepthSimMaps) { - for(int i = 0; i < alldepths->size(); i++) - { - const std::string fn = _mp.getDepthMapsFolder() + std::to_string(_mp.getViewId(_rc)) + "depths" + - mvsUtils::num2str(i) + ".txt"; - FILE* f = fopen(fn.c_str(), "w"); - for(int j = 0; j < (*alldepths)[i]->size(); j++) - { - const float depth = (*(*alldepths)[i])[j]; - fprintf(f, "%f\n", depth); - } - fclose(f); - } + writeDepthSimMap(tile.rc, _mp, _tileParams, tile.roi, _depthSimMap_dmp, _sgmParams.scale, _sgmParams.stepXY, "_sgm"); } - if(_sgmParams.saveDepthsToSweepTxtFile) + // compute normal map from depth/sim map if needed + if(_sgmParams.computeNormalMap) { - OrientedPoint rcplane; - rcplane.p = _mp.CArr[_rc]; - rcplane.n = _mp.iRArr[_rc] * Point3d(0.0, 0.0, 1.0); - rcplane.n = rcplane.n.normalize(); - - const std::string fn = _mp.getDepthMapsFolder() + std::to_string(_mp.getViewId(_rc)) + "rcDepths.txt"; - FILE* f = fopen(fn.c_str(), "w"); - float depth = minDepthAll; - while(depth < maxDepthAll) - { - fprintf(f, "%f\n", depth); - const Point3d p = rcplane.p + rcplane.n * depth; - depth = depth + _mp.getCamPixelSize(p, _rc); - } - fclose(f); - } + // downscale the region of interest + const ROI downscaledRoi = downscaleROI(tile.roi, _sgmParams.scale * _sgmParams.stepXY); - deleteArrayOfArrays(&alldepths); + // get R device camera from cache + DeviceCache& deviceCache = DeviceCache::getInstance(); + const DeviceCamera& rcDeviceCamera = deviceCache.requestCamera(tile.rc, _sgmParams.scale, _mp); - ALICEVISION_LOG_DEBUG("Compute depths and reset TCams done, rc depths: " << _depths.size()); -} + ALICEVISION_LOG_INFO(tile << "SGM normal map of view id: " << viewId << ", rc: " << tile.rc << " (" << (tile.rc + 1) << " / " << _mp.ncams << ")."); + cuda_depthSimMapComputeNormal(_normalMap_dmp, _depthSimMap_dmp, rcDeviceCamera, _sgmParams, downscaledRoi, _stream); -StaticVector*>* Sgm::computeAllDepthsAndResetTCams(float midDepth) -{ - StaticVector tCamsNew; - StaticVector*>* alldepths = new StaticVector*>(); - alldepths->reserve(_tCams.size()); - - for(int c = 0; c < _tCams.size(); c++) - { - // depths of all meaningful points on the principal ray of the reference camera regarding the target camera tc - StaticVector* tcdepths = getDepthsTc(_tCams[c], midDepth); - if(sizeOfStaticVector(tcdepths) < 50) - { - // fallback if we don't have enough valid samples over the epipolar line - if(tcdepths != nullptr) - { - delete tcdepths; - tcdepths = nullptr; - } - float avMinDist, avMidDist, avMaxDist; - getMinMaxDepths(avMinDist, avMidDist, avMaxDist); - tcdepths = getDepthsByPixelSize(avMinDist, avMidDist, avMaxDist); - - if(sizeOfStaticVector(tcdepths) < 50) - { - if(tcdepths != nullptr) - { - delete tcdepths; - tcdepths = nullptr; - } - } - } - - if(tcdepths != nullptr) - { - alldepths->push_back(tcdepths); - tCamsNew.push_back(_tCams[c]); - } + writeDeviceImage(_normalMap_dmp, getFileNameFromIndex(_mp, tile.rc, mvsUtils::EFileType::depthMap, _sgmParams.scale, "Normal", tile.roi.x.begin, tile.roi.y.begin)); } - _tCams = tCamsNew; - - return alldepths; + ALICEVISION_LOG_INFO(tile << "SGM depth/sim map done."); } -void Sgm::computeDepthsTcamsLimits(StaticVector*>* alldepths) +void Sgm::computeSimilarityVolumes(const Tile& tile, const SgmDepthList& tileDepthList) { - _depthsTcamsLimits.resize(_tCams.size()); - - for(int c = 0; c < _tCams.size(); c++) - { - const float d1 = (*(*alldepths)[c])[0]; - const float d2 = (*(*alldepths)[c])[(*alldepths)[c]->size() - 1]; - - int id1 = _depths.indexOfNearestSorted(d1); - int id2 = _depths.indexOfNearestSorted(d2); + ALICEVISION_LOG_INFO(tile << "SGM Compute similarity volume."); - if(id1 == -1) - id1 = 0; + // downscale the region of interest + const ROI downscaledRoi = downscaleROI(tile.roi, _sgmParams.scale * _sgmParams.stepXY); - if(id2 == -1) - id2 = _depths.size() - 1; + // initialize the two similarity volumes at 255 + cuda_volumeInitialize(_volumeBestSim_dmp, 255.f, _stream); + cuda_volumeInitialize(_volumeSecBestSim_dmp, 255.f, _stream); + + // get device cache instance + DeviceCache& deviceCache = DeviceCache::getInstance(); - // clamp to keep only the closest depths if we have too much inputs (> _sgmParams.maxDepthsPerTc) - id2 = std::min(id1 + _sgmParams.maxDepthsPerTc - 1, id2); - _depthsTcamsLimits[c] = Pixel(id1, id2 - id1 + 1); - } -} - -void Sgm::computeDepths(float minDepth, float maxDepth, float scaleFactor, - const StaticVector*>* alldepths) -{ - _depths.clear(); + // get R device camera from cache + const DeviceCamera& rcDeviceCamera = deviceCache.requestCamera(tile.rc, _sgmParams.scale, _mp); - float depth = minDepth; - - while(depth < maxDepth) + // compute similarity volume per Rc Tc + for(std::size_t tci = 0; tci < tile.sgmTCams.size(); ++tci) { - _depths.push_back(depth); + const int tc = tile.sgmTCams.at(tci); - // get min tc step at depth - float minTcStep = maxDepth - minDepth; + const int firstDepth = tileDepthList.getDepthsTcLimits()[tci].x; + const int lastDepth = firstDepth + tileDepthList.getDepthsTcLimits()[tci].y; - // for each tc camera - for(int i = 0; i < alldepths->size(); i++) - { - // list of valid depths for the tc - StaticVector* tcDepths = (*alldepths)[i]; + const Range tcDepthRange(firstDepth, lastDepth); - // get the tc depth closest to the current depth - const int id = tcDepths->indexOfNearestSorted(depth); + // get T device camera from cache + const DeviceCamera& tcDeviceCamera = deviceCache.requestCamera(tc, _sgmParams.scale, _mp); - // continue on no result or last element (we need id + 1) - if(id < 0 || id >= tcDepths->size() - 1) - continue; + ALICEVISION_LOG_DEBUG(tile << "Compute similarity volume:" << std::endl + << "\t- rc: " << tile.rc << std::endl + << "\t- tc: " << tc << " (" << (tci + 1) << "/" << tile.sgmTCams.size() << ")" << std::endl + << "\t- rc camera device id: " << rcDeviceCamera.getDeviceCamId() << std::endl + << "\t- tc camera device id: " << tcDeviceCamera.getDeviceCamId() << std::endl + << "\t- tc first depth: " << firstDepth << std::endl + << "\t- tc last depth: " << lastDepth << std::endl + << "\t- tile range x: [" << downscaledRoi.x.begin << " - " << downscaledRoi.x.end << "]" << std::endl + << "\t- tile range y: [" << downscaledRoi.y.begin << " - " << downscaledRoi.y.end << "]" << std::endl); - // consider the enclosing depth range - const float did = (*tcDepths)[id]; // closest depth - const float nid = (*tcDepths)[id + 1]; // next depth - const float tcStep = fabs(did - nid); // [closest; next] depths distance + cuda_volumeComputeSimilarity(_volumeBestSim_dmp, + _volumeSecBestSim_dmp, + _depths_dmp, + rcDeviceCamera, + tcDeviceCamera, + _sgmParams, + tcDepthRange, + downscaledRoi, + _stream); + } - // keep this value if smallest step so far - minTcStep = std::min(minTcStep, tcStep); - } + // update second best uninitialized similarity volume values with first best similarity volume values + // - allows to avoid the particular case with a single tc (second best volume has no valid similarity values) + // - usefull if a tc alone contributes to the calculation of a subpart of the similarity volume + if(_sgmParams.updateUninitializedSim) // should always be true, false for debug purposes + { + ALICEVISION_LOG_DEBUG(tile << "SGM Update uninitialized similarity volume values from best similarity volume."); - depth += minTcStep * scaleFactor; + cuda_volumeUpdateUninitializedSimilarity(_volumeBestSim_dmp, _volumeSecBestSim_dmp, _stream); } + + ALICEVISION_LOG_INFO(tile << "SGM Compute similarity volume done."); } -void Sgm::getMinMaxDepths(float& minDepth, float& midDepth, float& maxDepth) +void Sgm::optimizeSimilarityVolume(const Tile& tile, const SgmDepthList& tileDepthList) { - if(_sgmParams.prematchinMinMaxDepthDontUseSeeds) - { - minDepth = 0.0f; - maxDepth = 0.0f; - for(int c = 0; c < _tCams.size(); ++c) - { - const int tc = _tCams[c]; - minDepth += (_mp.CArr[_rc] - _mp.CArr[tc]).size() * _sgmParams.prematchingMinCamDist; - maxDepth += (_mp.CArr[_rc] - _mp.CArr[tc]).size() * _sgmParams.prematchingMaxCamDist; - } - minDepth /= static_cast(_tCams.size()); - maxDepth /= static_cast(_tCams.size()); - midDepth = (minDepth + maxDepth) / 2.0f; - } - else - { - std::size_t nbDepths; - _mp.getMinMaxMidNbDepth(_rc, minDepth, maxDepth, midDepth, nbDepths, _sgmParams.seedsRangePercentile); - maxDepth = maxDepth * _sgmParams.prematchingMaxDepthScale; - } + ALICEVISION_LOG_INFO(tile << "SGM Optimizing volume (filtering axes: " << _sgmParams.filteringAxes << ")."); + + // downscale the region of interest + const ROI downscaledRoi = downscaleROI(tile.roi, _sgmParams.scale * _sgmParams.stepXY); + + // get R device camera from cache + DeviceCache& deviceCache = DeviceCache::getInstance(); + const DeviceCamera& rcDeviceCamera = deviceCache.requestCamera(tile.rc, _sgmParams.scale, _mp); + + cuda_volumeOptimize(_volumeBestSim_dmp, // output volume (reuse best sim to put optimized similarity) + _volumeSliceAccA_dmp, // slice A accumulation buffer pre-allocate + _volumeSliceAccB_dmp, // slice B accumulation buffer pre-allocate + _volumeAxisAcc_dmp, // axis accumulation buffer pre-allocate + _volumeSecBestSim_dmp, // input volume + rcDeviceCamera, + _sgmParams, + tileDepthList.getDepths().size(), + downscaledRoi, + _stream); + + ALICEVISION_LOG_INFO(tile << "SGM Optimizing volume done."); } -StaticVector* Sgm::getDepthsByPixelSize(float minDepth, float midDepth, float maxDepth) +void Sgm::retrieveBestDepth(const Tile& tile, const SgmDepthList& tileDepthList) { - const int maxDepthsHalf = 1024; - - const float d = float(_sgmParams.scale) * float(_sgmParams.rcDepthsCompStep); - - OrientedPoint rcplane; - rcplane.p = _mp.CArr[_rc]; - rcplane.n = _mp.iRArr[_rc] * Point3d(0.0, 0.0, 1.0); - rcplane.n = rcplane.n.normalize(); - - int ndepthsMidMax = 0; - float maxdepth = midDepth; - while((maxdepth < maxDepth) && (ndepthsMidMax < maxDepthsHalf)) - { - Point3d p = rcplane.p + rcplane.n * maxdepth; - float pixSize = _mp.getCamPixelSize(p, _rc, d); - maxdepth += pixSize; - ndepthsMidMax++; - } + ALICEVISION_LOG_INFO(tile << "SGM Retrieve best depth in volume."); - int ndepthsMidMin = 0; - float mindepth = midDepth; - while((mindepth > minDepth) && (ndepthsMidMin < maxDepthsHalf * 2 - ndepthsMidMax)) - { - Point3d p = rcplane.p + rcplane.n * mindepth; - float pixSize = _mp.getCamPixelSize(p, _rc, d); - mindepth -= pixSize; - ndepthsMidMin++; - } + // downscale the region of interest + const ROI downscaledRoi = downscaleROI(tile.roi, _sgmParams.scale * _sgmParams.stepXY); - // getNumberOfDepths - float depth = mindepth; - int ndepths = 0; - float pixSize = 1.0f; - while((depth < maxdepth) && (pixSize > 0.0f) && (ndepths < 2 * maxDepthsHalf)) - { - Point3d p = rcplane.p + rcplane.n * depth; - pixSize = _mp.getCamPixelSize(p, _rc, d); - depth += pixSize; - ndepths++; - } + // get depth range + const Range depthRange(0, tileDepthList.getDepths().size()); - StaticVector* out = new StaticVector(); - out->reserve(ndepths); + // get R device camera from cache + DeviceCache& deviceCache = DeviceCache::getInstance(); + const DeviceCamera& rcDeviceCamera = deviceCache.requestCamera(tile.rc, 1, _mp); - // fill - depth = mindepth; - pixSize = 1.0f; - ndepths = 0; - while((depth < maxdepth) && (pixSize > 0.0f) && (ndepths < 2 * maxDepthsHalf)) - { - out->push_back(depth); - Point3d p = rcplane.p + rcplane.n * depth; - pixSize = _mp.getCamPixelSize(p, _rc, d); - depth += pixSize; - ndepths++; - } + cuda_volumeRetrieveBestDepth(_depthSimMap_dmp, // output depth/sim map + _depths_dmp, // rc depth + _volumeBestSim_dmp, // second best sim volume optimized in best sim volume + rcDeviceCamera, + _sgmParams, + depthRange, + downscaledRoi, + _stream); - // check if it is asc - for(int i = 0; i < out->size() - 1; i++) - { - if((*out)[i] >= (*out)[i + 1]) - { - for(int j = 0; j <= i + 1; j++) - { - ALICEVISION_LOG_TRACE("getDepthsByPixelSize: check if it is asc: " << (*out)[j]); - } - throw std::runtime_error("getDepthsByPixelSize not asc."); - } - } - return out; + ALICEVISION_LOG_INFO(tile << "SGM Retrieve best depth in volume done."); } -StaticVector* Sgm::getDepthsTc(int tc, float midDepth) +void Sgm::exportVolumeInformation(const Tile& tile, + const SgmDepthList& tileDepthList, + const CudaDeviceMemoryPitched& in_volume_dmp, + const std::string& name) const { - OrientedPoint rcplane; - rcplane.p = _mp.CArr[_rc]; - rcplane.n = _mp.iRArr[_rc] * Point3d(0.0, 0.0, 1.0); - rcplane.n = rcplane.n.normalize(); - - const Point2d rmid = Point2d((float)_mp.getWidth(_rc) / 2.0f, (float)_mp.getHeight(_rc) / 2.0f); - Point2d pFromTar, pToTar; // segment of epipolar line of the principal point of the rc camera to the tc camera - getTarEpipolarDirectedLine(&pFromTar, &pToTar, rmid, _rc, tc, _mp); - - int allDepths = static_cast((pToTar - pFromTar).size()); - ALICEVISION_LOG_DEBUG("allDepths: " << allDepths); - - const Point2d pixelVect = ((pToTar - pFromTar).normalize()) * std::max(1.0f, (float)_sgmParams.scale); - // printf("%f %f %i %i\n",pixelVect.size(),((float)(scale*step)/3.0f),scale,step); - - Point2d cg = Point2d(0.0f, 0.0f); - Point3d cg3 = Point3d(0.0f, 0.0f, 0.0f); - int ncg = 0; - // navigate through all pixels of the epilolar segment - // Compute the middle of the valid pixels of the epipolar segment (in rc camera) of the principal point (of the rc - // camera) - for(int i = 0; i < allDepths; i++) - { - Point2d tpix = pFromTar + pixelVect * (float)i; - Point3d p; - if(triangulateMatch(p, rmid, tpix, _rc, tc, _mp)) // triangulate principal point from rc with tpix - { - float depth = orientedPointPlaneDistance( - p, rcplane.p, - rcplane.n); // todo: can compute the distance to the camera (as it's the principal point it's the same) - if(_mp.isPixelInImage(tpix, tc) && (depth > 0.0f) && - checkPair(p, _rc, tc, _mp, _mp.getMinViewAngle(), _mp.getMaxViewAngle())) - { - cg = cg + tpix; - cg3 = cg3 + p; - ncg++; - } - } - } - if(ncg == 0) - { - return new StaticVector(); - } - cg = cg / (float)ncg; - cg3 = cg3 / (float)ncg; - allDepths = ncg; - - ALICEVISION_LOG_DEBUG("All correct depths: " << allDepths); - - Point2d midpoint = cg; - if(midDepth > 0.0f) - { - Point3d midPt = rcplane.p + rcplane.n * midDepth; - _mp.getPixelFor3DPoint(&midpoint, midPt, tc); - } - - // compute the direction - float direction = 1.0f; + if(!_sgmParams.exportIntermediateVolumes && + !_sgmParams.exportIntermediateCrossVolumes && + !_sgmParams.exportIntermediateVolume9pCsv) { - Point3d p; - if(!triangulateMatch(p, rmid, midpoint, _rc, tc, _mp)) - { - StaticVector* out = new StaticVector(); - return out; - } - - float depth = orientedPointPlaneDistance(p, rcplane.p, rcplane.n); - - if(!triangulateMatch(p, rmid, midpoint + pixelVect, _rc, tc, _mp)) - { - StaticVector* out = new StaticVector(); - return out; - } - - float depthP1 = orientedPointPlaneDistance(p, rcplane.p, rcplane.n); - if(depth > depthP1) - { - direction = -1.0f; - } + // nothing to do + return; } - StaticVector* out1 = new StaticVector(); - out1->reserve(2 * _sgmParams.rcTcDepthsHalfLimit); + // get file tile begin indexes (default is single tile) + int tileBeginX = -1; + int tileBeginY = -1; - Point2d tpix = midpoint; - float depthOld = -1.0f; - int istep = 0; - bool ok = true; - - // compute depths for all pixels from the middle point to on one side of the epipolar line - while((out1->size() < _sgmParams.rcTcDepthsHalfLimit) && (_mp.isPixelInImage(tpix, tc) == true) && (ok == true)) + if(tile.nbTiles > 1) { - tpix = tpix + pixelVect * direction; - - Point3d refvect = _mp.iCamArr[_rc] * rmid; - Point3d tarvect = _mp.iCamArr[tc] * tpix; - float rptpang = angleBetwV1andV2(refvect, tarvect); - - Point3d p; - ok = triangulateMatch(p, rmid, tpix, _rc, tc, _mp); - - float depth = orientedPointPlaneDistance(p, rcplane.p, rcplane.n); - if(_mp.isPixelInImage(tpix, tc) && (depth > 0.0f) && (depth > depthOld) && - checkPair(p, _rc, tc, _mp, _mp.getMinViewAngle(), _mp.getMaxViewAngle()) && - (rptpang > - _mp.getMinViewAngle()) // WARNING if vects are near parallel thaen this results to strange angles ... - && - (rptpang < - _mp.getMaxViewAngle())) // this is the propper angle ... beacause is does not depend on the triangluated p - { - out1->push_back(depth); - // if ((tpix.x!=tpixold.x)||(tpix.y!=tpixold.y)||(depthOld>=depth)) - //{ - // printf("after %f %f %f %f %i %f %f\n",tpix.x,tpix.y,depth,depthOld,istep,ang,kk); - //}; - } - else - { - ok = false; - } - depthOld = depth; - istep++; + tileBeginX = tile.roi.x.begin; + tileBeginY = tile.roi.y.begin; } - StaticVector* out2 = new StaticVector(); - out2->reserve(2 * _sgmParams.rcTcDepthsHalfLimit); - tpix = midpoint; - istep = 0; - ok = true; + // copy device similarity volume to host memory + CudaHostMemoryHeap volumeSim_hmh(in_volume_dmp.getSize()); + volumeSim_hmh.copyFrom(in_volume_dmp); - // compute depths for all pixels from the middle point to the other side of the epipolar line - while((out2->size() < _sgmParams.rcTcDepthsHalfLimit) && (_mp.isPixelInImage(tpix, tc) == true) && (ok == true)) + if(_sgmParams.exportIntermediateVolumes) { - const Point3d refvect = _mp.iCamArr[_rc] * rmid; - const Point3d tarvect = _mp.iCamArr[tc] * tpix; - const float rptpang = angleBetwV1andV2(refvect, tarvect); - - Point3d p; - ok = triangulateMatch(p, rmid, tpix, _rc, tc, _mp); - - float depth = orientedPointPlaneDistance(p, rcplane.p, rcplane.n); - if(_mp.isPixelInImage(tpix, tc) && (depth > 0.0f) && (depth < depthOld) && - checkPair(p, _rc, tc, _mp, _mp.getMinViewAngle(), _mp.getMaxViewAngle()) && - (rptpang > - _mp.getMinViewAngle()) // WARNING if vects are near parallel thaen this results to strange angles ... - && - (rptpang < - _mp.getMaxViewAngle())) // this is the propper angle ... beacause is does not depend on the triangluated p - { - out2->push_back(depth); - // printf("%f %f\n",tpix.x,tpix.y); - } - else - { - ok = false; - } - - depthOld = depth; - tpix = tpix - pixelVect * direction; - } + ALICEVISION_LOG_INFO(tile << "Export similarity volume (" << name << ")."); - // printf("out2\n"); - StaticVector* out = new StaticVector(); - out->reserve(2 * _sgmParams.rcTcDepthsHalfLimit); - for(int i = out2->size() - 1; i >= 0; i--) - { - out->push_back((*out2)[i]); - // printf("%f\n",(*out2)[i]); - } - // printf("out1\n"); - for(int i = 0; i < out1->size(); i++) - { - out->push_back((*out1)[i]); - // printf("%f\n",(*out1)[i]); - } + const std::string volumePath = getFileNameFromIndex(_mp, tile.rc, mvsUtils::EFileType::volume, _sgmParams.scale, "_" + name, tileBeginX, tileBeginY); + + exportSimilarityVolume(volumeSim_hmh, tileDepthList.getDepths(), _mp, tile.rc, _sgmParams, volumePath, tile.roi); - delete out2; - delete out1; - - // we want to have it in ascending order - if(out->size() > 0 && (*out)[0] > (*out)[out->size() - 1]) - { - StaticVector* outTmp = new StaticVector(); - outTmp->reserve(out->size()); - for(int i = out->size() - 1; i >= 0; i--) - { - outTmp->push_back((*out)[i]); - } - delete out; - out = outTmp; - } - - // check if it is asc - for(int i = 0; i < out->size() - 1; i++) - { - if((*out)[i] > (*out)[i + 1]) - { - - for(int j = 0; j <= i + 1; j++) - { - ALICEVISION_LOG_TRACE("getDepthsRcTc: check if it is asc: " << (*out)[j]); - } - ALICEVISION_LOG_WARNING("getDepthsRcTc: not asc"); - - if(out->size() > 1) - { - qsort(&(*out)[0], out->size(), sizeof(float), qSortCompareFloatAsc); - } - } + ALICEVISION_LOG_INFO(tile << "Export similarity volume (" << name << ") done."); } - ALICEVISION_LOG_DEBUG("used depths: " << out->size()); - return out; -} - -bool Sgm::selectBestDepthsRange(int nDepthsThr, StaticVector* rcSeedsDistsAsc) -{ - if(_depths.size() <= nDepthsThr) - return true; - - StaticVector votes; - votes.reserve(_depths.size() - nDepthsThr); - for(int i = 0; i < _depths.size() - nDepthsThr; i++) + if(_sgmParams.exportIntermediateCrossVolumes) { - const float d1 = _depths[i]; - const float d2 = _depths[i + nDepthsThr - 1]; + ALICEVISION_LOG_INFO(tile << "Export similarity volume cross (" << name << ")."); - int id1 = rcSeedsDistsAsc->indexOfNearestSorted(d1); - int id2 = rcSeedsDistsAsc->indexOfNearestSorted(d2); + const std::string volumeCrossPath = getFileNameFromIndex(_mp, tile.rc, mvsUtils::EFileType::volumeCross, _sgmParams.scale, "_" + name, tileBeginX, tileBeginY); - if(d1 < (*rcSeedsDistsAsc)[0]) - id1 = 0; + exportSimilarityVolumeCross(volumeSim_hmh, tileDepthList.getDepths(), _mp, tile.rc, _sgmParams, volumeCrossPath, tile.roi); - if(d2 > (*rcSeedsDistsAsc)[rcSeedsDistsAsc->size() - 1]) - id2 = rcSeedsDistsAsc->size() - 1; - - if((id1 > -1) && (id2 > -1)) - votes.push_back(abs(id2 - id1)); - else - votes.push_back(0); + ALICEVISION_LOG_INFO(tile << "Export similarity volume cross (" << name << ") done."); } - StaticVector depthsNew; - depthsNew.reserve(nDepthsThr); - - const int id1 = votes.maxValId(); - const int id2 = id1 + nDepthsThr - 1; - for(int i = id1; i <= id2; i++) - depthsNew.push_back(_depths[i]); - - std::swap(_depths, depthsNew); - return true; -} - -bool Sgm::selectBestDepthsRange(int nDepthsThr, StaticVector*>* alldepths) -{ - if(nDepthsThr <= 0 || _depths.size() <= nDepthsThr) - return true; - - StaticVector votes; - votes.reserve(_depths.size() - nDepthsThr); - - for(int i = 0; i < _depths.size() - nDepthsThr; i++) + if(_sgmParams.exportIntermediateVolume9pCsv) { - const float d1 = _depths[i]; - const float d2 = _depths[i + nDepthsThr - 1]; - float overlap = 0.0f; - - for(int c = 0; c < alldepths->size(); c++) - { - const StaticVector* tcDepths = (*alldepths)[c]; - const float dd1 = std::max(d1, (*tcDepths)[0]); - const float dd2 = std::min(d2, (*tcDepths)[tcDepths->size() - 1]); - if(dd1 < dd2) - overlap += dd2 - dd1; - } - votes.push_back(overlap); - } - - StaticVector depthsNew; - depthsNew.reserve(nDepthsThr); + ALICEVISION_LOG_INFO(tile << "Export similarity volume 9 points CSV (" << name << ")."); - const int id1 = votes.maxValId(); - const int id2 = id1 + nDepthsThr - 1; + const std::string stats9Path = getFileNameFromIndex(_mp, tile.rc, mvsUtils::EFileType::stats9p, _sgmParams.scale, "_sgm", tileBeginX, tileBeginY); - for(int i = id1; i <= id2; i++) - depthsNew.push_back(_depths[i]); + exportSimilaritySamplesCSV(volumeSim_hmh, tileDepthList.getDepths(), tile.rc, name, stats9Path); - std::swap(_depths, depthsNew); - return true; + ALICEVISION_LOG_INFO(tile << "Export similarity volume 9 points CSV (" << name << ") done."); + } } } // namespace depthMap diff --git a/src/aliceVision/depthMap/Sgm.hpp b/src/aliceVision/depthMap/Sgm.hpp index d09b31f2ca..6fbcad92e9 100644 --- a/src/aliceVision/depthMap/Sgm.hpp +++ b/src/aliceVision/depthMap/Sgm.hpp @@ -6,72 +6,130 @@ #pragma once +#include #include -#include -#include -#include +#include +#include +#include +#include +#include +#include + +#include +#include namespace aliceVision { namespace depthMap { -struct SgmParams; -class PlaneSweepingCuda; - /** * @brief Depth Map Estimation Semi-Global Matching */ class Sgm { public: - Sgm(const SgmParams& sgmParams, const mvsUtils::MultiViewParams& mp, PlaneSweepingCuda& cps, int rc); - ~Sgm(); - bool sgmRc(); + /** + * @brief Sgm constructor. + * @param[in] mp the multi-view parameters + * @param[in] tileParams tile workflow parameters + * @param[in] sgmParams the Semi Global Matching parameters + * @param[in] stream the stream for gpu execution + */ + Sgm(const mvsUtils::MultiViewParams& mp, + const mvsUtils::TileParams& tileParams, + const SgmParams& sgmParams, + cudaStream_t stream); - const StaticVector& getTCams() const { return _tCams; } - const StaticVector& getDepths() const { return _depths; } - const DepthSimMap& getDepthSimMap() const { return _depthSimMap; } + // no default constructor + Sgm() = delete; -private: + // default destructor + ~Sgm() = default; - void logRcTcDepthInformation() const; - void checkStartingAndStoppingDepth() const; + // final depth/similarity map getter + inline const CudaDeviceMemoryPitched& getDeviceDepthSimMap() const { return _depthSimMap_dmp; } - void computeDepthsAndResetTCams(); + // final normal map getter + inline const CudaDeviceMemoryPitched& getDeviceNormalMap() const { return _normalMap_dmp; } /** - * @brief Compute depths of the principal ray of reference camera rc visible by a pixel in a target camera tc - * providing meaningful 3d information. + * @brief Get memory consumpyion in device memory. + * @return device memory consumpyion (in MB) */ - StaticVector*>* computeAllDepthsAndResetTCams(float midDepth); + double getDeviceMemoryConsumption() const; /** - * @brief Fill depthsTcamsLimits member variable with index range of depths to sweep + * @brief Get unpadded memory consumpyion in device memory. + * @return unpadded device memory consumpyion (in MB) */ - void computeDepthsTcamsLimits(StaticVector*>* alldepths); + double getDeviceMemoryConsumptionUnpadded() const; /** - * @brief Fill the list of "best" depths (_depths) for rc, from all tc cameras depths + * @brief Compute for a single R camera the Semi-Global Matching depth/sim map. + * @param[in] tile The given tile for SGM computation + * @param[in] tileDepthList the tile SGM depth list */ - void computeDepths(float minDepth, float maxDepth, float scaleFactor, const StaticVector*>* alldepths); + void sgmRc(const Tile& tile, const SgmDepthList& tileDepthList); - void getMinMaxDepths(float& minDepth, float& midDepth, float& maxDepth); +private: - StaticVector* getDepthsByPixelSize(float minDepth, float midDepth, float maxDepth); - StaticVector* getDepthsTc(int tc, float midDepth); + // private methods - bool selectBestDepthsRange(int nDepthsThr, StaticVector* rcSeedsDistsAsc); - bool selectBestDepthsRange(int nDepthsThr, StaticVector*>* alldepths); + /** + * @brief Compute for each RcTc the best / second best similarity volumes. + * @param[in] tile The given tile for SGM computation + * @param[in] tileDepthList the tile SGM depth list + */ + void computeSimilarityVolumes(const Tile& tile, const SgmDepthList& tileDepthList); - const SgmParams& _sgmParams; - const mvsUtils::MultiViewParams& _mp; - PlaneSweepingCuda& _cps; - const int _rc; + /** + * @brief Optimize the given similarity volume. + * @note Filter on the 3D volume to weight voxels based on their neighborhood strongness. + * So it downweights local minimums that are not supported by their neighborhood. + * @param[in] tile The given tile for SGM computation + * @param[in] tileDepthList the tile SGM depth list + */ + void optimizeSimilarityVolume(const Tile& tile, const SgmDepthList& tileDepthList); - StaticVector _tCams; - StaticVector _depths; - StaticVector _depthsTcamsLimits; - DepthSimMap _depthSimMap; + /** + * @brief Retrieve the best depths in the given similarity volume. + * @note For each pixel, choose the voxel with the minimal similarity value. + * @param[in] tile The given tile for SGM computation + * @param[in] tileDepthList the tile SGM depth list + */ + void retrieveBestDepth(const Tile& tile, const SgmDepthList& tileDepthList); + + /** + * @brief Export volume alembic files and 9 points csv file. + * @param[in] tile The given tile for SGM computation + * @param[in] tileDepthList the tile SGM depth list + * @param[in] in_volume_dmp the input volume + * @param[in] name the export filename + */ + void exportVolumeInformation(const Tile& tile, + const SgmDepthList& tileDepthList, + const CudaDeviceMemoryPitched& in_volume_dmp, + const std::string& name) const; + + + // private members + + const mvsUtils::MultiViewParams& _mp; //< Multi-view parameters + const mvsUtils::TileParams& _tileParams; //< tile workflow parameters + const SgmParams& _sgmParams; //< Semi Global Matching parameters + + // private members in device memory + + CudaHostMemoryHeap _depths_hmh; //< rc depth data host memory + CudaDeviceMemoryPitched _depths_dmp; //< rc depth data device memory + CudaDeviceMemoryPitched _depthSimMap_dmp; //< rc result depth/sim map + CudaDeviceMemoryPitched _normalMap_dmp; //< rc normal map + CudaDeviceMemoryPitched _volumeBestSim_dmp; //< rc best similarity volume + CudaDeviceMemoryPitched _volumeSecBestSim_dmp; //< rc second best similarity volume + CudaDeviceMemoryPitched _volumeSliceAccA_dmp; //< for optimization: volume accumulation slice A + CudaDeviceMemoryPitched _volumeSliceAccB_dmp; //< for optimization: volume accumulation slice B + CudaDeviceMemoryPitched _volumeAxisAcc_dmp; //< for optimization: volume accumulation axis + cudaStream_t _stream; //< stream for gpu execution }; } // namespace depthMap diff --git a/src/aliceVision/depthMap/SgmDepthList.cpp b/src/aliceVision/depthMap/SgmDepthList.cpp new file mode 100644 index 0000000000..34677ac59b --- /dev/null +++ b/src/aliceVision/depthMap/SgmDepthList.cpp @@ -0,0 +1,711 @@ +// This file is part of the AliceVision project. +// Copyright (c) 2022 AliceVision contributors. +// This Source Code Form is subject to the terms of the Mozilla Public License, +// v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + +#include "SgmDepthList.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace aliceVision { +namespace depthMap { + +int indexOfNearestSorted(const std::vector& in_vector, const float value) +{ + // retrieve the first element >= value in _data + auto it = std::lower_bound(in_vector.begin(), in_vector.end(), value); + + if(it == in_vector.end()) + return -1; + + if(it != in_vector.begin()) + { + // select the index of the closest value between it (>= value) and prevIt (< value) + const auto prevIt = std::prev(it); + it = (value - *prevIt) < (*it - value) ? prevIt : it; + } + return std::distance(in_vector.begin(), it); +} + +SgmDepthList::SgmDepthList(const mvsUtils::MultiViewParams& mp, const SgmParams& sgmParams, const Tile& tile) + : _mp(mp) + , _sgmParams(sgmParams) + , _tile(tile) +{} + +void SgmDepthList::computeListRc() +{ + ALICEVISION_LOG_DEBUG(_tile << "Compute SGM depths list."); + + // reset member variables + _depths.clear(); + _depthsTcLimits.clear(); + + // compute min/max/mid/nb depth from SfM + std::size_t nbObsDepths; + float minObsDepth, maxObsDepth, midObsDepth; + getMinMaxMidNbDepthFromSfM(minObsDepth, maxObsDepth, midObsDepth, nbObsDepths); + + if(nbObsDepths < 2) + { + ALICEVISION_LOG_INFO(_tile << "Cannot get min/max/middle depth from SfM."); + return; // nothing to do + } + + // compute depth list for each T cameras + std::vector> depthsPerTc(_tile.sgmTCams.size()); + + for(std::size_t c = 0; c < _tile.sgmTCams.size(); ++c) + { + std::vector& tcDepths = depthsPerTc.at(c); + + // compute depths of all meaningful points on the principal ray of the R camera regarding each T cameras + computeRcTcDepths(_tile.sgmTCams.at(c), (nbObsDepths < 10) ? -1 : midObsDepth, tcDepths); + + if(tcDepths.size() < 10) // fallback if we don't have enough valid samples over the epipolar line + { + ALICEVISION_LOG_DEBUG(_tile << "Not enough valid samples over the epipolar line. Compute depth list from R camera pixel size."); + + tcDepths.clear(); + + computePixelSizeDepths(minObsDepth, midObsDepth, maxObsDepth * _sgmParams.prematchingMaxDepthScale, tcDepths); + } + } + + // compute min/max for all Rc/Tc depth list + float minDepthAll = std::numeric_limits::max(); + float maxDepthAll = std::numeric_limits::min(); + + for(const std::vector& tcDepths : depthsPerTc) + { + for(const float depth : tcDepths) + { + minDepthAll = std::min(minDepthAll, depth); + maxDepthAll = std::max(maxDepthAll, depth); + } + } + + // no depths found + if(minDepthAll > maxDepthAll) + { + ALICEVISION_LOG_INFO(_tile << "No depths found."); + return; // nothing to do + } + + ALICEVISION_LOG_DEBUG(_tile << "Depth candidates from seeds for R camera:" << std::endl + << "\t- nb observations: " << nbObsDepths << std::endl + << "\t- all depth range: [" << minDepthAll << "-" << maxDepthAll << "]" << std::endl + << "\t- sfm depth range: [" << minObsDepth << "-" << maxObsDepth << "]"); + + float firstDepth = minDepthAll; + float lastDepth = maxDepthAll; + + // if we want to use SfM seeds anf if we get enough information from these seeds, adjust min/maxDepth + if(_sgmParams.useSfmSeeds && !_mp.getInputSfMData().getLandmarks().empty() && nbObsDepths > 10) + { + const float margin = _sgmParams.seedsRangeInflate * (maxObsDepth-minObsDepth); + firstDepth = std::max(0.f, minObsDepth - margin); + lastDepth = maxObsDepth + margin; + + if(maxDepthAll < firstDepth || minDepthAll > lastDepth) + { + // no intersection between min/maxDepth and min/maxDepthSample + // keep min/maxDepth value as is + } + else + { + // min/maxDepth intersection with min/maxDepthAll + firstDepth = std::max(minDepthAll, firstDepth); + lastDepth = std::min(maxDepthAll, lastDepth ); + } + ALICEVISION_LOG_DEBUG(_tile << "Final depth range (intersection: frustums / landmarks with margin): [" << firstDepth << "-" << lastDepth << "]"); + } + + // build the list of "best" depths for rc, from all tc cameras depths + computeRcDepthList(firstDepth, lastDepth, (_sgmParams.stepZ > 0.0f ? _sgmParams.stepZ : 1.0f), depthsPerTc); + + // filter out depths if computeDepths gave too many values + if(_sgmParams.maxDepths > 0 && _depths.size() > _sgmParams.maxDepths) + { + const float scaleFactor = float(_depths.size()) / float(_sgmParams.maxDepths); + + ALICEVISION_LOG_DEBUG(_tile << "Too many values in R camera depth list, filter out with scale factor:" << std::endl + << "\t- nb depths: " << _depths.size() << std::endl + << "\t- max depths: " << _sgmParams.maxDepths << std::endl + << "\t- scale factor to apply: " << scaleFactor); + + computeRcDepthList(firstDepth, lastDepth, scaleFactor, depthsPerTc); + + // ensure depth list size is not greater than maxDepths + if(_depths.size() > _sgmParams.maxDepths) + _depths.resize(_sgmParams.maxDepths); // reduce to depth list first maxDepths elements + } + + + ALICEVISION_LOG_DEBUG(_tile << "Final depth range for R camera:" << std::endl + << "\t- nb selected depths: " << _depths.size() << std::endl + << "\t- selected depth range: [" << firstDepth << "-" << lastDepth << "]"); + + + // update depth tc limits + _depthsTcLimits.resize(_tile.sgmTCams.size()); + + // fill depthsTcamsLimits member variable with index range of depths to sweep + for(std::size_t c = 0; c < _tile.sgmTCams.size(); ++c) + { + if(depthsPerTc.empty()) + { + _depthsTcLimits[c] = Pixel(-1, -1); + continue; + } + + const float d1 = depthsPerTc.at(c).front(); + const float d2 = depthsPerTc.at(c).back(); + + int id1 = indexOfNearestSorted(_depths, d1); + int id2 = indexOfNearestSorted(_depths, d2); + + if(id1 == -1) + id1 = 0; + + if(id2 == -1) + id2 = _depths.size() - 1; + + _depthsTcLimits[c] = Pixel(id1, id2 - id1 + 1); + } + + if(_sgmParams.exportDepthsTxtFiles) + exportTxtFiles(depthsPerTc); + + ALICEVISION_LOG_DEBUG(_tile << "Compute SGM depths list done."); +} + +void SgmDepthList::removeTcWithNoDepth(Tile& tile) +{ + assert(tile.rc == _tile.rc); + assert(tile.sgmTCams.size() == _tile.sgmTCams.size()); + + std::vector out_tCams; + std::vector out_depthsTcLimits; + + for(size_t c = 0; c < tile.sgmTCams.size(); ++c) + { + const Pixel& tcLimits = _depthsTcLimits.at(c); + const int tc = tile.sgmTCams.at(c); + + if(tcLimits.x != -1 && tcLimits.y != -1) + { + out_tCams.push_back(tc); + out_depthsTcLimits.push_back(tcLimits); + } + else + { + ALICEVISION_LOG_INFO(_tile << "Remove T camera (tc: " << tc << ", view id: " << _mp.getViewId(tc) << ") no depth found."); + } + } + + std::swap(tile.sgmTCams, out_tCams); + std::swap(_depthsTcLimits, out_depthsTcLimits); +} + +void SgmDepthList::logRcTcDepthInformation() const +{ + std::ostringstream ostr; + ostr << "Camera / Depth information: " << std::endl + << "\t- R camera:" << std::endl + << "\t - id: " << _tile.rc << std::endl + << "\t - view id: " << _mp.getViewId(_tile.rc) << std::endl + << "\t - depth planes: " << _depths.size() << std::endl + << "\t - depths range: [" << _depths[0] << "-" << _depths[_depths.size() - 1] << "]" << std::endl + << "\t- T cameras:" << std::endl; + + for(std::size_t c = 0; c < _tile.sgmTCams.size(); ++c) + { + ostr << "\t - T camera (" << (c + 1) << "/" << _tile.sgmTCams.size() << "):" << std::endl + << "\t - id: " << _tile.sgmTCams.at(c) << std::endl + << "\t - view id: " << _mp.getViewId(_tile.sgmTCams.at(c)) << std::endl + << "\t - depth planes: " << _depthsTcLimits[c].y << std::endl + << "\t - depths range: [" << _depths[_depthsTcLimits[c].x] << "-" << _depths[_depthsTcLimits[c].x + _depthsTcLimits[c].y - 1] << "]" << std::endl + << "\t - depth indexes range: [" << _depthsTcLimits[c].x << "-" << _depthsTcLimits[c].x + _depthsTcLimits[c].y << "]" << std::endl; + } + + ALICEVISION_LOG_INFO(_tile << ostr.str()); +} + +void SgmDepthList::checkStartingAndStoppingDepth() const +{ + struct MinOffX + { + bool operator()(const Pixel& l, const Pixel& r) const { return (l.x < r.x); } + }; + + struct MinOffXplusY + { + bool operator()(const Pixel& l, const Pixel& r) const { return (l.x + l.y < r.x + r.y); } + }; + + const int startingDepth = std::min_element(_depthsTcLimits.begin(), _depthsTcLimits.end(), MinOffX())->x; + const auto depth_it = std::max_element(_depthsTcLimits.begin(), _depthsTcLimits.end(), MinOffXplusY()); + const int stoppingDepth = depth_it->x + depth_it->y; + + // The overall starting depth index should always be zero. + assert(startingDepth == 0); + + // Usually stoppingDepth should be equal to the total number of depths. + // But due to sgmMaxDepths and sgmMaxDepthPerTc, we can have more depths + // than we finally use in all TC cameras. + assert(_depths.size() >= stoppingDepth); +} + +void SgmDepthList::getMinMaxMidNbDepthFromSfM(float& out_min, + float& out_max, + float& out_mid, + std::size_t& out_nbDepths) const +{ + using namespace boost::accumulators; + + const std::size_t cacheSize = 1000; + accumulator_set>> accDistanceMin(tag::tail::cache_size = cacheSize); + accumulator_set>> accDistanceMax(tag::tail::cache_size = cacheSize); + + const IndexT viewId = _mp.getViewId(_tile.rc); + + const ROI fullsizeRoi = upscaleROI(_tile.roi, _mp.getProcessDownscale()); // landmark observations are in the full-size image coordinate system + //const ROI selectionRoi = inflateROI(fullsizeRoi, 1.4f); // we can inflate the image full-size roi to be more permissive for common landmark selection + + OrientedPoint cameraPlane; + cameraPlane.p = _mp.CArr[_tile.rc]; + cameraPlane.n = _mp.iRArr[_tile.rc] * Point3d(0.0, 0.0, 1.0); + cameraPlane.n = cameraPlane.n.normalize(); + + Point3d midDepthPoint; + out_nbDepths = 0; + + // for each landmark + for(const auto& landmarkPair : _mp.getInputSfMData().getLandmarks()) + { + const sfmData::Landmark& landmark = landmarkPair.second; + const Point3d point(landmark.X(0), landmark.X(1), landmark.X(2)); + + // find rc observation + const auto it = landmark.observations.find(viewId); + + // no rc observation + if(it == landmark.observations.end()) + continue; + + // get rc 2d observation + const Vec2& obs2d = it->second.x; + + // if we compute depth list per tile keep only observation located inside the inflated image full-size ROI + if(!_sgmParams.depthListPerTile || fullsizeRoi.contains(obs2d.x(), obs2d.y())) + { + const float distance = static_cast(pointPlaneDistance(point, cameraPlane.p, cameraPlane.n)); + accDistanceMin(distance); + accDistanceMax(distance); + midDepthPoint = midDepthPoint + point; + ++out_nbDepths; + } + } + + if(out_nbDepths > 0) + { + out_min = quantile(accDistanceMin, quantile_probability = 1.0 - _sgmParams.seedsRangePercentile); + out_max = quantile(accDistanceMax, quantile_probability = _sgmParams.seedsRangePercentile); + midDepthPoint = midDepthPoint / static_cast(out_nbDepths); + out_mid = pointPlaneDistance(midDepthPoint, cameraPlane.p, cameraPlane.n); + } + else + { + out_min = 0.f; + out_max = 0.f; + out_mid = 0.f; + } + + ALICEVISION_LOG_DEBUG(_tile << "Compute min/max/mid/nb observation depth from SfM for R camera:" << std::endl + << "\t- view id: " << viewId << std::endl + << "\t- min depth: " << out_min << std::endl + << "\t- max depth: " << out_max << std::endl + << "\t- mid depth: " << out_mid << std::endl + << "\t- nb depth: " << out_nbDepths << std::endl + << "\t- percentile: " << _sgmParams.seedsRangePercentile); +} + +void SgmDepthList::getRcTcDepthRangeFromSfM(int tc, + double& out_zmin, + double& out_zmax) const +{ + // get Rc/Tc view ids + const IndexT rcViewId = _mp.getViewId(_tile.rc); + const IndexT tcViewId = _mp.getViewId(tc); + + // get R region-of-interest + // landmark observations are in the full-size image coordinate system, we need to upcscale the tile ROI + const ROI fullsizeRoi = upscaleROI(_tile.roi, _mp.getProcessDownscale()); + //const ROI selectionRoi = inflateROI(fullsizeRoi, 1.4f); // we can inflate the image full-size roi to be more permissive for common landmark selection + + // build R camera plane + OrientedPoint cameraPlane; + cameraPlane.p = _mp.CArr[_tile.rc]; + cameraPlane.n = _mp.iRArr[_tile.rc] * Point3d(0.0, 0.0, 1.0); + cameraPlane.n = cameraPlane.n.normalize(); + + // initialize output min/max depth + out_zmin = std::numeric_limits::max(); + out_zmax = std::numeric_limits::min(); + + // for each landmark + for(const auto& landmarkPair : _mp.getInputSfMData().getLandmarks()) + { + const sfmData::Landmark& landmark = landmarkPair.second; + const Point3d point(landmark.X(0), landmark.X(1), landmark.X(2)); + + // no tc observation + if(landmark.observations.find(tcViewId) == landmark.observations.end()) + continue; + + // find rc observation + const auto it = landmark.observations.find(rcViewId); + + // no rc observation + if(it == landmark.observations.end()) + continue; + + // get rc 2d observation + const Vec2& obs2d = it->second.x; + + // observation located inside the inflated image full-size ROI + if(!_sgmParams.depthListPerTile || fullsizeRoi.contains(obs2d.x(), obs2d.y())) + { + // compute related depth + const double depth = pointPlaneDistance(point, cameraPlane.p, cameraPlane.n); + + // update min/max depth + out_zmin = std::min(out_zmin, depth); + out_zmax = std::max(out_zmax, depth); + } + } + + // no common observations found + if(out_zmin > out_zmax) + { + ALICEVISION_THROW_ERROR(_tile << "Cannot compute min/max depth from common Rc/Tc SfM observations." << std::endl + << "No common observations found (tc view id: " << tcViewId << ")."); + } + + ALICEVISION_LOG_DEBUG(_tile << "Compute min/max depth from common Rc/Tc SfM observations:" << std::endl + << "\t- rc: " << _tile.rc << " (view id: " << rcViewId << ")" << std::endl + << "\t- tc: " << tc << " (view id: " << tcViewId << ")" << std::endl + << "\t- min depth: " << out_zmin << std::endl + << "\t- max depth: " << out_zmax); +} + +void SgmDepthList::computeRcTcDepths(int tc, + float midDepth, + std::vector& out_depths) const +{ + assert(out_depths.empty()); + + OrientedPoint rcplane; + rcplane.p = _mp.CArr[_tile.rc]; + rcplane.n = _mp.iRArr[_tile.rc] * Point3d(0.0, 0.0, 1.0); + rcplane.n = rcplane.n.normalize(); + + // ROI center + const Point2d roiCenter((_tile.roi.x.begin + (_tile.roi.width() * 0.5)), _tile.roi.y.begin + (_tile.roi.height() * 0.5)); + + // principal point of the rc camera + const Point2d principalPoint(_mp.getWidth(_tile.rc) * 0.5, _mp.getHeight(_tile.rc) * 0.5); + + // reference point for the epipolar line + const Point2d referencePoint = (!_sgmParams.depthListPerTile) ? principalPoint : roiCenter; + + // input middle depth related point + Point2d tcMidDepthPoint; + + // segment of epipolar line + Point2d tcFromPoint, tcToPoint; + + { + const Matrix3x4& rP = _mp.camArr[_tile.rc]; + const Matrix3x4& tP = _mp.camArr[tc]; + + Point3d rC; + Matrix3x3 rR; + Matrix3x3 riR; + Matrix3x3 rK; + Matrix3x3 riK; + Matrix3x3 riP; + _mp.decomposeProjectionMatrix(rC, rR, riR, rK, riK, riP, rP); + + _mp.getPixelFor3DPoint(&tcMidDepthPoint, ((riP * referencePoint) * midDepth) + rC, tP); + + double zmin; + double zmax; + + getRcTcDepthRangeFromSfM(tc, zmin, zmax); + + Point2d tarpix1; + Point2d tarpix2; + + _mp.getPixelFor3DPoint(&tarpix1, ((riP * referencePoint) * zmin) + rC, tP); + _mp.getPixelFor3DPoint(&tarpix2, ((riP * referencePoint) * zmax) + rC, tP); + + get2dLineImageIntersection(&tcFromPoint, &tcToPoint, tarpix1, tarpix2, _mp, tc); + } + + const int nbSegmentPoints = static_cast((tcToPoint - tcFromPoint).size()); + const int nbSegmentPointsAtSgmScale = nbSegmentPoints / _sgmParams.scale; + const Point2d pixelVect = (tcToPoint - tcFromPoint).normalize() * std::max(1.0, double(_sgmParams.scale)); + + // compute the epilolar segment depth direction + int depthDirection = 1; + { + Point3d p; + + // triangulate middle depth point + if(!triangulateMatch(p, referencePoint, tcMidDepthPoint, _tile.rc, tc, _mp)) + return; + + const float depth = orientedPointPlaneDistance(p, rcplane.p, rcplane.n); + + // triangulate middle depth point + 1 pixelVect + if(!triangulateMatch(p, referencePoint, tcMidDepthPoint + pixelVect, _tile.rc, tc, _mp)) + return; + + const float depthP1 = orientedPointPlaneDistance(p, rcplane.p, rcplane.n); + + if(depth > depthP1) + depthDirection = -1; + } + + out_depths.reserve(nbSegmentPointsAtSgmScale); + + const Point3d refVect = _mp.iCamArr[_tile.rc] * referencePoint; + float previousDepth = -1.0f; + + // compute depths for all pixels from one side of the epipolar segment to the other + for(int i = 0; i < nbSegmentPointsAtSgmScale; ++i) + { + const Point2d tcPoint = ((depthDirection > 0) ? tcFromPoint : tcToPoint) + (pixelVect * double(i) * double(depthDirection)); + + // check if the epipolar segment point is in T camera + // note: get2dLineImageIntersection can give points slightly out of the picture + if(!_mp.isPixelInImage(tcPoint, tc)) + continue; + + const Point3d tarVect = _mp.iCamArr[tc] * tcPoint; + const float refTarVectAngle = angleBetwV1andV2(refVect, tarVect); + + // if vects are near parallel then this results to strange angles + // this is the proper angle because it does not depend on the triangulated p + if(refTarVectAngle < _mp.getMinViewAngle() || refTarVectAngle > _mp.getMaxViewAngle()) + continue; + + // epipolar segment point related 3d point + Point3d p; + + // triangulate principal point from rc with tcPoint + if(!triangulateMatch(p, referencePoint, tcPoint, _tile.rc, tc, _mp)) + continue; + + // check the difference in pixel size between R and T and the angle size of p + // note: disabled for now, this test is too strict and rejects too many points. + //if(!checkPair(p, _tile.rc, tc, _mp, _mp.getMinViewAngle(), _mp.getMaxViewAngle())) + // continue; + + // compute related 3d point depth + const float depth = float(orientedPointPlaneDistance(p, rcplane.p, rcplane.n)); + + if((depth > 0.0f) && (depth > previousDepth)) + { + out_depths.push_back(depth); + previousDepth = depth + std::numeric_limits::epsilon(); + } + } + + out_depths.shrink_to_fit(); + + ALICEVISION_LOG_DEBUG(_tile << "Find depths over the epipolar line segment between R and T cameras:" << std::endl + << "\t- rc: " << _tile.rc << "(view id: " << _mp.getViewId(_tile.rc) << ")" << std::endl + << "\t- tc: " << tc << "(view id: " << _mp.getViewId(tc) << ")" << std::endl + << "\t- # points of the epipolar segment: " << nbSegmentPoints << std::endl + << "\t- # points of the epipolar segment at SGM scale: " << nbSegmentPointsAtSgmScale << std::endl + << "\t- # depths to use: " << out_depths.size()); + + if(!out_depths.empty()) + ALICEVISION_LOG_DEBUG(_tile << "Depth to use range [" << out_depths.front() << "-" << out_depths.back() << "]" << std::endl); +} + +void SgmDepthList::computePixelSizeDepths(float minObsDepth, + float midObsDepth, + float maxObsDepth, + std::vector& out_depths) const +{ + assert(out_depths.empty()); + + const int rcDepthsCompStep = 6; + const int maxDepthsHalf = 1024; + + const float d = float(_sgmParams.scale) * float(rcDepthsCompStep); + + OrientedPoint rcplane; + rcplane.p = _mp.CArr[_tile.rc]; + rcplane.n = _mp.iRArr[_tile.rc] * Point3d(0.0, 0.0, 1.0); + rcplane.n = rcplane.n.normalize(); + + int ndepthsMidMax = 0; + float maxdepth = midObsDepth; + while((maxdepth < maxObsDepth) && (ndepthsMidMax < maxDepthsHalf)) + { + Point3d p = rcplane.p + rcplane.n * maxdepth; + float pixSize = _mp.getCamPixelSize(p, _tile.rc, d); + maxdepth += pixSize; + ndepthsMidMax++; + } + + int ndepthsMidMin = 0; + float mindepth = midObsDepth; + while((mindepth > minObsDepth) && (ndepthsMidMin < maxDepthsHalf * 2 - ndepthsMidMax)) + { + Point3d p = rcplane.p + rcplane.n * mindepth; + float pixSize = _mp.getCamPixelSize(p, _tile.rc, d); + mindepth -= pixSize; + ndepthsMidMin++; + } + + // get number of depths + float depth = mindepth; + int ndepths = 0; + float pixSize = 1.0f; + while((depth < maxdepth) && (pixSize > 0.0f) && (ndepths < 2 * maxDepthsHalf)) + { + Point3d p = rcplane.p + rcplane.n * depth; + pixSize = _mp.getCamPixelSize(p, _tile.rc, d); + depth += pixSize; + ndepths++; + } + + out_depths.reserve(ndepths); + + // fill + depth = mindepth; + pixSize = 1.0f; + ndepths = 0; + while((depth < maxdepth) && (pixSize > 0.0f) && (ndepths < 2 * maxDepthsHalf)) + { + out_depths.push_back(depth); + Point3d p = rcplane.p + rcplane.n * depth; + pixSize = _mp.getCamPixelSize(p, _tile.rc, d); + depth += pixSize; + ndepths++; + } + + // check if it is asc + for(int i = 0; i < out_depths.size() - 1; i++) + { + if(out_depths[i] >= out_depths[i + 1]) + { + for(int j = 0; j <= i + 1; j++) + { + ALICEVISION_LOG_TRACE(_tile << "getDepthsByPixelSize: check if it is asc: " << out_depths[j]); + } + throw std::runtime_error("getDepthsByPixelSize not asc."); + } + } +} + +void SgmDepthList::computeRcDepthList(float firstDepth, + float lastDepth, + float scaleFactor, + const std::vector>& dephtsPerTc) +{ + _depths.clear(); + + float depth = firstDepth; + + while(depth < lastDepth) + { + _depths.push_back(depth); + + // get min tc step at depth + float minTcStep = lastDepth - firstDepth; + + // for each tc camera + for(const std::vector& tcDepths : dephtsPerTc) + { + // get the tc depth closest to the current depth + const int id = indexOfNearestSorted(tcDepths, depth); + + // continue on no result or last element (we need id + 1) + if(id < 0 || id >= tcDepths.size() - 1) + continue; + + // enclosing depth range + const float tcStep = fabs(tcDepths.at(id) - tcDepths.at(id + 1)); // (closest - next) depths distance + + // keep this value if smallest step so far + minTcStep = std::min(minTcStep, tcStep); + } + + depth += minTcStep * scaleFactor; + } +} + +void SgmDepthList::exportTxtFiles(const std::vector>& dephtsPerTc) const +{ + const std::string prefix(_mp.getDepthMapsFolder() + std::to_string(_mp.getViewId(_tile.rc)) + std::string("_")); + const std::string suffix("_" + std::to_string(_tile.roi.x.begin) + "_" + std::to_string(_tile.roi.y.begin) + ".txt"); + + // export depthsTcLimits txt file + { + const std::string fn = prefix + "depthsTcLimits" + suffix; + FILE* f = fopen(fn.c_str(), "w"); + for(int j = 0; j < _depthsTcLimits.size(); j++) + { + Pixel l = _depthsTcLimits[j]; + fprintf(f, "%i %i\n", l.x, l.y); + } + fclose(f); + } + + // export rc depth txt file + { + const std::string fn = prefix + "depths" + suffix; + FILE* f = fopen(fn.c_str(), "w"); + for(int j = 0; j < _depths.size(); j++) + { + fprintf(f, "%f\n", _depths[j]); + } + fclose(f); + } + + // export all depths per tc txt files + { + for(int c = 0; c < dephtsPerTc.size(); ++c) + { + const std::string fn = prefix + "depths_tc_" + mvsUtils::num2str(_mp.getViewId(_tile.sgmTCams.at(c))) + suffix; + FILE* f = fopen(fn.c_str(), "w"); + for(const float depth : dephtsPerTc.at(c)) + { + fprintf(f, "%f\n", depth); + } + fclose(f); + } + } +} + +} // namespace depthMap +} // namespace aliceVision diff --git a/src/aliceVision/depthMap/SgmDepthList.hpp b/src/aliceVision/depthMap/SgmDepthList.hpp new file mode 100644 index 0000000000..3360c0b3a2 --- /dev/null +++ b/src/aliceVision/depthMap/SgmDepthList.hpp @@ -0,0 +1,145 @@ +// This file is part of the AliceVision project. +// Copyright (c) 2022 AliceVision contributors. +// This Source Code Form is subject to the terms of the Mozilla Public License, +// v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + +#pragma once + +#include +#include +#include +#include + +namespace aliceVision { +namespace depthMap { + +/** + * @brief Semi-Global Matching Depth List + */ +class SgmDepthList +{ +public: + + /** + * @brief SgmDepthList constructor. + * @param[in] mp the multi-view parameters + * @param[in] sgmParams the Semi Global Matching parameters + * @param[in] tile The given tile for depth list computation + */ + SgmDepthList(const mvsUtils::MultiViewParams& mp, const SgmParams& sgmParams, const Tile& tile); + + // default destructor + ~SgmDepthList() = default; + + // final R camera depth list getter + inline const std::vector& getDepths() const { return _depths; } + + // final T camera depth limits getter + inline const std::vector& getDepthsTcLimits() const { return _depthsTcLimits; } + + // final R camera first/last depth getter + inline const std::pair getMinMaxDepths() const { return {_depths.front(), _depths.back()}; } + + /** + * @brief Compute R camera depth list / depth limits from T cameras + * @param[in,out] tile The given tile for depth list computation + */ + void computeListRc(); + + /** + * @brief Remove tile tcs with no depth + * @note also remove depthsTcLimits with no depth + */ + void removeTcWithNoDepth(Tile& tile); + + /** + * @brief Log depth information + */ + void logRcTcDepthInformation() const; + + /** + * @brief check the starting and stopping depth + */ + void checkStartingAndStoppingDepth() const; + +private: + + // private methods + + /** + * @brief Compute min/max/mid/nb depth observation for R camera from SfM. + * @param[out] out_min The minimum depth observation + * @param[out] out_max The maximum depth observation + * @param[out] out_mid The middle depth observation + * @param[out] out_nbDepths The number of depth observation + */ + void getMinMaxMidNbDepthFromSfM(float& out_min, + float& out_max, + float& out_mid, + std::size_t& out_nbDepths) const; + + /** + * @brief Compute min/max depth from common Rc/Tc SfM observations. + * @param[in] tc The T camera index + * @param[out] out_zmin The minimum depth + * @param[out] out_zmax The maximum depth + */ + void getRcTcDepthRangeFromSfM(int tc, + double& out_zmin, + double& out_zmax) const; + + /** + * @brief Compute depths of the principal ray of reference camera rc visible by a pixel in a target camera tc + * providing meaningful 3d information. + * @param[in] tc the T camera index + * @param[in] midDepth The middle depth observation + * @param[out] out_depths the output depth list + */ + void computeRcTcDepths(int tc, + float midObsDepth, + std::vector& out_depths) const; + + /** + * @brief Compute a depth list from R camera pixel size. + * @param[in] minObsDepth The min depth observation + * @param[in] midObsDepth The middle depth observation + * @param[in] maxObsDepth The max depth observation + * @param[out] out_depths the output depth list + */ + void computePixelSizeDepths(float minObsDepth, + float midObsDepth, + float maxObsDepth, + std::vector& out_depths) const; + + /** + * @brief Fill the list of "best" depths (_depths) for rc, from all tc cameras depths. + * @param[in] firstDepth The first depth + * @param[in] lastDepth The last depth + * @param[in] scaleFactor The scale factor to apply between each depth + * @param[in] dephtsPerTc The depth list per T camera + */ + void computeRcDepthList(float firstDepth, + float lastDepth, + float scaleFactor, + const std::vector>& dephtsPerTc); + + + /** + * @brief Export multiple intermediate depth list txt files. + * @param[in] dephtsPerTc The depth list per T camera + */ + void exportTxtFiles(const std::vector>& dephtsPerTc) const; + + // private members + + const mvsUtils::MultiViewParams& _mp; //< Multi-view parameters + const SgmParams& _sgmParams; //< Semi Global Matching parameters + const Tile& _tile; //< Tile for depth list computation + + std::vector _depths; //< R camera depth list + std::vector _depthsTcLimits; //< T camera depth limits +}; + +} // namespace depthMap +} // namespace aliceVision diff --git a/src/aliceVision/depthMap/SgmParams.cpp b/src/aliceVision/depthMap/SgmParams.cpp deleted file mode 100644 index f0218ae2fd..0000000000 --- a/src/aliceVision/depthMap/SgmParams.cpp +++ /dev/null @@ -1,63 +0,0 @@ -// This file is part of the AliceVision project. -// Copyright (c) 2021 AliceVision contributors. -// This Source Code Form is subject to the terms of the Mozilla Public License, -// v. 2.0. If a copy of the MPL was not distributed with this file, -// You can obtain one at https://mozilla.org/MPL/2.0/. - -#include "SgmParams.hpp" - -#include -#include - -namespace aliceVision { -namespace depthMap { - -int computeDownscale(const mvsUtils::MultiViewParams& mp, int scale, int maxWidth, int maxHeight) -{ - const int maxImageWidth = mp.getMaxImageWidth() / scale; - const int maxImageHeight = mp.getMaxImageHeight() / scale; - - int downscale = 1; - int downscaleWidth = mp.getMaxImageWidth() / scale; - int downscaleHeight = mp.getMaxImageHeight() / scale; - - while((downscaleWidth > maxWidth) || (downscaleHeight > maxHeight)) - { - downscale++; - downscaleWidth = maxImageWidth / downscale; - downscaleHeight = maxImageHeight / downscale; - } - - return downscale; -} - -void computeScaleStepSgmParams(const mvsUtils::MultiViewParams& mp, SgmParams& sgmParams) -{ - const int fileScale = 1; // input images scale (should be one) - const int maxSideXY = sgmParams.maxSideXY / mp.getProcessDownscale(); - const int maxImageW = mp.getMaxImageWidth(); - const int maxImageH = mp.getMaxImageHeight(); - - int maxW = maxSideXY; - int maxH = maxSideXY * 0.8; - - if(maxImageW < maxImageH) - std::swap(maxW, maxH); - - if(sgmParams.scale == -1) - { - // compute the number of scales that will be used in the plane sweeping. - // the highest scale should have a resolution close to 700x550 (or less). - const int scaleTmp = computeDownscale(mp, fileScale, maxW, maxH); - sgmParams.scale = std::min(2, scaleTmp); - } - if(sgmParams.stepXY == -1) - { - sgmParams.stepXY = computeDownscale(mp, fileScale * sgmParams.scale, maxW, maxH); - } - - ALICEVISION_LOG_INFO("Computed SGM scale: " << sgmParams.scale << ", stepXY: " << sgmParams.stepXY); -} - -} // namespace depthMap -} // namespace aliceVision diff --git a/src/aliceVision/depthMap/SgmParams.hpp b/src/aliceVision/depthMap/SgmParams.hpp index 74886e8f64..7de82478fa 100644 --- a/src/aliceVision/depthMap/SgmParams.hpp +++ b/src/aliceVision/depthMap/SgmParams.hpp @@ -9,10 +9,6 @@ #include namespace aliceVision { - -// MultiViewParams forward declaration -namespace mvsUtils { class MultiViewParams; } - namespace depthMap { /** @@ -22,39 +18,38 @@ struct SgmParams { // user parameters - int scale = -1; - int stepXY = -1; + int scale = 2; + int stepXY = 2; int stepZ = -1; int wsh = 4; - int maxTCams = 10; - int maxDepths = 3000; - int maxDepthsPerTc = 1500; - int maxSideXY = 700; + int maxDepths = 1500; + int maxTCamsPerTile = 4; + double seedsRangeInflate = 0.2; double gammaC = 5.5; double gammaP = 8.0; double p1 = 10; double p2Weighting = 100.0; std::string filteringAxes = "YX"; bool useSfmSeeds = true; - bool exportIntermediateResults = false; + bool depthListPerTile = false; + + // intermediate results export parameters + + bool exportIntermediateDepthSimMaps = false; + bool exportIntermediateVolumes = false; + bool exportIntermediateCrossVolumes = false; + bool exportIntermediateVolume9pCsv = false; + const bool exportDepthsTxtFiles = false; // constant parameters - - const bool prematchinMinMaxDepthDontUseSeeds = false; - const float prematchingMaxDepthScale = 1.5f; - const float prematchingMinCamDist = 0.0f; - const float prematchingMaxCamDist = 15.0f; - const int rcTcDepthsHalfLimit = 2048; - const int rcDepthsCompStep = 6; - const double seedsRangeInflate = 0.2; + const bool updateUninitializedSim = true; // should always be true, false for debug purposes + const bool computeNormalMap = false; // for experimentation purposes + const float prematchingMaxDepthScale = 1.5f; const double seedsRangePercentile = 0.999; const bool doSgmOptimizeVolume = true; - const bool interpolateRetrieveBestDepth = false; - const bool saveDepthsToSweepTxtFile = false; + }; -void computeScaleStepSgmParams(const mvsUtils::MultiViewParams& mp, SgmParams& sgmParams); - } // namespace depthMap } // namespace aliceVision diff --git a/src/aliceVision/depthMap/Tile.hpp b/src/aliceVision/depthMap/Tile.hpp new file mode 100644 index 0000000000..de3c8d1bf3 --- /dev/null +++ b/src/aliceVision/depthMap/Tile.hpp @@ -0,0 +1,37 @@ +// This file is part of the AliceVision project. +// Copyright (c) 2022 AliceVision contributors. +// This Source Code Form is subject to the terms of the Mozilla Public License, +// v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + +#pragma once + +#include + +#include +#include + +namespace aliceVision { +namespace depthMap { + +/** + * @brief Depth Map Tile Structure + */ +struct Tile +{ + int id; //< tile index + int nbTiles; //< number of tiles per image + int rc; //< related R camera index + std::vector sgmTCams; //< SGM T camera index list + std::vector refineTCams; //< Refine T camera index list + ROI roi; //< 2d region of interest of the R image +}; + +inline std::ostream& operator<<(std::ostream& os, const Tile& tile) +{ + os << "(rc: " << tile.rc << ", tile: " << (tile.id + 1) << "/" << tile.nbTiles << ") "; + return os; +} + +} // namespace depthMap +} // namespace aliceVision diff --git a/src/aliceVision/depthMap/computeOnMultiGPUs.cpp b/src/aliceVision/depthMap/computeOnMultiGPUs.cpp index 4c38c57f3c..d0b4562751 100644 --- a/src/aliceVision/depthMap/computeOnMultiGPUs.cpp +++ b/src/aliceVision/depthMap/computeOnMultiGPUs.cpp @@ -5,15 +5,16 @@ // You can obtain one at https://mozilla.org/MPL/2.0/. #include "computeOnMultiGPUs.hpp" -#include // useful for listCUDADevices + #include +#include namespace aliceVision { namespace depthMap { void computeOnMultiGPUs(mvsUtils::MultiViewParams& mp, const std::vector& cams, GPUJob gpujob, int nbGPUsToUse) { - const int nbGPUDevices = listCUDADevices(true); + const int nbGPUDevices = listCudaDevices(); const int nbCPUThreads = omp_get_max_threads(); ALICEVISION_LOG_INFO("Number of GPU devices: " << nbGPUDevices << ", number of CPU threads: " << nbCPUThreads); @@ -41,14 +42,14 @@ void computeOnMultiGPUs(mvsUtils::MultiViewParams& mp, const std::vector& c #pragma omp parallel { const int cpuThreadId = omp_get_thread_num(); - const int cudaDeviceIndex = cpuThreadId % nbThreads; + const int cudaDeviceId = cpuThreadId % nbThreads; - ALICEVISION_LOG_INFO("CPU thread " << cpuThreadId << " (of " << nbThreads << ") uses CUDA device: " << cudaDeviceIndex); + ALICEVISION_LOG_INFO("CPU thread " << cpuThreadId << " (of " << nbThreads << ") uses CUDA device: " << cudaDeviceId); const int nbCamsPerThread = (cams.size() / nbThreads); - const int rcFrom = cudaDeviceIndex * nbCamsPerThread; - int rcTo = (cudaDeviceIndex + 1) * nbCamsPerThread; - if (cudaDeviceIndex == nbThreads - 1) + const int rcFrom = cudaDeviceId * nbCamsPerThread; + int rcTo = (cudaDeviceId + 1) * nbCamsPerThread; + if(cudaDeviceId == nbThreads - 1) { rcTo = cams.size(); } @@ -61,7 +62,7 @@ void computeOnMultiGPUs(mvsUtils::MultiViewParams& mp, const std::vector& c subcams.push_back(cams[rc]); } - gpujob(cudaDeviceIndex, mp, subcams); + gpujob(cudaDeviceId, mp, subcams); } omp_set_num_threads(previous_count_threads); } diff --git a/src/aliceVision/depthMap/computeOnMultiGPUs.hpp b/src/aliceVision/depthMap/computeOnMultiGPUs.hpp index 4b34624241..4d3ae81403 100644 --- a/src/aliceVision/depthMap/computeOnMultiGPUs.hpp +++ b/src/aliceVision/depthMap/computeOnMultiGPUs.hpp @@ -6,7 +6,6 @@ #pragma once -#include #include namespace aliceVision { diff --git a/src/aliceVision/depthMap/cuda/FrameCacheMemory.cpp b/src/aliceVision/depthMap/cuda/FrameCacheMemory.cpp deleted file mode 100644 index 7da5b63858..0000000000 --- a/src/aliceVision/depthMap/cuda/FrameCacheMemory.cpp +++ /dev/null @@ -1,164 +0,0 @@ -// This file is part of the AliceVision project. -// Copyright (c) 2021 AliceVision contributors. -// This Source Code Form is subject to the terms of the Mozilla Public License, -// v. 2.0. If a copy of the MPL was not distributed with this file, -// You can obtain one at https://mozilla.org/MPL/2.0/. - -#include "FrameCacheMemory.hpp" - -#include -#include - -#include -#include - -namespace aliceVision { -namespace depthMap { - -/********************************************************************************* - * FrameCacheEntry - *********************************************************************************/ - -FrameCacheEntry::FrameCacheEntry(int cache_frame_id, int w, int h, int s) - : _cache_frame_id(cache_frame_id) - , _cache_cam_id(-1) - , _global_cam_id(-1) - , _width(w) - , _height(h) - , _scales(s) - , _memBytes(0) -{ - CudaSize<2> sz(w, h); - _host_frame = new CudaHostMemoryHeap(sz); - _memBytes = ps_deviceAllocate(_pyramid, w, h, s); -} - -FrameCacheEntry::~FrameCacheEntry() -{ - ps_deviceDeallocate(_pyramid, _scales); - delete _host_frame; -} - -Pyramid& FrameCacheEntry::getPyramid() -{ - return _pyramid; -} - -Pyramid* FrameCacheEntry::getPyramidPtr() -{ - return &_pyramid; -} - -int FrameCacheEntry::getPyramidMem() const -{ - return _memBytes; -} - -void FrameCacheEntry::fillFrame(int global_cam_id, - mvsUtils::ImagesCache>& imageCache, - mvsUtils::MultiViewParams& mp, cudaStream_t stream) -{ - ALICEVISION_LOG_TRACE(__FUNCTION__ << ": camera:" << global_cam_id << " " << mp.getWidth(global_cam_id) << "x" - << mp.getHeight(global_cam_id)); - - /* Copy data for cached image "global_cam_id" into the host-side data buffer managed - * by data structure "cam". */ - fillHostFrameFromImageCache(imageCache, _host_frame, global_cam_id, mp); - - /* Copy data from host-sided cache in "cam" onto the GPU and create - * downscaled and Gauss-filtered versions on the GPU. */ - ps_device_fillPyramidFromHostFrame(_pyramid, _host_frame, _scales, mp.getWidth(global_cam_id), - mp.getHeight(global_cam_id), stream); -} - -void FrameCacheEntry::fillHostFrameFromImageCache(mvsUtils::ImagesCache>& ic, - CudaHostMemoryHeap* hostFrame, int c, - mvsUtils::MultiViewParams& mp) -{ - system::Timer timer; - - auto img = ic.getImg_sync(c); - ALICEVISION_LOG_TRACE(__FUNCTION__ << ": " << c << " -a- Retrieve from ImagesCache elapsed time: " << timer.elapsedMs() << " ms."); - timer.reset(); - - const int h = mp.getHeight(c); - const int w = mp.getWidth(c); - for(int y = 0; y < h; ++y) - { - for(int x = 0; x < w; ++x) - { - const image::RGBAfColor& floatRGBA = (*img)(y, x); - CudaRGBA& pix_rgba = (*hostFrame)(x, y); - pix_rgba.x = floatRGBA.r() * 255.0f; - pix_rgba.y = floatRGBA.g() * 255.0f; - pix_rgba.z = floatRGBA.b() * 255.0f; - pix_rgba.w = floatRGBA.a() * 255.0f; - } - } - ALICEVISION_LOG_DEBUG(__FUNCTION__ << ": " << c << " -b- Copy to HMH elapsed time: " << timer.elapsedMs() << " ms."); -} - -void FrameCacheEntry::setLocalCamId(int cache_cam_id) -{ - _cache_cam_id = cache_cam_id; -} - -int FrameCacheEntry::getLocalCamId() const -{ - return _cache_cam_id; -} - -/********************************************************************************* - * FrameCacheMemory - *********************************************************************************/ - -FrameCacheMemory::FrameCacheMemory(int ImgsInGPUAtTime, int maxWidth, int maxHeight, int scales, int CUDAdeviceNo) -{ - int allBytes = 0; - - /* If not done before, initialize Gaussian filters in GPU constant mem. */ - ps_create_gaussian_arr(CUDAdeviceNo, scales); - - pr_printfDeviceMemoryInfo(); - - _v.resize(ImgsInGPUAtTime); - - for(int i = 0; i < ImgsInGPUAtTime; i++) - { - _v[i] = new FrameCacheEntry(i, maxWidth, maxHeight, scales); - allBytes += _v[i]->getPyramidMem(); - } - - ALICEVISION_LOG_INFO("FrameCache for GPU " << CUDAdeviceNo << ", " << scales << " scales, allocated " << allBytes << " on GPU"); - - pr_printfDeviceMemoryInfo(); -} - -FrameCacheMemory::~FrameCacheMemory() -{ - for(auto ptr : _v) - { - delete ptr; - } -} - -void FrameCacheMemory::fillFrame(int cache_frame_id, int global_cam_id, - mvsUtils::ImagesCache>& imageCache, - mvsUtils::MultiViewParams& mp, - cudaStream_t stream) -{ - _v[cache_frame_id]->fillFrame(global_cam_id, imageCache, mp, stream); -} - -void FrameCacheMemory::setLocalCamId(int cache_frame_id, int cache_cam_id) -{ - _v[cache_frame_id]->setLocalCamId(cache_cam_id); -} - -int FrameCacheMemory::getLocalCamId(int cache_frame_id) const -{ - return _v[cache_frame_id]->getLocalCamId(); -} - -} // namespace depthMap -} // namespace aliceVision diff --git a/src/aliceVision/depthMap/cuda/FrameCacheMemory.hpp b/src/aliceVision/depthMap/cuda/FrameCacheMemory.hpp deleted file mode 100644 index 655b675f41..0000000000 --- a/src/aliceVision/depthMap/cuda/FrameCacheMemory.hpp +++ /dev/null @@ -1,98 +0,0 @@ -// This file is part of the AliceVision project. -// Copyright (c) 2021 AliceVision contributors. -// This Source Code Form is subject to the terms of the Mozilla Public License, -// v. 2.0. If a copy of the MPL was not distributed with this file, -// You can obtain one at https://mozilla.org/MPL/2.0/. - -#pragma once - -#include -#include - -#include - -namespace aliceVision { -namespace depthMap { - -/********************************************************************************* - * FrameCacheEntry - * Support class to maintain CUDA memory and textures for an image frame in - * the GPU Cache. - * _cache_cam_id contains the own position in the memory array. - * _global_cam_id should contain the global frame that is currently stored in - * this cache slot. - *********************************************************************************/ - -class FrameCacheEntry -{ - // cache slot for image, identical to index in FrameCacheMemory vector - const int _cache_frame_id; - - // cache slot for camera parameters - int _cache_cam_id; - - // cache slot in the global host-sided image cache - int _global_cam_id; - - Pyramid _pyramid; - CudaHostMemoryHeap* _host_frame; - int _width; - int _height; - int _scales; - int _memBytes; - -public: - FrameCacheEntry(int cache_frame_id, int w, int h, int s); - - ~FrameCacheEntry(); - - Pyramid& getPyramid(); - Pyramid* getPyramidPtr(); - - int getPyramidMem() const; - - void fillFrame(int global_cam_id, - mvsUtils::ImagesCache>& imageCache, - mvsUtils::MultiViewParams& mp, - cudaStream_t stream); - - void setLocalCamId(int cache_cam_id); - - int getLocalCamId() const; - -private: - static void fillHostFrameFromImageCache(mvsUtils::ImagesCache>& ic, - CudaHostMemoryHeap* hostFrame, int c, - mvsUtils::MultiViewParams& mp); -}; - -/********************************************************************************* - * FrameCacheMemory - * Support class that maintains the memory for the GPU memory used for caching - * currently loaded images. - *********************************************************************************/ - -class FrameCacheMemory -{ - std::vector _v; - -public: - FrameCacheMemory(int ImgsInGPUAtTime, int maxWidth, int maxHeight, int scales, int CUDADeviceNO); - - ~FrameCacheMemory(); - - inline Pyramid& getPyramid(int camera) { return _v[camera]->getPyramid(); } - inline Pyramid* getPyramidPtr(int camera) { return _v[camera]->getPyramidPtr(); } - - void fillFrame(int cache_id, int global_cam_id, - mvsUtils::ImagesCache>& imageCache, - mvsUtils::MultiViewParams& mp, - cudaStream_t stream); - - void setLocalCamId(int cache_id, int cache_cam_id); - - int getLocalCamId(int cache_id) const; -}; - -} // namespace depthMap -} // namespace aliceVision diff --git a/src/aliceVision/depthMap/cuda/OneTC.hpp b/src/aliceVision/depthMap/cuda/OneTC.hpp deleted file mode 100644 index 17a447c321..0000000000 --- a/src/aliceVision/depthMap/cuda/OneTC.hpp +++ /dev/null @@ -1,60 +0,0 @@ -// This file is part of the AliceVision project. -// Copyright (c) 2017 AliceVision contributors. -// This Source Code Form is subject to the terms of the Mozilla Public License, -// v. 2.0. If a copy of the MPL was not distributed with this file, -// You can obtain one at https://mozilla.org/MPL/2.0/. - -#pragma once - -// #include - -namespace aliceVision { -namespace depthMap { - -struct OneTC -{ -private: - /* tcidx is the local index of this TC for the computation of the current RC */ - const int _tcidx; - - const int _depth_to_start; - - const int _depths_to_search; - -public: - OneTC(int tc, int start, int search) - : _tcidx( tc ) - , _depth_to_start( start ) - , _depths_to_search( search ) - { } - - OneTC( const OneTC& orig ) - : _tcidx( orig._tcidx ) - , _depth_to_start( orig._depth_to_start ) - , _depths_to_search( orig._depths_to_search ) - { } - - inline int getTCIndex() const - { - return _tcidx; - } - - inline int getDepthToStart() const - { - return _depth_to_start; - } - - inline int getDepthsToSearch() const - { - return _depths_to_search; - } - - inline int getDepthToStop() const - { - return _depth_to_start + _depths_to_search; - } -}; - -} // namespace depthMap -} // namespace aliceVision - diff --git a/src/aliceVision/depthMap/cuda/PlaneSweepingCuda.cpp b/src/aliceVision/depthMap/cuda/PlaneSweepingCuda.cpp deleted file mode 100644 index 6f0e0a5c1d..0000000000 --- a/src/aliceVision/depthMap/cuda/PlaneSweepingCuda.cpp +++ /dev/null @@ -1,765 +0,0 @@ -// This file is part of the AliceVision project. -// Copyright (c) 2017 AliceVision contributors. -// This Source Code Form is subject to the terms of the Mozilla Public License, -// v. 2.0. If a copy of the MPL was not distributed with this file, -// You can obtain one at https://mozilla.org/MPL/2.0/. - -#include "PlaneSweepingCuda.hpp" -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -namespace aliceVision { -namespace depthMap { - -static void cps_host_fillCamera(CameraStructBase& base, int c, mvsUtils::MultiViewParams& mp, int scale ) -{ - - Matrix3x3 scaleM; - scaleM.m11 = 1.0 / (float)scale; - scaleM.m12 = 0.0; - scaleM.m13 = 0.0; - scaleM.m21 = 0.0; - scaleM.m22 = 1.0 / (float)scale; - scaleM.m23 = 0.0; - scaleM.m31 = 0.0; - scaleM.m32 = 0.0; - scaleM.m33 = 1.0; - Matrix3x3 K = scaleM * mp.KArr[c]; - - Matrix3x3 iK = K.inverse(); - Matrix3x4 P = K * (mp.RArr[c] | (Point3d(0.0, 0.0, 0.0) - mp.RArr[c] * mp.CArr[c])); - Matrix3x3 iP = mp.iRArr[c] * iK; - - base.C.x = mp.CArr[c].x; - base.C.y = mp.CArr[c].y; - base.C.z = mp.CArr[c].z; - - base.P[0] = P.m11; - base.P[1] = P.m21; - base.P[2] = P.m31; - base.P[3] = P.m12; - base.P[4] = P.m22; - base.P[5] = P.m32; - base.P[6] = P.m13; - base.P[7] = P.m23; - base.P[8] = P.m33; - base.P[9] = P.m14; - base.P[10] = P.m24; - base.P[11] = P.m34; - - base.iP[0] = iP.m11; - base.iP[1] = iP.m21; - base.iP[2] = iP.m31; - base.iP[3] = iP.m12; - base.iP[4] = iP.m22; - base.iP[5] = iP.m32; - base.iP[6] = iP.m13; - base.iP[7] = iP.m23; - base.iP[8] = iP.m33; - - base.R[0] = mp.RArr[c].m11; - base.R[1] = mp.RArr[c].m21; - base.R[2] = mp.RArr[c].m31; - base.R[3] = mp.RArr[c].m12; - base.R[4] = mp.RArr[c].m22; - base.R[5] = mp.RArr[c].m32; - base.R[6] = mp.RArr[c].m13; - base.R[7] = mp.RArr[c].m23; - base.R[8] = mp.RArr[c].m33; - - base.iR[0] = mp.iRArr[c].m11; - base.iR[1] = mp.iRArr[c].m21; - base.iR[2] = mp.iRArr[c].m31; - base.iR[3] = mp.iRArr[c].m12; - base.iR[4] = mp.iRArr[c].m22; - base.iR[5] = mp.iRArr[c].m32; - base.iR[6] = mp.iRArr[c].m13; - base.iR[7] = mp.iRArr[c].m23; - base.iR[8] = mp.iRArr[c].m33; - - base.K[0] = K.m11; - base.K[1] = K.m21; - base.K[2] = K.m31; - base.K[3] = K.m12; - base.K[4] = K.m22; - base.K[5] = K.m32; - base.K[6] = K.m13; - base.K[7] = K.m23; - base.K[8] = K.m33; - - base.iK[0] = iK.m11; - base.iK[1] = iK.m21; - base.iK[2] = iK.m31; - base.iK[3] = iK.m12; - base.iK[4] = iK.m22; - base.iK[5] = iK.m32; - base.iK[6] = iK.m13; - base.iK[7] = iK.m23; - base.iK[8] = iK.m33; - - ps_initCameraMatrix( base ); -} - - -void copy(CudaHostMemoryHeap& outHmh, const StaticVector& inDepthSimMap, int yFrom) -{ - const int w = outHmh.getSize()[0]; - const int h = outHmh.getSize()[1]; - for (int y = 0; y < h; ++y) - { - for (int x = 0; x < w; ++x) - { - int jO = (y + yFrom) * w + x; - float2& h_data = outHmh(x, y); - const DepthSim& data = inDepthSimMap[jO]; - h_data.x = data.depth; - h_data.y = data.sim; - } - } -} - -void copy(StaticVector& outDepthSimMap, const CudaHostMemoryHeap& inHmh, int yFrom) -{ - const int w = inHmh.getSize()[0]; - const int h = inHmh.getSize()[1]; - for (int y = 0; y < h; ++y) - { - for (int x = 0; x < w; ++x) - { - int jO = (y + yFrom) * w + x; - DepthSim& oDepthSim = outDepthSimMap[jO]; - const float2& h_depthSim = inHmh(x, y); - - oDepthSim.depth = h_depthSim.x; - oDepthSim.sim = h_depthSim.y; - } - } -} - -int listCUDADevices(bool verbose) -{ - return ps_listCUDADevices(verbose); -} - -/********************************************************************************* - * CamSelection - *********************************************************************************/ - -bool operator==(const CamSelection& l, const CamSelection& r) -{ - return (l.first == r.first && l.second == r.second); -} - -bool operator<(const CamSelection& l, const CamSelection& r) -{ - return (l.first < r.first || (l.first == r.first && l.second < r.second)); -} - -/********************************************************************************* - * PlaneSweepingCuda - *********************************************************************************/ - -PlaneSweepingCuda::PlaneSweepingCuda(int CUDADeviceNo, - mvsUtils::ImagesCache>& ic, - mvsUtils::MultiViewParams& mp, - int scales ) - : _scales( scales ) - , _CUDADeviceNo( CUDADeviceNo ) - , _ic( ic ) - , _mp(mp) - , _cameraParamCache( MAX_CONSTANT_CAMERA_PARAM_SETS ) -{ - /* The caller knows all camera that will become rc cameras, but it does not - * pass that information to this function. - * It knows the nearest cameras for each of those rc cameras, but it doesn't - * pass that information, either. - * So, the only task of this function is to allocate an amount of memory that - * will hold CUDA memory for camera structs and bitmaps. - */ - - ps_testCUDAdeviceNo( _CUDADeviceNo ); - - _nImgsInGPUAtTime = imagesInGPUAtTime( mp, scales ); - - // allocate global on the device - _hidden.reset(new FrameCacheMemory( _nImgsInGPUAtTime, - mp.getMaxImageWidth(), - mp.getMaxImageHeight(), - scales, - _CUDADeviceNo)); - - - ALICEVISION_LOG_INFO("PlaneSweepingCuda:" << std::endl - << "\t- _nImgsInGPUAtTime: " << _nImgsInGPUAtTime << std::endl - << "\t- scales: " << _scales); - - cudaError_t err; - - err = cudaMallocHost(&_camsBasesHst, MAX_CONSTANT_CAMERA_PARAM_SETS * sizeof(CameraStructBase)); - THROW_ON_CUDA_ERROR( err, "Could not allocate set of camera structs in pinned host memory in " << __FILE__ << ":" << __LINE__ << ", " << cudaGetErrorString(err) ); - - _cams .resize(_nImgsInGPUAtTime); - _camsHost.resize(_nImgsInGPUAtTime); - - for( int rc = 0; rc < _nImgsInGPUAtTime; ++rc ) - { - _cams[rc].camId = -1; - _cams[rc].param_dev.i = rc; - _cams[rc].pyramid = _hidden->getPyramidPtr(rc); // &_hidden_pyramids[rc]; - - err = cudaStreamCreate( &_cams[rc].stream ); - if( err != cudaSuccess ) - { - ALICEVISION_LOG_WARNING("Failed to create a CUDA stream object for async sweeping"); - _cams[rc].stream = 0; - } - } -} - -PlaneSweepingCuda::~PlaneSweepingCuda() -{ - ///////////////////////////////////////////////////////////////////////////////////////////////////////////////// - // deallocate global on the device - - cudaFreeHost( _camsBasesHst ); - - for(int c = 0; c < _cams.size(); c++) - { - cudaStreamDestroy( _cams[c].stream ); - } -} - -/* static private function called by the constructor */ -int PlaneSweepingCuda::imagesInGPUAtTime( mvsUtils::MultiViewParams& mp, int scales ) -{ - int value; - - const int maxImageWidth = mp.getMaxImageWidth(); - const int maxImageHeight = mp.getMaxImageHeight(); - - float oneimagemb = 4.0f * sizeof(float) * (((float)(maxImageWidth * maxImageHeight) / 1024.0f) / 1024.0f); - for(int scale = 2; scale <= scales; ++scale) - { - oneimagemb += 4.0 * sizeof(float) * (((float)((maxImageWidth / scale) * (maxImageHeight / scale)) / 1024.0) / 1024.0); - } - float maxmbGPU = 400.0f; // TODO FACA - - value = (int)(maxmbGPU / oneimagemb); - value = std::max(2, std::min(mp.ncams, value)); - - if( value > MAX_CONSTANT_CAMERA_PARAM_SETS ) - { - ALICEVISION_LOG_WARNING( "DepthMap has been compiled with a hard limit of " - << MAX_CONSTANT_CAMERA_PARAM_SETS - << " concurrent images. "<< std::endl - << "Recompilation required for larger values." << std::endl - << "Change define MAX_CONSTANT_CAMERA_PARAM_SETS " - << " but consider hardware limits for CUDA constant memory." ); - value = MAX_CONSTANT_CAMERA_PARAM_SETS; - } - - return value; -} - -CamCacheIdx PlaneSweepingCuda::loadCameraParam( int global_cam_id, int scale, cudaStream_t stream ) -{ - CamSelection newP( global_cam_id, scale ); - CamCacheIdx newPIndex; - - bool newCamParam = _cameraParamCache.insert( newP, &newPIndex.i ); - if( newCamParam ) - { - cps_host_fillCamera(_camsBasesHst[newPIndex.i], global_cam_id, _mp, scale); - ps_loadCameraStructs( _camsBasesHst, newPIndex, stream ); - } - - return newPIndex; -} - -int PlaneSweepingCuda::addCam( int global_cam_id, int scale, cudaStream_t stream ) -{ - // first is oldest - int local_frame_id; - bool newInsertion = _camsHost.insert( global_cam_id, &local_frame_id ); - - CameraStruct& cam = _cams[local_frame_id]; - - if( newInsertion ) - { - cam.camId = local_frame_id; - - long t1 = clock(); - - /* Fill slot id in the GPU-sided frame cache from the global image cache */ - _hidden->fillFrame( local_frame_id, global_cam_id, _ic, _mp, stream ); - - mvsUtils::printfElapsedTime(t1, "Copy image (camera id="+std::to_string(global_cam_id)+") from CPU to GPU"); - } - - /* Fetch slot in constant memory that contains the camera parameters, - * and fill it needed. */ - cam.param_dev = loadCameraParam( global_cam_id, scale, stream ); - - _hidden->setLocalCamId( local_frame_id, cam.param_dev.i ); - - if( _cams[local_frame_id].camId != local_frame_id ) - { - std::cerr << "BUG in " << __FILE__ << ":" << __LINE__ << " ?" - << " The camId member should be initialized with the return value of addCam()." - << std::endl; - exit( -1 ); - } - - return local_frame_id; -} - -bool PlaneSweepingCuda::refineRcTcDepthMap(int rc, int tc, - StaticVector& inout_depthMap, - StaticVector& out_simMap, - const RefineParams& refineParams, - int xFrom, int wPart) -{ - const int rcWidth = _mp.getWidth(rc) / refineParams.scale; - const int rcHeight = _mp.getHeight(rc) / refineParams.scale; - - const int tcWidth = _mp.getWidth(tc) / refineParams.scale; - const int tcHeight = _mp.getHeight(tc) / refineParams.scale; - - const int rcFrameCacheId = addCam(rc, refineParams.scale); - const int tcFrameCacheId = addCam(tc, refineParams.scale); - - const CameraStruct& rcam = _cams[rcFrameCacheId]; - const CameraStruct& tcam = _cams[tcFrameCacheId]; - - ps_refineRcDepthMap(rcam, tcam, - inout_depthMap.getDataWritable().data(), - out_simMap.getDataWritable().data(), - rcWidth, rcHeight, - tcWidth, tcHeight, - refineParams, - xFrom, wPart, _CUDADeviceNo); - return true; -} - -/* Be very careful with volume indexes: - * volume is indexed with the same index as tc. The values of tc can be quite different. - * depths is indexed with the index_set elements - */ -void PlaneSweepingCuda::computeDepthSimMapVolume(int rc, - CudaDeviceMemoryPitched& volBestSim_dmp, - CudaDeviceMemoryPitched& volSecBestSim_dmp, - const CudaSize<3>& volDim, - const std::vector& tCams, - const std::vector& rcDepthsTcamsLimits, - const std::vector& rcDepths, - const SgmParams& sgmParams) -{ - const system::Timer timer; - - ALICEVISION_LOG_INFO("SGM Compute similarity volume (x: " << volDim.x() << ", y: " << volDim.y() << ", z: " << volDim.z() << ")"); - - std::vector tcs; - tcs.reserve(rcDepthsTcamsLimits.size()); - - for(std::size_t i = 0; i < rcDepthsTcamsLimits.size(); ++i) - { - tcs.emplace_back(tCams[i], rcDepthsTcamsLimits[i].x, rcDepthsTcamsLimits[i].y); - } - - nvtxPush("preload host cache "); - _ic.getImg_sync(rc); - for( const auto& tc : tcs) _ic.getImg_sync( tc.getTCIndex() ); - nvtxPop("preload host cache "); - - ps::SimilarityVolume vol(volDim, sgmParams.stepXY, sgmParams.scale, rcDepths); - - vol.initOutputVolumes(volBestSim_dmp, volSecBestSim_dmp, 0); - vol.WaitSweepStream(0); - - ALICEVISION_LOG_DEBUG("Initialize output volumes: " << std::endl - << "\t- volBestSim_dmp : " << volBestSim_dmp.getUnitsInDim(0) << ", " << volBestSim_dmp.getUnitsInDim(1) << ", " << volBestSim_dmp.getUnitsInDim(2) << std::endl - << "\t- volSecBestSim_dmp : " << volSecBestSim_dmp.getUnitsInDim(0) << ", " << volSecBestSim_dmp.getUnitsInDim(1) << ", " << volSecBestSim_dmp.getUnitsInDim(2) << std::endl - << "\t- scale: " << vol.scale() << std::endl - << "\t- volStepXY: " << vol.stepXY() << std::endl); - - for(int tci = 0; tci < tcs.size(); ++tci) - { - vol.WaitSweepStream(tci); - cudaStream_t stream = vol.SweepStream(tci); - - const system::Timer timerPerTc; - - const int tc = tcs[tci].getTCIndex(); - - const int rcWidth = _mp.getWidth(rc); - const int rcHeight = _mp.getHeight(rc); - - const int tcWidth = _mp.getWidth(tc); - const int tcHeight = _mp.getHeight(tc); - - const int rcFrameCacheId = addCam(rc, vol.scale(), stream); - const int tcFrameCacheId = addCam(tc, vol.scale(), stream); - - const CameraStruct& rcam = _cams[rcFrameCacheId]; - const CameraStruct& tcam = _cams[tcFrameCacheId]; - - const auto deviceMemoryInfo = getDeviceMemoryInfo(); - - ALICEVISION_LOG_DEBUG("Compute similarity volume:" << std::endl - << "\t- rc: " << rc << std::endl - << "\t- tc: " << tc << " (" << tci << "/" << tcs.size() << ")" << std::endl - << "\t- rc frame cache id: " << rcFrameCacheId << std::endl - << "\t- tc frame cache id: " << tcFrameCacheId << std::endl - << "\t- tc depth to start: " << tcs[tci].getDepthToStart() << std::endl - << "\t- tc depths to search: " << tcs[tci].getDepthsToSearch() << std::endl - << "\t- device similarity volume size: " << volBestSim_dmp.getBytesPadded() / (1024.0 * 1024.0) << " MB" << std::endl - << "\t- device unpadded similarity volume size: " << volBestSim_dmp.getBytesUnpadded() / (1024.0 * 1024.0) << " MB" << std::endl - << "\t- device memory available: " << deviceMemoryInfo.x << "MB, total: " << deviceMemoryInfo.y << " MB" << std::endl); - - // last synchronous step - // cudaDeviceSynchronize(); - vol.compute( - volBestSim_dmp, - volSecBestSim_dmp, - rcam, rcWidth, rcHeight, - tcam, tcWidth, tcHeight, - tcs[tci], - sgmParams, - tci); - - ALICEVISION_LOG_DEBUG("Compute similarity volume (with tc: " << tc << ") done in: " << timerPerTc.elapsedMs() << " ms."); - } - ALICEVISION_LOG_INFO("SGM Compute similarity volume done in: " << timer.elapsedMs() << " ms."); -} - - -/** - * @param[inout] volume input similarity volume - */ -bool PlaneSweepingCuda::sgmOptimizeSimVolume(int rc, - CudaDeviceMemoryPitched& volSimFiltered_dmp, - const CudaDeviceMemoryPitched& volSim_dmp, - const CudaSize<3>& volDim, - const SgmParams& sgmParams) -{ - const system::Timer timer; - - ALICEVISION_LOG_INFO("SGM Optimizing volume:" << std::endl - << "\t- filtering axes: " << sgmParams.filteringAxes << std::endl - << "\t- volume dimensions: (x: " << volDim.x() << ", y: " << volDim.y() << ", z: " << volDim.z() << ")" << std::endl - << "\t- device similarity volume size: " << (double(volSim_dmp.getBytesPadded()) / (1024.0 * 1024.0)) << " MB" << std::endl); - - const int rcFrameCacheId = addCam(rc, sgmParams.scale); - - // update aggregation volume - int npaths = 0; - const Pyramid& rcPyramid = *(_cams[rcFrameCacheId].pyramid); - const size_t rcPyramidScaleIndex = size_t(sgmParams.scale) - 1; - cudaTextureObject_t rc_tex = rcPyramid[rcPyramidScaleIndex].tex; - - const auto updateAggrVolume = [&](const CudaSize<3>& axisT, bool invX) - { - ALICEVISION_LOG_DEBUG("Update aggregate volume (npaths: " << npaths << ", invX: " << invX << ")"); - - ps_aggregatePathVolume(volSimFiltered_dmp, - volSim_dmp, - volDim, - axisT, rc_tex, - sgmParams, - invX, npaths); - npaths++; - - ALICEVISION_LOG_DEBUG("Update aggregate volume done."); - }; - - // filtering is done on the last axis - const std::map> mapAxes = { - {'X', {1, 0, 2}}, // XYZ -> YXZ - {'Y', {0, 1, 2}}, // XYZ - }; - - for(char axis : sgmParams.filteringAxes) - { - const CudaSize<3>& axisT = mapAxes.at(axis); - updateAggrVolume(axisT, false); // without transpose - updateAggrVolume(axisT, true); // with transpose of the last axis - } - - ALICEVISION_LOG_INFO("SGM Optimizing volume done in: " << timer.elapsedMs() << " ms."); - return true; -} - -void PlaneSweepingCuda::sgmRetrieveBestDepth(int rc, - DepthSimMap& bestDepth, - const CudaDeviceMemoryPitched& volSim_dmp, - const CudaSize<3>& volDim, - const StaticVector& rcDepths, - const SgmParams& sgmParams) -{ - const system::Timer timer; - - ALICEVISION_LOG_INFO("SGM Retrieve best depth in volume (x: " << volDim.x() << ", y: " << volDim.y() << ", z: " << volDim.z() << ")"); - - const int rcFrameCacheId = addCam(rc, 1); - const int rcamCacheId = _hidden->getLocalCamId(rcFrameCacheId); - const CudaSize<2> depthSimDim(volDim.x(), volDim.y()); - - CudaDeviceMemory depths_d(rcDepths.getData().data(), rcDepths.size()); - CudaDeviceMemoryPitched bestDepth_dmp(depthSimDim); - CudaDeviceMemoryPitched bestSim_dmp(depthSimDim); - - const int scaleStep = sgmParams.scale * sgmParams.stepXY; - - ps_SGMretrieveBestDepth( - rcamCacheId, - bestDepth_dmp, - bestSim_dmp, - volSim_dmp, - volDim, - depths_d, - scaleStep, - sgmParams.interpolateRetrieveBestDepth); - - /* - { - CudaTexture bestDepth_tex(bestDepth_dmp); - ps_medianFilter3(bestDepth_tex.textureObj, bestDepth_dmp); - } - */ - - CudaHostMemoryHeap bestDepth_hmh(depthSimDim); - bestDepth_hmh.copyFrom(bestDepth_dmp); - bestDepth_dmp.deallocate(); - - CudaHostMemoryHeap bestSim_hmh(depthSimDim); - bestSim_hmh.copyFrom(bestSim_dmp); - bestSim_dmp.deallocate(); - - for(int y = 0; y < depthSimDim.y(); ++y) - { - for(int x = 0; x < depthSimDim.x(); ++x) - { - DepthSim& out = bestDepth._dsm[y * depthSimDim.x() + x]; - out.depth = bestDepth_hmh(x, y); - out.sim = bestSim_hmh(x, y); - } - } - - ALICEVISION_LOG_INFO("SGM Retrieve best depth in volume done in: " << timer.elapsedMs() << " ms."); -} - -// make_float3(avail,total,used) -Point3d PlaneSweepingCuda::getDeviceMemoryInfo() -{ - size_t iavail; - size_t itotal; - - cudaMemGetInfo(&iavail, &itotal); - - const double avail = double(iavail) / (1024.0 * 1024.0); - const double total = double(itotal) / (1024.0 * 1024.0); - const double used = double(itotal - iavail) / (1024.0 * 1024.0); - - return Point3d(avail, total, used); -} - -bool PlaneSweepingCuda::fuseDepthSimMapsGaussianKernelVoting(int wPart, int hPart, - StaticVector& out_depthSimMap, - const StaticVector*>& dataMaps, - const RefineParams& refineParams) -{ - const system::Timer timer; - const CudaSize<2> depthSimMapPartDim(wPart, hPart); - - std::vector*> dataMaps_hmh(dataMaps.size()); - for(int i = 0; i < dataMaps.size(); i++) - { - dataMaps_hmh[i] = new CudaHostMemoryHeap(depthSimMapPartDim); - for(int y = 0; y < hPart; ++y) - { - for(int x = 0; x < wPart; ++x) - { - float2& data_hmh = (*dataMaps_hmh[i])(x, y); - const DepthSim& data = (*dataMaps[i])[y * wPart + x]; - data_hmh.x = data.depth; - data_hmh.y = data.sim; - } - } - } - - CudaHostMemoryHeap depthSimMap_hmh(depthSimMapPartDim); - - ps_fuseDepthSimMapsGaussianKernelVoting(wPart, hPart, - &depthSimMap_hmh, - dataMaps_hmh, dataMaps.size(), - refineParams); - for(int y = 0; y < hPart; ++y) - { - for(int x = 0; x < wPart; ++x) - { - const float2& depthSim_hmh = depthSimMap_hmh(x, y); - DepthSim& out_depthSim = out_depthSimMap[y * wPart + x]; - out_depthSim.depth = depthSim_hmh.x; - out_depthSim.sim = depthSim_hmh.y; - } - } - - for(int i = 0; i < dataMaps.size(); ++i) - { - delete dataMaps_hmh[i]; - } - - ALICEVISION_LOG_DEBUG("Fuse depth/sim maps gaussian kernel voting done in: " << timer.elapsedMs() << " ms."); - - return true; -} - -bool PlaneSweepingCuda::optimizeDepthSimMapGradientDescent(int rc, - StaticVector& out_depthSimMapOptimized, - const StaticVector& depthSimMapSgmUpscale, - const StaticVector& depthSimMapRefinedFused, - const RefineParams& refineParams, - int yFrom, int hPart) -{ - const system::Timer timer; - - const CudaSize<2> depthSimMapPartDim(size_t(_mp.getWidth(rc) / refineParams.scale), size_t(hPart)); - - const int rcFrameCacheId = addCam(rc, refineParams.scale); - const CameraStruct& rcam = _cams[rcFrameCacheId]; - - CudaHostMemoryHeap sgmDepthPixSizeMap_hmh(depthSimMapPartDim); - CudaHostMemoryHeap refinedDepthSimMap_hmh(depthSimMapPartDim); - - copy(sgmDepthPixSizeMap_hmh, depthSimMapSgmUpscale, yFrom); - copy(refinedDepthSimMap_hmh, depthSimMapRefinedFused, yFrom); - - CudaHostMemoryHeap optimizedDepthSimMap_hmh(depthSimMapPartDim); - - ps_optimizeDepthSimMapGradientDescent(rcam, - optimizedDepthSimMap_hmh, - sgmDepthPixSizeMap_hmh, - refinedDepthSimMap_hmh, - depthSimMapPartDim, - refineParams, - _CUDADeviceNo, _nImgsInGPUAtTime, yFrom); - - copy(out_depthSimMapOptimized, optimizedDepthSimMap_hmh, yFrom); - - ALICEVISION_LOG_DEBUG("Optimize depth/sim map gradient descent done in: " << timer.elapsedMs() << " ms."); - - return true; -} - -NormalMapping* PlaneSweepingCuda::createNormalMapping() -{ - return new NormalMapping; -} - -void PlaneSweepingCuda::deleteNormalMapping( NormalMapping* m ) -{ - delete m; -} - -bool PlaneSweepingCuda::computeNormalMap( - NormalMapping* mapping, - const image::Image& depthMap, - image::Image& normalMap, - int rc, int scale, - float igammaC, float igammaP, int wsh) -{ - const int w = _mp.getWidth(rc) / scale; - const int h = _mp.getHeight(rc) / scale; - - const long t1 = clock(); - - ALICEVISION_LOG_DEBUG("computeNormalMap rc: " << rc); - - // Fill Camera Struct - - cps_host_fillCamera( *mapping->camsBasesHst, rc, _mp, scale ); - mapping->loadCameraParameters(); - mapping->allocHostMaps( w, h ); - mapping->copyDepthMap(depthMap.data(), depthMap.size()); - - ps_computeNormalMap( mapping, - w, h, scale - 1, - _nImgsInGPUAtTime, - _scales, wsh, _mp.verbose, igammaC, igammaP); - - float3* normalMapPtr = mapping->getNormalMapHst(); - - constexpr bool q = ( sizeof(image::RGBfColor[2]) == sizeof(float3[2]) ); - if( q == true ) - { - memcpy( normalMap.data(), mapping->getNormalMapHst(), w*h*sizeof(float3) ); - } - else - { - for (int i = 0; i < w * h; i++) - { - normalMap(i).r() = normalMapPtr[i].x; - normalMap(i).g() = normalMapPtr[i].y; - normalMap(i).b() = normalMapPtr[i].z; - } - } - - if (_mp.verbose) - mvsUtils::printfElapsedTime(t1); - - return true; -} - -bool PlaneSweepingCuda::getSilhoueteMap(StaticVectorBool* oMap, int scale, int step, const rgb maskColor, int rc) -{ - ALICEVISION_LOG_DEBUG("getSilhoueteeMap: rc: " << rc); - - int w = _mp.getWidth(rc) / scale; - int h = _mp.getHeight(rc) / scale; - - long t1 = clock(); - - int camId = addCam(rc, scale ); - CameraStruct& cam = _cams[camId]; - - uchar4 maskColorRgb; - maskColorRgb.x = maskColor.r; - maskColorRgb.y = maskColor.g; - maskColorRgb.z = maskColor.b; - maskColorRgb.w = 1.0f; - - CudaHostMemoryHeap omap_hmh(CudaSize<2>(w / step, h / step)); - - ps_getSilhoueteMap( &omap_hmh, w, h, scale - 1, - step, - cam, - maskColorRgb, _mp.verbose ); - - for(int i = 0; i < (w / step) * (h / step); i++) - { - (*oMap)[i] = omap_hmh.getBuffer()[i]; - } - - mvsUtils::printfElapsedTime(t1); - - return true; -} - -} // namespace depthMap -} // namespace aliceVision diff --git a/src/aliceVision/depthMap/cuda/PlaneSweepingCuda.hpp b/src/aliceVision/depthMap/cuda/PlaneSweepingCuda.hpp deleted file mode 100644 index a1e8d2684a..0000000000 --- a/src/aliceVision/depthMap/cuda/PlaneSweepingCuda.hpp +++ /dev/null @@ -1,169 +0,0 @@ -// This file is part of the AliceVision project. -// Copyright (c) 2017 AliceVision contributors. -// This Source Code Form is subject to the terms of the Mozilla Public License, -// v. 2.0. If a copy of the MPL was not distributed with this file, -// You can obtain one at https://mozilla.org/MPL/2.0/. - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace aliceVision { -namespace depthMap { - -#ifdef TSIM_USE_FLOAT - using TSim = float; -#else - using TSim = unsigned char; -#endif - -/********************************************************************************* - * CamSelection - * Support class for operating an LRU cache of the currently selection cameras - *********************************************************************************/ - -struct CamSelection : public std::pair -{ - CamSelection( ) - : std::pair( 0, 0 ) - { } - - CamSelection( int i ) - : std::pair( i, i ) - { } - - CamSelection( int i, int j ) - : std::pair( i, j ) - { } - - CamSelection& operator=( int i ) - { - this->first = this->second = i; - return *this; - } -}; - -bool operator==( const CamSelection& l, const CamSelection& r ); -bool operator<( const CamSelection& l, const CamSelection& r ); - -/********************************************************************************* - * PlaneSweepingCuda - * Class for performing plane sweeping for some images on a selected GPU. - * There may be several instances of these class that are operating on the same - * GPU. It must therefore switch GPUs by ID. - *********************************************************************************/ -class PlaneSweepingCuda -{ -private: - std::unique_ptr _hidden; - -public: - - CameraStructBase* _camsBasesHst; - std::vector _cams; - LRUCache _camsHost; - LRUCache _cameraParamCache; - mvsUtils::MultiViewParams& _mp; - const int _scales; - const int _CUDADeviceNo = 0; - int _nImgsInGPUAtTime = 2; - mvsUtils::ImagesCache>& _ic; - - inline int maxImagesInGPU() const { return _nImgsInGPUAtTime; } - - PlaneSweepingCuda(int CUDADeviceNo, mvsUtils::ImagesCache>& _ic, - mvsUtils::MultiViewParams& _mp, int scales); - ~PlaneSweepingCuda(); - - int addCam( int rc, int scale, cudaStream_t stream = 0 ); - - void computeDepthSimMapVolume(int rc, - CudaDeviceMemoryPitched& volBestSim_dmp, - CudaDeviceMemoryPitched& volSecBestSim_dmp, - const CudaSize<3>& volDim, - const std::vector& tCams, - const std::vector& rcDepthsTcamsLimits, - const std::vector& rcDepths, - const SgmParams& sgmParams); - - bool sgmOptimizeSimVolume(int rc, - CudaDeviceMemoryPitched& volSimFiltered_dmp, - const CudaDeviceMemoryPitched& volSim_dmp, - const CudaSize<3>& volDim, - const SgmParams& sgmParams); - - void sgmRetrieveBestDepth(int rc, - DepthSimMap& bestDepth, - const CudaDeviceMemoryPitched& volSim_dmp, - const CudaSize<3>& volDim, - const StaticVector& rcDepths, - const SgmParams& sgmParams); - - Point3d getDeviceMemoryInfo(); - - bool refineRcTcDepthMap(int rc, int tc, - StaticVector& inout_depthMap, - StaticVector& out_simMap, - const RefineParams& refineParams, - int xFrom, int wPart); - - bool fuseDepthSimMapsGaussianKernelVoting(int wPart, int hPart, - StaticVector& out_depthSimMap, - const StaticVector*>& dataMaps, - const RefineParams& refineParams); - - bool optimizeDepthSimMapGradientDescent(int rc, - StaticVector& out_depthSimMapOptimized, - const StaticVector& depthSimMapSgmUpscale, - const StaticVector& depthSimMapRefinedFused, - const RefineParams& refineParams, - int yFrom, int hPart); - - /* create object to store intermediate data for repeated use */ - NormalMapping* createNormalMapping(); - - /* delete object to store intermediate data for repeated use */ - void deleteNormalMapping( NormalMapping* m ); - - bool computeNormalMap( NormalMapping* mapping, - const image::Image& depthMap, - image::Image& normalMap, - int rc, int scale, - float igammaC, float igammaP, int wsh); - - bool getSilhoueteMap(StaticVectorBool* oMap, int scale, int step, const rgb maskColor, int rc); - -private: - /* Support function for addCam that loads cameraStructs into the GPU constant - * memory if necessary. - * Returns the index in the constant cache. */ - CamCacheIdx loadCameraParam( int global_cam_id, int scale, cudaStream_t stream ); - - /* Compute the number of images that can be stored in the current GPU. Called only by - * the constructor. */ - static int imagesInGPUAtTime( mvsUtils::MultiViewParams& mp, int scales ); - -}; - -int listCUDADevices(bool verbose); - - -} // namespace depthMap -} // namespace aliceVision diff --git a/src/aliceVision/depthMap/cuda/device/DeviceCameraParams.cu b/src/aliceVision/depthMap/cuda/device/DeviceCameraParams.cu new file mode 100644 index 0000000000..b9bb1a59f7 --- /dev/null +++ b/src/aliceVision/depthMap/cuda/device/DeviceCameraParams.cu @@ -0,0 +1,16 @@ +// This file is part of the AliceVision project. +// Copyright (c) 2022 AliceVision contributors. +// This Source Code Form is subject to the terms of the Mozilla Public License, +// v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + +#include "DeviceCameraParams.hpp" + +namespace aliceVision { +namespace depthMap { + +__constant__ DeviceCameraParams constantCameraParametersArray_d[ALICEVISION_DEVICE_MAX_CONSTANT_CAMERA_PARAM_SETS]; + +} // namespace depthMap +} // namespace aliceVision + diff --git a/src/aliceVision/depthMap/cuda/device/DeviceCameraParams.hpp b/src/aliceVision/depthMap/cuda/device/DeviceCameraParams.hpp new file mode 100644 index 0000000000..0bb766b3ee --- /dev/null +++ b/src/aliceVision/depthMap/cuda/device/DeviceCameraParams.hpp @@ -0,0 +1,37 @@ +// This file is part of the AliceVision project. +// Copyright (c) 2022 AliceVision contributors. +// This Source Code Form is subject to the terms of the Mozilla Public License, +// v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + +#pragma once + +namespace aliceVision { +namespace depthMap { + +/* + * @struct DeviceCameraParams + * @brief Support class to maintain usefull camera parameters in gpu memory. + */ +struct DeviceCameraParams +{ + float P[12]; + float iP[9]; + float R[9]; + float iR[9]; + float K[9]; + float iK[9]; + float3 C; + float3 XVect; + float3 YVect; + float3 ZVect; +}; + +// global / constant data structures + +#define ALICEVISION_DEVICE_MAX_CONSTANT_CAMERA_PARAM_SETS 100 // CUDA constant memory is limited to 65K + +extern __constant__ DeviceCameraParams constantCameraParametersArray_d[ALICEVISION_DEVICE_MAX_CONSTANT_CAMERA_PARAM_SETS]; + +} // namespace depthMap +} // namespace aliceVision diff --git a/src/aliceVision/depthMap/cuda/device/Patch.cuh b/src/aliceVision/depthMap/cuda/device/Patch.cuh new file mode 100644 index 0000000000..65e1257dfa --- /dev/null +++ b/src/aliceVision/depthMap/cuda/device/Patch.cuh @@ -0,0 +1,479 @@ +// This file is part of the AliceVision project. +// Copyright (c) 2017 AliceVision contributors. +// This Source Code Form is subject to the terms of the Mozilla Public License, +// v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + +#pragma once + +#include +#include +#include +#include +#include + +#include + +namespace aliceVision { +namespace depthMap { + +struct Patch +{ + float3 p; //< 3d point + float3 n; //< normal + float3 x; //< x axis + float3 y; //< y axis + float d; //< pixel size +}; + +__device__ static void rotPointAroundVect(float3& out, float3& X, float3& vect, int angle) +{ + double ux, uy, uz, vx, vy, vz, wx, wy, wz, sa, ca, x, y, z, u, v, w; + + double sizeX = sqrt(dot(X, X)); + x = X.x / sizeX; + y = X.y / sizeX; + z = X.z / sizeX; + u = vect.x; + v = vect.y; + w = vect.z; + + /*Rotate the point (x,y,z) around the vector (u,v,w)*/ + ux = u * x; + uy = u * y; + uz = u * z; + vx = v * x; + vy = v * y; + vz = v * z; + wx = w * x; + wy = w * y; + wz = w * z; + sa = sin((double)angle * (M_PI / 180.0f)); + ca = cos((double)angle * (M_PI / 180.0f)); + x = u * (ux + vy + wz) + (x * (v * v + w * w) - u * (vy + wz)) * ca + (-wy + vz) * sa; + y = v * (ux + vy + wz) + (y * (u * u + w * w) - v * (ux + wz)) * ca + (wx - uz) * sa; + z = w * (ux + vy + wz) + (z * (u * u + v * v) - w * (ux + vy)) * ca + (-vx + uy) * sa; + + u = sqrt(x * x + y * y + z * z); + x /= u; + y /= u; + z /= u; + + out.x = x * sizeX; + out.y = y * sizeX; + out.z = z * sizeX; +} + +__device__ static void rotatePatch(Patch& ptch, int rx, int ry) +{ + float3 n, y, x; + + // rotate patch around x axis by angle rx + rotPointAroundVect(n, ptch.n, ptch.x, rx); + rotPointAroundVect(y, ptch.y, ptch.x, rx); + ptch.n = n; + ptch.y = y; + + // rotate new patch around y axis by angle ry + rotPointAroundVect(n, ptch.n, ptch.y, ry); + rotPointAroundVect(x, ptch.x, ptch.y, ry); + ptch.n = n; + ptch.x = x; +} + +__device__ static void movePatch(Patch& ptch, int pt) +{ + // float3 v = ptch.p-rC; + // normalize(v); + float3 v = ptch.n; + + float d = ptch.d * (float)pt; + float3 p = ptch.p + v * d; + ptch.p = p; +} + +__device__ static void computeRotCS(float3& xax, float3& yax, float3& n) +{ + xax.x = -n.y + n.z; // get any cross product + xax.y = +n.x + n.z; + xax.z = -n.x - n.y; + if(fabs(xax.x) < 0.0000001f && fabs(xax.y) < 0.0000001f && fabs(xax.z) < 0.0000001f) + { + xax.x = -n.y - n.z; // get any cross product (complementar) + xax.y = +n.x - n.z; + xax.z = +n.x + n.y; + }; + normalize(xax); + yax = cross(n, xax); +} + +__device__ static void computeRotCSEpip(int rcDeviceCamId, int tcDeviceCamId, Patch& ptch) +{ + // Vector from the reference camera to the 3d point + float3 v1 = constantCameraParametersArray_d[rcDeviceCamId].C - ptch.p; + // Vector from the target camera to the 3d point + float3 v2 = constantCameraParametersArray_d[tcDeviceCamId].C - ptch.p; + normalize(v1); + normalize(v2); + + // y has to be ortogonal to the epipolar plane + // n has to be on the epipolar plane + // x has to be on the epipolar plane + + ptch.y = cross(v1, v2); + normalize(ptch.y); + + ptch.n = (v1 + v2) / 2.0f; // IMPORTANT !!! + normalize(ptch.n); + // ptch.n = sg_s_r.ZVect; //IMPORTANT !!! + + ptch.x = cross(ptch.y, ptch.n); + normalize(ptch.x); +} + +__device__ static inline int angleBetwUnitV1andUnitV2(float3& V1, float3& V2) +{ + return (int)fabs(acos(V1.x * V2.x + V1.y * V2.y + V1.z * V2.z) / (CUDART_PI_F / 180.0f)); +} + +/* +__device__ static float getRefCamPixSize(Patch &ptch) +{ + float2 rp = project3DPoint(sg_s_r.P,ptch.p); + + float minstep=10000000.0f; + for (int i=0;i<4;i++) { + float2 pix = rp; + if (i==0) {pix.x += 1.0f;}; + if (i==1) {pix.x -= 1.0f;}; + if (i==2) {pix.y += 1.0f;}; + if (i==3) {pix.y -= 1.0f;}; + float3 vect = M3x3mulV2(sg_s_r.iP,pix); + float3 lpi = linePlaneIntersect(sg_s_r.C, vect, ptch.p, ptch.n); + float step = dist(lpi,ptch.p); + minstep = fminf(minstep,step); + }; + + return minstep; +} + +__device__ static float getTarCamPixSize(Patch &ptch) +{ + float2 tp = project3DPoint(sg_s_t.P,ptch.p); + + float minstep=10000000.0f; + for (int i=0;i<4;i++) { + float2 pix = tp; + if (i==0) {pix.x += 1.0f;}; + if (i==1) {pix.x -= 1.0f;}; + if (i==2) {pix.y += 1.0f;}; + if (i==3) {pix.y -= 1.0f;}; + float3 vect = M3x3mulV2(sg_s_t.iP,pix); + float3 lpi = linePlaneIntersect(sg_s_t.C, vect, ptch.p, ptch.n); + float step = dist(lpi,ptch.p); + minstep = fminf(minstep,step); + }; + + return minstep; +} + +__device__ static float getPatchPixSize(Patch &ptch) +{ + return fmaxf(getTarCamPixSize(ptch),getRefCamPixSize(ptch)); +} +*/ + +__device__ static void computeHomography(int rcDeviceCamId, int tcDeviceCamId, float* _H, const float3& _p, + const float3& _n) +{ + const DeviceCameraParams& rcDeviceCamParams = constantCameraParametersArray_d[rcDeviceCamId]; + const DeviceCameraParams& tcDeviceCamParams = constantCameraParametersArray_d[tcDeviceCamId]; + + // hartley zisserman second edition p.327 (13.2) + float3 _tl = make_float3(0.0, 0.0, 0.0) - M3x3mulV3(rcDeviceCamParams.R, rcDeviceCamParams.C); + float3 _tr = make_float3(0.0, 0.0, 0.0) - M3x3mulV3(tcDeviceCamParams.R, tcDeviceCamParams.C); + + float3 p = M3x3mulV3(rcDeviceCamParams.R, (_p - rcDeviceCamParams.C)); + float3 n = M3x3mulV3(rcDeviceCamParams.R, _n); + normalize(n); + float d = -dot(n, p); + + float RrT[9]; + M3x3transpose(RrT, rcDeviceCamParams.R); + + float tmpRr[9]; + M3x3mulM3x3(tmpRr, tcDeviceCamParams.R, RrT); + float3 tr = _tr - M3x3mulV3(tmpRr, _tl); + + float tmp[9]; + float tmp1[9]; + outerMultiply(tmp, tr, n / d); + M3x3minusM3x3(tmp, tmpRr, tmp); + M3x3mulM3x3(tmp1, tcDeviceCamParams.K, tmp); + M3x3mulM3x3(tmp, tmp1, rcDeviceCamParams.iK); + + for(int i = 0; i < 9; i++) + { + _H[i] = tmp[i]; + } +} + +/* +__device__ static float compNCCbyH(const DeviceCameraParams& rc_cam, const DeviceCameraParams& tc_cam, const Patch& ptch, int +wsh) +{ + float2 rpix = project3DPoint(sg_s_r.P, ptch.p); + float2 tpix = project3DPoint(sg_s_t.P, ptch.p); + + float H[9]; + computeHomography(rc_cam, tc_cam, H, ptch.p, ptch.n); + + simStat sst = simStat(); + for(int xp = -wsh; xp <= wsh; xp++) + { + for(int yp = -wsh; yp <= wsh; yp++) + { + float2 rp; + float2 tp; + rp.x = rpix.x + (float)xp; + rp.y = rpix.y + (float)yp; + tp = V2M3x3mulV2(H, rp); + + float2 g; + g.x = 255.0f * tex2D(rtex, rp.x + 0.5f, rp.y + 0.5f); + g.y = 255.0f * tex2D(ttex, tp.x + 0.5f, tp.y + 0.5f); + sst.update(g); + } + } + sst.computeSim(); + + return sst.sim; +} +*/ + +/** + * @brief Compute Normalized Cross-Correlation + * + * @param[inout] ptch + * @param[in] wsh half-width of the similarity homography matrix (width = wsh*2+1) + * @param[in] width image width + * @param[in] height image height + * @param[in] _gammaC + * @param[in] _gammaP + * + * @return similarity value + * or invalid similarity (CUDART_INF_F) if uninitialized or masked + */ +__device__ static float compNCCby3DptsYK(cudaTextureObject_t rcTex, + cudaTextureObject_t tcTex, + int rcDeviceCamId, + int tcDeviceCamId, + const Patch& ptch, + int rcWidth, int rcHeight, + int tcWidth, int tcHeight, + int wsh, + float _gammaC, + float _gammaP) +{ + const DeviceCameraParams& rcDeviceCamParams = constantCameraParametersArray_d[rcDeviceCamId]; + const DeviceCameraParams& tcDeviceCamParams = constantCameraParametersArray_d[tcDeviceCamId]; + + float3 p = ptch.p; + const float2 rp = project3DPoint(rcDeviceCamParams.P, p); + const float2 tp = project3DPoint(tcDeviceCamParams.P, p); + + const float dd = wsh + 2.0f; // TODO FACA + if((rp.x < dd) || (rp.x > float(rcWidth - 1) - dd) || (rp.y < dd) || (rp.y > float(rcHeight - 1) - dd) || + (tp.x < dd) || (tp.x > float(tcWidth - 1) - dd) || (tp.y < dd) || (tp.y > float(tcHeight - 1) - dd)) + { + return CUDART_INF_F; // uninitialized + } + + // see CUDA_C_Programming_Guide.pdf ... E.2 pp132-133 ... adding 0.5 caises that tex2D return for point i,j exactly + // value od I(i,j) ... it is what we want + const float4 gcr = tex2D_float4(rcTex, rp.x + 0.5f, rp.y + 0.5f); + const float4 gct = tex2D_float4(tcTex, tp.x + 0.5f, tp.y + 0.5f); + + // check the alpha values of the patch pixel center of R and T cameras + // for the R camera, alpha should be at least 0.9f (computation area) + // for the T camera, alpha should be at least 0.4f (masking) + if(gcr.w < 0.9f || gct.w < 0.4f) + { + return CUDART_INF_F; // uninitialized + } + + const float gammaC = _gammaC; + const float gammaP = _gammaP; + // float gammaC = ((gcr.w>0)||(gct.w>0))?sigmoid(_gammaC,25.5f,20.0f,10.0f,fmaxf(gcr.w,gct.w)):_gammaC; + // float gammaP = ((gcr.w>0)||(gct.w>0))?sigmoid(1.5,(float)(wsh+3),30.0f,20.0f,fmaxf(gcr.w,gct.w)):_gammaP; + + simStat sst; + for(int yp = -wsh; yp <= wsh; yp++) + { + for(int xp = -wsh; xp <= wsh; xp++) + { + p = ptch.p + ptch.x * (float)(ptch.d * (float)xp) + ptch.y * (float)(ptch.d * (float)yp); + const float2 rp1 = project3DPoint(rcDeviceCamParams.P, p); + const float2 tp1 = project3DPoint(tcDeviceCamParams.P, p); + + // see CUDA_C_Programming_Guide.pdf ... E.2 pp132-133 ... adding 0.5 caises that tex2D return for point i,j + // exactly value od I(i,j) ... it is what we want + const float4 gcr1 = tex2D_float4(rcTex, rp1.x + 0.5f, rp1.y + 0.5f); + const float4 gct1 = tex2D_float4(tcTex, tp1.x + 0.5f, tp1.y + 0.5f); + + // TODO: Does it make a difference to accurately test it for each pixel of the patch? + // if (gcr1.w == 0.0f || gct1.w == 0.0f) + // continue; + + // Weighting is based on: + // * color difference to the center pixel of the patch: + // ** low value (close to 0) means that the color is different from the center pixel (ie. strongly + // supported surface) + // ** high value (close to 1) means that the color is close the center pixel (ie. uniform color) + // * distance in image to the center pixel of the patch: + // ** low value (close to 0) means that the pixel is close to the center of the patch + // ** high value (close to 1) means that the pixel is far from the center of the patch + const float w = + CostYKfromLab(xp, yp, gcr, gcr1, gammaC, gammaP) * CostYKfromLab(xp, yp, gct, gct1, gammaC, gammaP); + + assert(w >= 0.f); + assert(w <= 1.f); + + sst.update(gcr1.x, gct1.x, w); + } + } + return sst.computeWSim(); +} + +__device__ static void getPixelFor3DPoint(int deviceCamId, float2& out, float3& X) +{ + const DeviceCameraParams& deviceCamParams = constantCameraParametersArray_d[deviceCamId]; + + float3 p = M3x4mulV3(deviceCamParams.P, X); + + if(p.z <= 0.0f) + { + out = make_float2(-1.0f, -1.0f); + } + else + { + out = make_float2(p.x / p.z, p.y / p.z); + } +} + +__device__ static float3 get3DPointForPixelAndFrontoParellePlaneRC(int deviceCamId, const float2& pix, float fpPlaneDepth) +{ + const DeviceCameraParams& deviceCamParams = constantCameraParametersArray_d[deviceCamId]; + const float3 planep = deviceCamParams.C + deviceCamParams.ZVect * fpPlaneDepth; + float3 v = M3x3mulV2(deviceCamParams.iP, pix); + normalize(v); + return linePlaneIntersect(deviceCamParams.C, v, planep, deviceCamParams.ZVect); +} + +__device__ static float3 get3DPointForPixelAndFrontoParellePlaneRC(int deviceCamId, const int2& pixi, float fpPlaneDepth) +{ + float2 pix; + pix.x = (float)pixi.x; + pix.y = (float)pixi.y; + return get3DPointForPixelAndFrontoParellePlaneRC(deviceCamId, pix, fpPlaneDepth); +} + +__device__ static float3 get3DPointForPixelAndDepthFromRC(int deviceCamId, const float2& pix, float depth) +{ + const DeviceCameraParams& deviceCamParams = constantCameraParametersArray_d[deviceCamId]; + float3 rpv = M3x3mulV2(deviceCamParams.iP, pix); + normalize(rpv); + return deviceCamParams.C + rpv * depth; +} + +__device__ static float3 get3DPointForPixelAndDepthFromRC(int deviceCamId, const int2& pixi, float depth) +{ + float2 pix; + pix.x = float(pixi.x); + pix.y = float(pixi.y); + return get3DPointForPixelAndDepthFromRC(deviceCamId, pix, depth); +} + +__device__ static float3 triangulateMatchRef(int rcDeviceCamId, int tcDeviceCamId, float2& refpix, float2& tarpix) +{ + const DeviceCameraParams& rcDeviceCamParams = constantCameraParametersArray_d[rcDeviceCamId]; + const DeviceCameraParams& tcDeviceCamParams = constantCameraParametersArray_d[tcDeviceCamId]; + + float3 refvect = M3x3mulV2(rcDeviceCamParams.iP, refpix); + normalize(refvect); + float3 refpoint = refvect + rcDeviceCamParams.C; + + float3 tarvect = M3x3mulV2(tcDeviceCamParams.iP, tarpix); + normalize(tarvect); + float3 tarpoint = tarvect + tcDeviceCamParams.C; + + float k, l; + float3 lli1, lli2; + + lineLineIntersect(&k, &l, &lli1, &lli2, rcDeviceCamParams.C, refpoint, tcDeviceCamParams.C, tarpoint); + + return rcDeviceCamParams.C + refvect * k; +} + +__device__ static float computePixSize(int deviceCamId, const float3& p) +{ + const DeviceCameraParams& deviceCamParams = constantCameraParametersArray_d[deviceCamId]; + + float2 rp = project3DPoint(deviceCamParams.P, p); + float2 rp1 = rp + make_float2(1.0f, 0.0f); + + float3 refvect = M3x3mulV2(deviceCamParams.iP, rp1); + normalize(refvect); + return pointLineDistance3D(p, deviceCamParams.C, refvect); +} + +__device__ static float refineDepthSubPixel(const float3& depths, const float3& sims) +{ + // subpixel refinement + // subpixel refine by Stereo Matching with Color-Weighted Correlation, Hierarchical Belief Propagation, and + // Occlusion Handling Qingxiong pami08 + // quadratic polynomial interpolation is used to approximate the cost function between three discrete depth + // candidates: d, dA, and dB. + // TODO: get formula back from paper as it has been lost by encoding. + // d is the discrete depth with the minimal cost, dA ? d A 1, and dB ? d B 1. The cost function is approximated as + // f?x? ? ax2 B bx B c. + + float simM1 = sims.x; + float sim = sims.y; + float simP1 = sims.z; + simM1 = (simM1 + 1.0f) / 2.0f; + sim = (sim + 1.0f) / 2.0f; + simP1 = (simP1 + 1.0f) / 2.0f; + + // sim is supposed to be the best one (so the smallest one) + if((simM1 < sim) || (simP1 < sim)) + return depths.y; // return the input + + float dispStep = -((simP1 - simM1) / (2.0f * (simP1 + simM1 - 2.0f * sim))); + + float floatDepthM1 = depths.x; + float floatDepthP1 = depths.z; + + //-1 : floatDepthM1 + // 0 : floatDepth + //+1 : floatDepthP1 + // linear function fit + // f(x)=a*x+b + // floatDepthM1=-a+b + // floatDepthP1= a+b + // a = b - floatDepthM1 + // floatDepthP1=2*b-floatDepthM1 + float b = (floatDepthP1 + floatDepthM1) / 2.0f; + float a = b - floatDepthM1; + + float interpDepth = a * dispStep + b; + + // Ensure that the interpolated value is isfinite (i.e. neither infinite nor NaN) + if(!isfinite(interpDepth) || interpDepth <= 0.0f) + return depths.y; // return the input + + return interpDepth; +} + +} // namespace depthMap +} // namespace aliceVision diff --git a/src/aliceVision/depthMap/cuda/deviceCommon/device_simStat.cu b/src/aliceVision/depthMap/cuda/device/SimStat.cuh similarity index 100% rename from src/aliceVision/depthMap/cuda/deviceCommon/device_simStat.cu rename to src/aliceVision/depthMap/cuda/device/SimStat.cuh diff --git a/src/aliceVision/depthMap/cuda/deviceCommon/device_utils.cuh b/src/aliceVision/depthMap/cuda/device/buffer.cuh similarity index 78% rename from src/aliceVision/depthMap/cuda/deviceCommon/device_utils.cuh rename to src/aliceVision/depthMap/cuda/device/buffer.cuh index acd9a57be0..22ebb5a9b7 100644 --- a/src/aliceVision/depthMap/cuda/deviceCommon/device_utils.cuh +++ b/src/aliceVision/depthMap/cuda/device/buffer.cuh @@ -7,7 +7,7 @@ #pragma once #include -#include +#include namespace aliceVision { namespace depthMap { @@ -21,8 +21,7 @@ namespace depthMap { * @return */ template -__device__ static inline -T* get2DBufferAt(T* ptr, int pitch, int x, int y) +__device__ static inline T* get2DBufferAt(T* ptr, size_t pitch, size_t x, size_t y) { return &(BufPtr(ptr,pitch).at(x,y)); } @@ -37,29 +36,25 @@ T* get2DBufferAt(T* ptr, int pitch, int x, int y) * @return */ template -__device__ static inline -T* get3DBufferAt(T* ptr, int spitch, int pitch, int x, int y, int z) +__device__ static inline T* get3DBufferAt(T* ptr, size_t spitch, size_t pitch, size_t x, size_t y, size_t z) { return ((T*)(((char*)ptr) + z * spitch + y * pitch)) + x; } template -__device__ static inline -const T* get3DBufferAt(const T* ptr, int spitch, int pitch, int x, int y, int z) +__device__ static inline const T* get3DBufferAt(const T* ptr, size_t spitch, size_t pitch, size_t x, size_t y, size_t z) { return ((const T*)(((const char*)ptr) + z * spitch + y * pitch)) + x; } template -__device__ static inline -T* get3DBufferAt(T* ptr, int spitch, int pitch, const int3& v) +__device__ static inline T* get3DBufferAt(T* ptr, size_t spitch, size_t pitch, const int3& v) { return get3DBufferAt(ptr, spitch, pitch, v.x, v.y, v.z); } template -__device__ static inline -const T* get3DBufferAt(const T* ptr, int spitch, int pitch, const int3& v) +__device__ static inline const T* get3DBufferAt(const T* ptr, size_t spitch, size_t pitch, const int3& v) { return get3DBufferAt(ptr, spitch, pitch, v.x, v.y, v.z); } diff --git a/src/aliceVision/depthMap/cuda/deviceCommon/device_color.cu b/src/aliceVision/depthMap/cuda/device/color.cuh similarity index 64% rename from src/aliceVision/depthMap/cuda/deviceCommon/device_color.cu rename to src/aliceVision/depthMap/cuda/device/color.cuh index 0393057823..ecfa7d6786 100644 --- a/src/aliceVision/depthMap/cuda/deviceCommon/device_color.cu +++ b/src/aliceVision/depthMap/cuda/device/color.cuh @@ -6,50 +6,55 @@ #pragma once -#include +#include namespace aliceVision { namespace depthMap { -inline __device__ float Euclidean(const float3 x1, const float3 x2) +__device__ static inline float Euclidean(const float3 x1, const float3 x2) { // return sqrtf((x1.x - x2.x) * (x1.x - x2.x) + (x1.y - x2.y) * (x1.y - x2.y) + (x1.z - x2.z) * (x1.z - x2.z)); return norm3df(x1.x - x2.x, x1.y - x2.y, x1.z - x2.z); } -inline __device__ float Euclidean3(const float4 x1, const float4 x2) +__device__ static inline float Euclidean3(const float4 x1, const float4 x2) { // return sqrtf((x1.x - x2.x) * (x1.x - x2.x) + (x1.y - x2.y) * (x1.y - x2.y) + (x1.z - x2.z) * (x1.z - x2.z)); return norm3df(x1.x - x2.x, x1.y - x2.y, x1.z - x2.z); } -//== colour conversion utils ====================================================================== +// colour conversion utils -// sRGB (0..1) to linear RGB (0..1) -inline __device__ float3 srgb2rgb(const float3 c) +/** + * @brief sRGB (0..1) to linear RGB (0..1) + * @param[in] c the float3 sRGB + * @return float3 linear RGB + */ +__device__ static inline float3 srgb2rgb(const float3 c) { return make_float3(c.x <= 0.04045f ? c.x / 12.92f : __powf((c.x + 0.055f) / 1.055f, 2.4f), c.y <= 0.04045f ? c.y / 12.92f : __powf((c.y + 0.055f) / 1.055f, 2.4f), c.z <= 0.04045f ? c.z / 12.92f : __powf((c.z + 0.055f) / 1.055f, 2.4f)); } -// linear RGB (0..1) to XZY (0..1) using sRGB primaries -inline __device__ float3 rgb2xyz(const float3 c) -{ - return make_float3(0.4124564f * c.x + 0.3575761f * c.y + 0.1804375f * c.z, - 0.2126729f * c.x + 0.7151522f * c.y + 0.0721750f * c.z, - 0.0193339f * c.x + 0.1191920f * c.y + 0.9503041f * c.z); -} - -inline __host__ float3 h_rgb2xyz(const float3 c) +/** + * @brief Linear RGB (0..1) to XZY (0..1) using sRGB primaries + * @param[in] c the float3 Linear RGB + * @return float3 XYZ + */ +__device__ static inline float3 rgb2xyz(const float3 c) { return make_float3(0.4124564f * c.x + 0.3575761f * c.y + 0.1804375f * c.z, 0.2126729f * c.x + 0.7151522f * c.y + 0.0721750f * c.z, 0.0193339f * c.x + 0.1191920f * c.y + 0.9503041f * c.z); } -// linear RGB (0..1) to HSL (0..1) -inline __device__ float3 rgb2hsl(const float3& c) +/** + * @brief Linear RGB (0..1) to HSL (0..1) + * @param[in] c the float3 Linear RGB + * @return float3 HSL + */ +__device__ static float3 rgb2hsl(const float3& c) { const float cmin = fminf(c.x, fminf(c.y, c.z)); const float cmax = fmaxf(c.x, fmaxf(c.y, c.z)); @@ -91,8 +96,12 @@ inline __device__ float3 rgb2hsl(const float3& c) return make_float3(h, s, l); } -// XYZ (0..1) to CIELAB (0..255) assuming D65 whitepoint -inline __host__ __device__ float3 xyz2lab(const float3 c) +/** + * @brief XYZ (0..1) to CIELAB (0..255) assuming D65 whitepoint + * @param[in] c the float3 XYZ + * @return float3 CIELAB + */ +__device__ static inline float3 xyz2lab(const float3 c) { // assuming whitepoint D65, XYZ=(0.95047, 1.00000, 1.08883) float3 r = make_float3(c.x / 0.95047f, c.y, c.z / 1.08883f); @@ -111,7 +120,12 @@ inline __host__ __device__ float3 xyz2lab(const float3 c) return out; } -inline __device__ float rgb2gray(const uchar4 c) +/** + * @brief RGB (uchar4) to gray (float) + * @param[in] c the uchar4 RGB + * @return float gray + */ +__device__ static inline float rgb2gray(const uchar4 c) { return 0.2989f * (float)c.x + 0.5870f * (float)c.y + 0.1140f * (float)c.z; } @@ -130,8 +144,9 @@ inline __device__ float rgb2gray(const uchar4 c) * @param[in] gammaP Strength of Grouping by Proximity 8 / 4 * @return distance value */ -inline __device__ float CostYKfromLab(const int dx, const int dy, const float4 c1, const float4 c2, const float gammaC, - const float gammaP) +__device__ static float CostYKfromLab(const int dx, const int dy, + const float4 c1, const float4 c2, + const float gammaC, const float gammaP) { // const float deltaC = 0; // ignore colour difference @@ -169,8 +184,9 @@ inline __device__ float CostYKfromLab(const int dx, const int dy, const float4 c return __expf(-deltaC); // Yoon & Kweon // return __expf(-(deltaC * deltaC / (2 * gammaC * gammaC))) * sqrtf(__expf(-(deltaP * deltaP / (2 * gammaP * gammaP)))); // DCB } + /* -inline __device__ float CostYKfromLab(const float4 c1, const float4 c2, const float gammaC) + __device__ static inline float CostYKfromLab(const float4 c1, const float4 c2, const float gammaC) { // Euclidean distance in Lab, assuming linear RGB const float deltaC = Euclidean3(c1, c2); @@ -179,49 +195,6 @@ inline __device__ float CostYKfromLab(const float4 c1, const float4 c2, const fl return __expf(-(deltaC / gammaC)); // Yoon & Kweon } */ -__global__ void rgb2lab_kernel(CudaRGBA* irgbaOlab, int irgbaOlab_p, int width, int height) -{ - int x = blockIdx.x * blockDim.x + threadIdx.x; - int y = blockIdx.y * blockDim.y + threadIdx.y; - - if((x >= width) || (y >= height)) - return; - - CudaRGBA* rgb = get2DBufferAt(irgbaOlab, irgbaOlab_p, x, y); - float3 flab = xyz2lab(rgb2xyz(make_float3(rgb->x / 255.f, rgb->y / 255.f, rgb->z / 255.f))); - - rgb->x = flab.x; - rgb->y = flab.y; - rgb->z = flab.z; -} - -/* - Because a 2D gaussian mask is symmetry in row and column, - here only generate a 1D mask, and use the product by row - and column index later. - - 1D gaussian distribution : - g(x, d) -- C * exp(-x^2/d^2), C is a constant amplifier - - parameters: - og - output gaussian array in global memory - delta - the 2nd parameter 'd' in the above function - radius - half of the filter size - (total filter size = 2 * radius + 1) -*/ -// use only one block - -/* -__global__ void downscale_kernel(unsigned char* tex, int tex_p, int width, int height, int scale) -{ - int x = blockIdx.x*blockDim.x + threadIdx.x; - int y = blockIdx.y*blockDim.y + threadIdx.y; - - if ((x #include -#include + +// mn MATRIX ADDRESSING: mxy = x*n+y (x-row,y-col), (m-number of rows, n-number of columns) namespace aliceVision { namespace depthMap { -__device__ float2 project3DPoint( const float* M3x4, const float3& V) +__device__ static inline uchar4 float4_to_uchar4(const float4& a) +{ + return make_uchar4((unsigned char)a.x, (unsigned char)a.y, (unsigned char)a.z, (unsigned char)a.w); +} + +__device__ static inline float4 uchar4_to_float4(const uchar4& a) +{ + return make_float4((float)a.x, (float)a.y, (float)a.z, (float)a.w); +} + +__device__ static inline float dot(const float3& a, const float3& b) +{ + return a.x * b.x + a.y * b.y + a.z * b.z; +} + +__device__ static inline float dot(const float2& a, const float2& b) +{ + return a.x * b.x + a.y * b.y; +} + +__device__ static inline float size(const float3& a) +{ + return sqrtf(a.x * a.x + a.y * a.y + a.z * a.z); +} + +__device__ static inline float size(const float2& a) +{ + return sqrtf(a.x * a.x + a.y * a.y); +} + +__device__ static inline float dist(const float3& a, const float3& b) +{ + float3 ab = a - b; + return size(ab); +} + +__device__ static inline float dist(const float2& a, const float2& b) +{ + float2 ab; + ab.x = a.x - b.x; + ab.y = a.y - b.y; + return size(ab); +} + +__device__ static inline float3 cross(const float3& a, const float3& b) +{ + return make_float3(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x); +} + +__device__ static inline void normalize(float3& a) +{ + float d = sqrtf(dot(a, a)); + a.x /= d; + a.y /= d; + a.z /= d; +} + +__device__ static inline void normalize(float2& a) +{ + float d = sqrtf(dot(a, a)); + a.x /= d; + a.y /= d; +} + +__device__ static inline float3 M3x3mulV3( const float* M3x3, const float3& V) +{ + return make_float3(M3x3[0] * V.x + M3x3[3] * V.y + M3x3[6] * V.z, M3x3[1] * V.x + M3x3[4] * V.y + M3x3[7] * V.z, + M3x3[2] * V.x + M3x3[5] * V.y + M3x3[8] * V.z); +} + +__device__ static inline float3 M3x3mulV2( const float* M3x3, const float2& V) +{ + return make_float3(M3x3[0] * V.x + M3x3[3] * V.y + M3x3[6], M3x3[1] * V.x + M3x3[4] * V.y + M3x3[7], + M3x3[2] * V.x + M3x3[5] * V.y + M3x3[8]); +} + +__device__ static inline float3 M3x4mulV3(const float* M3x4, const float3& V) +{ + return make_float3(M3x4[0] * V.x + M3x4[3] * V.y + M3x4[6] * V.z + M3x4[9], + M3x4[1] * V.x + M3x4[4] * V.y + M3x4[7] * V.z + M3x4[10], + M3x4[2] * V.x + M3x4[5] * V.y + M3x4[8] * V.z + M3x4[11]); +} + +__device__ static inline float2 V2M3x3mulV2(float* M3x3, float2& V) +{ + float d = M3x3[2] * V.x + M3x3[5] * V.y + M3x3[8]; + return make_float2((M3x3[0] * V.x + M3x3[3] * V.y + M3x3[6]) / d, (M3x3[1] * V.x + M3x3[4] * V.y + M3x3[7]) / d); +} + + +__device__ static inline float2 project3DPoint(const float* M3x4, const float3& V) { float3 p = M3x4mulV3(M3x4, V); return make_float2(p.x / p.z, p.y / p.z); } -__device__ void M3x3mulM3x3(float* O3x3, const float* A3x3, const float* B3x3) +__device__ static void M3x3mulM3x3(float* O3x3, const float* A3x3, const float* B3x3) { O3x3[0] = A3x3[0] * B3x3[0] + A3x3[3] * B3x3[1] + A3x3[6] * B3x3[2]; O3x3[3] = A3x3[0] * B3x3[3] + A3x3[3] * B3x3[4] + A3x3[6] * B3x3[5]; @@ -35,7 +126,7 @@ __device__ void M3x3mulM3x3(float* O3x3, const float* A3x3, const float* B3x3) O3x3[8] = A3x3[2] * B3x3[6] + A3x3[5] * B3x3[7] + A3x3[8] * B3x3[8]; } -__device__ void M3x3minusM3x3(float* O3x3, float* A3x3, float* B3x3) +__device__ static inline void M3x3minusM3x3(float* O3x3, float* A3x3, float* B3x3) { for(int i = 0; i < 9; i++) { @@ -43,7 +134,7 @@ __device__ void M3x3minusM3x3(float* O3x3, float* A3x3, float* B3x3) }; } -__device__ void M3x3transpose(float* O3x3, const float* A3x3) +__device__ static void M3x3transpose(float* O3x3, const float* A3x3) { O3x3[0] = A3x3[0]; O3x3[1] = A3x3[3]; @@ -56,7 +147,7 @@ __device__ void M3x3transpose(float* O3x3, const float* A3x3) O3x3[8] = A3x3[8]; } -__device__ void outerMultiply(float* O3x3, const float3& a, const float3& b) +__device__ static void outerMultiply(float* O3x3, const float3& a, const float3& b) { O3x3[0] = a.x * b.x; O3x3[3] = a.x * b.y; @@ -69,31 +160,32 @@ __device__ void outerMultiply(float* O3x3, const float3& a, const float3& b) O3x3[8] = a.z * b.z; } -__device__ float3 linePlaneIntersect(const float3& linePoint, const float3& lineVect, const float3& planePoint, - const float3& planeNormal) +__device__ static inline float3 linePlaneIntersect(const float3& linePoint, + const float3& lineVect, + const float3& planePoint, + const float3& planeNormal) { float k = (dot(planePoint, planeNormal) - dot(planeNormal, linePoint)) / dot(planeNormal, lineVect); return linePoint + lineVect * k; } -__device__ float3 closestPointOnPlaneToPoint(const float3& point, const float3& planePoint, - const float3& planeNormalNormalized) +__device__ static inline float3 closestPointOnPlaneToPoint(const float3& point, const float3& planePoint, const float3& planeNormalNormalized) { return point - planeNormalNormalized * dot(planeNormalNormalized, point - planePoint); } -__device__ float3 closestPointToLine3D(const float3& point, const float3& linePoint, const float3& lineVectNormalized) +__device__ static inline float3 closestPointToLine3D(const float3& point, const float3& linePoint, const float3& lineVectNormalized) { return linePoint + lineVectNormalized * dot(lineVectNormalized, point - linePoint); } -__device__ float pointLineDistance3D(const float3& point, const float3& linePoint, const float3& lineVectNormalized) +__device__ static inline float pointLineDistance3D(const float3& point, const float3& linePoint, const float3& lineVectNormalized) { return size(cross(lineVectNormalized, linePoint - point)); } // v1,v2 dot not have to be normalized -__device__ float angleBetwV1andV2(const float3& iV1, const float3& iV2) +__device__ static float angleBetwV1andV2(const float3& iV1, const float3& iV2) { float3 V1, V2; V1 = iV1; @@ -104,22 +196,28 @@ __device__ float angleBetwV1andV2(const float3& iV1, const float3& iV2) return fabsf(acosf(V1.x * V2.x + V1.y * V2.y + V1.z * V2.z) / (CUDART_PI_F / 180.0f)); } -__device__ float angleBetwABandAC(const float3& A, const float3& B, const float3& C) +__device__ static float angleBetwABandAC(const float3& A, const float3& B, const float3& C) { - float3 V1, V2; - V1 = B - A; - V2 = C - A; + float3 V1 = B - A; + float3 V2 = C - A; + normalize(V1); normalize(V2); - float a = acosf(V1.x * V2.x + V1.y * V2.y + V1.z * V2.z); - a = isinf(a) ? 0.0f : a; - - return fabsf(a) / (CUDART_PI_F / 180.0f); + const double x = double(V1.x * V2.x + V1.y * V2.y + V1.z * V2.z); + double a = acos(x); + a = isinf(a) ? 0.0 : a; + return float(fabs(a) / (CUDART_PI / 180.0)); } -__device__ float3 lineLineIntersect(float* k, float* l, float3* lli1, float3* lli2, - const float3& p1, const float3& p2, const float3& p3, const float3& p4) +__device__ static float3 lineLineIntersect(float* k, + float* l, + float3* lli1, + float3* lli2, + const float3& p1, + const float3& p2, + const float3& p3, + const float3& p4) { /* % [pa, pb, mua, mub] = LineLineIntersect(p1,p2,p3,p4) @@ -219,7 +317,7 @@ __device__ float3 lineLineIntersect(float* k, float* l, float3* lli1, float3* ll * f(x) = min + (max-min) * \frac{1}{1 + e^{10 * (x - mid) / width}} * https://www.desmos.com/calculator/1qvampwbyx */ -__device__ float sigmoid(float zeroVal, float endVal, float sigwidth, float sigMid, float xval) +__device__ static inline float sigmoid(float zeroVal, float endVal, float sigwidth, float sigMid, float xval) { return zeroVal + (endVal - zeroVal) * (1.0f / (1.0f + expf(10.0f * ((xval - sigMid) / sigwidth)))); } @@ -227,7 +325,7 @@ __device__ float sigmoid(float zeroVal, float endVal, float sigwidth, float sigM /** * f(x) = min + (max-min) * \frac{1}{1 + e^{10 * (mid - x) / width}} */ -__device__ float sigmoid2(float zeroVal, float endVal, float sigwidth, float sigMid, float xval) +__device__ static inline float sigmoid2(float zeroVal, float endVal, float sigwidth, float sigMid, float xval) { return zeroVal + (endVal - zeroVal) * (1.0f / (1.0f + expf(10.0f * ((sigMid - xval) / sigwidth)))); } diff --git a/src/aliceVision/depthMap/cuda/deviceCommon/device_operators.cuh b/src/aliceVision/depthMap/cuda/device/operators.cuh similarity index 100% rename from src/aliceVision/depthMap/cuda/deviceCommon/device_operators.cuh rename to src/aliceVision/depthMap/cuda/device/operators.cuh diff --git a/src/aliceVision/depthMap/cuda/deviceCommon/device_global.cu b/src/aliceVision/depthMap/cuda/deviceCommon/device_global.cu deleted file mode 100644 index 85db41d1a1..0000000000 --- a/src/aliceVision/depthMap/cuda/deviceCommon/device_global.cu +++ /dev/null @@ -1,49 +0,0 @@ -// This file is part of the AliceVision project. -// Copyright (c) 2017 AliceVision contributors. -// This Source Code Form is subject to the terms of the Mozilla Public License, -// v. 2.0. If a copy of the MPL was not distributed with this file, -// You can obtain one at https://mozilla.org/MPL/2.0/. - -#ifndef ALICEVISION_CUDA_deviceCommon_device_global_cu -#define ALICEVISION_CUDA_deviceCommon_device_global_cu - -#include - -namespace aliceVision { -namespace depthMap { - -// Helper functions -// function clamping x between a and b -__device__ int clamp(int x, int a, int b) -{ - return max(a, min(b, x)); -} - - -//////////////////////////////////////////////////////////////////////////////// -// CONSTANT MEMORY - -// MATLAB: x = [-2:2]; delta = 1; y = exp( - (x .* x) / (2 * delta * delta)); format long g; y -__constant__ float gauss5[5] = {0.135335283236613f, 0.606530659712633f, 1.0f, 0.606530659712633f, - 0.135335283236613f}; -__constant__ float sumGauss55 = 6.16892408102888f; - -// MATLAB: distFcnHeight=1.0; maxDist = 0.3; dist = 0:0.01:1; y = -// 1-distFcnHeight*exp(-(dist.*dist)/(2*maxDist*maxDist)); plot(dist,y); -// MATLAB: distFcnHeight=1.0; maxDist = 0.3; dist = 0:0.25:1; y = -// 1-distFcnHeight*exp(-(dist.*dist)/(2*maxDist*maxDist)); plot(dist,y); int32(125*y) -__constant__ unsigned char distFcnConst5[5] = {0, 37, 94, 120, 125}; - -// MATLAB: distFcnHeight=1.0; maxDist = 0.3; dist = 0:1/2:1; y = -// 1-distFcnHeight*exp(-(dist.*dist)/(2*maxDist*maxDist)); plot(dist,y); int32(125*y) -__constant__ unsigned char distFcnConst3[3] = {0, 94, 125}; - -__constant__ CameraStructBase camsBasesDev[MAX_CONSTANT_CAMERA_PARAM_SETS]; - - -} // namespace depthMap -} // namespace aliceVision - -#else // ALICEVISION_CUDA_deviceCommon_device_global_cu -#error "deviceCommon/device_global.cu has been included twice" -#endif // ALICEVISION_CUDA_deviceCommon_device_global_cu diff --git a/src/aliceVision/depthMap/cuda/deviceCommon/device_matrix.cuh b/src/aliceVision/depthMap/cuda/deviceCommon/device_matrix.cuh deleted file mode 100644 index 919e39f824..0000000000 --- a/src/aliceVision/depthMap/cuda/deviceCommon/device_matrix.cuh +++ /dev/null @@ -1,105 +0,0 @@ -// This file is part of the AliceVision project. -// Copyright (c) 2017 AliceVision contributors. -// This Source Code Form is subject to the terms of the Mozilla Public License, -// v. 2.0. If a copy of the MPL was not distributed with this file, -// You can obtain one at https://mozilla.org/MPL/2.0/. - -#pragma once - -#include -#include - -namespace aliceVision { -namespace depthMap { - -__device__ static inline uchar4 float4_to_uchar4(const float4& a) -{ - return make_uchar4((unsigned char)a.x, (unsigned char)a.y, (unsigned char)a.z, (unsigned char)a.w); -} - -__device__ static inline float4 uchar4_to_float4(const uchar4& a) -{ - return make_float4((float)a.x, (float)a.y, (float)a.z, (float)a.w); -} - -__device__ static inline float dot(const float3& a, const float3& b) -{ - return a.x * b.x + a.y * b.y + a.z * b.z; -} - -__device__ static inline float dot(const float2& a, const float2& b) -{ - return a.x * b.x + a.y * b.y; -} - -__device__ static inline float size(const float3& a) -{ - return sqrtf(a.x * a.x + a.y * a.y + a.z * a.z); -} - -__device__ static inline float size(const float2& a) -{ - return sqrtf(a.x * a.x + a.y * a.y); -} - -__device__ static inline float dist(const float3& a, const float3& b) -{ - float3 ab = a - b; - return size(ab); -} - -__device__ static inline float dist(const float2& a, const float2& b) -{ - float2 ab; - ab.x = a.x - b.x; - ab.y = a.y - b.y; - return size(ab); -} - -__device__ static inline float3 cross(const float3& a, const float3& b) -{ - return make_float3(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x); -} - -__device__ static inline void normalize(float3& a) -{ - float d = sqrtf(dot(a, a)); - a.x /= d; - a.y /= d; - a.z /= d; -} - -__device__ static inline void normalize(float2& a) -{ - float d = sqrtf(dot(a, a)); - a.x /= d; - a.y /= d; -} - -__device__ static inline float3 M3x3mulV3( const float* M3x3, const float3& V) -{ - return make_float3(M3x3[0] * V.x + M3x3[3] * V.y + M3x3[6] * V.z, M3x3[1] * V.x + M3x3[4] * V.y + M3x3[7] * V.z, - M3x3[2] * V.x + M3x3[5] * V.y + M3x3[8] * V.z); -} - -__device__ static inline float3 M3x3mulV2( const float* M3x3, const float2& V) -{ - return make_float3(M3x3[0] * V.x + M3x3[3] * V.y + M3x3[6], M3x3[1] * V.x + M3x3[4] * V.y + M3x3[7], - M3x3[2] * V.x + M3x3[5] * V.y + M3x3[8]); -} - -__device__ static inline float3 M3x4mulV3(const float* M3x4, const float3& V) -{ - return make_float3(M3x4[0] * V.x + M3x4[3] * V.y + M3x4[6] * V.z + M3x4[9], - M3x4[1] * V.x + M3x4[4] * V.y + M3x4[7] * V.z + M3x4[10], - M3x4[2] * V.x + M3x4[5] * V.y + M3x4[8] * V.z + M3x4[11]); -} - -__device__ static inline float2 V2M3x3mulV2(float* M3x3, float2& V) -{ - float d = M3x3[2] * V.x + M3x3[5] * V.y + M3x3[8]; - return make_float2((M3x3[0] * V.x + M3x3[3] * V.y + M3x3[6]) / d, (M3x3[1] * V.x + M3x3[4] * V.y + M3x3[7]) / d); -} - -} // namespace depthMap -} // namespace aliceVision diff --git a/src/aliceVision/depthMap/cuda/deviceCommon/device_patch_es.cu b/src/aliceVision/depthMap/cuda/deviceCommon/device_patch_es.cu deleted file mode 100644 index 11789c3542..0000000000 --- a/src/aliceVision/depthMap/cuda/deviceCommon/device_patch_es.cu +++ /dev/null @@ -1,401 +0,0 @@ -// This file is part of the AliceVision project. -// Copyright (c) 2017 AliceVision contributors. -// This Source Code Form is subject to the terms of the Mozilla Public License, -// v. 2.0. If a copy of the MPL was not distributed with this file, -// You can obtain one at https://mozilla.org/MPL/2.0/. - -#pragma once - -#include -#include -#include -#include -#include - -#include - -namespace aliceVision { -namespace depthMap { - -__device__ void computeRotCSEpip( int rc_cam_cache_idx, - int tc_cam_cache_idx, - Patch& ptch ) -{ - // Vector from the reference camera to the 3d point - float3 v1 = camsBasesDev[rc_cam_cache_idx].C - ptch.p; - // Vector from the target camera to the 3d point - float3 v2 = camsBasesDev[tc_cam_cache_idx].C - ptch.p; - normalize(v1); - normalize(v2); - - // y has to be ortogonal to the epipolar plane - // n has to be on the epipolar plane - // x has to be on the epipolar plane - - ptch.y = cross(v1, v2); - normalize(ptch.y); - - ptch.n = (v1 + v2) / 2.0f; // IMPORTANT !!! - normalize(ptch.n); - // ptch.n = sg_s_r.ZVect; //IMPORTANT !!! - - ptch.x = cross(ptch.y, ptch.n); - normalize(ptch.x); -} - -__device__ int angleBetwUnitV1andUnitV2(float3& V1, float3& V2) -{ - return (int)fabs(acos(V1.x * V2.x + V1.y * V2.y + V1.z * V2.z) / (CUDART_PI_F / 180.0f)); -} - -/* -__device__ float getRefCamPixSize(Patch &ptch) -{ - float2 rp = project3DPoint(sg_s_r.P,ptch.p); - - float minstep=10000000.0f; - for (int i=0;i<4;i++) { - float2 pix = rp; - if (i==0) {pix.x += 1.0f;}; - if (i==1) {pix.x -= 1.0f;}; - if (i==2) {pix.y += 1.0f;}; - if (i==3) {pix.y -= 1.0f;}; - float3 vect = M3x3mulV2(sg_s_r.iP,pix); - float3 lpi = linePlaneIntersect(sg_s_r.C, vect, ptch.p, ptch.n); - float step = dist(lpi,ptch.p); - minstep = fminf(minstep,step); - }; - - return minstep; -} - -__device__ float getTarCamPixSize(Patch &ptch) -{ - float2 tp = project3DPoint(sg_s_t.P,ptch.p); - - float minstep=10000000.0f; - for (int i=0;i<4;i++) { - float2 pix = tp; - if (i==0) {pix.x += 1.0f;}; - if (i==1) {pix.x -= 1.0f;}; - if (i==2) {pix.y += 1.0f;}; - if (i==3) {pix.y -= 1.0f;}; - float3 vect = M3x3mulV2(sg_s_t.iP,pix); - float3 lpi = linePlaneIntersect(sg_s_t.C, vect, ptch.p, ptch.n); - float step = dist(lpi,ptch.p); - minstep = fminf(minstep,step); - }; - - return minstep; -} - -__device__ float getPatchPixSize(Patch &ptch) -{ - return fmaxf(getTarCamPixSize(ptch),getRefCamPixSize(ptch)); -} -*/ - -__device__ void computeHomography( int rc_cam_cache_idx, - int tc_cam_cache_idx, - float* _H, const float3& _p, const float3& _n) -{ - // hartley zisserman second edition p.327 (13.2) - float3 _tl = make_float3(0.0, 0.0, 0.0) - M3x3mulV3(camsBasesDev[rc_cam_cache_idx].R, camsBasesDev[rc_cam_cache_idx].C); - float3 _tr = make_float3(0.0, 0.0, 0.0) - M3x3mulV3(camsBasesDev[tc_cam_cache_idx].R, camsBasesDev[tc_cam_cache_idx].C); - - float3 p = M3x3mulV3(camsBasesDev[rc_cam_cache_idx].R, (_p - camsBasesDev[rc_cam_cache_idx].C)); - float3 n = M3x3mulV3(camsBasesDev[rc_cam_cache_idx].R, _n); - normalize(n); - float d = -dot(n, p); - - float RrT[9]; - M3x3transpose(RrT, camsBasesDev[rc_cam_cache_idx].R); - - float tmpRr[9]; - M3x3mulM3x3(tmpRr, camsBasesDev[tc_cam_cache_idx].R, RrT); - float3 tr = _tr - M3x3mulV3(tmpRr, _tl); - - float tmp[9]; - float tmp1[9]; - outerMultiply(tmp, tr, n / d); - M3x3minusM3x3(tmp, tmpRr, tmp); - M3x3mulM3x3(tmp1, camsBasesDev[tc_cam_cache_idx].K, tmp); - M3x3mulM3x3(tmp, tmp1, camsBasesDev[rc_cam_cache_idx].iK); - - for(int i = 0; i < 9; i++) - { - _H[i] = tmp[i]; - } -} - -/* -__device__ float compNCCbyH(const CameraStructBase& rc_cam, const CameraStructBase& tc_cam, const Patch& ptch, int wsh) -{ - float2 rpix = project3DPoint(sg_s_r.P, ptch.p); - float2 tpix = project3DPoint(sg_s_t.P, ptch.p); - - float H[9]; - computeHomography(rc_cam, tc_cam, H, ptch.p, ptch.n); - - simStat sst = simStat(); - for(int xp = -wsh; xp <= wsh; xp++) - { - for(int yp = -wsh; yp <= wsh; yp++) - { - float2 rp; - float2 tp; - rp.x = rpix.x + (float)xp; - rp.y = rpix.y + (float)yp; - tp = V2M3x3mulV2(H, rp); - - float2 g; - g.x = 255.0f * tex2D(rtex, rp.x + 0.5f, rp.y + 0.5f); - g.y = 255.0f * tex2D(ttex, tp.x + 0.5f, tp.y + 0.5f); - sst.update(g); - } - } - sst.computeSim(); - - return sst.sim; -} -*/ - -/** - * @brief Compute Normalized Cross-Correlation - * - * @param[inout] ptch - * @param[in] wsh half-width of the similarity homography matrix (width = wsh*2+1) - * @param[in] width image width - * @param[in] height image height - * @param[in] _gammaC - * @param[in] _gammaP - * - * @return similarity value - * or invalid similarity (CUDART_INF_F) if uninitialized or masked - */ -__device__ float compNCCby3DptsYK( cudaTextureObject_t rc_tex, - cudaTextureObject_t tc_tex, - int rc_cam_cache_idx, - int tc_cam_cache_idx, - const Patch& ptch, - int wsh, - int rc_width, int rc_height, - int tc_width, int tc_height, - const float _gammaC, const float _gammaP) -{ - const CameraStructBase& rcCam = camsBasesDev[rc_cam_cache_idx]; - const CameraStructBase& tcCam = camsBasesDev[tc_cam_cache_idx]; - - float3 p = ptch.p; - const float2 rp = project3DPoint(rcCam.P, p); - const float2 tp = project3DPoint(tcCam.P, p); - - const float dd = wsh + 2.0f; // TODO FACA - if((rp.x < dd) || (rp.x > (float)(rc_width - 1) - dd) || - (rp.y < dd) || (rp.y > (float)(rc_height - 1) - dd) || - (tp.x < dd) || (tp.x > (float)(tc_width - 1) - dd) || - (tp.y < dd) || (tp.y > (float)(tc_height - 1) - dd)) - { - return CUDART_INF_F; // uninitialized - } - - // see CUDA_C_Programming_Guide.pdf ... E.2 pp132-133 ... adding 0.5 caises that tex2D return for point i,j exactly - // value od I(i,j) ... it is what we want - const float4 gcr = tex2D_float4(rc_tex, rp.x + 0.5f, rp.y + 0.5f); - const float4 gct = tex2D_float4(tc_tex, tp.x + 0.5f, tp.y + 0.5f); - - // printf("gcr: R: %f, G: %f, B: %f, A: %f", gcr.x, gcr.y, gcr.z, gcr.w); - // printf("gct: R: %f, G: %f, B: %f, A: %f", gct.x, gct.y, gct.z, gct.w); - - if (gcr.w == 0.0f || gct.w == 0.0f) - return CUDART_INF_F; // if no alpha, invalid pixel from input mask - - const float gammaC = _gammaC; - const float gammaP = _gammaP; - // float gammaC = ((gcr.w>0)||(gct.w>0))?sigmoid(_gammaC,25.5f,20.0f,10.0f,fmaxf(gcr.w,gct.w)):_gammaC; - // float gammaP = ((gcr.w>0)||(gct.w>0))?sigmoid(1.5,(float)(wsh+3),30.0f,20.0f,fmaxf(gcr.w,gct.w)):_gammaP; - - - simStat sst; - for(int yp = -wsh; yp <= wsh; yp++) - { - for(int xp = -wsh; xp <= wsh; xp++) - { - p = ptch.p + ptch.x * (float)(ptch.d * (float)xp) + ptch.y * (float)(ptch.d * (float)yp); - const float2 rp1 = project3DPoint(rcCam.P, p); - const float2 tp1 = project3DPoint(tcCam.P, p); - - // see CUDA_C_Programming_Guide.pdf ... E.2 pp132-133 ... adding 0.5 caises that tex2D return for point i,j - // exactly value od I(i,j) ... it is what we want - const float4 gcr1 = tex2D_float4(rc_tex, rp1.x + 0.5f, rp1.y + 0.5f); - const float4 gct1 = tex2D_float4(tc_tex, tp1.x + 0.5f, tp1.y + 0.5f); - - // TODO: Does it make a difference to accurately test it for each pixel of the patch? - // if (gcr1.w == 0.0f || gct1.w == 0.0f) - // continue; - - // Weighting is based on: - // * color difference to the center pixel of the patch: - // ** low value (close to 0) means that the color is different from the center pixel (ie. strongly supported surface) - // ** high value (close to 1) means that the color is close the center pixel (ie. uniform color) - // * distance in image to the center pixel of the patch: - // ** low value (close to 0) means that the pixel is close to the center of the patch - // ** high value (close to 1) means that the pixel is far from the center of the patch - const float w = CostYKfromLab(xp, yp, gcr, gcr1, gammaC, gammaP) * CostYKfromLab(xp, yp, gct, gct1, gammaC, gammaP); - - assert(w >= 0.f); - assert(w <= 1.f); - - sst.update(gcr1.x, gct1.x, w); - } - } - return sst.computeWSim(); -} - - -__device__ void getPixelFor3DPoint( int cam_cache_idx, - float2& out, float3& X) -{ - const CameraStructBase& cam = camsBasesDev[cam_cache_idx]; - float3 p = M3x4mulV3(cam.P, X); - - if(p.z <= 0.0f) - { - out = make_float2(-1.0f, -1.0f); - } - else - { - out = make_float2(p.x / p.z, p.y / p.z); - } -} - -__device__ float3 get3DPointForPixelAndFrontoParellePlaneRC( int cam_cache_idx, - const float2& pix, - float fpPlaneDepth) -{ - const CameraStructBase& cam = camsBasesDev[cam_cache_idx]; - const float3 planep = cam.C + cam.ZVect * fpPlaneDepth; - float3 v = M3x3mulV2(cam.iP, pix); - normalize(v); - return linePlaneIntersect(cam.C, - v, - planep, - cam.ZVect); -} - -__device__ float3 get3DPointForPixelAndFrontoParellePlaneRC( int cam_cache_idx, - const int2& pixi, - float fpPlaneDepth) -{ - float2 pix; - pix.x = (float)pixi.x; - pix.y = (float)pixi.y; - return get3DPointForPixelAndFrontoParellePlaneRC(cam_cache_idx, pix, fpPlaneDepth); -} - -__device__ float3 get3DPointForPixelAndDepthFromRC( int cam_cache_idx, - const float2& pix, float depth) -{ - const CameraStructBase& cam = camsBasesDev[cam_cache_idx]; - float3 rpv = M3x3mulV2(cam.iP, pix); - normalize(rpv); - return cam.C + rpv * depth; -} - -__device__ float3 get3DPointForPixelAndDepthFromRC( int cam_cache_idx, - const int2& pixi, float depth) -{ - float2 pix; - pix.x = (float)pixi.x; - pix.y = (float)pixi.y; - return get3DPointForPixelAndDepthFromRC(cam_cache_idx, pix, depth); -} - -__device__ float3 triangulateMatchRef( int rc_cam_cache_idx, - int tc_cam_cache_idx, - float2& refpix, float2& tarpix) -{ - const CameraStructBase& rcCam = camsBasesDev[rc_cam_cache_idx]; - const CameraStructBase& tcCam = camsBasesDev[tc_cam_cache_idx]; - float3 refvect = M3x3mulV2(rcCam.iP, refpix); - normalize(refvect); - float3 refpoint = refvect + rcCam.C; - - float3 tarvect = M3x3mulV2(tcCam.iP, tarpix); - normalize(tarvect); - float3 tarpoint = tarvect + tcCam.C; - - float k, l; - float3 lli1, lli2; - - lineLineIntersect(&k, &l, &lli1, &lli2, - rcCam.C, - refpoint, - tcCam.C, - tarpoint); - - return rcCam.C + refvect * k; -} - -__device__ float computePixSize( int cam_cache_idx, - const float3& p) -{ - const CameraStructBase& cam = camsBasesDev[cam_cache_idx]; - float2 rp = project3DPoint(cam.P, p); - float2 rp1 = rp + make_float2(1.0f, 0.0f); - - float3 refvect = M3x3mulV2(cam.iP, rp1); - normalize(refvect); - return pointLineDistance3D(p, cam.C, refvect); -} - -__device__ float refineDepthSubPixel(const float3& depths, const float3& sims) -{ - // subpixel refinement - // subpixel refine by Stereo Matching with Color-Weighted Correlation, Hierarchical Belief Propagation, and - // Occlusion Handling Qingxiong pami08 - // quadratic polynomial interpolation is used to approximate the cost function between three discrete depth - // candidates: d, dA, and dB. - // TODO: get formula back from paper as it has been lost by encoding. - // d is the discrete depth with the minimal cost, dA ? d A 1, and dB ? d B 1. The cost function is approximated as f?x? ? ax2 - // B bx B c. - - float simM1 = sims.x; - float sim = sims.y; - float simP1 = sims.z; - simM1 = (simM1 + 1.0f) / 2.0f; - sim = (sim + 1.0f) / 2.0f; - simP1 = (simP1 + 1.0f) / 2.0f; - - // sim is supposed to be the best one (so the smallest one) - if((simM1 < sim) || (simP1 < sim)) - return depths.y; // return the input - - float dispStep = -((simP1 - simM1) / (2.0f * (simP1 + simM1 - 2.0f * sim))); - - float floatDepthM1 = depths.x; - float floatDepthP1 = depths.z; - - //-1 : floatDepthM1 - // 0 : floatDepth - //+1 : floatDepthP1 - // linear function fit - // f(x)=a*x+b - // floatDepthM1=-a+b - // floatDepthP1= a+b - // a = b - floatDepthM1 - // floatDepthP1=2*b-floatDepthM1 - float b = (floatDepthP1 + floatDepthM1) / 2.0f; - float a = b - floatDepthM1; - - float interpDepth = a * dispStep + b; - - // Ensure that the interpolated value is isfinite (i.e. neither infinite nor NaN) - if(!isfinite(interpDepth) || interpDepth <= 0.0f) - return depths.y; // return the input - - return interpDepth; -} - -} // namespace depthMap -} // namespace aliceVision diff --git a/src/aliceVision/depthMap/cuda/deviceCommon/device_patch_es_glob.hpp b/src/aliceVision/depthMap/cuda/deviceCommon/device_patch_es_glob.hpp deleted file mode 100644 index dff14a1405..0000000000 --- a/src/aliceVision/depthMap/cuda/deviceCommon/device_patch_es_glob.hpp +++ /dev/null @@ -1,103 +0,0 @@ -// This file is part of the AliceVision project. -// Copyright (c) 2017 AliceVision contributors. -// This Source Code Form is subject to the terms of the Mozilla Public License, -// v. 2.0. If a copy of the MPL was not distributed with this file, -// You can obtain one at https://mozilla.org/MPL/2.0/. - -#pragma once - -namespace aliceVision { -namespace depthMap { - -struct Patch -{ - float3 p; //< 3d point - float3 n; //< normal - float3 x; //< x axis - float3 y; //< y axis - float d; //< pixel size -}; - -__device__ void rotPointAroundVect(float3& out, float3& X, float3& vect, int angle) -{ - double ux, uy, uz, vx, vy, vz, wx, wy, wz, sa, ca, x, y, z, u, v, w; - - double sizeX = sqrt(dot(X, X)); - x = X.x / sizeX; - y = X.y / sizeX; - z = X.z / sizeX; - u = vect.x; - v = vect.y; - w = vect.z; - - /*Rotate the point (x,y,z) around the vector (u,v,w)*/ - ux = u * x; - uy = u * y; - uz = u * z; - vx = v * x; - vy = v * y; - vz = v * z; - wx = w * x; - wy = w * y; - wz = w * z; - sa = sin((double)angle * (M_PI / 180.0f)); - ca = cos((double)angle * (M_PI / 180.0f)); - x = u * (ux + vy + wz) + (x * (v * v + w * w) - u * (vy + wz)) * ca + (-wy + vz) * sa; - y = v * (ux + vy + wz) + (y * (u * u + w * w) - v * (ux + wz)) * ca + (wx - uz) * sa; - z = w * (ux + vy + wz) + (z * (u * u + v * v) - w * (ux + vy)) * ca + (-vx + uy) * sa; - - u = sqrt(x * x + y * y + z * z); - x /= u; - y /= u; - z /= u; - - out.x = x * sizeX; - out.y = y * sizeX; - out.z = z * sizeX; -} - -__device__ void rotatePatch(Patch& ptch, int rx, int ry) -{ - float3 n, y, x; - - // rotate patch around x axis by angle rx - rotPointAroundVect(n, ptch.n, ptch.x, rx); - rotPointAroundVect(y, ptch.y, ptch.x, rx); - ptch.n = n; - ptch.y = y; - - // rotate new patch around y axis by angle ry - rotPointAroundVect(n, ptch.n, ptch.y, ry); - rotPointAroundVect(x, ptch.x, ptch.y, ry); - ptch.n = n; - ptch.x = x; -} - -__device__ void movePatch(Patch& ptch, int pt) -{ - // float3 v = ptch.p-rC; - // normalize(v); - float3 v = ptch.n; - - float d = ptch.d * (float)pt; - float3 p = ptch.p + v * d; - ptch.p = p; -} - -__device__ void computeRotCS(float3& xax, float3& yax, float3& n) -{ - xax.x = -n.y + n.z; // get any cross product - xax.y = +n.x + n.z; - xax.z = -n.x - n.y; - if(fabs(xax.x) < 0.0000001f && fabs(xax.y) < 0.0000001f && fabs(xax.z) < 0.0000001f) - { - xax.x = -n.y - n.z; // get any cross product (complementar) - xax.y = +n.x - n.z; - xax.z = +n.x + n.y; - }; - normalize(xax); - yax = cross(n, xax); -} - -} // namespace depthMap -} // namespace aliceVision diff --git a/src/aliceVision/depthMap/cuda/deviceCommon/device_utils.h b/src/aliceVision/depthMap/cuda/deviceCommon/device_utils.h deleted file mode 100644 index c217da2349..0000000000 --- a/src/aliceVision/depthMap/cuda/deviceCommon/device_utils.h +++ /dev/null @@ -1,62 +0,0 @@ -// This file is part of the AliceVision project. -// Copyright (c) 2017 AliceVision contributors. -// This Source Code Form is subject to the terms of the Mozilla Public License, -// v. 2.0. If a copy of the MPL was not distributed with this file, -// You can obtain one at https://mozilla.org/MPL/2.0/. - -#pragma once - -namespace aliceVision { -namespace depthMap { - -template -class BufPtr -{ -public: - __host__ __device__ - BufPtr( T* ptr, int pitch ) - : _ptr( (unsigned char*)ptr ) - , _pitch( pitch ) - { } - - __host__ __device__ - inline T* ptr() { return (T*) _ptr; } - __host__ __device__ - inline const T* ptr() const { return (const T*)_ptr; } - - __host__ __device__ - inline T* row( int y ) { return (T*) (_ptr + y * _pitch); } - __host__ __device__ - inline const T* row( int y ) const { return (const T*)(_ptr + y * _pitch); } - - __host__ __device__ - inline T& at( int x, int y ) { return row(y)[x]; } - __host__ __device__ - inline const T& at( int x, int y ) const { return row(y)[x]; } -private: - BufPtr( ); - BufPtr( const BufPtr& ); - BufPtr& operator*=( const BufPtr& ); - - unsigned char* const _ptr; - const int _pitch; -}; - - -template -static inline -T* get3DBufferAt_h(T* ptr, int spitch, int pitch, int x, int y, int z) -{ - return ((T*)(((char*)ptr) + z * spitch + y * pitch)) + x; -} - -template -static inline -const T* get3DBufferAt_h(const T* ptr, int spitch, int pitch, int x, int y, int z) -{ - return ((const T*)(((const char*)ptr) + z * spitch + y * pitch)) + x; -} - -} // namespace depthMap -} // namespace aliceVision - diff --git a/src/aliceVision/depthMap/cuda/host/DeviceCache.cpp b/src/aliceVision/depthMap/cuda/host/DeviceCache.cpp new file mode 100644 index 0000000000..008e0321db --- /dev/null +++ b/src/aliceVision/depthMap/cuda/host/DeviceCache.cpp @@ -0,0 +1,296 @@ +// This file is part of the AliceVision project. +// Copyright (c) 2022 AliceVision contributors. +// This Source Code Form is subject to the terms of the Mozilla Public License, +// v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + +#include "DeviceCache.hpp" + +#include + +#include +#include + +#define DEVICE_MAX_DOWNSCALE ( MAX_CONSTANT_GAUSS_SCALES - 1 ) // maximum pre-computed Gaussian scales + +namespace aliceVision { +namespace depthMap { + +float3 M3x3mulV3(const float* M3x3, const float3& V) +{ + return make_float3(M3x3[0] * V.x + M3x3[3] * V.y + M3x3[6] * V.z, + M3x3[1] * V.x + M3x3[4] * V.y + M3x3[7] * V.z, + M3x3[2] * V.x + M3x3[5] * V.y + M3x3[8] * V.z); +} + +void normalize(float3& a) +{ + float d = sqrt(a.x * a.x + a.y * a.y + a.z * a.z); + a.x /= d; + a.y /= d; + a.z /= d; +} + +void initCameraMatrix(DeviceCameraParams& cameraParameters_h) +{ + float3 z; + z.x = 0.0f; + z.y = 0.0f; + z.z = 1.0f; + cameraParameters_h.ZVect = M3x3mulV3(cameraParameters_h.iR, z); + normalize(cameraParameters_h.ZVect); + + float3 y; + y.x = 0.0f; + y.y = 1.0f; + y.z = 0.0f; + cameraParameters_h.YVect = M3x3mulV3(cameraParameters_h.iR, y); + normalize(cameraParameters_h.YVect); + + float3 x; + x.x = 1.0f; + x.y = 0.0f; + x.z = 0.0f; + cameraParameters_h.XVect = M3x3mulV3(cameraParameters_h.iR, x); + normalize(cameraParameters_h.XVect); +} + +void fillHostCameraParameters(DeviceCameraParams& cameraParameters_h, int globalCamId, int downscale, const mvsUtils::MultiViewParams& mp) +{ + + Matrix3x3 scaleM; + scaleM.m11 = 1.0 / float(downscale); + scaleM.m12 = 0.0; + scaleM.m13 = 0.0; + scaleM.m21 = 0.0; + scaleM.m22 = 1.0 / float(downscale); + scaleM.m23 = 0.0; + scaleM.m31 = 0.0; + scaleM.m32 = 0.0; + scaleM.m33 = 1.0; + + Matrix3x3 K = scaleM * mp.KArr[globalCamId]; + Matrix3x3 iK = K.inverse(); + Matrix3x4 P = K * (mp.RArr[globalCamId] | (Point3d(0.0, 0.0, 0.0) - mp.RArr[globalCamId] * mp.CArr[globalCamId])); + Matrix3x3 iP = mp.iRArr[globalCamId] * iK; + + cameraParameters_h.C.x = mp.CArr[globalCamId].x; + cameraParameters_h.C.y = mp.CArr[globalCamId].y; + cameraParameters_h.C.z = mp.CArr[globalCamId].z; + + cameraParameters_h.P[0] = P.m11; + cameraParameters_h.P[1] = P.m21; + cameraParameters_h.P[2] = P.m31; + cameraParameters_h.P[3] = P.m12; + cameraParameters_h.P[4] = P.m22; + cameraParameters_h.P[5] = P.m32; + cameraParameters_h.P[6] = P.m13; + cameraParameters_h.P[7] = P.m23; + cameraParameters_h.P[8] = P.m33; + cameraParameters_h.P[9] = P.m14; + cameraParameters_h.P[10] = P.m24; + cameraParameters_h.P[11] = P.m34; + + cameraParameters_h.iP[0] = iP.m11; + cameraParameters_h.iP[1] = iP.m21; + cameraParameters_h.iP[2] = iP.m31; + cameraParameters_h.iP[3] = iP.m12; + cameraParameters_h.iP[4] = iP.m22; + cameraParameters_h.iP[5] = iP.m32; + cameraParameters_h.iP[6] = iP.m13; + cameraParameters_h.iP[7] = iP.m23; + cameraParameters_h.iP[8] = iP.m33; + + cameraParameters_h.R[0] = mp.RArr[globalCamId].m11; + cameraParameters_h.R[1] = mp.RArr[globalCamId].m21; + cameraParameters_h.R[2] = mp.RArr[globalCamId].m31; + cameraParameters_h.R[3] = mp.RArr[globalCamId].m12; + cameraParameters_h.R[4] = mp.RArr[globalCamId].m22; + cameraParameters_h.R[5] = mp.RArr[globalCamId].m32; + cameraParameters_h.R[6] = mp.RArr[globalCamId].m13; + cameraParameters_h.R[7] = mp.RArr[globalCamId].m23; + cameraParameters_h.R[8] = mp.RArr[globalCamId].m33; + + cameraParameters_h.iR[0] = mp.iRArr[globalCamId].m11; + cameraParameters_h.iR[1] = mp.iRArr[globalCamId].m21; + cameraParameters_h.iR[2] = mp.iRArr[globalCamId].m31; + cameraParameters_h.iR[3] = mp.iRArr[globalCamId].m12; + cameraParameters_h.iR[4] = mp.iRArr[globalCamId].m22; + cameraParameters_h.iR[5] = mp.iRArr[globalCamId].m32; + cameraParameters_h.iR[6] = mp.iRArr[globalCamId].m13; + cameraParameters_h.iR[7] = mp.iRArr[globalCamId].m23; + cameraParameters_h.iR[8] = mp.iRArr[globalCamId].m33; + + cameraParameters_h.K[0] = K.m11; + cameraParameters_h.K[1] = K.m21; + cameraParameters_h.K[2] = K.m31; + cameraParameters_h.K[3] = K.m12; + cameraParameters_h.K[4] = K.m22; + cameraParameters_h.K[5] = K.m32; + cameraParameters_h.K[6] = K.m13; + cameraParameters_h.K[7] = K.m23; + cameraParameters_h.K[8] = K.m33; + + cameraParameters_h.iK[0] = iK.m11; + cameraParameters_h.iK[1] = iK.m21; + cameraParameters_h.iK[2] = iK.m31; + cameraParameters_h.iK[3] = iK.m12; + cameraParameters_h.iK[4] = iK.m22; + cameraParameters_h.iK[5] = iK.m32; + cameraParameters_h.iK[6] = iK.m13; + cameraParameters_h.iK[7] = iK.m23; + cameraParameters_h.iK[8] = iK.m33; + + initCameraMatrix(cameraParameters_h); +} + +DeviceCache::SingleDeviceCache::SingleDeviceCache(int maxNbCameras) + : cameraCache(maxNbCameras) +{ + // get the current device id + const int cudaDeviceId = getCudaDeviceId(); + + ALICEVISION_LOG_TRACE("Initialize device cache (device id: " << cudaDeviceId << ", cameras: " << maxNbCameras << ")."); + + // initialize Gaussian filters in GPU constant memory + cuda_createConstantGaussianArray(cudaDeviceId, DEVICE_MAX_DOWNSCALE); // force at compilation to build with maximum pre-computed Gaussian scales. + + if(maxNbCameras > ALICEVISION_DEVICE_MAX_CONSTANT_CAMERA_PARAM_SETS) + ALICEVISION_THROW_ERROR("Cannot initialize device cache with more than " << ALICEVISION_DEVICE_MAX_CONSTANT_CAMERA_PARAM_SETS << " cameras (device id: " << cudaDeviceId << ", cameras: " << maxNbCameras << ").") + + // initialize cached camera containers + cameras.reserve(maxNbCameras); + for(int i = 0; i < maxNbCameras; ++i) + { + cameras.push_back(std::make_unique(i)); + } +} + +void DeviceCache::clear() +{ + // get the current device id + const int cudaDeviceId = getCudaDeviceId(); + + auto it = _cachePerDevice.find(cudaDeviceId); + + // if found, erase SingleDeviceCache data + if(it != _cachePerDevice.end()) + _cachePerDevice.erase(it); +} + +void DeviceCache::buildCache(int maxNbCameras) +{ + // get the current device id + const int cudaDeviceId = getCudaDeviceId(); + + // reset the current device cache + _cachePerDevice[cudaDeviceId].reset(new SingleDeviceCache(maxNbCameras)); +} + +void DeviceCache::addCamera(int globalCamId, int downscale, mvsUtils::ImagesCache>& imageCache, const mvsUtils::MultiViewParams& mp) +{ + // get the current device id + const int cudaDeviceId = getCudaDeviceId(); + + // get the current device cache + if(_cachePerDevice[cudaDeviceId] == nullptr) + ALICEVISION_THROW_ERROR("Cannot add camera, device cache is not initialized (cuda device id: " << cudaDeviceId <<").") + + SingleDeviceCache& currentDeviceCache = *_cachePerDevice[cudaDeviceId]; + + // find out with the LRU (Least Recently Used) strategy if the camera is already in the cache + int deviceCamId; + const CameraSelection newCameraSelection(globalCamId, downscale); + const bool isNewInsertion = currentDeviceCache.cameraCache.insert(newCameraSelection, &deviceCamId); + DeviceCamera& deviceCamera = *(currentDeviceCache.cameras.at(deviceCamId)); + + // get corresponding view id for logs + const IndexT viewId = mp.getViewId(globalCamId); + + // check if the camera is already in cache + if(!isNewInsertion) + { + // nothing to do + ALICEVISION_LOG_TRACE("Add camera on device cache: Camera already on cache (id: " << globalCamId << ", view id: " << viewId << ", downscale: " << downscale << ")."); + return; + } + + // update the cached camera container + if(deviceCamera.getGlobalCamId() < 0) + ALICEVISION_LOG_TRACE("Add camera on device cache (id: " << globalCamId << ", view id: " << viewId << ", downscale: " << downscale << ")."); + else + ALICEVISION_LOG_TRACE("Add camera on device cache (id: " << globalCamId << ", view id: " << viewId << ", downscale: " << downscale << ")." + << "Replace camera (id: " << deviceCamera.getGlobalCamId() << ", view id: " << mp.getViewId(deviceCamera.getGlobalCamId()) << ", downscale: " << deviceCamera.getDownscale() << ")"); + + mvsUtils::ImagesCache>::ImgSharedPtr img = imageCache.getImg_sync(globalCamId); + + // allocate the frame full size host-sided data buffer + CudaSize<2> originalFrameSize(img->Width(), img->Height()); + CudaHostMemoryHeap frame_hmh(originalFrameSize); + + // copy data for cached image "globalCamId" into an host-side data buffer + #pragma omp parallel for + for(int y = 0; y < originalFrameSize.y(); ++y) + { + for(int x = 0; x < originalFrameSize.x(); ++x) + { + const image::RGBAfColor& floatRGBA = (*img)(y, x); + CudaRGBA& cudaRGBA = frame_hmh(x, y); + +#ifdef ALICEVISION_DEPTHMAP_TEXTURE_USE_HALF + // explicit float to half conversion + cudaRGBA.x = __float2half(floatRGBA.r() * 255.0f); + cudaRGBA.y = __float2half(floatRGBA.g() * 255.0f); + cudaRGBA.z = __float2half(floatRGBA.b() * 255.0f); + cudaRGBA.w = __float2half(floatRGBA.a() * 255.0f); +#else + cudaRGBA.x = floatRGBA.r() * 255.0f; + cudaRGBA.y = floatRGBA.g() * 255.0f; + cudaRGBA.z = floatRGBA.b() * 255.0f; + cudaRGBA.w = floatRGBA.a() * 255.0f; +#endif + } + } + + // build host-side device camera parameters struct + DeviceCameraParams cameraParameters_h; + fillHostCameraParameters(cameraParameters_h, globalCamId, downscale, mp); + + // update device camera + deviceCamera.fill(globalCamId, downscale, originalFrameSize.x(), originalFrameSize.y(), frame_hmh, cameraParameters_h); +} + +const DeviceCamera& DeviceCache::requestCamera(int globalCamId, int downscale, const mvsUtils::MultiViewParams& mp) +{ + // get the current device id + const int cudaDeviceId = getCudaDeviceId(); + + // get the current device cache + if(_cachePerDevice[cudaDeviceId] == nullptr) + ALICEVISION_THROW_ERROR("Cannot add camera, device cache is not initialized (cuda device id: " << cudaDeviceId <<").") + + SingleDeviceCache& currentDeviceCache = *_cachePerDevice[cudaDeviceId]; + + // find out with the LRU (Least Recently Used) strategy if the camera is already in the cache + int deviceCamId; + const CameraSelection newCameraSelection(globalCamId, downscale); + const bool isNewInsertion = currentDeviceCache.cameraCache.insert(newCameraSelection, &deviceCamId); + const DeviceCamera& deviceCamera = *(currentDeviceCache.cameras.at(deviceCamId)); + + // get corresponding view id for logs + const IndexT viewId = mp.getViewId(globalCamId); + + // check if the camera is already in cache + if(isNewInsertion) + { + ALICEVISION_THROW_ERROR("Request camera on device cache: Not found (id: " << globalCamId << ", view id: " << viewId << ", downscale: " << downscale << ").") + } + + ALICEVISION_LOG_TRACE("Request camera on device cache (id: " << globalCamId << ", view id: " << viewId << ", downscale: " << downscale << ")."); + + // return the cached device camera + return deviceCamera; +} + +} // namespace depthMap +} // namespace aliceVision diff --git a/src/aliceVision/depthMap/cuda/host/DeviceCache.hpp b/src/aliceVision/depthMap/cuda/host/DeviceCache.hpp new file mode 100644 index 0000000000..3d94b0e0a9 --- /dev/null +++ b/src/aliceVision/depthMap/cuda/host/DeviceCache.hpp @@ -0,0 +1,103 @@ +// This file is part of the AliceVision project. +// Copyright (c) 2022 AliceVision contributors. +// This Source Code Form is subject to the terms of the Mozilla Public License, +// v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + +#pragma once + +#include + +#include +#include + +#include +#include + +namespace aliceVision { +namespace depthMap { + +/* + * @class DeviceCache + * @brief This singleton allows to access the current gpu cache. + */ +class DeviceCache +{ +public: + + static DeviceCache& getInstance() + { + static DeviceCache instance; + return instance; + } + + // Singleton, no copy constructor + DeviceCache(DeviceCache const&) = delete; + + // Singleton, no copy operator + void operator=(DeviceCache const&) = delete; + + /** + * @brief Clear the current gpu device cache. + */ + void clear(); + + /** + * @brief Build the current device cache with the given maximum number of cameras. + * @param[in] maxNbCameras the maximum number of cameras in the current device cache + */ + void buildCache(int maxNbCameras); + + /** + * @brief Add a camera (images + parameters) in current gpu device cache. + * @param[in] globalCamId the camera index in the ImagesCache / MultiViewParams + * @param[in] downscale the downscale to apply on gpu + * @param[in,out] imageCache the image cache to get host-side data + * @param[in] mp the multi-view parameters + */ + void addCamera(int globalCamId, int downscale, mvsUtils::ImagesCache>& imageCache, const mvsUtils::MultiViewParams& mp); + + /** + * @brief Request a camera (images + parameters) in current gpu device cache. + * @param[in] globalCamId the camera index in the ImagesCache / MultiViewParams + * @param[in] downscale the downscale to apply on gpu + * @param[in] mp the multi-view parameters + * @return DeviceCamera (images + parameters) + */ + const DeviceCamera& requestCamera(int globalCamId, int downscale, const mvsUtils::MultiViewParams& mp); + +private: + + // Singleton, private default constructor + DeviceCache() = default; + + // Singleton, private default destructor + ~DeviceCache() = default; + + /* + * @struct SingleDeviceCache + * @brief This class keeps the cache data for a single gpu device. + */ + struct SingleDeviceCache + { + SingleDeviceCache(int maxNbCameras); + ~SingleDeviceCache() = default; + + LRUCameraCache cameraCache; // Least Recently Used device camera id cache + std::vector> cameras; + }; + + std::map > _cachePerDevice; // +}; + +/** + * @brief Fill the host-side camera parameters from multi-view parameters. + * @param[in,out] cameraParameters_h the host-side camera parameters + * @param[in] globalCamId the camera index in the ImagesCache / MultiViewParams + * @param[in] downscale the downscale to apply on gpu + * @param[in] mp the multi-view parameters + */ +void fillHostCameraParameters(DeviceCameraParams& cameraParameters_h, int globalCamId, int downscale, const mvsUtils::MultiViewParams& mp); + +} // namespace depthMap +} // namespace aliceVision diff --git a/src/aliceVision/depthMap/cuda/host/DeviceCamera.cpp b/src/aliceVision/depthMap/cuda/host/DeviceCamera.cpp new file mode 100644 index 0000000000..a6983f3d26 --- /dev/null +++ b/src/aliceVision/depthMap/cuda/host/DeviceCamera.cpp @@ -0,0 +1,182 @@ +// This file is part of the AliceVision project. +// Copyright (c) 2022 AliceVision contributors. +// This Source Code Form is subject to the terms of the Mozilla Public License, +// v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + +#include "DeviceCamera.hpp" + +#include +#include + +namespace aliceVision { +namespace depthMap { + +void buildFrameCudaTexture(CudaDeviceMemoryPitched& frame_dmp, cudaTextureObject_t* textureObject) +{ + cudaTextureDesc texDesc; + memset(&texDesc, 0, sizeof(cudaTextureDesc)); + texDesc.normalizedCoords = 0; // addressed (x,y) in [width,height] + texDesc.addressMode[0] = cudaAddressModeClamp; + texDesc.addressMode[1] = cudaAddressModeClamp; + texDesc.addressMode[2] = cudaAddressModeClamp; + +#if defined(ALICEVISION_DEPTHMAP_TEXTURE_USE_UCHAR) && defined(ALICEVISION_DEPTHMAP_TEXTURE_USE_INTERPOLATION) + tex_desc.readMode = cudaReadModeNormalizedFloat; // uchar to float [0:1], see tex2d_float4 function +#else + texDesc.readMode = cudaReadModeElementType; +#endif + +#ifdef ALICEVISION_DEPTHMAP_TEXTURE_USE_INTERPOLATION + // with subpixel interpolation (can have a large performance impact on some graphic cards) + // but could be critical for quality during SGM in small resolution + texDesc.filterMode = cudaFilterModeLinear; +#else + // without interpolation + tex_desc.filterMode = cudaFilterModePoint; +#endif + + cudaResourceDesc resDesc; + resDesc.resType = cudaResourceTypePitch2D; + +#ifdef ALICEVISION_DEPTHMAP_TEXTURE_USE_HALF + resDesc.res.pitch2D.desc = cudaCreateChannelDescHalf4(); +#else + resDesc.res.pitch2D.desc = cudaCreateChannelDesc(); +#endif + + resDesc.res.pitch2D.devPtr = frame_dmp.getBuffer(); + resDesc.res.pitch2D.width = frame_dmp.getSize()[0]; + resDesc.res.pitch2D.height = frame_dmp.getSize()[1]; + resDesc.res.pitch2D.pitchInBytes = frame_dmp.getPitch(); + + cudaError_t err = cudaCreateTextureObject(textureObject, &resDesc, &texDesc, 0); + THROW_ON_CUDA_ERROR(err, "Failed to bind texture object to camera frame array"); +} + +DeviceCamera::DeviceCamera(int deviceCamId) + : _deviceCamId(deviceCamId) + , _globalCamId(-1) + , _originalWidth(-1) + , _originalHeight(-1) + , _width(-1) + , _height(-1) + , _downscale(-1) + , _memBytes(0) +{} + +DeviceCamera::~DeviceCamera() +{ + _frame_dmp.reset(); + cudaFreeHost(_cameraParameters_h); + cudaDestroyTextureObject(_textureObject); +} + +void DeviceCamera::fill(int globalCamId, + int downscale, + int originalWidth, + int originalHeight, + const CudaHostMemoryHeap& frame_hmh, + const DeviceCameraParams& cameraParameters_h) +{ + // update members + _globalCamId = globalCamId; + _originalWidth = originalWidth; + _originalHeight = originalHeight; + _width = _originalWidth / downscale; + _height = _originalHeight / downscale; + _downscale = downscale; + + // allocate or re-allocate the host-sided camera params + { + if(_cameraParameters_h != nullptr) + cudaFreeHost(_cameraParameters_h); + CHECK_CUDA_ERROR(); + cudaError_t err = cudaMallocHost(&_cameraParameters_h, sizeof(DeviceCameraParams)); + THROW_ON_CUDA_ERROR(err, "Could not allocate camera parameters in pinned host memory in " << __FILE__ << ":" << __LINE__ << ", " << cudaGetErrorString(err)); + } + + // copy the given camera parameters + *_cameraParameters_h = cameraParameters_h; + + // copy the host-sided camera params in device constant camera params array + { + cudaMemcpyKind kind = cudaMemcpyHostToDevice; + cudaError_t err; + + err = cudaMemcpyToSymbol(constantCameraParametersArray_d, _cameraParameters_h, sizeof(DeviceCameraParams), _deviceCamId * sizeof(DeviceCameraParams), kind); + + //if(stream != 0) + //{ + // err = cudaMemcpyToSymbolAsync(constantCameraParametersArray_d, _cameraParameters_h, sizeof(DeviceCameraParams), + // _deviceCamId * sizeof(DeviceCameraParams), kind, stream); + //} + + THROW_ON_CUDA_ERROR(err, "Failed to copy DeviceCameraParams from host to device in " << __FILE__ << ":" << __LINE__ << ": " << cudaGetErrorString(err)); + } + + // destroy previsous texture object + if(_frame_dmp != nullptr) + cudaDestroyTextureObject(_textureObject); + + // allocate or re-allocate device frame if needed + const CudaSize<2> deviceFrameSize(_width, _height); + + if(_frame_dmp.get() == nullptr || _frame_dmp->getSize() != deviceFrameSize) + { + // allocate or re-allocate the device-sided data buffer with the new size + _frame_dmp.reset(new CudaDeviceMemoryPitched(deviceFrameSize)); + _memBytes = _frame_dmp->getBytesPadded(); + } + + // update device frame + fillDeviceFrameFromHostFrame(frame_hmh); +} + +void DeviceCamera::fillDeviceFrameFromHostFrame(const CudaHostMemoryHeap& frame_hmh) +{ + if(_downscale <= 1) + { + // no need to downscale + assert(_originalHeight == _height); + assert(_originalWidth == _width); + + // copy texture's data from host to device + _frame_dmp->copyFrom(frame_hmh); + } + else + { + // allocate the full size device-sided data buffer + CudaDeviceMemoryPitched deviceFrameToDownscale(frame_hmh.getSize()); + cudaTextureObject_t textureObjectToDownscale; + + // copy the full size host-sided data buffer onto the device-sided data buffer + deviceFrameToDownscale.copyFrom(frame_hmh); + + // build the full size device-sided data buffer texture object + buildFrameCudaTexture(deviceFrameToDownscale, &textureObjectToDownscale); + + // downscale with gaussian blur the initial texture + const int gaussianFilterRadius = _downscale; + cuda_downscaleWithGaussianBlur(*_frame_dmp, textureObjectToDownscale, _downscale, _width, _height, gaussianFilterRadius, 0 /*stream*/); + + // wait for kernel completion + cudaDeviceSynchronize(); + + // delete full size texture object on the GPU. + // full size device frame will be deleted at the end of the scope + cudaDestroyTextureObject(textureObjectToDownscale); + } + + // in-place color conversion into CIELAB + cuda_rgb2lab(*_frame_dmp, _width, _height, 0 /*stream*/); + + // wait for kernel completion + cudaDeviceSynchronize(); + + // re-build the frame associated CUDA texture object + buildFrameCudaTexture(*_frame_dmp.get(), &_textureObject); +} + +} // namespace depthMap +} // namespace aliceVision diff --git a/src/aliceVision/depthMap/cuda/host/DeviceCamera.hpp b/src/aliceVision/depthMap/cuda/host/DeviceCamera.hpp new file mode 100644 index 0000000000..0d3207261b --- /dev/null +++ b/src/aliceVision/depthMap/cuda/host/DeviceCamera.hpp @@ -0,0 +1,94 @@ +// This file is part of the AliceVision project. +// Copyright (c) 2022 AliceVision contributors. +// This Source Code Form is subject to the terms of the Mozilla Public License, +// v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + +#pragma once + +#include +#include +#include + +namespace aliceVision { +namespace depthMap { + +/* + * @class DeviceCamera + * @brief Support class to maintain a camera frame in gpu memory and + * also manage DeviceCameraParams in gpu contant memory. + */ +class DeviceCamera +{ +public: + + /** + * @brief DeviceCamera constructor. + * @param[in] deviceCamId the unique gpu camera index should correspond to + * an available index in DeviceCameraParams constant memory + */ + DeviceCamera(int deviceCamId); + + // destructor + ~DeviceCamera(); + + // this class handles unique data, no copy constructor + DeviceCamera(DeviceCamera const&) = delete; + + // this class handles unique data, no copy operator + void operator=(DeviceCamera const&) = delete; + + inline int getDeviceCamId() const { return _deviceCamId; } + inline int getGlobalCamId() const { return _globalCamId; } + inline int getOriginalWidth() const { return _originalWidth; } + inline int getOriginalHeight() const { return _originalHeight; } + inline int getWidth() const { return _width; } + inline int getHeight() const { return _height; } + inline int getDownscale() const { return _downscale; } + inline int getDeviceMemoryConsumption() const { return _memBytes; } + inline cudaTextureObject_t getTextureObject() const { return _textureObject; } + + /** + * @brief Update the DeviceCamera from a new host-side corresponding camera. + * @param[in] globalCamId the camera index in the ImagesCache / MultiViewParams + * @param[in] downscale the downscale to apply on gpu + * @param[in] originalWidth the image original width + * @param[in] originalHeight the image original height + * @param[in] frame_hmh the host-side image frame + * @param[in] cameraParameters_h the host-side camera parameters + */ + void fill(int globalCamId, + int downscale, + int originalWidth, + int originalHeight, + const CudaHostMemoryHeap& frame_hmh, + const DeviceCameraParams& cameraParameters_h); + +private: + + // private methods + + /** + * @brief Update the DeviceCamera frame with an host-side corresponding frame. + * @param[in] frame_hmh the host-side corresponding frame + */ + void fillDeviceFrameFromHostFrame(const CudaHostMemoryHeap& frame_hmh); + + // private members + + const int _deviceCamId; // the device camera index, identical to index in DeviceCache vector & index in constantCameraParametersArray_d + int _globalCamId; // the global camera index, host-sided image cache index + int _originalWidth; // the original image width (before downscale, in cpu memory) + int _originalHeight; // the original image height (before downscale, in cpu memory) + int _width; // the image width (after downscale, in gpu memory) + int _height; // the image height (after downscale, in gpu memory) + int _downscale; // the downscale factor (1 equal no downscale) + int _memBytes; // the device memory consumption + + DeviceCameraParams* _cameraParameters_h = nullptr; // host-side camera parameters + std::unique_ptr> _frame_dmp = nullptr; + cudaTextureObject_t _textureObject; +}; + +} // namespace depthMap +} // namespace aliceVision diff --git a/src/aliceVision/depthMap/cuda/host/DeviceStreamManager.cpp b/src/aliceVision/depthMap/cuda/host/DeviceStreamManager.cpp new file mode 100644 index 0000000000..c582eea9fa --- /dev/null +++ b/src/aliceVision/depthMap/cuda/host/DeviceStreamManager.cpp @@ -0,0 +1,56 @@ +// This file is part of the AliceVision project. +// Copyright (c) 2022 AliceVision contributors. +// This Source Code Form is subject to the terms of the Mozilla Public License, +// v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + +#include "DeviceStreamManager.hpp" + +#include + +namespace aliceVision { +namespace depthMap { + +DeviceStreamManager::DeviceStreamManager(int nbStreams) + : _nbStreams(nbStreams) +{ + assert(nbStreams > 0); + + _streams.resize(nbStreams); + + for(int i = 0; i < nbStreams; ++i) + { + cudaError_t err = cudaStreamCreate(&_streams.at(i)); + if(err != cudaSuccess) + { + ALICEVISION_LOG_WARNING("DeviceStreamManager: Failed to create a CUDA stream object " << i << "/" << nbStreams << ", " << cudaGetErrorString(err)); + _streams.at(i) = 0; + } + } +} + +DeviceStreamManager::~DeviceStreamManager() +{ + for(cudaStream_t& stream : _streams) + { + cudaStreamSynchronize(stream); + + if(stream != 0) + { + cudaStreamDestroy(stream); + } + } +} + +cudaStream_t DeviceStreamManager::getStream(int streamIndex) +{ + return _streams.at(streamIndex % _nbStreams); +} + +void DeviceStreamManager::waitStream(int streamIndex) +{ + cudaStreamSynchronize(getStream(streamIndex)); +} + +} // namespace depthMap +} // namespace aliceVision diff --git a/src/aliceVision/depthMap/cuda/host/DeviceStreamManager.hpp b/src/aliceVision/depthMap/cuda/host/DeviceStreamManager.hpp new file mode 100644 index 0000000000..e0b2a04e4a --- /dev/null +++ b/src/aliceVision/depthMap/cuda/host/DeviceStreamManager.hpp @@ -0,0 +1,66 @@ +// This file is part of the AliceVision project. +// Copyright (c) 2022 AliceVision contributors. +// This Source Code Form is subject to the terms of the Mozilla Public License, +// v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + +#pragma once + +#include + +#include + +namespace aliceVision { +namespace depthMap { + +/* + * @class DeviceStreamManager + * @brief Small class allowing a simple management of gpu streams + */ +class DeviceStreamManager +{ +public: + + /** + * @brief DeviceStreamManager constructor. + * @param[in] nbStreams the number of gpu streams managed + */ + DeviceStreamManager(int nbStreams); + + // destructor + ~DeviceStreamManager(); + + // this class handles unique data, no copy constructor + DeviceStreamManager(DeviceStreamManager const&) = delete; + + // this class handles unique data, no copy operator + void operator=(DeviceStreamManager const&) = delete; + + /** + * @brief Get the number of gpu streams managed. + * @return number of gpu streams managed + */ + inline int getNbStreams() const { return _nbStreams; } + + /** + * @brief Get the stream object associated with the given index. + * @param[in] streamIndex the stream index in the DeviceStreamManager + * @note if streamIndex > nbStream, this function returns the stream object associated with streamIndex % nbStream + * @return the associated stream object + */ + cudaStream_t getStream(int streamIndex); + + /** + * @brief Waits for stream tasks to complete. + * @param[in] streamIndex the stream index in the DeviceStreamManager + */ + void waitStream(int streamIndex); + +private: + + const int _nbStreams; + std::vector _streams; +}; + +} // namespace depthMap +} // namespace aliceVision diff --git a/src/aliceVision/depthMap/cuda/LRUCache.hpp b/src/aliceVision/depthMap/cuda/host/LRUCache.hpp similarity index 100% rename from src/aliceVision/depthMap/cuda/LRUCache.hpp rename to src/aliceVision/depthMap/cuda/host/LRUCache.hpp diff --git a/src/aliceVision/depthMap/cuda/host/LRUCameraCache.hpp b/src/aliceVision/depthMap/cuda/host/LRUCameraCache.hpp new file mode 100644 index 0000000000..fe6da76b1c --- /dev/null +++ b/src/aliceVision/depthMap/cuda/host/LRUCameraCache.hpp @@ -0,0 +1,44 @@ +// This file is part of the AliceVision project. +// Copyright (c) 2022 AliceVision contributors. +// This Source Code Form is subject to the terms of the Mozilla Public License, +// v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + +#pragma once + +#include + +namespace aliceVision { +namespace depthMap { + +/** + * @struct CameraSelection + * @brief Support class for operating an LRU cache of cameras + */ +struct CameraSelection : public std::pair +{ + CameraSelection() : std::pair(0, 0) {} + CameraSelection(int i) : std::pair(i, i) {} + CameraSelection(int i, int j) : std::pair(i, j) {} + + CameraSelection& operator=(int i) + { + this->first = this->second = i; + return *this; + } +}; + +inline bool operator==(const CameraSelection& l, const CameraSelection& r) +{ + return (l.first == r.first && l.second == r.second); +} + +inline bool operator<(const CameraSelection& l, const CameraSelection& r) +{ + return (l.first < r.first || (l.first == r.first && l.second < r.second)); +} + +using LRUCameraCache = LRUCache; + +} // namespace depthMap +} // namespace aliceVision diff --git a/src/aliceVision/depthMap/cuda/planeSweeping/host_utils.h b/src/aliceVision/depthMap/cuda/host/divUp.hpp similarity index 58% rename from src/aliceVision/depthMap/cuda/planeSweeping/host_utils.h rename to src/aliceVision/depthMap/cuda/host/divUp.hpp index a4f3547377..2e1488fbdb 100644 --- a/src/aliceVision/depthMap/cuda/planeSweeping/host_utils.h +++ b/src/aliceVision/depthMap/cuda/host/divUp.hpp @@ -6,31 +6,20 @@ #pragma once -#include - namespace aliceVision { namespace depthMap { -// Round a / b to nearest higher integer value. -inline -unsigned int divUp(unsigned int a, unsigned int b) +/** + * @brief Round a / b to nearest higher integer value. + * @param[in] a an integer value + * @param[in] b an integer value + * @return nearest higher integer value of round a / b. + */ +__host__ inline unsigned int divUp(unsigned int a, unsigned int b) { return (a % b != 0) ? (a / b + 1) : (a / b); } -inline -clock_t tic() -{ - return clock(); -} - -// returns the ms passed after last call to tic() -inline -float toc(clock_t ticClk) -{ - return (float)((clock() - ticClk) * 1000.0 / CLOCKS_PER_SEC); -} - } // namespace depthMap } // namespace aliceVision diff --git a/src/aliceVision/depthMap/cuda/commonStructures.hpp b/src/aliceVision/depthMap/cuda/host/memory.hpp similarity index 90% rename from src/aliceVision/depthMap/cuda/commonStructures.hpp rename to src/aliceVision/depthMap/cuda/host/memory.hpp index 518b46e48b..20f7e61ff1 100644 --- a/src/aliceVision/depthMap/cuda/commonStructures.hpp +++ b/src/aliceVision/depthMap/cuda/host/memory.hpp @@ -6,7 +6,20 @@ #pragma once +// #define ALICEVISION_DEPTHMAP_TEXTURE_USE_UCHAR +#define ALICEVISION_DEPTHMAP_TEXTURE_USE_HALF +#define ALICEVISION_DEPTHMAP_TEXTURE_USE_INTERPOLATION + +#ifdef ALICEVISION_DEPTHMAP_TEXTURE_USE_HALF +#define CUDA_NO_HALF +#include +#endif + +#include +#include + #include + #include #include #include @@ -16,20 +29,21 @@ #include #include - -#define THROW_ON_CUDA_ERROR(rcode, message) \ - if (rcode != cudaSuccess) { \ - std::stringstream s; s << message << ": " << cudaGetErrorString(err); \ - throw std::runtime_error(s.str()); \ - } - - namespace aliceVision { namespace depthMap { -#define MAX_CONSTANT_CAMERA_PARAM_SETS 10 - - +#ifdef ALICEVISION_DEPTHMAP_TEXTURE_USE_UCHAR +using CudaColorBaseType = unsigned char; +using CudaRGBA = uchar4; +#else +#ifdef ALICEVISION_DEPTHMAP_TEXTURE_USE_HALF +struct CudaRGBA { __half x, y, z, w; }; +using CudaColorBaseType = __half; +#else +using CudaColorBaseType = float; +using CudaRGBA = float4; +#endif // ALICEVISION_DEPTHMAP_TEXTURE_USE_HALF +#endif // ALICEVISION_DEPTHMAP_TEXTURE_USE_UCHAR /********************************************************************************* * forward declarations @@ -347,7 +361,7 @@ template class CudaHostMemoryHeap : public CudaMemory } // see below with copy() functions - void copyFrom( const CudaDeviceMemoryPitched& src ); + void copyFrom( const CudaDeviceMemoryPitched& src, cudaStream_t stream = 0); inline Type *getBuffer() { @@ -447,7 +461,7 @@ template class CudaDeviceMemoryPitched : public CudaM { if( buffer == nullptr ) { - allocate( rhs.size ); + allocate( rhs.getSize() ); } else if( this->getSize() != rhs.getSize() ) { @@ -458,23 +472,11 @@ template class CudaDeviceMemoryPitched : public CudaM return *this; } - template - void bindToTexture( texturetype& texref ) - { - cudaError_t err = cudaBindTexture2D( 0, // offset - texref, - this->getBuffer(), - cudaCreateChannelDesc(), - this->getUnitsInDim(0), - this->getUnitsInDim(1), - this->getPitch() ); - THROW_ON_CUDA_ERROR( err, "Failed to bind texture reference to pitched memory, " << cudaGetErrorString( err ) ); - } - // see below with copy() functions + void copyFrom( const CudaDeviceMemoryPitched& src, cudaStream_t stream = 0 ); void copyFrom( const CudaHostMemoryHeap& src, cudaStream_t stream = 0 ); void copyFrom( const Type* src, size_t sx, size_t sy ); - void copyFrom( const CudaDeviceMemoryPitched& src ); + void copyTo( Type* dst, size_t sx, size_t sy ) const; @@ -563,6 +565,9 @@ template class CudaDeviceMemoryPitched : public CudaM buffer = (Type*)pitchDevPtr.ptr; this->setPitch( pitchDevPtr.pitch ); + + ALICEVISION_LOG_DEBUG("GPU 3D allocation: " << this->getUnitsInDim(0) << "x" << this->getUnitsInDim(1) << "x" << this->getUnitsInDim(2) << ", type size=" << sizeof(Type) << ", pitch=" << pitchDevPtr.pitch); + ALICEVISION_LOG_DEBUG(" : " << this->getBytesUnpadded() << ", padded=" << this->getBytesPadded() << ", wasted=" << this->getBytesPadded() - this->getBytesUnpadded() << ", wasted ratio=" << ((this->getBytesPadded() - this->getBytesUnpadded()) / double(this->getBytesUnpadded())) * 100.0 << "%"); } else { @@ -594,6 +599,11 @@ template class CudaDeviceMemory : public CudaMemorySizeBase { Type* buffer = nullptr; public: + + CudaDeviceMemory() + : buffer( nullptr ) + { } + explicit CudaDeviceMemory(const size_t size) { allocate( size ); @@ -790,6 +800,53 @@ template class CudaArray : public CudaMemorySizeBase< * copyFrom member functions *********************************************************************************/ +template +void CudaDeviceMemoryPitched::copyFrom(const CudaDeviceMemoryPitched& src, cudaStream_t stream) +{ + const cudaMemcpyKind kind = cudaMemcpyDeviceToDevice; + cudaError_t err; + if(Dim == 1) + { + if( stream == 0 ) + err = cudaMemcpy( this->getBytePtr(), + src.getBytePtr(), + src.getUnpaddedBytesInRow(), + kind ); + else + err = cudaMemcpyAsync( this->getBytePtr(), + src.getBytePtr(), + src.getUnpaddedBytesInRow(), + kind, + stream ); + + THROW_ON_CUDA_ERROR(err, "Failed to copy (" << __FILE__ << " " << __LINE__ << ")"); + } + else if(Dim >= 2) + { + size_t number_of_rows = 1; + for( int i=1; igetBytePtr(), + this->getPitch(), + src.getBytePtr(), + src.getPitch(), + src.getUnpaddedBytesInRow(), + number_of_rows, + kind ); + else + err = cudaMemcpy2DAsync( this->getBytePtr(), + this->getPitch(), + src.getBytePtr(), + src.getPitch(), + src.getUnpaddedBytesInRow(), + number_of_rows, + kind, + stream ); + THROW_ON_CUDA_ERROR(err, "Failed to copy (" << __FILE__ << " " << __LINE__ << ")"); + } +} + template void CudaDeviceMemoryPitched::copyFrom( const CudaHostMemoryHeap& src, cudaStream_t stream ) { @@ -836,6 +893,7 @@ void CudaDeviceMemoryPitched::copyFrom( const CudaHostMemoryHeap void CudaDeviceMemoryPitched::copyFrom( const Type* src, size_t sx, size_t sy ) { @@ -857,58 +915,47 @@ void CudaDeviceMemoryPitched::copyFrom( const Type* src, size_t sx, s } template -void CudaDeviceMemoryPitched::copyFrom(const CudaDeviceMemoryPitched& src) -{ - const cudaMemcpyKind kind = cudaMemcpyDeviceToDevice; - if(Dim == 1) - { - cudaError_t err = cudaMemcpy(this->getBytePtr(), - src.getBytePtr(), - src.getUnpaddedBytesInRow(), - kind); - THROW_ON_CUDA_ERROR(err, "Failed to copy (" << __FILE__ << " " << __LINE__ << ")"); - } - else if(Dim >= 2) - { - size_t number_of_rows = 1; - for( int i=1; igetBytePtr(), - this->getPitch(), - src.getBytePtr(), - src.getPitch(), - src.getUnpaddedBytesInRow(), - number_of_rows, - kind); - THROW_ON_CUDA_ERROR(err, "Failed to copy (" << __FILE__ << " " << __LINE__ << ")"); - } -} - -template -void CudaHostMemoryHeap::copyFrom( const CudaDeviceMemoryPitched& src ) +void CudaHostMemoryHeap::copyFrom(const CudaDeviceMemoryPitched& src, cudaStream_t stream) { const cudaMemcpyKind kind = cudaMemcpyDeviceToHost; + cudaError_t err; if(Dim == 1) { - cudaError_t err = cudaMemcpy( this->getBytePtr(), - src.getBytePtr(), - this->getUnpaddedBytesInRow(), - kind); + if( stream == 0 ) + err = cudaMemcpy( this->getBytePtr(), + src.getBytePtr(), + src.getUnpaddedBytesInRow(), + kind ); + else + err = cudaMemcpyAsync( this->getBytePtr(), + src.getBytePtr(), + src.getUnpaddedBytesInRow(), + kind, + stream ); THROW_ON_CUDA_ERROR(err, "Failed to copy (" << __FILE__ << " " << __LINE__ << ")"); } else if(Dim >= 2) { size_t number_of_rows = 1; - for( int i=1; igetUnitsInDim(i); - - cudaError_t err = cudaMemcpy2D( this->getBytePtr(), - this->getPitch(), - src.getBytePtr(), - src.getPitch(), - this->getUnpaddedBytesInRow(), - number_of_rows, - kind); + for( int i=1; igetBytePtr(), + this->getPitch(), + src.getBytePtr(), + src.getPitch(), + src.getUnpaddedBytesInRow(), + number_of_rows, + kind ); + else + err = cudaMemcpy2DAsync( this->getBytePtr(), + this->getPitch(), + src.getBytePtr(), + src.getPitch(), + src.getUnpaddedBytesInRow(), + number_of_rows, + kind, + stream ); THROW_ON_CUDA_ERROR(err, "Failed to copy (" << __FILE__ << " " << __LINE__ << ")"); } } @@ -1250,61 +1297,9 @@ template void copy2D( Type* dst, size_t sx, size_t sy, THROW_ON_CUDA_ERROR( err, "Failed to copy (" << __FILE__ << " " << __LINE__ << ", " << cudaGetErrorString(err) << ")" ); } -struct CameraStructBase -{ - float P[12]; - float iP[9]; - float R[9]; - float iR[9]; - float K[9]; - float iK[9]; - float3 C; - float3 XVect; - float3 YVect; - float3 ZVect; -}; - -// #define ALICEVISION_DEPTHMAP_TEXTURE_USE_UCHAR -#define ALICEVISION_DEPTHMAP_TEXTURE_USE_INTERPOLATION - -#ifdef ALICEVISION_DEPTHMAP_TEXTURE_USE_UCHAR -using CudaColorBaseType = unsigned char; -using CudaRGBA = uchar4; - -#else -using CudaColorBaseType = float; -using CudaRGBA = float4; - -#endif - - -struct TexturedArray -{ - CudaDeviceMemoryPitched* arr = nullptr; - cudaTextureObject_t tex; -}; - -struct CamCacheIdx -{ - int i = 0; - - CamCacheIdx() = default; - explicit CamCacheIdx( int val ) : i(val) { } -}; - -typedef std::vector Pyramid; - -struct CameraStruct -{ - CamCacheIdx param_dev; - Pyramid* pyramid = nullptr; - int camId = -1; - cudaStream_t stream = 0; // allow async work on cameras used in parallel -}; - -/** -* @notes: use normalized coordinates -*/ +/* + * @notes: use normalized coordinates + */ template struct CudaTexture { diff --git a/src/aliceVision/depthMap/cuda/host/utils.cpp b/src/aliceVision/depthMap/cuda/host/utils.cpp new file mode 100644 index 0000000000..2ed8c7bc8b --- /dev/null +++ b/src/aliceVision/depthMap/cuda/host/utils.cpp @@ -0,0 +1,118 @@ +// This file is part of the AliceVision project. +// Copyright (c) 2022 AliceVision contributors. +// This Source Code Form is subject to the terms of the Mozilla Public License, +// v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + +#include "utils.hpp" + +#include + +#include + +namespace aliceVision { +namespace depthMap { + +int listCudaDevices() +{ + int nbDevices = 0; // number of CUDA GPUs + + // determine the number of CUDA capable GPUs + cudaError_t err = cudaGetDeviceCount(&nbDevices); + CHECK_CUDA_ERROR(); + if(err != cudaSuccess) + { + ALICEVISION_LOG_ERROR("Cannot get CUDA device count."); + return 0; + } + + if(nbDevices < 1) + { + ALICEVISION_LOG_ERROR("No CUDA capable devices detected."); + return 0; + } + + + // display CPU and GPU configuration + std::stringstream s; + for(int i = 0; i < nbDevices; ++i) + { + cudaDeviceProp dprop; + cudaGetDeviceProperties(&dprop, i); + s << "\t- Device " << i << ": " << dprop.name << std::endl; + } + ALICEVISION_LOG_DEBUG(nbDevices << " CUDA devices found:" << std::endl << s.str()); + + return nbDevices; +} + +int getCudaDeviceId() +{ + int currentCudaDeviceId; + + if(cudaGetDevice(¤tCudaDeviceId) != cudaSuccess) + { + ALICEVISION_LOG_ERROR("Cannot get current CUDA device id."); + } + + CHECK_CUDA_ERROR(); + + return currentCudaDeviceId; +} + +void setCudaDeviceId(int cudaDeviceId) +{ + if(cudaSetDevice(cudaDeviceId) != cudaSuccess) + { + ALICEVISION_LOG_ERROR("Cannot set device id " << cudaDeviceId << " as current CUDA device."); + } + + CHECK_CUDA_ERROR(); +} + +bool testCudaDeviceId(int cudaDeviceId) +{ + int currentCudaDeviceId; + cudaGetDevice(¤tCudaDeviceId); + if(currentCudaDeviceId != cudaDeviceId) + { + ALICEVISION_LOG_WARNING("CUDA device id should be: " << cudaDeviceId << ", program curently use device id: " << currentCudaDeviceId << "."); + return false; + } + return true; +} + +void logDeviceMemoryInfo() +{ + size_t iavail; + size_t itotal; + + cudaMemGetInfo(&iavail, &itotal); + + const double availableMB = double(iavail) / (1024.0 * 1024.0); + const double totalMB = double(itotal) / (1024.0 * 1024.0); + const double usedMB = double(itotal - iavail) / (1024.0 * 1024.0); + + int cudaDeviceId; + cudaGetDevice(&cudaDeviceId); + + ALICEVISION_LOG_INFO("Device memory (device id: "<< cudaDeviceId <<"):" << std::endl + << "\t- used: " << usedMB << " MB" << std::endl + << "\t- available: " << availableMB << " MB" << std::endl + << "\t- total: " << totalMB << " MB"); +} + +void getDeviceMemoryInfo(double& availableMB, double& usedMB, double& totalMB) +{ + size_t iavail; + size_t itotal; + + cudaMemGetInfo(&iavail, &itotal); + + availableMB = double(iavail) / (1024.0 * 1024.0); + totalMB = double(itotal) / (1024.0 * 1024.0); + usedMB = double(itotal - iavail) / (1024.0 * 1024.0); +} + +} // namespace depthMap +} // namespace aliceVision diff --git a/src/aliceVision/depthMap/cuda/host/utils.hpp b/src/aliceVision/depthMap/cuda/host/utils.hpp new file mode 100644 index 0000000000..a2e54cc180 --- /dev/null +++ b/src/aliceVision/depthMap/cuda/host/utils.hpp @@ -0,0 +1,77 @@ +// This file is part of the AliceVision project. +// Copyright (c) 2022 AliceVision contributors. +// This Source Code Form is subject to the terms of the Mozilla Public License, +// v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + +#pragma once + +// Macro for checking cuda errors +#define CHECK_CUDA_ERROR() \ + if(cudaError_t err = cudaGetLastError()) \ + { \ + fprintf(stderr, "\n\nCUDAError: %s\n", cudaGetErrorString(err)); \ + fprintf(stderr, " file: %s\n", __FILE__); \ + fprintf(stderr, " function: %s\n", __FUNCTION__); \ + fprintf(stderr, " line: %d\n\n", __LINE__); \ + std::stringstream s; \ + s << "\n CUDA Error: " << cudaGetErrorString(err) << "\n file: " << __FILE__ \ + << "\n function: " << __FUNCTION__ << "\n line: " << __LINE__ << "\n"; \ + throw std::runtime_error(s.str()); \ + } + +#define ALICEVISION_CU_PRINT_DEBUG(a) std::cerr << a << std::endl; +#define ALICEVISION_CU_PRINT_ERROR(a) std::cerr << a << std::endl; + +#define THROW_ON_CUDA_ERROR(rcode, message) \ + if(rcode != cudaSuccess) \ + { \ + std::stringstream s; \ + s << message << ": " << cudaGetErrorString(err); \ + throw std::runtime_error(s.str()); \ + } + +namespace aliceVision { +namespace depthMap { + +/** + * @brief Get and log available CUDA devices. + * @return the number of CUDA devices + */ +int listCudaDevices(); + +/** + * @brief Get the device id currently used for GPU executions. + * @return current CUDA device id + */ +int getCudaDeviceId(); + +/** + * @brief Set the device to use for GPU executions. + * @param[in] cudaDeviceId the CUDA device id to use + */ +void setCudaDeviceId(int cudaDeviceId); + +/** + * @brief Test if the device id currently used for GPU executions + * is the same as the one given. + * @param[in] cudaDeviceId the given CUDA device id to test + */ +bool testCudaDeviceId(int cudaDeviceId); + +/** + * @brief Log current CUDA device memory information. + */ +void logDeviceMemoryInfo(); + +/** + * @brief Get current CUDA device memory information. + * @param[out] availableMB the available memory in MB on the current CUDA device + * @param[out] usedMB the used memory in MB on the current CUDA device + * @param[out] totalMB the total memory in MB on the current CUDA device + */ +void getDeviceMemoryInfo(double& availableMB, double& usedMB, double& totalMB); + +} // namespace depthMap +} // namespace aliceVision + diff --git a/src/aliceVision/depthMap/cuda/imageProcessing/deviceColorConversion.cu b/src/aliceVision/depthMap/cuda/imageProcessing/deviceColorConversion.cu new file mode 100644 index 0000000000..d8f5adb7a9 --- /dev/null +++ b/src/aliceVision/depthMap/cuda/imageProcessing/deviceColorConversion.cu @@ -0,0 +1,43 @@ +// This file is part of the AliceVision project. +// Copyright (c) 2022 AliceVision contributors. +// This Source Code Form is subject to the terms of the Mozilla Public License, +// v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + +#include "deviceColorConversion.hpp" + +#include +#include +#include + +namespace aliceVision { +namespace depthMap { + +__global__ void rgb2lab_kernel(CudaRGBA* irgbaOlab_d, int irgbaOlab_p, int width, int height) +{ + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + + if((x >= width) || (y >= height)) + return; + + CudaRGBA* rgb = get2DBufferAt(irgbaOlab_d, irgbaOlab_p, x, y); + float3 flab = xyz2lab(rgb2xyz(make_float3(float(rgb->x) / 255.f, float(rgb->y) / 255.f, float(rgb->z) / 255.f))); + + rgb->x = flab.x; + rgb->y = flab.y; + rgb->z = flab.z; +} + +__host__ void cuda_rgb2lab(CudaDeviceMemoryPitched& frame_dmp, int width, int height, cudaStream_t stream) +{ + const dim3 block(32, 2, 1); + const dim3 grid(divUp(width, block.x), divUp(height, block.y), 1); + + // in-place color conversion into CIELAB + rgb2lab_kernel<<>>(frame_dmp.getBuffer(), frame_dmp.getPitch(), width, height); + CHECK_CUDA_ERROR(); +} + +} // namespace depthMap +} // namespace aliceVision diff --git a/src/aliceVision/depthMap/cuda/imageProcessing/deviceColorConversion.hpp b/src/aliceVision/depthMap/cuda/imageProcessing/deviceColorConversion.hpp new file mode 100644 index 0000000000..d3ea3c9696 --- /dev/null +++ b/src/aliceVision/depthMap/cuda/imageProcessing/deviceColorConversion.hpp @@ -0,0 +1,25 @@ +// This file is part of the AliceVision project. +// Copyright (c) 2022 AliceVision contributors. +// This Source Code Form is subject to the terms of the Mozilla Public License, +// v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + +#pragma once + +#include + +namespace aliceVision { +namespace depthMap { + +/** + * @brief In-place color conversion into CIELAB using CUDA. + * @param[in, out] frame_dmp the camera frame in device memory + * @param[in] width the frame width + * @param[in] height the frame height + * @param[in] stream the CUDA stream for gpu execution + */ +extern void cuda_rgb2lab(CudaDeviceMemoryPitched& frame_dmp, int width, int height, cudaStream_t stream); + +} // namespace depthMap +} // namespace aliceVision + diff --git a/src/aliceVision/depthMap/cuda/imageProcessing/deviceGaussianFilter.cu b/src/aliceVision/depthMap/cuda/imageProcessing/deviceGaussianFilter.cu new file mode 100644 index 0000000000..7228e76dc8 --- /dev/null +++ b/src/aliceVision/depthMap/cuda/imageProcessing/deviceGaussianFilter.cu @@ -0,0 +1,359 @@ +// This file is part of the AliceVision project. +// Copyright (c) 2018 AliceVision contributors. +// This Source Code Form is subject to the terms of the Mozilla Public License, +// v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + +#include "deviceGaussianFilter.hpp" + +#include +#include +#include +#include + +#include + +namespace aliceVision { +namespace depthMap { + +/********************************************************************************* +* global / constant data structures +*********************************************************************************/ +std::set d_gaussianArrayInitialized; +__device__ __constant__ int d_gaussianArrayOffset[MAX_CONSTANT_GAUSS_SCALES]; +__device__ __constant__ float d_gaussianArray[MAX_CONSTANT_GAUSS_MEM_SIZE]; + +/********************************************************************************* + * device functions definitions + *********************************************************************************/ + +__device__ void cuda_swap_float(float& a, float& b) +{ + float temp = a; + a = b; + b = temp; +} + +/********************************************************************************* + * kernel definitions + *********************************************************************************/ + +/* + * @note This kernel implementation is not optimized because the Gaussian filter is separable. + */ +__global__ void downscaleWithGaussianBlur_kernel(cudaTextureObject_t originalFrameTex, + CudaRGBA* downscaleFrame_d, int downscaleFrame_p, + int downscaleFrameWidth, + int downscaleFrameHeight, + int downscale, + int gaussRadius) +{ + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + if((x < downscaleFrameWidth) && (y < downscaleFrameHeight)) + { + const float s = float(downscale) * 0.5f; + + float4 accPix = make_float4(0.0f, 0.0f, 0.0f, 0.0f); + float sumFactor = 0.0f; + + for(int i = -gaussRadius; i <= gaussRadius; i++) + { + for(int j = -gaussRadius; j <= gaussRadius; j++) + { + const float4 curPix = tex2D_float4(originalFrameTex, float(x * downscale + j) + s, float(y * downscale + i) + s); + const float factor = getGauss(downscale - 1, i + gaussRadius) * + getGauss(downscale - 1, j + gaussRadius); // domain factor + + accPix = accPix + curPix * factor; + sumFactor += factor; + } + } + + CudaRGBA& out = BufPtr(downscaleFrame_d, downscaleFrame_p).at(x, y); + out.x = accPix.x / sumFactor; + out.y = accPix.y / sumFactor; + out.z = accPix.z / sumFactor; + out.w = accPix.w / sumFactor; + } +} + +__global__ void gaussianBlurVolumeZ_kernel(float* out_volume_d, int out_volume_s, int out_volume_p, + const float* in_volume_d, int in_volume_s, int in_volume_p, + int volDimX, int volDimY, int volDimZ, int gaussRadius) +{ + const int vx = blockIdx.x * blockDim.x + threadIdx.x; + const int vy = blockIdx.y * blockDim.y + threadIdx.y; + const int vz = blockIdx.z; + + const int gaussScale = gaussRadius - 1; + + if(vx >= volDimX || vy >= volDimY) + return; + + float sum = 0.0f; + float sumFactor = 0.0f; + + for(int rz = -gaussRadius; rz <= gaussRadius; rz++) + { + const int iz = vz + rz; + if((iz < volDimZ) && (iz > 0)) + { + const float value = float(*get3DBufferAt(in_volume_d, in_volume_s, in_volume_p, vx, vy, iz)); + const float factor = getGauss(gaussScale, rz + gaussRadius); + sum += value * factor; + sumFactor += factor; + } + } + + *get3DBufferAt(out_volume_d, out_volume_s, out_volume_p, vx, vy, vz) = float(sum / sumFactor); +} + +__global__ void gaussianBlurVolumeXYZ_kernel(float* out_volume_d, int out_volume_s, int out_volume_p, + const float* in_volume_d, int in_volume_s, int in_volume_p, + int volDimX, int volDimY, int volDimZ, int gaussRadius) +{ + const int vx = blockIdx.x * blockDim.x + threadIdx.x; + const int vy = blockIdx.y * blockDim.y + threadIdx.y; + const int vz = blockIdx.z; + + const int gaussScale = gaussRadius - 1; + + if(vx >= volDimX || vy >= volDimY) + return; + + const int xMinRadius = max(-gaussRadius, -vx); + const int yMinRadius = max(-gaussRadius, -vy); + const int zMinRadius = max(-gaussRadius, -vz); + + const int xMaxRadius = min(gaussRadius, volDimX - vx - 1); + const int yMaxRadius = min(gaussRadius, volDimY - vy - 1); + const int zMaxRadius = min(gaussRadius, volDimZ - vz - 1); + + float sum = 0.0f; + float sumFactor = 0.0f; + + for(int rx = xMinRadius; rx <= xMaxRadius; rx++) + { + const int ix = vx + rx; + + for(int ry = yMinRadius; ry <= yMaxRadius; ry++) + { + const int iy = vy + ry; + + for(int rz = zMinRadius; rz <= zMaxRadius; rz++) + { + const int iz = vz + rz; + + const float value = float(*get3DBufferAt(in_volume_d, in_volume_s, in_volume_p, ix, iy, iz)); + const float factor = getGauss(gaussScale, rx + gaussRadius) * getGauss(gaussScale, ry + gaussRadius) * getGauss(gaussScale, rz + gaussRadius); + sum += value * factor; + sumFactor += factor; + } + } + } + + *get3DBufferAt(out_volume_d, out_volume_s, out_volume_p, vx, vy, vz) = float(sum / sumFactor); +} + +/** + * @warning: use an hardcoded buffer size, so max radius value is 3. + */ +__global__ void medianFilter3_kernel(cudaTextureObject_t tex, float* texLab_d, int texLab_p, int width, int height, int scale) +{ + const int radius = 3; + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + if((x >= width - radius) || (y >= height - radius) || (x < radius) || (y < radius)) + return; + + const int filterWidth = radius * 2 + 1; + const int filterNbPixels = filterWidth * filterWidth; + + float buf[filterNbPixels]; // filterNbPixels + + // Assign masked values to buf + for(int yi = 0; yi < filterWidth; ++yi) + { + for(int xi = 0; xi < filterWidth; ++xi) + { + float pix = tex2D(tex, x + xi - radius, y + yi - radius); + buf[yi * filterWidth + xi] = pix; + } + } + + // Calculate until we get the median value + for(int k = 0; k < filterNbPixels; ++k) // (filterNbPixels + 1) / 2 + for(int l = 0; l < filterNbPixels; ++l) + if(buf[k] < buf[l]) + cuda_swap_float(buf[k], buf[l]); + + BufPtr(texLab_d, texLab_p).at(x, y) = buf[radius * filterWidth + radius]; +} + +/********************************************************************************* + * exported host function + *********************************************************************************/ +__host__ void cuda_createConstantGaussianArray(int cudaDeviceId, int scales) // float delta, int radius) +{ + if(scales >= MAX_CONSTANT_GAUSS_SCALES) + { + throw std::runtime_error( "Programming error: too few scales pre-computed for Gaussian kernels. Enlarge and recompile." ); + } + + cudaError_t err; + + if(d_gaussianArrayInitialized.find(cudaDeviceId) != d_gaussianArrayInitialized.end()) + return; + + d_gaussianArrayInitialized.insert(cudaDeviceId); + + int* h_gaussianArrayOffset; + float* h_gaussianArray; + + err = cudaMallocHost(&h_gaussianArrayOffset, MAX_CONSTANT_GAUSS_SCALES * sizeof(int)); + THROW_ON_CUDA_ERROR(err, "Failed to allocate " << MAX_CONSTANT_GAUSS_SCALES * sizeof(int) << " of CUDA host memory."); + + err = cudaMallocHost(&h_gaussianArray, MAX_CONSTANT_GAUSS_MEM_SIZE * sizeof(float)); + THROW_ON_CUDA_ERROR(err, "Failed to allocate " << MAX_CONSTANT_GAUSS_MEM_SIZE * sizeof(float) << " of CUDA host memory."); + + int sumSizes = 0; + + for(int scale = 0; scale < MAX_CONSTANT_GAUSS_SCALES; ++scale) + { + h_gaussianArrayOffset[scale] = sumSizes; + const int radius = scale + 1; + const int size = 2 * radius + 1; + sumSizes += size; + } + + if(sumSizes >= MAX_CONSTANT_GAUSS_MEM_SIZE) + { + throw std::runtime_error( "Programming error: too little memory allocated for " + + std::to_string(MAX_CONSTANT_GAUSS_SCALES) + " Gaussian kernels. Enlarge and recompile." ); + } + + for(int scale = 0; scale < MAX_CONSTANT_GAUSS_SCALES; ++scale) + { + const int radius = scale + 1; + const float delta = 1.0f; + const int size = 2 * radius + 1; + + for(int idx = 0; idx < size; idx++) + { + int x = idx - radius; + h_gaussianArray[h_gaussianArrayOffset[scale]+idx] = expf(-(x * x) / (2 * delta * delta)); + } + } + + // create cuda array + err = cudaMemcpyToSymbol( d_gaussianArrayOffset, + h_gaussianArrayOffset, + MAX_CONSTANT_GAUSS_SCALES * sizeof(int), 0, cudaMemcpyHostToDevice); + + THROW_ON_CUDA_ERROR(err, "Failed to move Gaussian filter to symbol."); + + err = cudaMemcpyToSymbol(d_gaussianArray, + h_gaussianArray, + sumSizes * sizeof(float), 0, cudaMemcpyHostToDevice); + + THROW_ON_CUDA_ERROR(err, "Failed to move Gaussian filter to symbol." ); + + cudaFreeHost(h_gaussianArrayOffset); + cudaFreeHost(h_gaussianArray); +} + +__host__ void cuda_downscaleWithGaussianBlur(CudaDeviceMemoryPitched& out_downscaleFrame_dmp, + cudaTextureObject_t originalFrameTex, + int downscale, + int downscaleFrameWidth, + int downscaleFrameHeight, + int gaussRadius, + cudaStream_t stream) +{ + const dim3 block(32, 2, 1); + const dim3 grid(divUp(downscaleFrameWidth, block.x), divUp(downscaleFrameHeight, block.y), 1); + + downscaleWithGaussianBlur_kernel<<>>( + originalFrameTex, + out_downscaleFrame_dmp.getBuffer(), + out_downscaleFrame_dmp.getPitch(), + downscaleFrameWidth, + downscaleFrameHeight, + downscale, + gaussRadius); + + CHECK_CUDA_ERROR(); +} + +__host__ void cuda_gaussianBlurVolumeZ(CudaDeviceMemoryPitched& inout_volume_dmp, int gaussRadius, cudaStream_t stream) +{ + const CudaSize<3>& volDim = inout_volume_dmp.getSize(); + CudaDeviceMemoryPitched volSmoothZ_dmp(volDim); + + const dim3 block(32, 1, 1); + const dim3 grid(divUp(volDim.x(), block.x), divUp(volDim.y(), block.y), volDim.z()); + + gaussianBlurVolumeZ_kernel<<>>( + volSmoothZ_dmp.getBuffer(), + volSmoothZ_dmp.getBytesPaddedUpToDim(1), + volSmoothZ_dmp.getBytesPaddedUpToDim(0), + inout_volume_dmp.getBuffer(), + inout_volume_dmp.getBytesPaddedUpToDim(1), + inout_volume_dmp.getBytesPaddedUpToDim(0), + int(volDim.x()), + int(volDim.y()), + int(volDim.z()), + gaussRadius); + + inout_volume_dmp.copyFrom(volSmoothZ_dmp); + + CHECK_CUDA_ERROR(); +} + +__host__ void cuda_gaussianBlurVolumeXYZ(CudaDeviceMemoryPitched& inout_volume_dmp, int gaussRadius, cudaStream_t stream) +{ + const CudaSize<3>& volDim = inout_volume_dmp.getSize(); + CudaDeviceMemoryPitched volSmoothXYZ_dmp(volDim); + + const dim3 block(32, 1, 1); + const dim3 grid(divUp(volDim.x(), block.x), divUp(volDim.y(), block.y), volDim.z()); + + gaussianBlurVolumeXYZ_kernel<<>>( + volSmoothXYZ_dmp.getBuffer(), + volSmoothXYZ_dmp.getBytesPaddedUpToDim(1), + volSmoothXYZ_dmp.getBytesPaddedUpToDim(0), + inout_volume_dmp.getBuffer(), + inout_volume_dmp.getBytesPaddedUpToDim(1), + inout_volume_dmp.getBytesPaddedUpToDim(0), + int(volDim.x()), + int(volDim.y()), + int(volDim.z()), + gaussRadius); + + inout_volume_dmp.copyFrom(volSmoothXYZ_dmp); + + CHECK_CUDA_ERROR(); +} + +__host__ void cuda_medianFilter3(cudaTextureObject_t tex, CudaDeviceMemoryPitched& img) +{ + int scale = 1; + const dim3 block(32, 2, 1); + const dim3 grid(divUp(img.getSize()[0], block.x), divUp(img.getSize()[1], block.y), 1); + + medianFilter3_kernel<<>>( + tex, + img.getBuffer(), img.getPitch(), + img.getSize()[0], img.getSize()[1], + scale); + + CHECK_CUDA_ERROR(); +} + + +} // namespace depthMap +} // namespace aliceVision + diff --git a/src/aliceVision/depthMap/cuda/images/gauss_filter.hpp b/src/aliceVision/depthMap/cuda/imageProcessing/deviceGaussianFilter.hpp similarity index 65% rename from src/aliceVision/depthMap/cuda/images/gauss_filter.hpp rename to src/aliceVision/depthMap/cuda/imageProcessing/deviceGaussianFilter.hpp index 6b72c79707..98e947c75b 100644 --- a/src/aliceVision/depthMap/cuda/images/gauss_filter.hpp +++ b/src/aliceVision/depthMap/cuda/imageProcessing/deviceGaussianFilter.hpp @@ -6,8 +6,8 @@ #pragma once -#include -#include +#include +#include #include @@ -29,12 +29,55 @@ __device__ inline float getGauss(int scale, int idx) return d_gaussianArray[d_gaussianArrayOffset[scale] + idx]; } -extern void ps_create_gaussian_arr( int deviceId, int scales ); +/** + * @brief Create Gaussian array in device constant memory. + * @param[in] cudaDeviceId the cuda device id + * @param[in] scales the number of pre-computed Gaussian scales + */ +extern void cuda_createConstantGaussianArray(int cudaDeviceId, int scales); -extern void ps_downscale_gauss( Pyramid& pyramid, - int scale, - int w, int h, int radius, - cudaStream_t stream ); +/** + * @brief Downscale with Gaussian blur the given frame. + * @param[out] out_downscaleFrame_dmp the downscaled frame in device memory + * @param[in] originalFrame_tex the cuda texture object of the full size frame + * @param[in] downscaleFrameWidth the downscaled frame width + * @param[in] downscaleFrameHeight the downscaled frame height + * @param[in] gaussRadius the Gaussian radius + * @param[in] stream the CUDA stream for gpu execution + */ +extern void cuda_downscaleWithGaussianBlur(CudaDeviceMemoryPitched& out_downscaleFrame_dmp, + cudaTextureObject_t originalFrame_tex, + int downscale, + int downscaleFrameWidth, + int downscaleFrameHeight, + int gaussRadius, + cudaStream_t stream); + +/** + * @brief Apply a Gaussion blur to the Z axis of the given volume. + * @param[in,out] inout_volume_dmp the input/output volume in device memory + * @param[in] gaussRadius the Gaussian radius + * @param[in] stream the CUDA stream for gpu execution + */ +extern void cuda_gaussianBlurVolumeZ(CudaDeviceMemoryPitched& inout_volume_dmp, + int gaussRadius, + cudaStream_t stream); + +/** + * @brief Apply a Gaussion blur to the XYZ axis of the given volume. + * @param[in,out] inout_volume_dmp the input/output volume in device memory + * @param[in] gaussRadius the Gaussian radius + * @param[in] stream the CUDA stream for gpu execution + */ +extern void cuda_gaussianBlurVolumeXYZ(CudaDeviceMemoryPitched& inout_volume_dmp, + int gaussRadius, + cudaStream_t stream); + +/** + * @brief Apply a Median filter to the given image. + * @warning: use an hardcoded buffer size, so max radius value is 3. + */ +extern void cuda_medianFilter3(cudaTextureObject_t tex, CudaDeviceMemoryPitched& img); #ifdef ALICEVISION_TMP_WITH_BILATERALFILTER @@ -162,11 +205,6 @@ __host__ void ps_bilateralFilter( } #endif -__host__ void ps_medianFilter3( - cudaTextureObject_t tex, - CudaDeviceMemoryPitched& img); - - } // namespace depthMap } // namespace aliceVision diff --git a/src/aliceVision/depthMap/cuda/images/gauss_filter.cu b/src/aliceVision/depthMap/cuda/images/gauss_filter.cu deleted file mode 100644 index 8b3b91fe87..0000000000 --- a/src/aliceVision/depthMap/cuda/images/gauss_filter.cu +++ /dev/null @@ -1,235 +0,0 @@ -// This file is part of the AliceVision project. -// Copyright (c) 2018 AliceVision contributors. -// This Source Code Form is subject to the terms of the Mozilla Public License, -// v. 2.0. If a copy of the MPL was not distributed with this file, -// You can obtain one at https://mozilla.org/MPL/2.0/. - -#include - -#include "gauss_filter.hpp" -#include -#include -#include -#include - - -namespace aliceVision { -namespace depthMap { - -/********************************************************************************* -* global / constant data structures -*********************************************************************************/ -std::set d_gaussianArrayInitialized; -__device__ __constant__ int d_gaussianArrayOffset[MAX_CONSTANT_GAUSS_SCALES]; -__device__ __constant__ float d_gaussianArray[MAX_CONSTANT_GAUSS_MEM_SIZE]; - -/********************************************************************************* - * kernel forward declarations - *********************************************************************************/ -__global__ void downscale_gauss_smooth_lab_kernel( - cudaTextureObject_t rc_tex, - CudaRGBA* texLab, int texLab_p, - int width, int height, int scale, int radius); - -/********************************************************************************* - * exported host function - *********************************************************************************/ -__host__ void ps_create_gaussian_arr( int deviceId, int scales ) // float delta, int radius) -{ - if( scales >= MAX_CONSTANT_GAUSS_SCALES ) - { - throw std::runtime_error( "Programming error: too few scales pre-computed for Gaussian kernels. Enlarge and recompile." ); - } - - cudaError_t err; - - if( d_gaussianArrayInitialized.find( deviceId ) != d_gaussianArrayInitialized.end() ) return; - - d_gaussianArrayInitialized.insert( deviceId ); - - int* h_gaussianArrayOffset; - float* h_gaussianArray; - err = cudaMallocHost( &h_gaussianArrayOffset, MAX_CONSTANT_GAUSS_SCALES * sizeof(int) ); - THROW_ON_CUDA_ERROR(err, "Failed to allocate " << MAX_CONSTANT_GAUSS_SCALES * sizeof(int) << " of CUDA host memory."); - - err = cudaMallocHost( &h_gaussianArray, MAX_CONSTANT_GAUSS_MEM_SIZE * sizeof(float) ); - THROW_ON_CUDA_ERROR(err, "Failed to allocate " << MAX_CONSTANT_GAUSS_MEM_SIZE * sizeof(float) << " of CUDA host memory."); - - int sum_sizes = 0; - for( int scale=0; scale= MAX_CONSTANT_GAUSS_MEM_SIZE ) - { - throw std::runtime_error( "Programming error: too little memory allocated for " - + std::to_string(MAX_CONSTANT_GAUSS_SCALES) + " Gaussian kernels. Enlarge and recompile." ); - } - - for( int scale=0; scale>> - ( pyramid[0].tex, - pyramid[scale].arr->getBuffer(), - pyramid[scale].arr->getPitch(), - w / (scale + 1), h / (scale + 1), scale + 1, - radius - ); -} -/********************************************************************************* - * kernel definitions - *********************************************************************************/ - -/* This is a bad Gaussian filter implementation - the Gaussian filter is separable. */ -__global__ void downscale_gauss_smooth_lab_kernel( - cudaTextureObject_t rc_tex, - CudaRGBA* texLab, int texLab_p, - int width, int height, int scale, int radius) -{ - int x = blockIdx.x * blockDim.x + threadIdx.x; - int y = blockIdx.y * blockDim.y + threadIdx.y; - - if((x < width) && (y < height)) - { - float s = (float)scale * 0.5f; - float4 t = make_float4(0.0f, 0.0f, 0.0f, 0.0f); - float sum = 0.0f; - for(int i = -radius; i <= radius; i++) - { - for(int j = -radius; j <= radius; j++) - { - float4 curPix = tex2D_float4(rc_tex, (float)(x * scale + j) + s, - (float)(y * scale + i) + s); - float factor = getGauss( scale-1, i + radius ) - * getGauss( scale-1, j + radius ); // domain factor - t = t + curPix * factor; - sum += factor; - } - } - t.x = t.x / sum; - t.y = t.y / sum; - t.z = t.z / sum; - t.w = t.w / sum; - - CudaRGBA& out = BufPtr(texLab, texLab_p).at(x,y); - out.x = t.x; - out.y = t.y; - out.z = t.z; - out.w = t.w; - } -} - -__device__ void cuda_swap_float(float& a, float& b) -{ - float temp = a; - a = b; - b = temp; -} - -/** -* @warning: use an hardcoded buffer size, so max radius value is 3. -*/ -__global__ void medianFilter3_kernel( - cudaTextureObject_t tex, - float* texLab, int texLab_p, - int width, int height, - int scale) -{ - const int radius = 3; - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if ((x >= width - radius) || (y >= height - radius) || - (x < radius) || (y < radius)) - return; - - const int filterWidth = radius * 2 + 1; - const int filterNbPixels = filterWidth * filterWidth; - - float buf[filterNbPixels]; // filterNbPixels - - // Assign masked values to buf - for (int yi = 0; yi < filterWidth; ++yi) - { - for (int xi = 0; xi < filterWidth; ++xi) - { - float pix = tex2D(tex, x + xi - radius, y + yi - radius); - buf[yi * filterWidth + xi] = pix; - } - } - - // Calculate until we get the median value - for (int k = 0; k < filterNbPixels; ++k) // (filterNbPixels + 1) / 2 - for (int l = 0; l < filterNbPixels; ++l) - if (buf[k] < buf[l]) - cuda_swap_float(buf[k], buf[l]); - - BufPtr(texLab, texLab_p).at(x, y) = buf[radius * filterWidth + radius]; -} - - -__host__ void ps_medianFilter3( - cudaTextureObject_t tex, - CudaDeviceMemoryPitched& img) -{ - int scale = 1; - const dim3 block(32, 2, 1); - const dim3 grid(divUp(img.getSize()[0], block.x), divUp(img.getSize()[1], block.y), 1); - - medianFilter3_kernel - <<>> - (tex, - img.getBuffer(), img.getPitch(), - img.getSize()[0], img.getSize()[1], - scale - ); -} - - -} // namespace depthMap -} // namespace aliceVision - diff --git a/src/aliceVision/depthMap/cuda/normalMapping/DeviceNormalMapper.cpp b/src/aliceVision/depthMap/cuda/normalMapping/DeviceNormalMapper.cpp new file mode 100644 index 0000000000..1967f084f9 --- /dev/null +++ b/src/aliceVision/depthMap/cuda/normalMapping/DeviceNormalMapper.cpp @@ -0,0 +1,95 @@ +// This file is part of the AliceVision project. +// Copyright (c) 2017 AliceVision contributors. +// This Source Code Form is subject to the terms of the Mozilla Public License, +// v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + +#include "DeviceNormalMapper.hpp" + +#include + +namespace aliceVision { +namespace depthMap { + +DeviceNormalMapper::DeviceNormalMapper() + : _allocated_floats(0) + , _depthMapHst(0) + , _normalMapHst(0) +{ + cudaError_t err; + + err = cudaMallocHost(&cameraParameters_h, sizeof(DeviceCameraParams) ); + THROW_ON_CUDA_ERROR( err, "Failed to allocate camera parameters on host in normal mapping" ); + + err = cudaMalloc(&cameraParameters_d, sizeof(DeviceCameraParams)); + THROW_ON_CUDA_ERROR( err, "Failed to allocate camera parameters on device in normal mapping" ); +} + +DeviceNormalMapper::~DeviceNormalMapper() +{ + cudaFree(cameraParameters_d); + cudaFreeHost(cameraParameters_h); + + if( _depthMapHst ) cudaFreeHost( _depthMapHst ); + if( _normalMapHst ) cudaFreeHost( _normalMapHst ); +} + +void DeviceNormalMapper::loadCameraParameters() +{ + cudaError_t err; + err = cudaMemcpy(cameraParameters_d, cameraParameters_h, sizeof(DeviceCameraParams), cudaMemcpyHostToDevice); + THROW_ON_CUDA_ERROR( err, "Failed to copy camera parameters from host to device in normal mapping" ); +} + +void DeviceNormalMapper::allocHostMaps(int w, int h) +{ + cudaError_t err; + if( _depthMapHst ) + { + if( w*h > _allocated_floats ); + { + err = cudaFreeHost( _depthMapHst ); + THROW_ON_CUDA_ERROR( err, "Failed to free host depth map in normal mapping" ); + err = cudaMallocHost( &_depthMapHst, w*h*sizeof(float) ); + THROW_ON_CUDA_ERROR( err, "Failed to re-allocate host depth map in normal mapping" ); + + err = cudaFreeHost( _normalMapHst ); + THROW_ON_CUDA_ERROR( err, "Failed to free host normal map in normal mapping" ); + err = cudaMallocHost( &_normalMapHst, w*h*sizeof(float3) ); + THROW_ON_CUDA_ERROR( err, "Failed to re-allocate host normal map in normal mapping" ); + _allocated_floats = w * h; + } + } + else + { + err = cudaMallocHost( &_depthMapHst, w*h*sizeof(float) ); + THROW_ON_CUDA_ERROR( err, "Failed to allocate host depth map in normal mapping" ); + err = cudaMallocHost( &_normalMapHst, w*h*sizeof(float3) ); + THROW_ON_CUDA_ERROR( err, "Failed to allocate host normal map in normal mapping" ); + _allocated_floats = w * h; + } +} + +void DeviceNormalMapper::copyDepthMap(const float* depthMap, int depthMapSize) +{ + if(_allocated_floats > depthMapSize) + { + std::cerr << "WARNING: " << __FILE__ << ":" << __LINE__ + << ": copying depthMap whose origin is too small" << std::endl; + } + memcpy(_depthMapHst, depthMap, _allocated_floats * sizeof(float)); +} + +const float* DeviceNormalMapper::getDepthMapHst() const +{ + return _depthMapHst; +} + +float3* DeviceNormalMapper::getNormalMapHst() +{ + return _normalMapHst; +} + +} // namespace depthMap +} // namespace aliceVision + diff --git a/src/aliceVision/depthMap/cuda/normalmap/normal_map.hpp b/src/aliceVision/depthMap/cuda/normalMapping/DeviceNormalMapper.hpp similarity index 67% rename from src/aliceVision/depthMap/cuda/normalmap/normal_map.hpp rename to src/aliceVision/depthMap/cuda/normalMapping/DeviceNormalMapper.hpp index 37acbb70d0..6c645f193f 100644 --- a/src/aliceVision/depthMap/cuda/normalmap/normal_map.hpp +++ b/src/aliceVision/depthMap/cuda/normalMapping/DeviceNormalMapper.hpp @@ -6,16 +6,17 @@ #pragma once -#include +#include +#include namespace aliceVision { namespace depthMap { -class NormalMapping +class DeviceNormalMapper { public: - NormalMapping(); - ~NormalMapping(); + DeviceNormalMapper(); + ~DeviceNormalMapper(); void loadCameraParameters(); void allocHostMaps( int w, int h ); @@ -25,8 +26,8 @@ class NormalMapping float3* getNormalMapHst(); // an output public: - CameraStructBase* camsBasesHst; - CameraStructBase* camsBasesDev; + DeviceCameraParams* cameraParameters_h; + DeviceCameraParams* cameraParameters_d; private: int _allocated_floats; @@ -34,12 +35,6 @@ class NormalMapping float3* _normalMapHst; }; -void ps_computeNormalMap( - NormalMapping* mapping, - int width, int height, - int scale, int ncamsAllocated, int scales, int wsh, bool verbose, - float gammaC, float gammaP); - } // namespace depthMap } // namespace aliceVision diff --git a/src/aliceVision/depthMap/cuda/normalMapping/deviceNormalMap.cu b/src/aliceVision/depthMap/cuda/normalMapping/deviceNormalMap.cu new file mode 100644 index 0000000000..b4b7cbaebf --- /dev/null +++ b/src/aliceVision/depthMap/cuda/normalMapping/deviceNormalMap.cu @@ -0,0 +1,50 @@ +// This file is part of the AliceVision project. +// Copyright (c) 2017 AliceVision contributors. +// This Source Code Form is subject to the terms of the Mozilla Public License, +// v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + +#include "deviceNormalMap.hpp" +#include "deviceNormalMapKernels.cuh" + +#include + +namespace aliceVision { +namespace depthMap { + +__host__ void cuda_computeNormalMap(DeviceNormalMapper* mapping, + int width, + int height, + int wsh, + float gammaC, + float gammaP) +{ + const DeviceCameraParams* cameraParameters_d = mapping->cameraParameters_d; + + CudaDeviceMemoryPitched depthMap_dmp(CudaSize<2>( width, height )); + depthMap_dmp.copyFrom( mapping->getDepthMapHst(), width, height ); + + CudaDeviceMemoryPitched normalMap_dmp(CudaSize<2>( width, height )); + + const int blockSize = 8; + const dim3 block(blockSize, blockSize, 1); + const dim3 grid(divUp(width, blockSize), divUp(height, blockSize), 1); + + // compute normal map + computeNormalMap_kernel<<>>( + *cameraParameters_d, + depthMap_dmp.getBuffer(), + depthMap_dmp.getPitch(), + normalMap_dmp.getBuffer(), + normalMap_dmp.getPitch(), + width, height, wsh, + gammaC, gammaP); + + normalMap_dmp.copyTo( mapping->getNormalMapHst(), width, height ); + + CHECK_CUDA_ERROR(); +} + +} // namespace depthMap +} // namespace aliceVision + diff --git a/src/aliceVision/depthMap/cuda/normalMapping/deviceNormalMap.hpp b/src/aliceVision/depthMap/cuda/normalMapping/deviceNormalMap.hpp new file mode 100644 index 0000000000..05046720ff --- /dev/null +++ b/src/aliceVision/depthMap/cuda/normalMapping/deviceNormalMap.hpp @@ -0,0 +1,23 @@ +// This file is part of the AliceVision project. +// Copyright (c) 2017 AliceVision contributors. +// This Source Code Form is subject to the terms of the Mozilla Public License, +// v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + +#pragma once + +#include + +namespace aliceVision { +namespace depthMap { + +extern void cuda_computeNormalMap(DeviceNormalMapper* mapping, + int width, + int height, + int wsh, + float gammaC, + float gammaP); + +} // namespace depthMap +} // namespace aliceVision + diff --git a/src/aliceVision/depthMap/cuda/normalMapping/deviceNormalMapKernels.cuh b/src/aliceVision/depthMap/cuda/normalMapping/deviceNormalMapKernels.cuh new file mode 100644 index 0000000000..9ce4b36208 --- /dev/null +++ b/src/aliceVision/depthMap/cuda/normalMapping/deviceNormalMapKernels.cuh @@ -0,0 +1,108 @@ +// This file is part of the AliceVision project. +// Copyright (c) 2017 AliceVision contributors. +// This Source Code Form is subject to the terms of the Mozilla Public License, +// v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + +#pragma once + +#include +#include +#include +#include + +#include + +namespace aliceVision { +namespace depthMap { + +__device__ static inline +float3 get3DPointForPixelAndDepthFromRC(const DeviceCameraParams& rcDeviceCamParams, const float2& pix, float depth) +{ + float3 rpv = M3x3mulV2(rcDeviceCamParams.iP, pix); + normalize(rpv); + return rcDeviceCamParams.C + rpv * depth; +} + +__device__ static inline +float3 get3DPointForPixelAndDepthFromRC(const DeviceCameraParams& rcDeviceCamParams, const int2& pixi, float depth) +{ + float2 pix; + pix.x = float(pixi.x); + pix.y = float(pixi.y); + return get3DPointForPixelAndDepthFromRC(rcDeviceCamParams, pix, depth); +} + +__device__ static inline +float orientedPointPlaneDistanceNormalizedNormal(const float3& point, const float3& planePoint, + const float3& planeNormalNormalized) +{ + return (dot(point, planeNormalNormalized) - dot(planePoint, planeNormalNormalized)); +} + +__global__ void computeNormalMap_kernel(const DeviceCameraParams& rcDeviceCamParams, + float* depthMap_d, int depthMap_p, //cudaTextureObject_t depthsTex, + float3* nmap_d, int nmap_p, + int width, int height, int wsh, const float gammaC, const float gammaP) +{ + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + + if ((x >= width) || (y >= height)) + return; + + float depth = *get2DBufferAt(depthMap_d, depthMap_p, x, y); // tex2D(depthsTex, x, y); + if(depth <= 0.0f) + { + *get2DBufferAt(nmap_d, nmap_p, x, y) = make_float3(-1.f, -1.f, -1.f); + return; + } + + int2 pix1 = make_int2(x, y); + float3 p = get3DPointForPixelAndDepthFromRC(rcDeviceCamParams, pix1, depth); + float pixSize = 0.0f; + { + int2 pix2 = make_int2(x + 1, y); + float3 p2 = get3DPointForPixelAndDepthFromRC(rcDeviceCamParams, pix2, depth); + pixSize = size(p - p2); + } + + cuda_stat3d s3d = cuda_stat3d(); + + for (int yp = -wsh; yp <= wsh; ++yp) + { + for (int xp = -wsh; xp <= wsh; ++xp) + { + float depthn = *get2DBufferAt(depthMap_d, depthMap_p, x + xp, y + yp); // tex2D(depthsTex, x + xp, y + yp); + if ((depth > 0.0f) && (fabs(depthn - depth) < 30.0f * pixSize)) + { + float w = 1.0f; + float2 pixn = make_float2(x + xp, y + yp); + float3 pn = get3DPointForPixelAndDepthFromRC(rcDeviceCamParams, pixn, depthn); + s3d.update(pn, w); + } + } + } + + float3 pp = p; + float3 nn = make_float3(-1.f, -1.f, -1.f); + if(!s3d.computePlaneByPCA(pp, nn)) + { + *get2DBufferAt(nmap_d, nmap_p, x, y) = make_float3(-1.f, -1.f, -1.f); + return; + } + + float3 nc = rcDeviceCamParams.C - p; + normalize(nc); + if (orientedPointPlaneDistanceNormalizedNormal(pp + nn, pp, nc) < 0.0f) + { + nn.x = -nn.x; + nn.y = -nn.y; + nn.z = -nn.z; + } + *get2DBufferAt(nmap_d, nmap_p, x, y) = nn; +} + +} // namespace depthMap +} // namespace aliceVision + diff --git a/src/aliceVision/depthMap/cuda/normalmap/normal_map.cu b/src/aliceVision/depthMap/cuda/normalmap/normal_map.cu deleted file mode 100644 index 0a546aea36..0000000000 --- a/src/aliceVision/depthMap/cuda/normalmap/normal_map.cu +++ /dev/null @@ -1,257 +0,0 @@ -// This file is part of the AliceVision project. -// Copyright (c) 2017 AliceVision contributors. -// This Source Code Form is subject to the terms of the Mozilla Public License, -// v. 2.0. If a copy of the MPL was not distributed with this file, -// You can obtain one at https://mozilla.org/MPL/2.0/. - -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include - -namespace aliceVision { -namespace depthMap { - -// Macro for checking cuda errors -#define CHECK_CUDA_ERROR() \ - if(cudaError_t err = cudaGetLastError()) \ - { \ - fprintf(stderr, "\n\nCUDAError: %s\n", cudaGetErrorString(err)); \ - fprintf(stderr, " file: %s\n", __FILE__); \ - fprintf(stderr, " function: %s\n", __FUNCTION__); \ - fprintf(stderr, " line: %d\n\n", __LINE__); \ - std::stringstream s; \ - s << "\n CUDA Error: " << cudaGetErrorString(err) \ - << "\n file: " << __FILE__ \ - << "\n function: " << __FUNCTION__ \ - << "\n line: " << __LINE__ << "\n"; \ - throw std::runtime_error(s.str()); \ - } - -__device__ static inline -float3 get3DPointForPixelAndDepthFromRC(const CameraStructBase& rc_cam, const float2& pix, float depth) -{ - float3 rpv = M3x3mulV2(rc_cam.iP, pix); - normalize(rpv); - return rc_cam.C + rpv * depth; -} - -__device__ static inline -float3 get3DPointForPixelAndDepthFromRC(const CameraStructBase& rc_cam, const int2& pixi, float depth) -{ - float2 pix; - pix.x = (float)pixi.x; - pix.y = (float)pixi.y; - return get3DPointForPixelAndDepthFromRC(rc_cam, pix, depth); -} - -__device__ static inline -float orientedPointPlaneDistanceNormalizedNormal(const float3& point, const float3& planePoint, - const float3& planeNormalNormalized) -{ - return (dot(point, planeNormalNormalized) - dot(planePoint, planeNormalNormalized)); -} - -__global__ void computeNormalMap_kernel( - const CameraStructBase& rc_cam, - float* depthMap, int depthMap_p, //cudaTextureObject_t depthsTex, - float3* nmap, int nmap_p, - int width, int height, int wsh, const float gammaC, const float gammaP) -{ - int x = blockIdx.x * blockDim.x + threadIdx.x; - int y = blockIdx.y * blockDim.y + threadIdx.y; - - if ((x >= width) || (y >= height)) - return; - - float depth = *get2DBufferAt(depthMap, depthMap_p, x, y); // tex2D(depthsTex, x, y); - if(depth <= 0.0f) - { - *get2DBufferAt(nmap, nmap_p, x, y) = make_float3(-1.f, -1.f, -1.f); - return; - } - - int2 pix1 = make_int2(x, y); - float3 p = get3DPointForPixelAndDepthFromRC(rc_cam, pix1, depth); - float pixSize = 0.0f; - { - int2 pix2 = make_int2(x + 1, y); - float3 p2 = get3DPointForPixelAndDepthFromRC(rc_cam, pix2, depth); - pixSize = size(p - p2); - } - - cuda_stat3d s3d = cuda_stat3d(); - - for (int yp = -wsh; yp <= wsh; ++yp) - { - for (int xp = -wsh; xp <= wsh; ++xp) - { - float depthn = *get2DBufferAt(depthMap, depthMap_p, x + xp, y + yp); // tex2D(depthsTex, x + xp, y + yp); - if ((depth > 0.0f) && (fabs(depthn - depth) < 30.0f * pixSize)) - { - float w = 1.0f; - float2 pixn = make_float2(x + xp, y + yp); - float3 pn = get3DPointForPixelAndDepthFromRC(rc_cam, pixn, depthn); - s3d.update(pn, w); - } - } - } - - float3 pp = p; - float3 nn = make_float3(-1.f, -1.f, -1.f); - if(!s3d.computePlaneByPCA(pp, nn)) - { - *get2DBufferAt(nmap, nmap_p, x, y) = make_float3(-1.f, -1.f, -1.f); - return; - } - - float3 nc = rc_cam.C - p; - normalize(nc); - if (orientedPointPlaneDistanceNormalizedNormal(pp + nn, pp, nc) < 0.0f) - { - nn.x = -nn.x; - nn.y = -nn.y; - nn.z = -nn.z; - } - *get2DBufferAt(nmap, nmap_p, x, y) = nn; -} - -void ps_computeNormalMap( - NormalMapping* mapping, - int width, int height, - int scale, int ncamsAllocated, int scales, int wsh, bool verbose, - float gammaC, float gammaP) -{ - clock_t tall = tic(); - - const CameraStructBase* camera = mapping->camsBasesDev; - - CudaDeviceMemoryPitched depthMap_dmp(CudaSize<2>( width, height )); - depthMap_dmp.copyFrom( mapping->getDepthMapHst(), width, height ); - - CudaDeviceMemoryPitched normalMap_dmp(CudaSize<2>( width, height )); - - int block_size = 8; - dim3 block(block_size, block_size, 1); - dim3 grid(divUp(width, block_size), divUp(height, block_size), 1); - - if (verbose) - printf("computeNormalMap_kernel\n"); - - // compute normal map - computeNormalMap_kernel<<>>( - *camera, - depthMap_dmp.getBuffer(), - depthMap_dmp.getPitch(), - normalMap_dmp.getBuffer(), - normalMap_dmp.getPitch(), - width, height, wsh, - gammaC, gammaP); - - // cudaThreadSynchronize(); - // CHECK_CUDA_ERROR(); - - if (verbose) - printf("copy normal map to host\n"); - - normalMap_dmp.copyTo( mapping->getNormalMapHst(), width, height ); - CHECK_CUDA_ERROR(); - - if (verbose) - printf("gpu elapsed time: %f ms \n", toc(tall)); -} - -NormalMapping::NormalMapping() - : _allocated_floats(0) - , _depthMapHst(0) - , _normalMapHst(0) -{ - cudaError_t err; - - err = cudaMallocHost( &camsBasesHst, sizeof(CameraStructBase) ); - THROW_ON_CUDA_ERROR( err, "Failed to allocate camera parameters on host in normal mapping" ); - - err = cudaMalloc( &camsBasesDev, sizeof(CameraStructBase) ); - THROW_ON_CUDA_ERROR( err, "Failed to allocate camera parameters on device in normal mapping" ); -} - -NormalMapping::~NormalMapping() -{ - cudaFree( camsBasesDev ); - cudaFreeHost( camsBasesHst ); - - if( _depthMapHst ) cudaFreeHost( _depthMapHst ); - if( _normalMapHst ) cudaFreeHost( _normalMapHst ); -} - -void NormalMapping::loadCameraParameters() -{ - cudaError_t err; - err = cudaMemcpy( camsBasesDev, - camsBasesHst, - sizeof(CameraStructBase), - cudaMemcpyHostToDevice ); - THROW_ON_CUDA_ERROR( err, "Failed to copy camera parameters from host to device in normal mapping" ); -} - -void NormalMapping::allocHostMaps( int w, int h ) -{ - cudaError_t err; - if( _depthMapHst ) - { - if( w*h > _allocated_floats ); - { - err = cudaFreeHost( _depthMapHst ); - THROW_ON_CUDA_ERROR( err, "Failed to free host depth map in normal mapping" ); - err = cudaMallocHost( &_depthMapHst, w*h*sizeof(float) ); - THROW_ON_CUDA_ERROR( err, "Failed to re-allocate host depth map in normal mapping" ); - - err = cudaFreeHost( _normalMapHst ); - THROW_ON_CUDA_ERROR( err, "Failed to free host normal map in normal mapping" ); - err = cudaMallocHost( &_normalMapHst, w*h*sizeof(float3) ); - THROW_ON_CUDA_ERROR( err, "Failed to re-allocate host normal map in normal mapping" ); - _allocated_floats = w * h; - } - } - else - { - err = cudaMallocHost( &_depthMapHst, w*h*sizeof(float) ); - THROW_ON_CUDA_ERROR( err, "Failed to allocate host depth map in normal mapping" ); - err = cudaMallocHost( &_normalMapHst, w*h*sizeof(float3) ); - THROW_ON_CUDA_ERROR( err, "Failed to allocate host normal map in normal mapping" ); - _allocated_floats = w * h; - } -} - -void NormalMapping::copyDepthMap(const float* depthMap , int depthMapSize) -{ - if (_allocated_floats > depthMapSize) - { - std::cerr << "WARNING: " << __FILE__ << ":" << __LINE__ - << ": copying depthMap whose origin is too small" << std::endl; - } - memcpy( _depthMapHst, depthMap, _allocated_floats*sizeof(float) ); -} - -const float* NormalMapping::getDepthMapHst() const -{ - return _depthMapHst; -} - -float3* NormalMapping::getNormalMapHst() -{ - return _normalMapHst; -} - -} // namespace depthMap -} // namespace aliceVision - diff --git a/src/aliceVision/depthMap/cuda/planeSweeping/deviceDepthSimilarityMap.cu b/src/aliceVision/depthMap/cuda/planeSweeping/deviceDepthSimilarityMap.cu new file mode 100644 index 0000000000..c23beb0ec3 --- /dev/null +++ b/src/aliceVision/depthMap/cuda/planeSweeping/deviceDepthSimilarityMap.cu @@ -0,0 +1,207 @@ +// This file is part of the AliceVision project. +// Copyright (c) 2022 AliceVision contributors. +// This Source Code Form is subject to the terms of the Mozilla Public License, +// v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + +#include "deviceDepthSimilarityMap.hpp" +#include "deviceDepthSimilarityMapKernels.cuh" + +#include + +#include + +namespace aliceVision { +namespace depthMap { + +__host__ void cuda_depthSimMapCopyDepthOnly(CudaDeviceMemoryPitched& out_depthSimMap_dmp, + const CudaDeviceMemoryPitched& in_depthSimMap_dmp, + float defaultSim, + cudaStream_t stream) +{ + const CudaSize<2>& depthSimMapSize = out_depthSimMap_dmp.getSize(); + + const int blockSize = 16; + const dim3 block(blockSize, blockSize, 1); + const dim3 grid(divUp(depthSimMapSize.x(), blockSize), divUp(depthSimMapSize.y(), blockSize), 1); + + depthSimMapCopyDepthOnly_kernel<<>>( + out_depthSimMap_dmp.getBuffer(), + out_depthSimMap_dmp.getPitch(), + in_depthSimMap_dmp.getBuffer(), + in_depthSimMap_dmp.getPitch(), + depthSimMapSize.x(), + depthSimMapSize.y(), + defaultSim); + + CHECK_CUDA_ERROR(); +} + +__host__ void cuda_normalMapUpscale(CudaDeviceMemoryPitched& out_upscaledMap_dmp, + const CudaDeviceMemoryPitched& in_map_dmp, + const ROI& roi, + cudaStream_t stream) +{ + const CudaSize<2>& out_mapSize = out_upscaledMap_dmp.getSize(); + const CudaSize<2>& in_mapSize = in_map_dmp.getSize(); + + const float ratio = float(in_mapSize.x()) / float(out_mapSize.x()); + + const int blockSize = 16; + const dim3 block(blockSize, blockSize, 1); + const dim3 grid(divUp(roi.width(), blockSize), divUp(roi.height(), blockSize), 1); + + mapUpscale_kernel<<>>( + out_upscaledMap_dmp.getBuffer(), + out_upscaledMap_dmp.getPitch(), + in_map_dmp.getBuffer(), + in_map_dmp.getPitch(), + roi, + ratio); + + CHECK_CUDA_ERROR(); +} + +__host__ void cuda_depthSimMapUpscaleAndFilter(CudaDeviceMemoryPitched& out_upscaledDepthSimMap_dmp, + const CudaDeviceMemoryPitched& in_otherDepthSimMap_dmp, + const DeviceCamera& rcDeviceCamera, + const RefineParams& refineParams, + const ROI& roi, + cudaStream_t stream) +{ + const CudaSize<2>& out_depthSimMapSize = out_upscaledDepthSimMap_dmp.getSize(); + const CudaSize<2>& in_depthSimMapSize = in_otherDepthSimMap_dmp.getSize(); + + const float ratio = float(in_depthSimMapSize.x()) / float(out_depthSimMapSize.x()); + + const int blockSize = 16; + const dim3 block(blockSize, blockSize, 1); + const dim3 grid(divUp(roi.width(), blockSize), divUp(roi.height(), blockSize), 1); + + depthSimMapUpscaleAndFilter_kernel<<>>( + rcDeviceCamera.getTextureObject(), + out_upscaledDepthSimMap_dmp.getBuffer(), + out_upscaledDepthSimMap_dmp.getPitch(), + in_otherDepthSimMap_dmp.getBuffer(), + in_otherDepthSimMap_dmp.getPitch(), + refineParams.stepXY, + roi, + ratio); + + CHECK_CUDA_ERROR(); +} + +__host__ void cuda_depthSimMapComputePixSize(CudaDeviceMemoryPitched& inout_depthPixSizeMap_dmp, + const DeviceCamera& rcDeviceCamera, + const RefineParams& refineParams, + const ROI& roi, + cudaStream_t stream) +{ + const int blockSize = 16; + const dim3 block(blockSize, blockSize, 1); + const dim3 grid(divUp(roi.width(), blockSize), divUp(roi.height(), blockSize), 1); + + depthSimMapComputePixSize_kernel<<>>( + rcDeviceCamera.getDeviceCamId(), + inout_depthPixSizeMap_dmp.getBuffer(), + inout_depthPixSizeMap_dmp.getPitch(), + refineParams.stepXY, + roi); + + CHECK_CUDA_ERROR(); +} + +__host__ void cuda_depthSimMapComputeNormal(CudaDeviceMemoryPitched& out_normalMap_dmp, + const CudaDeviceMemoryPitched& in_depthSimMap_dmp, + const DeviceCamera& rcDeviceCamera, + const SgmParams& sgmParams, + const ROI& roi, + cudaStream_t stream) +{ + // default parameters + const int wsh = 4; + const float gammaC = 1.0f; + const float gammaP = 1.0f; + + const dim3 block(8, 8, 1); + const dim3 grid(divUp(roi.width(), block.x), divUp(roi.height(), block.y), 1); + + depthSimMapComputeNormal_kernel<<>>( + rcDeviceCamera.getDeviceCamId(), + out_normalMap_dmp.getBuffer(), + out_normalMap_dmp.getPitch(), + in_depthSimMap_dmp.getBuffer(), + in_depthSimMap_dmp.getPitch(), + wsh, + gammaC, + gammaP, + sgmParams.stepXY, + roi); + + CHECK_CUDA_ERROR(); +} + +__host__ void cuda_depthSimMapOptimizeGradientDescent(CudaDeviceMemoryPitched& out_optimizeDepthSimMap_dmp, + CudaDeviceMemoryPitched& inout_imgVariance_dmp, + CudaDeviceMemoryPitched& inout_tmpOptDepthMap_dmp, + const CudaDeviceMemoryPitched& in_sgmDepthPixSizeMap_dmp, + const CudaDeviceMemoryPitched& in_refineDepthSimMap_dmp, + const DeviceCamera& rcDeviceCamera, + const RefineParams& refineParams, + const ROI& roi, + cudaStream_t stream) +{ + // initialize depth/sim map optimized with SGM depth/pixSize map + out_optimizeDepthSimMap_dmp.copyFrom(in_sgmDepthPixSizeMap_dmp, stream); + + { + // setup block and grid + const dim3 lblock(32, 2, 1); + const dim3 lgrid(divUp(roi.width(), lblock.x), divUp(roi.height(), lblock.y), 1); + + optimize_varLofLABtoW_kernel<<>>( + rcDeviceCamera.getTextureObject(), + inout_imgVariance_dmp.getBuffer(), + inout_imgVariance_dmp.getPitch(), + refineParams.stepXY, + roi); + } + + CudaTexture imgVarianceTex(inout_imgVariance_dmp); + CudaTexture depthTex(inout_tmpOptDepthMap_dmp); + + // setup block and grid + const int blockSize = 16; + const dim3 block(blockSize, blockSize, 1); + const dim3 grid(divUp(roi.width(), blockSize), divUp(roi.height(), blockSize), 1); + + for(int iter = 0; iter < refineParams.optimizationNbIterations; ++iter) // default nb iterations is 100 + { + // copy depths values from out_depthSimMapOptimized_dmp to inout_tmpOptDepthMap_dmp + optimize_getOptDeptMapFromOptDepthSimMap_kernel<<>>( + inout_tmpOptDepthMap_dmp.getBuffer(), + inout_tmpOptDepthMap_dmp.getPitch(), + out_optimizeDepthSimMap_dmp.getBuffer(), // initialized with SGM depth/sim map + out_optimizeDepthSimMap_dmp.getPitch(), + roi); + + // adjust depth/sim by using previously computed depths + optimize_depthSimMap_kernel<<>>( + rcDeviceCamera.getDeviceCamId(), + imgVarianceTex.textureObj, + depthTex.textureObj, + out_optimizeDepthSimMap_dmp.getBuffer(), + out_optimizeDepthSimMap_dmp.getPitch(), + in_sgmDepthPixSizeMap_dmp.getBuffer(), + in_sgmDepthPixSizeMap_dmp.getPitch(), + in_refineDepthSimMap_dmp.getBuffer(), + in_refineDepthSimMap_dmp.getPitch(), + iter, + roi); + } + + CHECK_CUDA_ERROR(); +} + +} // namespace depthMap +} // namespace aliceVision diff --git a/src/aliceVision/depthMap/cuda/planeSweeping/deviceDepthSimilarityMap.hpp b/src/aliceVision/depthMap/cuda/planeSweeping/deviceDepthSimilarityMap.hpp new file mode 100644 index 0000000000..1e851b86b5 --- /dev/null +++ b/src/aliceVision/depthMap/cuda/planeSweeping/deviceDepthSimilarityMap.hpp @@ -0,0 +1,112 @@ +// This file is part of the AliceVision project. +// Copyright (c) 2022 AliceVision contributors. +// This Source Code Form is subject to the terms of the Mozilla Public License, +// v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + +#pragma once + +#include +#include +#include +#include +#include + +namespace aliceVision { +namespace depthMap { + +/** + * @brief Copy depth and default from input depth/sim map to another depth/sim map. + * @param[out] out_depthSimMap_dmp the output depth/sim map + * @param[in] in_depthSimMap_dmp the input depth/sim map to copy + * @param[in] defaultSim the default similarity value to copy + * @param[in] stream the stream for gpu execution + */ +extern void cuda_depthSimMapCopyDepthOnly(CudaDeviceMemoryPitched& out_depthSimMap_dmp, + const CudaDeviceMemoryPitched& in_depthSimMap_dmp, + float defaultSim, + cudaStream_t stream); + +/** + * @brief Upscale the given normal map. + * @param[out] out_upscaledMap_dmp the output upscaled normal map + * @param[in] in_map_dmp the normal map to upscaled + * @param[in] roi the 2d region of interest + * @param[in] stream the stream for gpu execution + */ +extern void cuda_normalMapUpscale(CudaDeviceMemoryPitched& out_upscaledMap_dmp, + const CudaDeviceMemoryPitched& in_map_dmp, + const ROI& roi, + cudaStream_t stream); + +/** + * @brief Upscale the given depth/sim map and filter masked pixels. + * @param[out] out_upscaledDepthSimMap_dmp the output upscaled depth/sim map + * @param[in] in_otherDepthSimMap_dmp the depth/sim map to upscaled + * @param[in] rcDeviceCamera the R device camera + * @param[in] refineParams the Refine parameters + * @param[in] roi the 2d region of interest + * @param[in] stream the stream for gpu execution + */ +extern void cuda_depthSimMapUpscaleAndFilter(CudaDeviceMemoryPitched& out_upscaledDepthSimMap_dmp, + const CudaDeviceMemoryPitched& in_otherDepthSimMap_dmp, + const DeviceCamera& rcDeviceCamera, + const RefineParams& refineParams, + const ROI& roi, + cudaStream_t stream); + +/** + * @brief Compute the pixSize map from the depth map. + * @param[in,out] inout_depthPixSizeMap_dmp the input depth map, the output depth/pixSize map + * @param[in] rcDeviceCamera the R device camera + * @param[in] refineParams the Refine parameters + * @param[in] roi the 2d region of interest + * @param[in] stream the stream for gpu execution + */ +extern void cuda_depthSimMapComputePixSize(CudaDeviceMemoryPitched& inout_depthPixSizeMap_dmp, + const DeviceCamera& rcDeviceCamera, + const RefineParams& refineParams, + const ROI& roi, + cudaStream_t stream); + + +/** + * @brief Compute the normal map from the depth/sim map (only depth is used). + * @param[out] out_normalMap_dmp the output normal map + * @param[in] in_depthSimMap_dmp the input depth/sim map (only depth is used) + * @param[in] rcDeviceCamera the R device camera + * @param[in] sgmParams the SGM parameters + * @param[in] roi the 2d region of interest + * @param[in] stream the stream for gpu execution + */ +extern void cuda_depthSimMapComputeNormal(CudaDeviceMemoryPitched& out_normalMap_dmp, + const CudaDeviceMemoryPitched& in_depthSimMap_dmp, + const DeviceCamera& rcDeviceCamera, + const SgmParams& sgmParams, + const ROI& roi, + cudaStream_t stream); + +/** + * @brief Optimize a depth/sim map with the refineFused depth/sim map and the SGM depth/pixSize map. + * @param[out] out_optimizeDepthSimMap_dmp the output optimized depth/sim map + * @param[in,out] inout_imgVariance_dmp the image variance buffer + * @param[in,out] inout_tmpOptDepthMap_dmp the temporary optimized depth map buffer + * @param[in] in_sgmDepthPixSizeMap_dmp the input SGM upscaled depth/pixSize map + * @param[in] in_refineDepthSimMap_dmp the input refined and fused depth/sim map + * @param[in] rcDeviceCamera the R device camera + * @param[in] refineParams the Refine parameters + * @param[in] roi the 2d region of interest + * @param[in] stream the stream for gpu execution + */ +extern void cuda_depthSimMapOptimizeGradientDescent(CudaDeviceMemoryPitched& out_optimizeDepthSimMap_dmp, + CudaDeviceMemoryPitched& inout_imgVariance_dmp, + CudaDeviceMemoryPitched& inout_tmpOptDepthMap_dmp, + const CudaDeviceMemoryPitched& in_sgmDepthPixSizeMap_dmp, + const CudaDeviceMemoryPitched& in_refineDepthSimMap_dmp, + const DeviceCamera& rcDeviceCamera, + const RefineParams& refineParams, + const ROI& roi, + cudaStream_t stream); + +} // namespace depthMap +} // namespace aliceVision diff --git a/src/aliceVision/depthMap/cuda/planeSweeping/deviceDepthSimilarityMapKernels.cuh b/src/aliceVision/depthMap/cuda/planeSweeping/deviceDepthSimilarityMapKernels.cuh new file mode 100644 index 0000000000..5abb265ea2 --- /dev/null +++ b/src/aliceVision/depthMap/cuda/planeSweeping/deviceDepthSimilarityMapKernels.cuh @@ -0,0 +1,476 @@ +// This file is part of the AliceVision project. +// Copyright (c) 2017 AliceVision contributors. +// This Source Code Form is subject to the terms of the Mozilla Public License, +// v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + +#pragma once + +#include +#include +#include +#include +#include +#include + +#define ALICEVISION_DEPTHMAP_UPSCALE_NEAREST_NEIGHBOR + +namespace aliceVision { +namespace depthMap { + +/** + * @return (smoothStep, energy) + */ +__device__ float2 getCellSmoothStepEnergy(int rcDeviceCamId, cudaTextureObject_t depthTex, const int2& cell0, const int2& textureOffset) +{ + float2 out = make_float2(0.0f, 180.0f); + + // Get pixel depth from the depth texture + // Note: we do not use 0.5f offset because depthTex use nearest neighbor interpolation + const float d0 = tex2D(depthTex, float(cell0.x), float(cell0.y)); + + // Early exit: depth is <= 0 + if(d0 <= 0.0f) + return out; + + // Consider the neighbor pixels + const int2 cellL = cell0 + make_int2( 0, -1); // Left + const int2 cellR = cell0 + make_int2( 0, 1); // Right + const int2 cellU = cell0 + make_int2(-1, 0); // Up + const int2 cellB = cell0 + make_int2( 1, 0); // Bottom + + // Get associated depths from depth texture + // Note: we do not use 0.5f offset because depthTex use nearest neighbor interpolation + const float dL = tex2D(depthTex, float(cellL.x), float(cellL.y)); + const float dR = tex2D(depthTex, float(cellR.x), float(cellR.y)); + const float dU = tex2D(depthTex, float(cellU.x), float(cellU.y)); + const float dB = tex2D(depthTex, float(cellB.x), float(cellB.y)); + + // Get associated 3D points + const float3 p0 = get3DPointForPixelAndDepthFromRC(rcDeviceCamId, cell0 + textureOffset, d0); + const float3 pL = get3DPointForPixelAndDepthFromRC(rcDeviceCamId, cellL + textureOffset, dL); + const float3 pR = get3DPointForPixelAndDepthFromRC(rcDeviceCamId, cellR + textureOffset, dR); + const float3 pU = get3DPointForPixelAndDepthFromRC(rcDeviceCamId, cellU + textureOffset, dU); + const float3 pB = get3DPointForPixelAndDepthFromRC(rcDeviceCamId, cellB + textureOffset, dB); + + // Compute the average point based on neighbors (cg) + float3 cg = make_float3(0.0f, 0.0f, 0.0f); + float n = 0.0f; + + if(dL > 0.0f) { cg = cg + pL; n++; } + if(dR > 0.0f) { cg = cg + pR; n++; } + if(dU > 0.0f) { cg = cg + pU; n++; } + if(dB > 0.0f) { cg = cg + pB; n++; } + + // If we have at least one valid depth + if(n > 1.0f) + { + cg = cg / n; // average of x, y, depth + float3 vcn = constantCameraParametersArray_d[rcDeviceCamId].C - p0; + normalize(vcn); + // pS: projection of cg on the line from p0 to camera + const float3 pS = closestPointToLine3D(cg, p0, vcn); + // keep the depth difference between pS and p0 as the smoothing step + out.x = size(constantCameraParametersArray_d[rcDeviceCamId].C - pS) - d0; + } + + float e = 0.0f; + n = 0.0f; + + if(dL > 0.0f && dR > 0.0f) + { + // Large angle between neighbors == flat area => low energy + // Small angle between neighbors == non-flat area => high energy + e = fmaxf(e, (180.0f - angleBetwABandAC(p0, pL, pR))); + n++; + } + if(dU > 0.0f && dB > 0.0f) + { + e = fmaxf(e, (180.0f - angleBetwABandAC(p0, pU, pB))); + n++; + } + // The higher the energy, the less flat the area + if(n > 0.0f) + out.y = e; + + return out; +} + +__device__ static inline float orientedPointPlaneDistanceNormalizedNormal(const float3& point, + const float3& planePoint, + const float3& planeNormalNormalized) +{ + return (dot(point, planeNormalNormalized) - dot(planePoint, planeNormalNormalized)); +} + +__global__ void depthSimMapCopyDepthOnly_kernel(float2* out_deptSimMap_d, int out_deptSimMap_p, + const float2* in_depthSimMap_d, int in_depthSimMap_p, + int width, int height, + float defaultSim) +{ + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + if(x >= width || y >= height) + return; + + // write output + float2* out_depthSim = get2DBufferAt(out_deptSimMap_d, out_deptSimMap_p, x, y); + out_depthSim->x = get2DBufferAt(in_depthSimMap_d, in_depthSimMap_p, x, y)->x; + out_depthSim->y = defaultSim; +} + +template +__global__ void mapUpscale_kernel(T* out_upscaledMap_d, int out_upscaledMap_p, + const T* in_map_d, int in_map_p, + const ROI roi, + float ratio) +{ + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + if(x >= roi.width() || y >= roi.height()) + return; + + const float ox = (float(x) - 0.5f) * ratio; + const float oy = (float(y) - 0.5f) * ratio; + + // nearest neighbor, no interpolation + const int xp = min(int(floor(ox + 0.5)), int(roi.width() * ratio) - 1); + const int yp = min(int(floor(oy + 0.5)), int(roi.height() * ratio) - 1); + + // write output upscaled map + *get2DBufferAt(out_upscaledMap_d, out_upscaledMap_p, x, y) = *get2DBufferAt(in_map_d, in_map_p, xp, yp); +} + + +__global__ void depthSimMapUpscaleAndFilter_kernel(cudaTextureObject_t rcTex, + float2* out_upscaledDeptSimMap_d, int out_upscaledDeptSimMap_p, + const float2* in_otherDepthSimMap_d, int in_otherDepthSimMap_p, + int stepXY, + const ROI roi, + float ratio) +{ + const int roiX = blockIdx.x * blockDim.x + threadIdx.x; + const int roiY = blockIdx.y * blockDim.y + threadIdx.y; + + if(roiX >= roi.width() || roiY >= roi.height()) + return; + + // corresponding device image coordinates + const int x = (roi.x.begin + roiX) * stepXY; + const int y = (roi.y.begin + roiY) * stepXY; + + // filter masked pixels (alpha < 0.9f) + if(tex2D_float4(rcTex, x + 0.5f, y + 0.5f).w < 0.9f) + { + *get2DBufferAt(out_upscaledDeptSimMap_d, out_upscaledDeptSimMap_p, roiX, roiY) = make_float2(-2.f, 1.f); + return; + } + + const float oy = (float(roiY) - 0.5f) * ratio; + const float ox = (float(roiX) - 0.5f) * ratio; + + float2 out_depthSim; + +#ifdef ALICEVISION_DEPTHMAP_UPSCALE_NEAREST_NEIGHBOR + // nearest neighbor, no interpolation + int xp = floor(ox + 0.5); + int yp = floor(oy + 0.5); + + xp = min(xp, int(roi.width() * ratio) - 1); + yp = min(yp, int(roi.height() * ratio) - 1); + + out_depthSim = *get2DBufferAt(in_otherDepthSimMap_d, in_otherDepthSimMap_p, xp, yp); +#else + // interpolate using the distance to the pixels center + int xp = floor(ox); + int yp = floor(oy); + + xp = min(xp, in_width - 2); + yp = min(yp, in_height - 2); + + const float2 lu = *get2DBufferAt(in_otherDepthSimMap_d, in_otherDepthSimMap_p, xp, yp); + const float2 ru = *get2DBufferAt(in_otherDepthSimMap_d, in_otherDepthSimMap_p, xp + 1, yp); + const float2 rd = *get2DBufferAt(in_otherDepthSimMap_d, in_otherDepthSimMap_p, xp + 1, yp + 1); + const float2 ld = *get2DBufferAt(in_otherDepthSimMap_d, in_otherDepthSimMap_p, xp, yp + 1); + + if(lu.x <= 0.0f || ru.x <= 0.0f || rd.x <= 0.0f || ld.x <= 0.0f) + { + float2 acc = {0.0f, 0.0f}; + int count = 0; + + if(lu.x > 0.0f) + { + acc = acc + lu; + ++count; + } + if(ru.x > 0.0f) + { + acc = acc + ru; + ++count; + } + if(rd.x > 0.0f) + { + acc = acc + rd; + ++count; + } + if(ld.x > 0.0f) + { + acc = acc + ld; + ++count; + } + if(count != 0) + { + out_depthSim = {acc.x / float(count), acc.y / float(count)}; + return; + } + else + { + out_depthSim = {-1.0f, 1.0f}; + return; + } + } + + // bilinear interpolation + const float ui = x - float(xp); + const float vi = y - float(yp); + const float2 u = lu + (ru - lu) * ui; + const float2 d = ld + (rd - ld) * ui; + out_depthSim = u + (d - u) * vi; +#endif + + // write output + *get2DBufferAt(out_upscaledDeptSimMap_d, out_upscaledDeptSimMap_p, roiX, roiY) = out_depthSim; +} + +__global__ void depthSimMapComputePixSize_kernel(int rcDeviceCamId, float2* inout_deptPixSizeMap_d, int inout_deptPixSizeMap_p, int stepXY, const ROI roi) +{ + const int roiX = blockIdx.x * blockDim.x + threadIdx.x; + const int roiY = blockIdx.y * blockDim.y + threadIdx.y; + + if(roiX >= roi.width() || roiY >= roi.height()) + return; + + // corresponding device image coordinates + const int x = (roi.x.begin + roiX) * stepXY; + const int y = (roi.y.begin + roiY) * stepXY; + + // corresponding input/output depthSim + float2* inout_depthPixSize = get2DBufferAt(inout_deptPixSizeMap_d, inout_deptPixSizeMap_p, roiX, roiY); + + // original depth invalid or masked, pixSize set to 0 + if(inout_depthPixSize->x < 0.0f) + { + inout_depthPixSize->y = 0; + return; + } + + // get rc 3d point + const float3 p = get3DPointForPixelAndDepthFromRC(rcDeviceCamId, make_int2(x, y), inout_depthPixSize->x); + + inout_depthPixSize->y = computePixSize(rcDeviceCamId, p); +} + +__global__ void depthSimMapComputeNormal_kernel(int rcDeviceCamId, + float3* out_normalMap_d, int out_normalMap_p, + const float2* in_depthSimMap_d, int in_depthSimMap_p, + int wsh, + int gammaC, + int gammaP, + int stepXY, + const ROI roi) +{ + const int roiX = blockIdx.x * blockDim.x + threadIdx.x; + const int roiY = blockIdx.y * blockDim.y + threadIdx.y; + + if(roiX >= roi.width() || roiY >= roi.height()) + return; + + // corresponding image coordinates + const int x = (roi.x.begin + roiX) * stepXY; + const int y = (roi.y.begin + roiY) * stepXY; + + // corresponding input depth + const float in_depth = get2DBufferAt(in_depthSimMap_d, in_depthSimMap_p, roiX, roiY)->x; // use only depth + + // corresponding output normal + float3* out_normal = get2DBufferAt(out_normalMap_d, out_normalMap_p, roiX, roiY); + + // no depth + if(in_depth <= 0.0f) + { + *out_normal = make_float3(-1.f, -1.f, -1.f); + return; + } + + const int2 pix = make_int2(x, y); + const float3 p = get3DPointForPixelAndDepthFromRC(rcDeviceCamId, pix, in_depth); + const float pixSize = size(p - get3DPointForPixelAndDepthFromRC(rcDeviceCamId, make_int2(x + 1, y), in_depth)); + + cuda_stat3d s3d = cuda_stat3d(); + + for(int yp = -wsh; yp <= wsh; ++yp) + { + const int roiYp = roiY + yp; + if(roiYp < 0) + continue; + + for(int xp = -wsh; xp <= wsh; ++xp) + { + const int roiXp = roiX + xp; + if(roiXp < 0) + continue; + + const float depthP = get2DBufferAt(in_depthSimMap_d, in_depthSimMap_p, roiXp, roiYp)->x; // use only depth + + if((depthP > 0.0f) && (fabs(depthP - in_depth) < 30.0f * pixSize)) + { + const float w = 1.0f; + const float2 pixP = make_float2(x + xp, y + yp); + const float3 pP = get3DPointForPixelAndDepthFromRC(rcDeviceCamId, pixP, depthP); + s3d.update(pP, w); + } + } + } + + float3 pp = p; + float3 nn = make_float3(-1.f, -1.f, -1.f); + + if(!s3d.computePlaneByPCA(pp, nn)) + { + *out_normal = make_float3(-1.f, -1.f, -1.f); + return; + } + + float3 nc = constantCameraParametersArray_d[rcDeviceCamId].C - p; + normalize(nc); + + if(orientedPointPlaneDistanceNormalizedNormal(pp + nn, pp, nc) < 0.0f) + { + nn.x = -nn.x; + nn.y = -nn.y; + nn.z = -nn.z; + } + + *out_normal = nn; +} + +__global__ void optimize_varLofLABtoW_kernel(cudaTextureObject_t rcTex, float* out_varianceMap_d, int out_varianceMap_p, int stepXY, const ROI roi) +{ + // roi and varianceMap coordinates + const int roiX = blockIdx.x * blockDim.x + threadIdx.x; + const int roiY = blockIdx.y * blockDim.y + threadIdx.y; + + if(roiX >= roi.width() || roiY >= roi.height()) + return; + + // corresponding device image coordinates + const int x = (roi.x.begin + roiX) * stepXY; + const int y = (roi.y.begin + roiY) * stepXY; + + // compute gradient size of L + // note: we use 0.5f offset because rcTex texture use interpolation + const float xM1 = tex2D_float4(rcTex, float(x - 1) + 0.5f, float(y + 0) + 0.5f).x; + const float xP1 = tex2D_float4(rcTex, float(x + 1) + 0.5f, float(y + 0) + 0.5f).x; + const float yM1 = tex2D_float4(rcTex, float(x + 0) + 0.5f, float(y - 1) + 0.5f).x; + const float yP1 = tex2D_float4(rcTex, float(x + 0) + 0.5f, float(y + 1) + 0.5f).x; + const float2 g = make_float2(xM1 - xP1, yM1 - yP1); // TODO: not divided by 2? + const float grad = size(g); + + // write output + *get2DBufferAt(out_varianceMap_d, out_varianceMap_p, roiX, roiY) = grad; +} + +__global__ void optimize_getOptDeptMapFromOptDepthSimMap_kernel(float* out_tmpOptDepthMap_d, int out_tmpOptDepthMap_p, + const float2* in_optDepthSimMap_d, int in_optDepthSimMap_p, + const ROI roi) +{ + // roi and depth/sim map part coordinates + const int roiX = blockIdx.x * blockDim.x + threadIdx.x; + const int roiY = blockIdx.y * blockDim.y + threadIdx.y; + + if(roiX >= roi.width() || roiY >= roi.height()) + return; + + *get2DBufferAt(out_tmpOptDepthMap_d, out_tmpOptDepthMap_p, roiX, roiY) = get2DBufferAt(in_optDepthSimMap_d, in_optDepthSimMap_p, roiX, roiY)->x; // depth +} + +__global__ void optimize_depthSimMap_kernel(int rcDeviceCamId, + cudaTextureObject_t imgVarianceTex, + cudaTextureObject_t depthTex, + float2* out_optimizeDepthSimMap_d, int out_optimizeDepthSimMap_p, // output optimized depth/sim map + const float2* in_sgmDepthPixSizeMap_d, int in_sgmDepthPixSizeMap_p, // input upscaled rough depth/pixSize map + const float2* in_refineDepthSimMap_d, int in_refineDepthSimMap_p, // input fine depth/sim map + int iter, + const ROI roi) +{ + // roi and imgVarianceTex, depthTex coordinates + const int roiX = blockIdx.x * blockDim.x + threadIdx.x; + const int roiY = blockIdx.y * blockDim.y + threadIdx.y; + + if(roiX >= roi.width() || roiY >= roi.height()) + return; + + // SGM upscale (rough) depth/pixSize + const float2 sgmDepthPixSize = *get2DBufferAt(in_sgmDepthPixSizeMap_d, in_sgmDepthPixSizeMap_p, roiX, roiY); + const float sgmDepth = sgmDepthPixSize.x; + const float sgmPixSize = sgmDepthPixSize.y; + + // refined and fused (fine) depth/sim + const float2 refineDepthSim = *get2DBufferAt(in_refineDepthSimMap_d, in_refineDepthSimMap_p, roiX, roiY); + const float refineDepth = refineDepthSim.x; + const float refineSim = refineDepthSim.y; + + // output optimized depth/sim + float2* out_optDepthSimPtr = get2DBufferAt(out_optimizeDepthSimMap_d, out_optimizeDepthSimMap_p, roiX, roiY); + float2 out_optDepthSim = (iter == 0) ? make_float2(sgmDepth, refineSim) : *out_optDepthSimPtr; + const float depthOpt = out_optDepthSim.x; + + if (depthOpt > 0.0f) + { + const float2 depthSmoothStepEnergy = getCellSmoothStepEnergy(rcDeviceCamId, depthTex, {roiX, roiY}, {int(roi.x.begin), int(roi.y.begin)}); // (smoothStep, energy) + float stepToSmoothDepth = depthSmoothStepEnergy.x; + stepToSmoothDepth = copysignf(fminf(fabsf(stepToSmoothDepth), sgmPixSize / 10.0f), stepToSmoothDepth); + const float depthEnergy = depthSmoothStepEnergy.y; // max angle with neighbors + float stepToFineDM = refineDepth - depthOpt; // distance to refined/noisy input depth map + stepToFineDM = copysignf(fminf(fabsf(stepToFineDM), sgmPixSize / 10.0f), stepToFineDM); + + const float stepToRoughDM = sgmDepth - depthOpt; // distance to smooth/robust input depth map + const float imgColorVariance = tex2D(imgVarianceTex, float(roiX), float(roiY)); // do not use 0.5f offset because imgVarianceTex use nearest neighbor interpolation + const float colorVarianceThresholdForSmoothing = 20.0f; + const float angleThresholdForSmoothing = 30.0f; // 30 + + // https://www.desmos.com/calculator/kob9lxs9qf + const float weightedColorVariance = sigmoid2(5.0f, angleThresholdForSmoothing, 40.0f, colorVarianceThresholdForSmoothing, imgColorVariance); + + // https://www.desmos.com/calculator/jwhpjq6ppj + const float fineSimWeight = sigmoid(0.0f, 1.0f, 0.7f, -0.7f, refineSim); + + // if geometry variation is bigger than color variation => the fineDM is considered noisy + + // if depthEnergy > weightedColorVariance => energyLowerThanVarianceWeight=0 => smooth + // else: => energyLowerThanVarianceWeight=1 => use fineDM + // weightedColorVariance max value is 30, so if depthEnergy > 30 (which means depthAngle < 150�) energyLowerThanVarianceWeight will be 0 + // https://www.desmos.com/calculator/jzbweilb85 + const float energyLowerThanVarianceWeight = sigmoid(0.0f, 1.0f, 30.0f, weightedColorVariance, depthEnergy); // TODO: 30 => 60 + + // https://www.desmos.com/calculator/ilsk7pthvz + const float closeToRoughWeight = 1.0f - sigmoid(0.0f, 1.0f, 10.0f, 17.0f, fabsf(stepToRoughDM / sgmPixSize)); // TODO: 10 => 30 + + // f(z) = c1 * s1(z_rought - z)^2 + c2 * s2(z-z_fused)^2 + coeff3 * s3*(z-z_smooth)^2 + + const float depthOptStep = closeToRoughWeight * stepToRoughDM + // distance to smooth/robust input depth map + (1.0f - closeToRoughWeight) * (energyLowerThanVarianceWeight * fineSimWeight * stepToFineDM + // distance to refined/noisy + (1.0f - energyLowerThanVarianceWeight) * stepToSmoothDepth); // max angle in current depthMap + + out_optDepthSim.x = depthOpt + depthOptStep; + + out_optDepthSim.y = (1.0f - closeToRoughWeight) * (energyLowerThanVarianceWeight * fineSimWeight * refineSim + (1.0f - energyLowerThanVarianceWeight) * (depthEnergy / 20.0f)); + } + + *out_optDepthSimPtr = out_optDepthSim; +} + +} // namespace depthMap +} // namespace aliceVision diff --git a/src/aliceVision/depthMap/cuda/planeSweeping/deviceSimilarityVolume.cu b/src/aliceVision/depthMap/cuda/planeSweeping/deviceSimilarityVolume.cu new file mode 100644 index 0000000000..43b77f8cb2 --- /dev/null +++ b/src/aliceVision/depthMap/cuda/planeSweeping/deviceSimilarityVolume.cu @@ -0,0 +1,398 @@ +// This file is part of the AliceVision project. +// Copyright (c) 2022 AliceVision contributors. +// This Source Code Form is subject to the terms of the Mozilla Public License, +// v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + +#include "deviceSimilarityVolume.hpp" +#include "deviceSimilarityVolumeKernels.cuh" + +#include + +#include + +namespace aliceVision { +namespace depthMap { + +__host__ void cuda_volumeInitialize(CudaDeviceMemoryPitched& inout_volume_dmp, TSim value, cudaStream_t stream) +{ + const CudaSize<3>& volDim = inout_volume_dmp.getSize(); + const dim3 block(32, 4, 1); + const dim3 grid(divUp(volDim.x(), block.x), divUp(volDim.y(), block.y), volDim.z()); + + volume_init_kernel<<>>( + inout_volume_dmp.getBuffer(), + inout_volume_dmp.getBytesPaddedUpToDim(1), + inout_volume_dmp.getBytesPaddedUpToDim(0), + int(volDim.x()), + int(volDim.y()), + value); + + CHECK_CUDA_ERROR(); +} + +__host__ void cuda_volumeInitialize(CudaDeviceMemoryPitched& inout_volume_dmp, TSimRefine value, cudaStream_t stream) +{ + const CudaSize<3>& volDim = inout_volume_dmp.getSize(); + const dim3 block(32, 4, 1); + const dim3 grid(divUp(volDim.x(), block.x), divUp(volDim.y(), block.y), volDim.z()); + + volume_init_kernel<<>>( + inout_volume_dmp.getBuffer(), + inout_volume_dmp.getBytesPaddedUpToDim(1), + inout_volume_dmp.getBytesPaddedUpToDim(0), + int(volDim.x()), + int(volDim.y()), + value); + + CHECK_CUDA_ERROR(); +} + +__host__ void cuda_volumeAdd(CudaDeviceMemoryPitched& inout_volume_dmp, + const CudaDeviceMemoryPitched& in_volume_dmp, + cudaStream_t stream) +{ + const CudaSize<3>& volDim = inout_volume_dmp.getSize(); + const dim3 block(32, 4, 1); + const dim3 grid(divUp(volDim.x(), block.x), divUp(volDim.y(), block.y), volDim.z()); + + volume_add_kernel<<>>( + inout_volume_dmp.getBuffer(), + inout_volume_dmp.getBytesPaddedUpToDim(1), + inout_volume_dmp.getBytesPaddedUpToDim(0), + in_volume_dmp.getBuffer(), + in_volume_dmp.getBytesPaddedUpToDim(1), + in_volume_dmp.getBytesPaddedUpToDim(0), + int(volDim.x()), + int(volDim.y())); + + CHECK_CUDA_ERROR(); +} + +__host__ void cuda_volumeUpdateUninitializedSimilarity(const CudaDeviceMemoryPitched& in_volBestSim_dmp, + CudaDeviceMemoryPitched& inout_volSecBestSim_dmp, + cudaStream_t stream) +{ + assert(in_volBestSim_dmp.getSize() == inout_volSecBestSim_dmp.getSize()); + + const CudaSize<3>& volDim = inout_volSecBestSim_dmp.getSize(); + + const dim3 block(32, 4, 1); + const dim3 grid(divUp(volDim.x(), block.x), divUp(volDim.y(), block.y), volDim.z()); + + volume_updateUninitialized_kernel<<>>( + inout_volSecBestSim_dmp.getBuffer(), + inout_volSecBestSim_dmp.getBytesPaddedUpToDim(1), + inout_volSecBestSim_dmp.getBytesPaddedUpToDim(0), + in_volBestSim_dmp.getBuffer(), + in_volBestSim_dmp.getBytesPaddedUpToDim(1), + in_volBestSim_dmp.getBytesPaddedUpToDim(0), + int(volDim.x()), + int(volDim.y())); + + CHECK_CUDA_ERROR(); +} + +__host__ void cuda_volumeComputeSimilarity(CudaDeviceMemoryPitched& out_volBestSim_dmp, + CudaDeviceMemoryPitched& out_volSecBestSim_dmp, + const CudaDeviceMemoryPitched& in_depths_dmp, + const DeviceCamera& rcDeviceCamera, + const DeviceCamera& tcDeviceCamera, + const SgmParams& sgmParams, + const Range& depthRange, + const ROI& roi, + cudaStream_t stream) +{ + const dim3 block(32, 1, 1); // minimal default settings + const dim3 grid(divUp(roi.width(), block.x), divUp(roi.height(), block.y), depthRange.size()); + + volume_slice_kernel<<>>( + rcDeviceCamera.getTextureObject(), + tcDeviceCamera.getTextureObject(), + rcDeviceCamera.getDeviceCamId(), + tcDeviceCamera.getDeviceCamId(), + rcDeviceCamera.getWidth(), + rcDeviceCamera.getHeight(), + tcDeviceCamera.getWidth(), + tcDeviceCamera.getHeight(), + float(sgmParams.gammaC), + float(sgmParams.gammaP), + sgmParams.wsh, + sgmParams.stepXY, + in_depths_dmp.getBuffer(), + in_depths_dmp.getBytesPaddedUpToDim(0), + out_volBestSim_dmp.getBuffer(), + out_volBestSim_dmp.getBytesPaddedUpToDim(1), + out_volBestSim_dmp.getBytesPaddedUpToDim(0), + out_volSecBestSim_dmp.getBuffer(), + out_volSecBestSim_dmp.getBytesPaddedUpToDim(1), + out_volSecBestSim_dmp.getBytesPaddedUpToDim(0), + depthRange, + roi); + + CHECK_CUDA_ERROR(); +} + +extern void cuda_volumeRefineSimilarity(CudaDeviceMemoryPitched& inout_volSim_dmp, + const CudaDeviceMemoryPitched& in_sgmDepthPixSizeMap_dmp, + const CudaDeviceMemoryPitched* in_sgmNormalMap_dmpPtr, + const DeviceCamera& rcDeviceCamera, + const DeviceCamera& tcDeviceCamera, + const RefineParams& refineParams, + const Range& depthRange, + const ROI& roi, + cudaStream_t stream) +{ + const dim3 block(32, 1, 1); // minimal default settings + const dim3 grid(divUp(roi.width(), block.x), divUp(roi.height(), block.y), depthRange.size()); + + volume_refine_kernel<<>>( + rcDeviceCamera.getTextureObject(), + tcDeviceCamera.getTextureObject(), + rcDeviceCamera.getDeviceCamId(), + tcDeviceCamera.getDeviceCamId(), + rcDeviceCamera.getWidth(), + rcDeviceCamera.getHeight(), + tcDeviceCamera.getWidth(), + tcDeviceCamera.getHeight(), + int(inout_volSim_dmp.getSize().z()), + refineParams.stepXY, + refineParams.wsh, + float(refineParams.gammaC), + float(refineParams.gammaP), + in_sgmDepthPixSizeMap_dmp.getBuffer(), + in_sgmDepthPixSizeMap_dmp.getBytesPaddedUpToDim(0), + (in_sgmNormalMap_dmpPtr == nullptr) ? nullptr : in_sgmNormalMap_dmpPtr->getBuffer(), + (in_sgmNormalMap_dmpPtr == nullptr) ? 0 : in_sgmNormalMap_dmpPtr->getBytesPaddedUpToDim(0), + inout_volSim_dmp.getBuffer(), + inout_volSim_dmp.getBytesPaddedUpToDim(1), + inout_volSim_dmp.getBytesPaddedUpToDim(0), + depthRange, + roi); + + CHECK_CUDA_ERROR(); +} + + +__host__ void cuda_volumeAggregatePath(CudaDeviceMemoryPitched& out_volAgr_dmp, + CudaDeviceMemoryPitched& inout_volSliceAccA_dmp, + CudaDeviceMemoryPitched& inout_volSliceAccB_dmp, + CudaDeviceMemoryPitched& inout_volAxisAcc_dmp, + const CudaDeviceMemoryPitched& in_volSim_dmp, + const CudaSize<3>& axisT, + const DeviceCamera& rcDeviceCamera, + const SgmParams& sgmParams, + int lastDepthIndex, + int filteringIndex, + bool invY, + const ROI& roi, + cudaStream_t stream) +{ + CudaSize<3> volDim = in_volSim_dmp.getSize(); + volDim[2] = lastDepthIndex; // override volume depth, use rc depth list last index + + const size_t volDimX = volDim[axisT[0]]; + const size_t volDimY = volDim[axisT[1]]; + const size_t volDimZ = volDim[axisT[2]]; + + const int3 volDim_ = make_int3(volDim[0], volDim[1], volDim[2]); + const int3 axisT_ = make_int3(axisT[0], axisT[1], axisT[2]); + const int ySign = (invY ? -1 : 1); + + // setup block and grid + const int blockSize = 8; + const dim3 blockVolXZ(blockSize, blockSize, 1); + const dim3 gridVolXZ(divUp(volDimX, blockVolXZ.x), divUp(volDimZ, blockVolXZ.y), 1); + + const int blockSizeL = 64; + const dim3 blockColZ(blockSizeL, 1, 1); + const dim3 gridColZ(divUp(volDimX, blockColZ.x), 1, 1); + + const dim3 blockVolSlide(blockSizeL, 1, 1); + const dim3 gridVolSlide(divUp(volDimX, blockVolSlide.x), volDimZ, 1); + + CudaDeviceMemoryPitched* xzSliceForY_dmpPtr = &inout_volSliceAccA_dmp; // Y slice + CudaDeviceMemoryPitched* xzSliceForYm1_dmpPtr = &inout_volSliceAccB_dmp; // Y-1 slice + CudaDeviceMemoryPitched* bestSimInYm1_dmpPtr = &inout_volAxisAcc_dmp; // best sim score along the Y axis for each Z value + + // Copy the first XZ plane (at Y=0) from 'in_volSim_dmp' into 'xzSliceForYm1_dmpPtr' + volume_getVolumeXZSlice_kernel<<>>( + xzSliceForYm1_dmpPtr->getBuffer(), + xzSliceForYm1_dmpPtr->getPitch(), + in_volSim_dmp.getBuffer(), + in_volSim_dmp.getBytesPaddedUpToDim(1), + in_volSim_dmp.getBytesPaddedUpToDim(0), + volDim_, + axisT_, + 0 /* Y = 0 */ ); + + // Set the first Z plane from 'out_volAgr_dmp' to 255 + volume_initVolumeYSlice_kernel<<>>( + out_volAgr_dmp.getBuffer(), + out_volAgr_dmp.getBytesPaddedUpToDim(1), + out_volAgr_dmp.getBytesPaddedUpToDim(0), + volDim_, + axisT_, + 0, 255); + + for(int iy = 1; iy < volDimY; ++iy) + { + const int y = invY ? volDimY - 1 - iy : iy; + + // For each column: compute the best score + // Foreach x: + // bestSimInYm1[x] = min(d_xzSliceForY[1:height]) + volume_computeBestZInSlice_kernel<<>>( + xzSliceForYm1_dmpPtr->getBuffer(), + xzSliceForYm1_dmpPtr->getPitch(), + bestSimInYm1_dmpPtr->getBuffer(), + volDimX, volDimZ); + + // Copy the 'z' plane from 'in_volSim_dmp' into 'xzSliceForY' + volume_getVolumeXZSlice_kernel<<>>( + xzSliceForY_dmpPtr->getBuffer(), + xzSliceForY_dmpPtr->getPitch(), + in_volSim_dmp.getBuffer(), + in_volSim_dmp.getBytesPaddedUpToDim(1), + in_volSim_dmp.getBytesPaddedUpToDim(0), + volDim_, axisT_, y); + + volume_agregateCostVolumeAtXinSlices_kernel<<>>( + rcDeviceCamera.getTextureObject(), + xzSliceForY_dmpPtr->getBuffer(), // inout: xzSliceForY + xzSliceForY_dmpPtr->getPitch(), + xzSliceForYm1_dmpPtr->getBuffer(), // in: xzSliceForYm1 + xzSliceForYm1_dmpPtr->getPitch(), + bestSimInYm1_dmpPtr->getBuffer(), // in: bestSimInYm1 + out_volAgr_dmp.getBuffer(), + out_volAgr_dmp.getBytesPaddedUpToDim(1), + out_volAgr_dmp.getBytesPaddedUpToDim(0), + volDim_, axisT_, + sgmParams.stepXY, + y, + sgmParams.p1, + sgmParams.p2Weighting, + ySign, + filteringIndex, + roi); + + std::swap(xzSliceForYm1_dmpPtr, xzSliceForY_dmpPtr); + } + + CHECK_CUDA_ERROR(); +} + +__host__ void cuda_volumeOptimize(CudaDeviceMemoryPitched& out_volSimFiltered_dmp, + CudaDeviceMemoryPitched& inout_volSliceAccA_dmp, + CudaDeviceMemoryPitched& inout_volSliceAccB_dmp, + CudaDeviceMemoryPitched& inout_volAxisAcc_dmp, + const CudaDeviceMemoryPitched& in_volSim_dmp, + const DeviceCamera& rcDeviceCamera, + const SgmParams& sgmParams, + int lastDepthIndex, + const ROI& roi, + cudaStream_t stream) +{ + // update aggregation volume + int npaths = 0; + const auto updateAggrVolume = [&](const CudaSize<3>& axisT, bool invX) + { + cuda_volumeAggregatePath(out_volSimFiltered_dmp, + inout_volSliceAccA_dmp, + inout_volSliceAccB_dmp, + inout_volAxisAcc_dmp, + in_volSim_dmp, + axisT, + rcDeviceCamera, + sgmParams, + lastDepthIndex, + npaths, + invX, + roi, + stream); + npaths++; + }; + + // filtering is done on the last axis + const std::map> mapAxes = { + {'X', {1, 0, 2}}, // XYZ -> YXZ + {'Y', {0, 1, 2}}, // XYZ + }; + + for(char axis : sgmParams.filteringAxes) + { + const CudaSize<3>& axisT = mapAxes.at(axis); + updateAggrVolume(axisT, false); // without transpose + updateAggrVolume(axisT, true); // with transpose of the last axis + } +} + +__host__ void cuda_volumeRetrieveBestDepth(CudaDeviceMemoryPitched& out_sgmDepthSimMap_dmp, + const CudaDeviceMemoryPitched& in_depths_dmp, + const CudaDeviceMemoryPitched& in_volSim_dmp, + const DeviceCamera& rcDeviceCamera, + const SgmParams& sgmParams, + const Range& depthRange, + const ROI& roi, + cudaStream_t stream) +{ + const int scaleStep = sgmParams.scale * sgmParams.stepXY; + const int blockSize = 8; + const dim3 block(blockSize, blockSize, 1); + const dim3 grid(divUp(roi.width(), blockSize), divUp(roi.height(), blockSize), 1); + + volume_retrieveBestZ_kernel<<>>( + out_sgmDepthSimMap_dmp.getBuffer(), + out_sgmDepthSimMap_dmp.getBytesPaddedUpToDim(0), + in_depths_dmp.getBuffer(), + in_depths_dmp.getBytesPaddedUpToDim(0), + in_volSim_dmp.getBuffer(), + in_volSim_dmp.getBytesPaddedUpToDim(1), + in_volSim_dmp.getBytesPaddedUpToDim(0), + in_volSim_dmp.getSize().z(), + rcDeviceCamera.getDeviceCamId(), + scaleStep, + depthRange, + roi); + + CHECK_CUDA_ERROR(); +} + +extern void cuda_volumeRefineBestDepth(CudaDeviceMemoryPitched& out_refineDepthSimMap_dmp, + const CudaDeviceMemoryPitched& in_sgmDepthPixSizeMap_dmp, + const CudaDeviceMemoryPitched& in_volSim_dmp, + const DeviceCamera& rcDeviceCamera, + const RefineParams& refineParams, + const ROI& roi, + cudaStream_t stream) +{ + const int scaleStep = refineParams.scale * refineParams.stepXY; + const int halfNbSamples = refineParams.nbSubsamples * refineParams.halfNbDepths; + const float twoTimesSigmaPowerTwo = float(2.0 * refineParams.sigma * refineParams.sigma); + + const int blockSize = 8; + const dim3 block(blockSize, blockSize, 1); + const dim3 grid(divUp(roi.width(), blockSize), divUp(roi.height(), blockSize), 1); + + volume_refineBestZ_kernel<<>>( + out_refineDepthSimMap_dmp.getBuffer(), + out_refineDepthSimMap_dmp.getBytesPaddedUpToDim(0), + in_sgmDepthPixSizeMap_dmp.getBuffer(), + in_sgmDepthPixSizeMap_dmp.getBytesPaddedUpToDim(0), + in_volSim_dmp.getBuffer(), + in_volSim_dmp.getBytesPaddedUpToDim(1), + in_volSim_dmp.getBytesPaddedUpToDim(0), + int(in_volSim_dmp.getSize().z()), + rcDeviceCamera.getDeviceCamId(), + scaleStep, + refineParams.nbSubsamples, // number of samples between two depths + halfNbSamples, // number of samples (in front and behind mid depth) + refineParams.halfNbDepths, // number of depths (in front and behind mid depth) + twoTimesSigmaPowerTwo, + roi); + + CHECK_CUDA_ERROR(); +} + +} // namespace depthMap +} // namespace aliceVision diff --git a/src/aliceVision/depthMap/cuda/planeSweeping/deviceSimilarityVolume.hpp b/src/aliceVision/depthMap/cuda/planeSweeping/deviceSimilarityVolume.hpp new file mode 100644 index 0000000000..a89f4e8316 --- /dev/null +++ b/src/aliceVision/depthMap/cuda/planeSweeping/deviceSimilarityVolume.hpp @@ -0,0 +1,163 @@ +// This file is part of the AliceVision project. +// Copyright (c) 2022 AliceVision contributors. +// This Source Code Form is subject to the terms of the Mozilla Public License, +// v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace aliceVision { +namespace depthMap { + +/** + * @brief Initialize all the given similarity volume in device memory to the given value. + * @param[in,out] inout_volume_dmp the similarity volume in device memory + * @param[in] value the value to initalize with + * @param[in] stream the stream for gpu execution + */ +extern void cuda_volumeInitialize(CudaDeviceMemoryPitched& inout_volume_dmp, TSim value, cudaStream_t stream); + +/** + * @brief Initialize all the given similarity volume in device memory to the given value. + * @param[in,out] inout_volume_dmp the similarity volume in device memory + * @param[in] value the value to initalize with + * @param[in] stream the stream for gpu execution + */ +extern void cuda_volumeInitialize(CudaDeviceMemoryPitched& inout_volume_dmp, TSimRefine value, cudaStream_t stream); + +/** + * @brief Add similarity values from a given volume to another given volume. + * @param[in,out] inout_volume_dmp the input/output similarity volume in device memory + * @param[in] in_volume_dmp the input similarity volume in device memory + * @param[in] stream the stream for gpu execution + */ +extern void cuda_volumeAdd(CudaDeviceMemoryPitched& inout_volume_dmp, + const CudaDeviceMemoryPitched& in_volume_dmp, + cudaStream_t stream); + +/** + * @brief Update second best similarity volume uninitialized values with first best volume values. + * @param[in] in_volBestSim_dmp the best similarity volume in device memory + * @param[out] inout_volSecBestSim_dmp the second best similarity volume in device memory + * @param[in] stream the stream for gpu execution + */ +extern void cuda_volumeUpdateUninitializedSimilarity(const CudaDeviceMemoryPitched& in_volBestSim_dmp, + CudaDeviceMemoryPitched& inout_volSecBestSim_dmp, + cudaStream_t stream); + +/** + * @brief Compute the best / second best similarity volume for the given RC / TC. + * @param[out] out_volBestSim_dmp the best similarity volume in device memory + * @param[out] out_volSecBestSim_dmp the second best similarity volume in device memory + * @param[in] in_depths_dmp the R camera depth list in device memory + * @param[in] rcDeviceCamera the R device camera + * @param[in] tcDeviceCamera the T device camera + * @param[in] sgmParams the Semi Global Matching parameters + * @param[in] depthRange the volume depth range to compute + * @param[in] roi the 2d region of interest + * @param[in] stream the stream for gpu execution + */ +extern void cuda_volumeComputeSimilarity(CudaDeviceMemoryPitched& out_volBestSim_dmp, + CudaDeviceMemoryPitched& out_volSecBestSim_dmp, + const CudaDeviceMemoryPitched& in_depths_dmp, + const DeviceCamera& rcDeviceCamera, + const DeviceCamera& tcDeviceCamera, + const SgmParams& sgmParams, + const Range& depthRange, + const ROI& roi, + cudaStream_t stream); + +/** + * @brief Refine the best similarity volume for the given RC / TC. + * @param[out] inout_volSim_dmp the similarity volume in device memory + * @param[in] in_sgmDepthPixSizeMap_dmp the SGM upscaled depth/pixSize map (usefull to get middle depth) in device memory + * @param[in] in_sgmNormalMap_dmpPtr (or nullptr) the SGM upscaled normal map in device memory + * @param[in] rcDeviceCamera the R device camera + * @param[in] tcDeviceCamera the T device camera + * @param[in] refineParams the Refine parameters + * @param[in] depthRange the volume depth range to compute + * @param[in] roi the 2d region of interest + * @param[in] stream the stream for gpu execution + */ +extern void cuda_volumeRefineSimilarity(CudaDeviceMemoryPitched& inout_volSim_dmp, + const CudaDeviceMemoryPitched& in_sgmDepthPixSizeMap_dmp, + const CudaDeviceMemoryPitched* in_sgmNormalMap_dmpPtr, + const DeviceCamera& rcDeviceCamera, + const DeviceCamera& tcDeviceCamera, + const RefineParams& refineParams, + const Range& depthRange, + const ROI& roi, + cudaStream_t stream); + +/** + * @brief Filter / Optimize the given similarity volume + * @param[out] out_volSimFiltered_dmp the output similarity volume in device memory + * @param[in,out] inout_volSliceAccA_dmp the volume slice first accumulation buffer in device memory + * @param[in,out] inout_volSliceAccB_dmp the volume slice second accumulation buffer in device memory + * @param[in,out] inout_volAxisAcc_dmp the volume axisaccumulation buffer in device memory + * @param[in] in_volSim_dmp the input similarity volume in device memory + * @param[in] rcDeviceCamera the R device camera + * @param[in] sgmParams the Semi Global Matching parameters + * @param[in] lastDepthIndex the R camera last depth index + * @param[in] roi the 2d region of interest + * @param[in] stream the stream for gpu execution + */ +extern void cuda_volumeOptimize(CudaDeviceMemoryPitched& out_volSimFiltered_dmp, + CudaDeviceMemoryPitched& inout_volSliceAccA_dmp, + CudaDeviceMemoryPitched& inout_volSliceAccB_dmp, + CudaDeviceMemoryPitched& inout_volAxisAcc_dmp, + const CudaDeviceMemoryPitched& in_volSim_dmp, + const DeviceCamera& rcDeviceCamera, + const SgmParams& sgmParams, + int lastDepthIndex, + const ROI& roi, + cudaStream_t stream); + +/** + * @brief Retrieve the best depth/sim in the given similarity volume. + * @param[out] out_sgmDepthSimMap_dmp the output best depth/sim map in device memory + * @param[in] in_depths_dmp the R camera depth list in device memory + * @param[in] in_volSim_dmp the input similarity volume in device memory + * @param[in] rcDeviceCamera the R device camera + * @param[in] sgmParams the Semi Global Matching parameters + * @param[in] depthRange the volume depth range to compute + * @param[in] roi the 2d region of interest + * @param[in] stream the stream for gpu execution + */ +extern void cuda_volumeRetrieveBestDepth(CudaDeviceMemoryPitched& out_sgmDepthSimMap_dmp, + const CudaDeviceMemoryPitched& in_depths_dmp, + const CudaDeviceMemoryPitched& in_volSim_dmp, + const DeviceCamera& rcDeviceCamera, + const SgmParams& sgmParams, + const Range& depthRange, + const ROI& roi, + cudaStream_t stream); + +/** + * @brief Retrieve the best depth/sim in the given refined similarity volume. + * @param[out] out_refineDepthSimMap_dmp the output refined and fused depth/sim map in device memory + * @param[in] in_sgmDepthPixSizeMap_dmp the SGM upscaled depth/pixSize map (usefull to get middle depth) in device memory + * @param[in] in_volSim_dmp the similarity volume in device memory + * @param[in] rcDeviceCamera the R device camera + * @param[in] refineParams the Refine parameters + * @param[in] depthRange the volume depth range to compute + * @param[in] roi the 2d region of interest + * @param[in] stream the stream for gpu execution + */ +extern void cuda_volumeRefineBestDepth(CudaDeviceMemoryPitched& out_refineDepthSimMap_dmp, + const CudaDeviceMemoryPitched& in_sgmDepthPixSizeMap_dmp, + const CudaDeviceMemoryPitched& in_volSim_dmp, + const DeviceCamera& rcDeviceCamera, + const RefineParams& refineParams, + const ROI& roi, + cudaStream_t stream); + +} // namespace depthMap +} // namespace aliceVision diff --git a/src/aliceVision/depthMap/cuda/planeSweeping/deviceSimilarityVolumeKernels.cuh b/src/aliceVision/depthMap/cuda/planeSweeping/deviceSimilarityVolumeKernels.cuh new file mode 100644 index 0000000000..b52ea5cc53 --- /dev/null +++ b/src/aliceVision/depthMap/cuda/planeSweeping/deviceSimilarityVolumeKernels.cuh @@ -0,0 +1,604 @@ +// This file is part of the AliceVision project. +// Copyright (c) 2017 AliceVision contributors. +// This Source Code Form is subject to the terms of the Mozilla Public License, +// v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + +#pragma once + +#include +#include +#include +#include + +namespace aliceVision { +namespace depthMap { + +inline __device__ void move3DPointByRcPixSize(int deviceCamId, float3& p, float rcPixSize) +{ + float3 rpv = p - constantCameraParametersArray_d[deviceCamId].C; + normalize(rpv); + p = p + rpv * rcPixSize; +} + +inline __device__ void volume_computePatch(int rcDeviceCamId, int tcDeviceCamId, Patch& ptch, const float fpPlaneDepth, const int2& pix) +{ + ptch.p = get3DPointForPixelAndFrontoParellePlaneRC(rcDeviceCamId, pix, fpPlaneDepth); // no texture use + ptch.d = computePixSize(rcDeviceCamId, ptch.p); // no texture use + computeRotCSEpip(rcDeviceCamId, tcDeviceCamId, ptch); // no texture use +} + +__device__ float depthPlaneToDepth(int deviceCamId, const float2& pix, float fpPlaneDepth) +{ + const DeviceCameraParams& deviceCamParams = constantCameraParametersArray_d[deviceCamId]; + float3 planen = M3x3mulV3(deviceCamParams.iR, make_float3(0.0f, 0.0f, 1.0f)); + normalize(planen); + float3 planep = deviceCamParams.C + planen * fpPlaneDepth; + float3 v = M3x3mulV2(deviceCamParams.iP, pix); + normalize(v); + float3 p = linePlaneIntersect(deviceCamParams.C, v, planep, planen); + float depth = size(deviceCamParams.C - p); + return depth; +} + +template +__global__ void volume_init_kernel(T* inout_volume_d, int inout_volume_s, int inout_volume_p, int volDimX, int volDimY, T value) +{ + const int vx = blockIdx.x * blockDim.x + threadIdx.x; + const int vy = blockIdx.y * blockDim.y + threadIdx.y; + const int vz = blockIdx.z; + + if(vx >= volDimX || vy >= volDimY) + return; + + *get3DBufferAt(inout_volume_d, inout_volume_s, inout_volume_p, vx, vy, vz) = value; +} + +__global__ void volume_add_kernel(TSimRefine* inout_volume_d, int inout_volume_s, int inout_volume_p, + const TSimRefine* in_volume_d, int in_volume_s, int in_volume_p, + int volDimX, int volDimY) +{ + const int vx = blockIdx.x * blockDim.x + threadIdx.x; + const int vy = blockIdx.y * blockDim.y + threadIdx.y; + const int vz = blockIdx.z; + + if(vx >= volDimX || vy >= volDimY) + return; + + TSimRefine* outSimPtr = get3DBufferAt(inout_volume_d, inout_volume_s, inout_volume_p, vx, vy, vz); + +#ifdef TSIM_REFINE_USE_HALF + // note: using built-in half addition can give bad results on some gpus + //*outSimPtr = __hadd(*outSimPtr, *get3DBufferAt(in_volume_d, in_volume_s, in_volume_p, vx, vy, vz)); + *outSimPtr = __float2half(__half2float(*outSimPtr) + __half2float(*get3DBufferAt(in_volume_d, in_volume_s, in_volume_p, vx, vy, vz))); // perform the addition in float +#else + *outSimPtr += *get3DBufferAt(in_volume_d, in_volume_s, in_volume_p, vx, vy, vz); +#endif +} + +__global__ void volume_updateUninitialized_kernel(TSim* inout_volume2nd_d, int inout_volume2nd_s, int inout_volume2nd_p, + const TSim* in_volume1st_d, int in_volume1st_s, int in_volume1st_p, + int volDimX, int volDimY) +{ + const int vx = blockIdx.x * blockDim.x + threadIdx.x; + const int vy = blockIdx.y * blockDim.y + threadIdx.y; + const int vz = blockIdx.z; + + if(vx >= volDimX || vy >= volDimY) + return; + + // input/output second best similarity value + TSim* inout_simPtr = get3DBufferAt(inout_volume2nd_d, inout_volume2nd_s, inout_volume2nd_p, vx, vy, vz); + + if(*inout_simPtr >= 255.f) // invalid or uninitialized similarity value + { + // update second best similarity value with first best similarity value + *inout_simPtr = *get3DBufferAt(in_volume1st_d, in_volume1st_s, in_volume1st_p, vx, vy, vz); + } +} + +__global__ void volume_slice_kernel(cudaTextureObject_t rcTex, + cudaTextureObject_t tcTex, + int rcDeviceCamId, + int tcDeviceCamId, + int rcWidth, int rcHeight, + int tcWidth, int tcHeight, + const float gammaC, + const float gammaP, + const int wsh, + const int stepXY, + const float* in_depths_d, int in_depths_p, + TSim* out_volume_1st_d, int out_volume1st_s, int out_volume1st_p, + TSim* out_volume_2nd_d, int out_volume2nd_s, int out_volume2nd_p, + const Range depthRange, + const ROI roi) +{ + const int roiX = blockIdx.x * blockDim.x + threadIdx.x; + const int roiY = blockIdx.y * blockDim.y + threadIdx.y; + const int roiZ = blockIdx.z; + + if(roiX >= roi.width() || roiY >= roi.height()) // no need to check roiZ + return; + + // corresponding volume coordinates + const int vx = roiX; + const int vy = roiY; + const int vz = depthRange.begin + roiZ; + + // corresponding device image coordinates + const int x = (roi.x.begin + vx) * stepXY; + const int y = (roi.y.begin + vy) * stepXY; + + // corresponding depth plane + const float depthPlane = *get2DBufferAt(in_depths_d, in_depths_p, vz, 0); + + // compute patch + Patch ptcho; + volume_computePatch(rcDeviceCamId, tcDeviceCamId, ptcho, depthPlane, make_int2(x, y)); // no texture use + + // compute patch similarity + float fsim = compNCCby3DptsYK(rcTex, tcTex, rcDeviceCamId, tcDeviceCamId, ptcho, rcWidth, rcHeight, tcWidth, tcHeight, wsh, gammaC, gammaP); + + if(fsim == CUDART_INF_F) // invalid similarity + { + fsim = 255.0f; // 255 is the invalid similarity value + } + else // valid similarity + { + // remap similarity value + constexpr const float fminVal = -1.0f; + constexpr const float fmaxVal = 1.0f; + constexpr const float fmultiplier = 1.0f / (fmaxVal - fminVal); + + fsim = (fsim - fminVal) * fmultiplier; + +#ifdef TSIM_USE_FLOAT + // no clamp +#else + fsim = fminf(1.0f, fmaxf(0.0f, fsim)); +#endif + // convert from (0, 1) to (0, 254) + // needed to store in the volume in uchar + // 255 is reserved for the similarity initialization, i.e. undefined values + fsim *= 254.0f; + } + + TSim* fsim_1st = get3DBufferAt(out_volume_1st_d, out_volume1st_s, out_volume1st_p, vx, vy, vz); + TSim* fsim_2nd = get3DBufferAt(out_volume_2nd_d, out_volume2nd_s, out_volume2nd_p, vx, vy, vz); + + if (fsim < *fsim_1st) + { + *fsim_2nd = *fsim_1st; + *fsim_1st = TSim(fsim); + } + else if (fsim < *fsim_2nd) + { + *fsim_2nd = TSim(fsim); + } +} + +__global__ void volume_refine_kernel(cudaTextureObject_t rcTex, + cudaTextureObject_t tcTex, + int rcDeviceCamId, + int tcDeviceCamId, + int rcWidth, int rcHeight, + int tcWidth, int tcHeight, + int volDimZ, + int stepXY, + int wsh, + float gammaC, + float gammaP, + const float2* in_sgmDepthPixSizeMap_d, int in_sgmDepthPixSizeMap_p, + const float3* in_sgmNormalMap_d, int in_sgmNormalMap_p, + TSimRefine* inout_volSim_d, int inout_volSim_s, int inout_volSim_p, + const Range depthRange, + const ROI roi) +{ + const int roiX = blockIdx.x * blockDim.x + threadIdx.x; + const int roiY = blockIdx.y * blockDim.y + threadIdx.y; + const int roiZ = blockIdx.z; + + if(roiX >= roi.width() || roiY >= roi.height()) // no need to check roiZ + return; + + // corresponding volume and depth/sim map coordinates + const int vx = roiX; + const int vy = roiY; + const int vz = depthRange.begin + roiZ; + + // corresponding device image coordinates + const int x = (roi.x.begin + vx) * stepXY; + const int y = (roi.y.begin + vy) * stepXY; + + // corresponding original plane depth + const float originalDepth = get2DBufferAt(in_sgmDepthPixSizeMap_d, in_sgmDepthPixSizeMap_p, vx, vy)->x; // input original middle depth + + // original depth invalid or masked, similarity value remain at 255 + if(originalDepth <= 0.0f) + return; + + // get rc 3d point at original depth (z center) + float3 p = get3DPointForPixelAndDepthFromRC(rcDeviceCamId, make_int2(x, y), originalDepth); + + // move rc 3d point according to the relative depth + const int relativeDepthIndexOffset = vz - ((volDimZ - 1) / 2); + if(relativeDepthIndexOffset != 0) + { + const float pixSizeOffset = relativeDepthIndexOffset * computePixSize(rcDeviceCamId, p); + move3DPointByRcPixSize(rcDeviceCamId, p, pixSizeOffset); + } + + // compute patch + Patch ptch; + ptch.p = p; + ptch.d = computePixSize(rcDeviceCamId, p); + + // computeRotCSEpip + { + // Vector from the reference camera to the 3d point + float3 v1 = constantCameraParametersArray_d[rcDeviceCamId].C - ptch.p; + // Vector from the target camera to the 3d point + float3 v2 = constantCameraParametersArray_d[tcDeviceCamId].C - ptch.p; + normalize(v1); + normalize(v2); + + // y has to be ortogonal to the epipolar plane + // n has to be on the epipolar plane + // x has to be on the epipolar plane + + ptch.y = cross(v1, v2); + normalize(ptch.y); + + if(in_sgmNormalMap_d != nullptr) // initialize patch normal from input normal map + { + ptch.n = *get2DBufferAt(in_sgmNormalMap_d, in_sgmNormalMap_p, vx, vy); + } + else // initialize patch normal from v1 & v2 + { + ptch.n = (v1 + v2) / 2.0f; + normalize(ptch.n); + } + + ptch.x = cross(ptch.y, ptch.n); + normalize(ptch.x); + } + + // compute similarity + // TODO: this function should return a similarity value between -1 and 0 or 1 for infinite. + // in practice this function return value between -1 and 1. + float fsim = compNCCby3DptsYK(rcTex, tcTex, rcDeviceCamId, tcDeviceCamId, ptch, rcWidth, rcHeight, tcWidth, tcHeight, wsh, gammaC, gammaP); + + if(fsim == 1.f || fsim == CUDART_INF_F) // infinite or invalid similarity + { + fsim = 0.0f; // 0 is the worst similarity value at this point + } + + // invert and filter similarity between 0 and 1 + // apply sigmoid see: https://www.desmos.com/calculator/skmhf1gpyf + // best similarity value was -1, worst was 0 + // best similarity value is 1, worst is still 0 + const float fsimInvertedFiltered = sigmoid(0.0f, 1.0f, 0.7f, -0.7f, fsim); + + // get output similarity pointer + TSimRefine* outSimPtr = get3DBufferAt(inout_volSim_d, inout_volSim_s, inout_volSim_p, vx, vy, vz); + + // add the output similarity value +#ifdef TSIM_REFINE_USE_HALF + // note: using built-in half addition can give bad results on some gpus + //*outSimPtr = __hadd(*outSimPtr, TSimRefine(fsimInvertedFiltered)); + //*outSimPtr = __hadd(*outSimPtr, __float2half(fsimInvertedFiltered)); + *outSimPtr = __float2half(__half2float(*outSimPtr) + fsimInvertedFiltered); // perform the addition in float +#else + *outSimPtr += TSimRefine(fsimInvertedFiltered); +#endif +} + +__global__ void volume_retrieveBestZ_kernel(float2* out_sgmDepthSimMap_d, int out_sgmDepthSimMap_p, + const float* in_depths_d, int in_depths_p, + const TSim* in_volSim_d, int in_volSim_s, int in_volSim_p, + int volDimZ, // useful for depth/sim interpolation + int rcDeviceCamId, + int scaleStep, + const Range depthRange, + const ROI roi) +{ + const int vx = blockIdx.x * blockDim.x + threadIdx.x; + const int vy = blockIdx.y * blockDim.y + threadIdx.y; + + if(vx >= roi.width() || vy >= roi.height()) + return; + + // corresponding device image coordinates + const float2 pix{float((roi.x.begin + vx) * scaleStep), float((roi.y.begin + vy) * scaleStep)}; + + // corresponding output depth/sim pointer + float2* out_bestDepthSimPtr = get2DBufferAt(out_sgmDepthSimMap_d, out_sgmDepthSimMap_p, vx, vy); + + // find best depth + float bestSim = 255.0f; + int bestZIdx = -1; + for(int vz = depthRange.begin; vz < depthRange.end; ++vz) + { + const float simAtZ = *get3DBufferAt(in_volSim_d, in_volSim_s, in_volSim_p, vx, vy, vz); + if (simAtZ < bestSim) + { + bestSim = simAtZ; + bestZIdx = vz; + } + } + + // TODO: consider filtering out the values with a too bad score like (bestSim > 200.0f) + // to reduce the storage volume of the depth maps + if (bestZIdx == -1) + { + out_bestDepthSimPtr->x = -1.0f; // invalid depth + out_bestDepthSimPtr->y = 1.0f; // worst similarity value + return; + } + +#ifdef ALICEVISION_DEPTHMAP_RETRIEVE_BEST_Z_INTERPOLATION + // with depth/sim interpolation + // NOTE: disable by default + const int bestZIdx_m1 = max(0, bestZIdx - 1); + const int bestZIdx_p1 = min(volDimZ-1, bestZIdx + 1); + + float3 depths; + depths.x = *get2DBufferAt(in_depths_d, in_depths_p, bestZIdx_m1, 0); + depths.y = *get2DBufferAt(in_depths_d, in_depths_p, bestZIdx, 0); + depths.z = *get2DBufferAt(in_depths_d, in_depths_p, bestZIdx_p1, 0); + + float3 sims; + sims.x = *get3DBufferAt(in_volSim_d, in_volSim_s, in_volSim_p, vx, vy, bestZIdx_m1); + sims.y = bestSim; + sims.z = *get3DBufferAt(in_volSim_d, in_volSim_s, in_volSim_p, vx, vy, bestZIdx_p1); + + // convert sims from (0, 255) to (-1, +1) + sims.x = (sims.x / 255.0f) * 2.0f - 1.0f; + sims.y = (sims.y / 255.0f) * 2.0f - 1.0f; + sims.z = (sims.z / 255.0f) * 2.0f - 1.0f; + + // interpolation between the 3 depth planes candidates + const float refinedDepthPlane = refineDepthSubPixel(depths, sims); + + out_bestDepthSimPtr->x = depthPlaneToDepth(rcDeviceCamId, pix, refinedDepthPlane); + out_bestDepthSimPtr->y = sims.y; +#else + // without depth interpolation + const float bestDepthPlane = *get2DBufferAt(in_depths_d, in_depths_p, bestZIdx, 0); + out_bestDepthSimPtr->x = depthPlaneToDepth(rcDeviceCamId, pix, bestDepthPlane); + out_bestDepthSimPtr->y = (bestSim / 255.0f) * 2.0f - 1.0f; // convert from (0, 255) to (-1, +1) + return; +#endif +} + + +__global__ void volume_refineBestZ_kernel(float2* out_refineDepthSimMap_d, int out_refineDepthSimMap_p, + const float2* in_sgmDepthPixSizeMap_d, int in_sgmDepthPixSizeMap_p, + const TSimRefine* in_volSim_d, int in_volSim_s, int in_volSim_p, + int volDimZ, + int rcDeviceCamId, + int scaleStep, + int samplesPerPixSize, // number of subsamples (samples between two depths) + int halfNbSamples, // number of samples (in front and behind mid depth) + int halfNbDepths, // number of depths (in front and behind mid depth) should be equal to (volDimZ - 1) / 2 + float twoTimesSigmaPowerTwo, + const ROI roi) +{ + const int roiX = blockIdx.x * blockDim.x + threadIdx.x; + const int roiY = blockIdx.y * blockDim.y + threadIdx.y; + + if(roiX >= roi.width() || roiY >= roi.height()) + return; + + // corresponding volume / depth sim map coordinates + const int vx = roiX; + const int vy = roiY; + + // corresponding device image coordinates + const int x = (roi.x.begin + vx) * scaleStep; + const int y = (roi.y.begin + vy) * scaleStep; + + // corresponding original plane depth + const float originalDepth = get2DBufferAt(in_sgmDepthPixSizeMap_d, in_sgmDepthPixSizeMap_p, vx, vy)->x; // input original middle depth + + // corresponding output depth/sim pointer + float2* out_bestDepthSimPtr = get2DBufferAt(out_refineDepthSimMap_d, out_refineDepthSimMap_p, vx, vy); + + if(originalDepth <= 0.0f) // original depth invalid or masked + { + out_bestDepthSimPtr->x = originalDepth; // -1 (invalid) or -2 (masked) + out_bestDepthSimPtr->y = 1.0f; // similarity between (-1, +1) + return; + } + + // find best z sample per pixel + float bestSampleSim = 99999.f; + int bestSampleOffsetIndex = 0; + + // sliding gaussian window + for(int sample = -halfNbSamples; sample <= halfNbSamples; ++sample) + { + float sampleSim = 0.f; + + for(int vz = 0; vz < volDimZ; ++vz) + { + const int rz = (vz - halfNbDepths); // relative depth index offset + const int zs = rz * samplesPerPixSize; // relative sample offset + + // get the inversed similarity sum value + // best value is the HIGHEST + const float invSimSum = *get3DBufferAt(in_volSim_d, in_volSim_s, in_volSim_p, vx, vy, vz); + + // reverse the inversed similarity sum value + // best similarity value is the LOWEST + const float simSum = -invSimSum; + + // apply gaussian + // see: https://www.desmos.com/calculator/ribalnoawq + sampleSim += simSum * expf(-((zs - sample) * (zs - sample)) / twoTimesSigmaPowerTwo); + } + + if(sampleSim < bestSampleSim) + { + bestSampleOffsetIndex = sample; + bestSampleSim = sampleSim; + } + } + + // get rc 3d point at original depth (z center) + const float3 p = get3DPointForPixelAndDepthFromRC(rcDeviceCamId, make_int2(x, y), originalDepth); + const float sampleSize = computePixSize(rcDeviceCamId, p) / samplesPerPixSize; + const float sampleSizeOffset = bestSampleOffsetIndex * sampleSize; + const float bestDepth = originalDepth + sampleSizeOffset; + + out_bestDepthSimPtr->x = bestDepth; + out_bestDepthSimPtr->y = bestSampleSim; +} + +template +__global__ void volume_initVolumeYSlice_kernel(T* volume_d, int volume_s, int volume_p, const int3 volDim, const int3 axisT, int y, T cst) +{ + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int z = blockIdx.y * blockDim.y + threadIdx.y; + + int3 v; + (&v.x)[axisT.x] = x; + (&v.x)[axisT.y] = y; + (&v.x)[axisT.z] = z; + + if ((x >= 0) && (x < (&volDim.x)[axisT.x]) && (z >= 0) && (z < (&volDim.x)[axisT.z])) + { + T* volume_zyx = get3DBufferAt(volume_d, volume_s, volume_p, v.x, v.y, v.z); + *volume_zyx = cst; + } +} + +template +__global__ void volume_getVolumeXZSlice_kernel(T1* slice_d, int slice_p, + const T2* volume_d, int volume_s, int volume_p, + const int3 volDim, const int3 axisT, int y) +{ + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int z = blockIdx.y * blockDim.y + threadIdx.y; + + int3 v; + (&v.x)[axisT.x] = x; + (&v.x)[axisT.y] = y; + (&v.x)[axisT.z] = z; + + if (x >= (&volDim.x)[axisT.x] || z >= (&volDim.x)[axisT.z]) + return; + + const T2* volume_xyz = get3DBufferAt(volume_d, volume_s, volume_p, v); + T1* slice_xz = get2DBufferAt(slice_d, slice_p, x, z); + *slice_xz = (T1)(*volume_xyz); +} + +__global__ void volume_computeBestZInSlice_kernel(TSimAcc* xzSlice_d, int xzSlice_p, TSimAcc* ySliceBestInColCst_d, int volDimX, int volDimZ) +{ + const int x = blockIdx.x * blockDim.x + threadIdx.x; + + if(x >= volDimX) + return; + + TSimAcc bestCst = *get2DBufferAt(xzSlice_d, xzSlice_p, x, 0); + + for(int z = 1; z < volDimZ; ++z) + { + const TSimAcc cst = *get2DBufferAt(xzSlice_d, xzSlice_p, x, z); + bestCst = cst < bestCst ? cst : bestCst; // min(cst, bestCst); + } + ySliceBestInColCst_d[x] = bestCst; +} + +/** + * @param[inout] xySliceForZ input similarity plane + * @param[in] xySliceForZM1 + * @param[in] xSliceBestInColCst + * @param[out] volSimT output similarity volume + */ +__global__ void volume_agregateCostVolumeAtXinSlices_kernel( + cudaTextureObject_t rcTex, + TSimAcc* xzSliceForY_d, int xzSliceForY_p, + const TSimAcc* xzSliceForYm1_d, int xzSliceForYm1_p, + const TSimAcc* bestSimInYm1_d, + TSim* volAgr_d, int volAgr_s, int volAgr_p, + const int3 volDim, + const int3 axisT, + float step, + int y, float _P1, float _P2, + int ySign, int filteringIndex, + const ROI roi) +{ + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int z = blockIdx.y * blockDim.y + threadIdx.y; + + int3 v; + (&v.x)[axisT.x] = x; + (&v.x)[axisT.y] = y; + (&v.x)[axisT.z] = z; + + if (x >= (&volDim.x)[axisT.x] || z >= volDim.z) + return; + + // find texture offset + const int beginX = (axisT.x == 0) ? roi.x.begin : roi.y.begin; + const int beginY = (axisT.x == 0) ? roi.y.begin : roi.x.begin; + + TSimAcc* sim_xz = get2DBufferAt(xzSliceForY_d, xzSliceForY_p, x, z); + float pathCost = 255.0f; + + if((z >= 1) && (z < volDim.z - 1)) + { + float P2 = 0; + + if(_P2 < 0) + { + // _P2 convention: use negative value to skip the use of deltaC. + P2 = std::abs(_P2); + } + else + { + const int imX0 = (beginX + v.x) * step; // current + const int imY0 = (beginY + v.y) * step; + + const int imX1 = imX0 - ySign * step * (axisT.y == 0); // M1 + const int imY1 = imY0 - ySign * step * (axisT.y == 1); + + const float4 gcr0 = tex2D_float4(rcTex, float(imX0) + 0.5f, float(imY0) + 0.5f); + const float4 gcr1 = tex2D_float4(rcTex, float(imX1) + 0.5f, float(imY1) + 0.5f); + const float deltaC = Euclidean3(gcr0, gcr1); + + // sigmoid f(x) = i + (a - i) * (1 / ( 1 + e^(10 * (x - P2) / w))) + // see: https://www.desmos.com/calculator/1qvampwbyx + // best values found from tests: i = 80, a = 255, w = 80, P2 = 100 + // historical values: i = 15, a = 255, w = 80, P2 = 20 + P2 = sigmoid(80.f, 255.f, 80.f, _P2, deltaC); + } + + const TSimAcc bestCostInColM1 = bestSimInYm1_d[x]; + const TSimAcc pathCostMDM1 = *get2DBufferAt(xzSliceForYm1_d, xzSliceForYm1_p, x, z - 1); // M1: minus 1 over depths + const TSimAcc pathCostMD = *get2DBufferAt(xzSliceForYm1_d, xzSliceForYm1_p, x, z); + const TSimAcc pathCostMDP1 = *get2DBufferAt(xzSliceForYm1_d, xzSliceForYm1_p, x, z + 1); // P1: plus 1 over depths + const float minCost = multi_fminf(pathCostMD, pathCostMDM1 + _P1, pathCostMDP1 + _P1, bestCostInColM1 + P2); + + // if 'pathCostMD' is the minimal value of the depth + pathCost = (*sim_xz) + minCost - bestCostInColM1; + } + + // fill the current slice with the new similarity score + *sim_xz = TSimAcc(pathCost); + +#ifndef TSIM_USE_FLOAT + // clamp if TSim = uchar (TSimAcc = unsigned int) + pathCost = fminf(255.0f, fmaxf(0.0f, pathCost)); +#endif + + // aggregate into the final output + TSim* volume_xyz = get3DBufferAt(volAgr_d, volAgr_s, volAgr_p, v.x, v.y, v.z); + const float val = (float(*volume_xyz) * float(filteringIndex) + pathCost) / float(filteringIndex + 1); + *volume_xyz = TSim(val); +} + +} // namespace depthMap +} // namespace aliceVision diff --git a/src/aliceVision/depthMap/cuda/planeSweeping/device_code.cu b/src/aliceVision/depthMap/cuda/planeSweeping/device_code.cu deleted file mode 100644 index 3d0df3db02..0000000000 --- a/src/aliceVision/depthMap/cuda/planeSweeping/device_code.cu +++ /dev/null @@ -1,117 +0,0 @@ -// This file is part of the AliceVision project. -// Copyright (c) 2017 AliceVision contributors. -// This Source Code Form is subject to the terms of the Mozilla Public License, -// v. 2.0. If a copy of the MPL was not distributed with this file, -// You can obtain one at https://mozilla.org/MPL/2.0/. - -#pragma once - -#include - -#include - -namespace aliceVision { -namespace depthMap { - -template -inline __device__ void swap( T& a, T& b ) -{ - T tmp = a; - a = b; - b = tmp; -} - -__device__ float computeGradientSizeOfL( cudaTextureObject_t rc_tex, int x, int y) -{ - float xM1 = tex2D_float4(rc_tex, (float)(x - 1) + 0.5f, (float)(y + 0) + 0.5f).x; - float xP1 = tex2D_float4(rc_tex, (float)(x + 1) + 0.5f, (float)(y + 0) + 0.5f).x; - float yM1 = tex2D_float4(rc_tex, (float)(x + 0) + 0.5f, (float)(y - 1) + 0.5f).x; - float yP1 = tex2D_float4(rc_tex, (float)(x + 0) + 0.5f, (float)(y + 1) + 0.5f).x; - - // not divided by 2? - float2 g = make_float2(xM1 - xP1, yM1 - yP1); - - return size(g); -} - -__global__ void compute_varLofLABtoW_kernel(cudaTextureObject_t rc_tex, - float* varianceMap, int varianceMap_p, - int partWidth, int partHeight, int yFrom) -{ - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if(x < partWidth && y < partHeight) - { - const float grad = computeGradientSizeOfL(rc_tex, x, y + yFrom); - float* val = get2DBufferAt(varianceMap, varianceMap_p, x, y); - *val = grad; - } -} - -__device__ void move3DPointByRcPixSize( int cam_cache_idx, - float3& p, float rcPixSize) -{ - float3 rpv = p - camsBasesDev[cam_cache_idx].C; - normalize(rpv); - p = p + rpv * rcPixSize; -} - -__device__ void move3DPointByTcPixStep( int rc_cam_cache_idx, - int tc_cam_cache_idx, - float3& p, float tcPixStep) -{ - float3 rpv = camsBasesDev[rc_cam_cache_idx].C - p; - float3 prp = p; - float3 prp1 = p + rpv / 2.0f; - - float2 rp; - getPixelFor3DPoint(rc_cam_cache_idx, rp, prp); - - float2 tpo; - getPixelFor3DPoint(tc_cam_cache_idx, tpo, prp); - - float2 tpv; - getPixelFor3DPoint(tc_cam_cache_idx, tpv, prp1); - - tpv = tpv - tpo; - normalize(tpv); - - float2 tpd = tpo + tpv * tcPixStep; - - p = triangulateMatchRef(rc_cam_cache_idx, tc_cam_cache_idx, rp, tpd); -} - -__device__ float move3DPointByTcOrRcPixStep(int rc_cam_cache_idx, - int tc_cam_cache_idx, - float3& p, float pixStep, bool moveByTcOrRc) -{ - if(moveByTcOrRc == true) - { - move3DPointByTcPixStep(rc_cam_cache_idx, tc_cam_cache_idx, p, pixStep); - return 0.0f; - } - else - { - float pixSize = pixStep * computePixSize(rc_cam_cache_idx, p); - move3DPointByRcPixSize(rc_cam_cache_idx, p, pixSize); - - return pixSize; - } -} - -__global__ void getSilhoueteMap_kernel(cudaTextureObject_t rc_tex, bool* out, int out_p, int step, int width, int height, const uchar4 maskColorLab) -{ - int x = blockIdx.x * blockDim.x + threadIdx.x; - int y = blockIdx.y * blockDim.y + threadIdx.y; - - if((x * step < width) && (y * step < height)) - { - uchar4 col = tex2D(rc_tex, x * step, y * step); - *get2DBufferAt(out, out_p, x, y) = ((maskColorLab.x == col.x) && (maskColorLab.y == col.y) && (maskColorLab.z == col.z)); - } -} - - -} // namespace depthMap -} // namespace aliceVision diff --git a/src/aliceVision/depthMap/cuda/planeSweeping/device_code_fuse.cu b/src/aliceVision/depthMap/cuda/planeSweeping/device_code_fuse.cu deleted file mode 100644 index ca5ea880a7..0000000000 --- a/src/aliceVision/depthMap/cuda/planeSweeping/device_code_fuse.cu +++ /dev/null @@ -1,257 +0,0 @@ -// This file is part of the AliceVision project. -// Copyright (c) 2017 AliceVision contributors. -// This Source Code Form is subject to the terms of the Mozilla Public License, -// v. 2.0. If a copy of the MPL was not distributed with this file, -// You can obtain one at https://mozilla.org/MPL/2.0/. - -namespace aliceVision { -namespace depthMap { - -/** - * @param[in] s: iteration over nSamplesHalf - */ -__global__ void fuse_computeGaussianKernelVotingSampleMap_kernel(float* out_gsvSampleMap, int out_gsvSampleMap_p, - float2* depthSimMap, int depthSimMap_p, - float2* midDepthPixSizeMap, int midDepthPixSizeMap_p, - int width, int height, float s, int idCam, - float samplesPerPixSize, float twoTimesSigmaPowerTwo) -{ - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if(x >= width || y >= height) - return; - - const float2 midDepthPixSize = *get2DBufferAt(midDepthPixSizeMap, midDepthPixSizeMap_p, x, y); - const float2 depthSim = *get2DBufferAt(depthSimMap, depthSimMap_p, x, y); - float* out_gsvSample_ptr = get2DBufferAt(out_gsvSampleMap, out_gsvSampleMap_p, x, y); - float gsvSample = (idCam == 0) ? 0.0f : *out_gsvSample_ptr; - - if((midDepthPixSize.x > 0.0f) && (depthSim.x > 0.0f)) - { - const float depthStep = midDepthPixSize.y / samplesPerPixSize; - const float i = (midDepthPixSize.x - depthSim.x) / depthStep; - const float sim = -sigmoid(0.0f, 1.0f, 0.7f, -0.7f, depthSim.y); - gsvSample += sim * expf(-((i - s) * (i - s)) / twoTimesSigmaPowerTwo); - } - *out_gsvSample_ptr = gsvSample; -} - - -__global__ void fuse_updateBestGaussianKernelVotingSampleMap_kernel(float2* bestGsvSampleMap, int bestGsvSampleMap_p, - float* gsvSampleMap, int gsvSampleMap_p, int width, - int height, float s, int id) -{ - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if(x >= width || y >= height) - return; - - const float gsvSampleX = *get2DBufferAt(gsvSampleMap, gsvSampleMap_p, x, y); - float2* bestGsvSample_ptr = get2DBufferAt(bestGsvSampleMap, bestGsvSampleMap_p, x, y); - - if(id == 0 || gsvSampleX < bestGsvSample_ptr->x) - { - *bestGsvSample_ptr = make_float2(gsvSampleX, s); - } -} - -__global__ void fuse_computeFusedDepthSimMapFromBestGaussianKernelVotingSampleMap_kernel( - float2* oDepthSimMap, int oDepthSimMap_p, float2* bestGsvSampleMap, int bestGsvSampleMap_p, - float2* midDepthPixSizeMap, int midDepthPixSizeMap_p, int width, int height, float samplesPerPixSize) -{ - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if(x >= width || y >= height) - return; - - const float2 bestGsvSample = *get2DBufferAt(bestGsvSampleMap, bestGsvSampleMap_p, x, y); - const float2 midDepthPixSize = *get2DBufferAt(midDepthPixSizeMap, midDepthPixSizeMap_p, x, y); - const float depthStep = midDepthPixSize.y / samplesPerPixSize; - - // normalize similarity to -1,0 - // figure; t = -5.0:0.01:0.0; plot(t,sigmoid(0.0,-1.0,6.0,-0.4,t,0)); - // bestGsvSample.x = sigmoid(0.0f, -1.0f, 6.0f, -0.4f, bestGsvSample.x); - float2* oDepthSim = get2DBufferAt(oDepthSimMap, oDepthSimMap_p, x, y); - - if(midDepthPixSize.x <= 0.0f) - { - *oDepthSim = make_float2(-1.0f, 1.0f); - } - else - { - *oDepthSim = make_float2(midDepthPixSize.x - bestGsvSample.y * depthStep, bestGsvSample.x); - } -} - -__global__ void fuse_getOptDeptMapFromOptDepthSimMap_kernel(float* optDepthMap, int optDepthMap_p, - float2* optDepthMapSimMap, int optDepthMapSimMap_p, - int partWidth, int partHeight) -{ - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if(x < partWidth && y < partHeight) - { - *get2DBufferAt(optDepthMap, optDepthMap_p, x, y) = get2DBufferAt(optDepthMapSimMap, optDepthMapSimMap_p, x, y)->x; - } -} - -/** - * @return (smoothStep, energy) - */ -__device__ float2 getCellSmoothStepEnergy( int rc_cam_cache_idx, cudaTextureObject_t depthTex, const int2& cell0, - int yFrom) -{ - float2 out = make_float2(0.0f, 180.0f); - - // Get pixel depth from the depth texture - // Note: we do not use 0.5f offset as we use nearest neighbor interpolation - float d0 = tex2D(depthTex, float(cell0.x), float(cell0.y - yFrom)); - - // Early exit: depth is <= 0 - if(d0 <= 0.0f) - return out; - - // Consider the neighbor pixels - const int2 cellL = cell0 + make_int2(0, -1); // Left - const int2 cellR = cell0 + make_int2(0, 1); // Right - const int2 cellU = cell0 + make_int2(-1, 0); // Up - const int2 cellB = cell0 + make_int2(1, 0); // Bottom - - // Get associated depths from depth texture - const float dL = tex2D(depthTex, float(cellL.x), float(cellL.y - yFrom)); - const float dR = tex2D(depthTex, float(cellR.x), float(cellR.y - yFrom)); - const float dU = tex2D(depthTex, float(cellU.x), float(cellU.y - yFrom)); - const float dB = tex2D(depthTex, float(cellB.x), float(cellB.y - yFrom)); - - // Get associated 3D points - const float3 p0 = get3DPointForPixelAndDepthFromRC(rc_cam_cache_idx, cell0, d0); - const float3 pL = get3DPointForPixelAndDepthFromRC(rc_cam_cache_idx, cellL, dL); - const float3 pR = get3DPointForPixelAndDepthFromRC(rc_cam_cache_idx, cellR, dR); - const float3 pU = get3DPointForPixelAndDepthFromRC(rc_cam_cache_idx, cellU, dU); - const float3 pB = get3DPointForPixelAndDepthFromRC(rc_cam_cache_idx, cellB, dB); - - // Compute the average point based on neighbors (cg) - float3 cg = make_float3(0.0f, 0.0f, 0.0f); - float n = 0.0f; - - if(dL > 0.0f) { cg = cg + pL; n++; } - if(dR > 0.0f) { cg = cg + pR; n++; } - if(dU > 0.0f) { cg = cg + pU; n++; } - if(dB > 0.0f) { cg = cg + pB; n++; } - - // If we have at least one valid depth - if(n > 1.0f) - { - cg = cg / n; // average of x, y, depth - float3 vcn = camsBasesDev[rc_cam_cache_idx].C - p0; - normalize(vcn); - // pS: projection of cg on the line from p0 to camera - const float3 pS = closestPointToLine3D(cg, p0, vcn); - // keep the depth difference between pS and p0 as the smoothing step - out.x = size(camsBasesDev[rc_cam_cache_idx].C - pS) - d0; - } - - float e = 0.0f; - n = 0.0f; - - if(dL > 0.0f && dR > 0.0f) - { - // Large angle between neighbors == flat area => low energy - // Small angle between neighbors == non-flat area => high energy - e = fmaxf(e, (180.0f - angleBetwABandAC(p0, pL, pR))); - n++; - } - if(dU > 0.0f && dB > 0.0f) - { - e = fmaxf(e, (180.0f - angleBetwABandAC(p0, pU, pB))); - n++; - } - // The higher the energy, the less flat the area - if(n > 0.0f) - out.y = e; - - return out; -} - -__global__ void fuse_optimizeDepthSimMap_kernel(cudaTextureObject_t rc_tex, - int rc_cam_cache_idx, - cudaTextureObject_t imgVarianceTex, - cudaTextureObject_t depthTex, - float2* out_optDepthSimMap, int optDepthSimMap_p, - const float2* roughDepthPixSizeMap, int roughDepthPixSizeMap_p, - const float2* fineDepthSimMap, int fineDepthSimMap_p, - int partWidth, int partHeight, int iter, float samplesPerPixSize, int yFrom) -{ - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if(x >= partWidth || y >= partHeight) - return; - - const int2 pix = make_int2(x, y + yFrom); - - const float2 roughDepthPixSize = *get2DBufferAt(roughDepthPixSizeMap, roughDepthPixSizeMap_p, x, y); - const float roughDepth = roughDepthPixSize.x; - const float roughPixSize = roughDepthPixSize.y; - - const float2 fineDepthSim = *get2DBufferAt(fineDepthSimMap, fineDepthSimMap_p, x, y); - const float fineDepth = fineDepthSim.x; - const float fineSim = fineDepthSim.y; - - float2* out_optDepthSim_ptr = get2DBufferAt(out_optDepthSimMap, optDepthSimMap_p, x, y); - float2 out_optDepthSim = (iter == 0) ? make_float2(roughDepth, fineSim) : *out_optDepthSim_ptr; - - const float depthOpt = out_optDepthSim.x; - - if (depthOpt > 0.0f) - { - const float2 depthSmoothStepEnergy = getCellSmoothStepEnergy(rc_cam_cache_idx, depthTex, pix, yFrom); // (smoothStep, energy) - float stepToSmoothDepth = depthSmoothStepEnergy.x; - stepToSmoothDepth = copysignf(fminf(fabsf(stepToSmoothDepth), roughPixSize / 10.0f), stepToSmoothDepth); - const float depthEnergy = depthSmoothStepEnergy.y; // max angle with neighbors - float stepToFineDM = fineDepth - depthOpt; // distance to refined/noisy input depth map - stepToFineDM = copysignf(fminf(fabsf(stepToFineDM), roughPixSize / 10.0f), stepToFineDM); - - const float stepToRoughDM = roughDepth - depthOpt; // distance to smooth/robust input depth map - const float imgColorVariance = tex2D(imgVarianceTex, float(x) + 0.5f, float(y) + 0.5f); - const float colorVarianceThresholdForSmoothing = 20.0f; - const float angleThresholdForSmoothing = 30.0f; // 30 - - // https://www.desmos.com/calculator/kob9lxs9qf - const float weightedColorVariance = sigmoid2(5.0f, angleThresholdForSmoothing, 40.0f, colorVarianceThresholdForSmoothing, imgColorVariance); - - // https://www.desmos.com/calculator/jwhpjq6ppj - const float fineSimWeight = sigmoid(0.0f, 1.0f, 0.7f, -0.7f, fineSim); - - // if geometry variation is bigger than color variation => the fineDM is considered noisy - - // if depthEnergy > weightedColorVariance => energyLowerThanVarianceWeight=0 => smooth - // else: => energyLowerThanVarianceWeight=1 => use fineDM - // weightedColorVariance max value is 30, so if depthEnergy > 30 (which means depthAngle < 150�) energyLowerThanVarianceWeight will be 0 - // https://www.desmos.com/calculator/jzbweilb85 - const float energyLowerThanVarianceWeight = sigmoid(0.0f, 1.0f, 30.0f, weightedColorVariance, depthEnergy); // TODO: 30 => 60 - - // https://www.desmos.com/calculator/ilsk7pthvz - const float closeToRoughWeight = 1.0f - sigmoid(0.0f, 1.0f, 10.0f, 17.0f, fabsf(stepToRoughDM / roughPixSize)); // TODO: 10 => 30 - - // f(z) = c1 * s1(z_rought - z)^2 + c2 * s2(z-z_fused)^2 + coeff3 * s3*(z-z_smooth)^2 - - const float depthOptStep = closeToRoughWeight * stepToRoughDM + // distance to smooth/robust input depth map - (1.0f - closeToRoughWeight) * (energyLowerThanVarianceWeight * fineSimWeight * stepToFineDM + // distance to refined/noisy - (1.0f - energyLowerThanVarianceWeight) * stepToSmoothDepth); // max angle in current depthMap - - out_optDepthSim.x = depthOpt + depthOptStep; - - out_optDepthSim.y = (1.0f - closeToRoughWeight) * (energyLowerThanVarianceWeight * fineSimWeight * fineSim + - (1.0f - energyLowerThanVarianceWeight) * (depthEnergy / 20.0f)); - } - - *out_optDepthSim_ptr = out_optDepthSim; -} - -} // namespace depthMap -} // namespace aliceVision diff --git a/src/aliceVision/depthMap/cuda/planeSweeping/device_code_refine.cu b/src/aliceVision/depthMap/cuda/planeSweeping/device_code_refine.cu deleted file mode 100644 index 4e4291f9b9..0000000000 --- a/src/aliceVision/depthMap/cuda/planeSweeping/device_code_refine.cu +++ /dev/null @@ -1,177 +0,0 @@ -// This file is part of the AliceVision project. -// Copyright (c) 2017 AliceVision contributors. -// This Source Code Form is subject to the terms of the Mozilla Public License, -// v. 2.0. If a copy of the MPL was not distributed with this file, -// You can obtain one at https://mozilla.org/MPL/2.0/. - -namespace aliceVision { -namespace depthMap { - -__global__ void refine_compUpdateYKNCCSimMapPatch_kernel(int rc_cam_cache_idx, - int tc_cam_cache_idx, - cudaTextureObject_t rc_tex, cudaTextureObject_t tc_tex, - float* osimMap, int osimMap_p, - float* odptMap, int odptMap_p, - const float* depthMap, int depthMap_p, int partWidth, int height, - int wsh, float gammaC, float gammaP, - float tcStep, - bool moveByTcOrRc, int xFrom, - int rcWidth, int rcHeight, - int tcWidth, int tcHeight) -{ - const int tile_x = blockIdx.x * blockDim.x + threadIdx.x; - const int tile_y = blockIdx.y * blockDim.y + threadIdx.y; - - if(tile_x >= partWidth || tile_y >= height) - return; - - const int2 pix = make_int2(tile_x + xFrom, tile_y); - - float odpt = *get2DBufferAt(depthMap, depthMap_p, tile_x, tile_y); - float osim = 1.0f; - - float* osim_ptr = get2DBufferAt(osimMap, osimMap_p, tile_x, tile_y); - float* odpt_ptr = get2DBufferAt(odptMap, odptMap_p, tile_x, tile_y); - - const float4 gcr = tex2D_float4(rc_tex, pix.x + 0.5f, pix.y + 0.5f); - if(odpt <= 0.0f || gcr.w == 0.0f) - { - *osim_ptr = osim; - *odpt_ptr = odpt; - return; - } - - { - float3 p = get3DPointForPixelAndDepthFromRC(rc_cam_cache_idx, pix, odpt); - move3DPointByTcOrRcPixStep(rc_cam_cache_idx, tc_cam_cache_idx, p, tcStep, moveByTcOrRc); - - odpt = size(p - camsBasesDev[rc_cam_cache_idx].C); - - Patch ptch; - ptch.p = p; - ptch.d = computePixSize(rc_cam_cache_idx, p); - // TODO: we could compute the orientation of the path from the input depth map instead of relying on the cameras orientations - computeRotCSEpip(rc_cam_cache_idx, tc_cam_cache_idx, ptch); - osim = compNCCby3DptsYK(rc_tex, tc_tex, rc_cam_cache_idx, tc_cam_cache_idx, ptch, wsh, rcWidth, rcHeight, tcWidth, tcHeight, gammaC, gammaP); - } - - if(tcStep == 0.0f) - { - // For the first iteration, we initialize the values - *osim_ptr = osim; - *odpt_ptr = odpt; - } - else - { - // Then we update the similarity value if it's better - float actsim = *osim_ptr; - if(osim < actsim) - { - *osim_ptr = osim; - *odpt_ptr = odpt; - } - } -} - -__global__ void refine_compYKNCCSimMapPatch_kernel(int rc_cam_cache_idx, - int tc_cam_cache_idx, - cudaTextureObject_t rc_tex, cudaTextureObject_t tc_tex, - float* osimMap, int osimMap_p, float* depthMap, int depthMap_p, - int partWidth, int height, int wsh, float gammaC, - float gammaP, float tcStep, - bool moveByTcOrRc, int xFrom, int rcWidth, int rcHeight, int tcWidth, int tcHeight) -{ - const int tile_x = blockIdx.x * blockDim.x + threadIdx.x; - const int tile_y = blockIdx.y * blockDim.y + threadIdx.y; - - if(tile_x >= partWidth || tile_y >= height) - return; - - const int2 pix = make_int2(tile_x + xFrom, tile_y); - - float depth = *get2DBufferAt(depthMap, depthMap_p, tile_x, tile_y); - float osim = 1.1f; - - if(depth > 0.0f) - { - float3 p = get3DPointForPixelAndDepthFromRC(rc_cam_cache_idx, pix, depth); - // move3DPointByTcPixStep(p, tcStep); - move3DPointByTcOrRcPixStep(rc_cam_cache_idx, tc_cam_cache_idx, p, tcStep, moveByTcOrRc); - - Patch ptch; - ptch.p = p; - ptch.d = computePixSize(rc_cam_cache_idx, p); - computeRotCSEpip(rc_cam_cache_idx, tc_cam_cache_idx, ptch); - osim = compNCCby3DptsYK(rc_tex, tc_tex, rc_cam_cache_idx, tc_cam_cache_idx, ptch, wsh, rcWidth, rcHeight, tcWidth, tcHeight, gammaC, gammaP); - } - *get2DBufferAt(osimMap, osimMap_p, tile_x, tile_y) = osim; -} - -__global__ void refine_setLastThreeSimsMap_kernel(float3* lastThreeSimsMap, int lastThreeSimsMap_p, float* simMap, - int simMap_p, int width, int height, int id) -{ - int x = blockIdx.x * blockDim.x + threadIdx.x; - int y = blockIdx.y * blockDim.y + threadIdx.y; - - if(x >= width || y >= height) - return; - - float sim = *get2DBufferAt(simMap, simMap_p, x, y); - float3* lastThreeSims_ptr = get2DBufferAt(lastThreeSimsMap, lastThreeSimsMap_p, x, y); - - if(id == 0) - { - lastThreeSims_ptr->x = sim; - } - if(id == 1) - { - lastThreeSims_ptr->y = sim; - } - if(id == 2) - { - lastThreeSims_ptr->z = sim; - } -} - -__global__ void refine_computeDepthSimMapFromLastThreeSimsMap_kernel(int rc_cam_cache_idx, - int tc_cam_cache_idx, - float* osimMap, int osimMap_p, float* iodepthMap, - int iodepthMap_p, float3* lastThreeSimsMap, - int lastThreeSimsMap_p, int partWidth, int height, - bool moveByTcOrRc, int xFrom) -{ - const int tile_x = blockIdx.x * blockDim.x + threadIdx.x; - const int tile_y = blockIdx.y * blockDim.y + threadIdx.y; - - if(tile_x >= partWidth || tile_y >= height) - return; - - const int2 pix = make_int2(tile_x + xFrom, tile_y); - - float midDepth = *get2DBufferAt(iodepthMap, iodepthMap_p, tile_x, tile_y); - float3 sims = *get2DBufferAt(lastThreeSimsMap, lastThreeSimsMap_p, tile_x, tile_y); - float outDepth = midDepth; - float outSim = sims.y; - - if(outDepth > 0.0f) - { - float3 pMid = get3DPointForPixelAndDepthFromRC(rc_cam_cache_idx, pix, midDepth); - float3 pm1 = pMid; - float3 pp1 = pMid; - move3DPointByTcOrRcPixStep(rc_cam_cache_idx, tc_cam_cache_idx, pm1, -1.0f, moveByTcOrRc); - move3DPointByTcOrRcPixStep(rc_cam_cache_idx, tc_cam_cache_idx, pp1, +1.0f, moveByTcOrRc); - - float3 depths; - depths.x = size(pm1 - camsBasesDev[rc_cam_cache_idx].C); - depths.y = midDepth; - depths.z = size(pp1 - camsBasesDev[rc_cam_cache_idx].C); - - outDepth = refineDepthSubPixel(depths, sims); - } - - *get2DBufferAt(osimMap, osimMap_p, tile_x, tile_y) = outSim; - *get2DBufferAt(iodepthMap, iodepthMap_p, tile_x, tile_y) = outDepth; -} - -} // namespace depthMap -} // namespace aliceVision diff --git a/src/aliceVision/depthMap/cuda/planeSweeping/device_code_volume.cu b/src/aliceVision/depthMap/cuda/planeSweeping/device_code_volume.cu deleted file mode 100644 index 5eae7cc2bd..0000000000 --- a/src/aliceVision/depthMap/cuda/planeSweeping/device_code_volume.cu +++ /dev/null @@ -1,371 +0,0 @@ -// This file is part of the AliceVision project. -// Copyright (c) 2017 AliceVision contributors. -// This Source Code Form is subject to the terms of the Mozilla Public License, -// v. 2.0. If a copy of the MPL was not distributed with this file, -// You can obtain one at https://mozilla.org/MPL/2.0/. - -#pragma once - -#include - - -namespace aliceVision { -namespace depthMap { - -#ifdef TSIM_USE_FLOAT -using TSim = float; -using TSimAcc = float; -#else -using TSim = unsigned char; -using TSimAcc = unsigned int; // TSimAcc is the similarity accumulation type -#endif - - -inline __device__ void volume_computePatch( int rc_cam_cache_idx, - int tc_cam_cache_idx, - Patch& ptch, - const float fpPlaneDepth, const int2& pix ) -{ - ptch.p = get3DPointForPixelAndFrontoParellePlaneRC(rc_cam_cache_idx, pix, fpPlaneDepth); // no texture use - ptch.d = computePixSize(rc_cam_cache_idx, ptch.p); // no texture use - computeRotCSEpip(rc_cam_cache_idx, tc_cam_cache_idx, ptch); // no texture use -} - -__global__ void volume_init_kernel(TSim* volume, int volume_s, int volume_p, - int volDimX, int volDimY ) -{ - const int vx = blockIdx.x * blockDim.x + threadIdx.x; - const int vy = blockIdx.y * blockDim.y + threadIdx.y; - const int vz = blockIdx.z; // * blockDim.z + threadIdx.z; - - if(vx >= volDimX || vy >= volDimY) - return; - - *get3DBufferAt(volume, volume_s, volume_p, vx, vy, vz) = 255.0f; -} - -__global__ void volume_slice_kernel( - cudaTextureObject_t rc_tex, - cudaTextureObject_t tc_tex, - int rc_cam_cache_idx, - int tc_cam_cache_idx, - const float* depths_d, - const int startDepthIndex, - const int nbDepthsToSearch, - int rcWidth, int rcHeight, - int tcWidth, int tcHeight, - int wsh, - const float gammaC, const float gammaP, - TSim* volume_1st, int volume1st_s, int volume1st_p, - TSim* volume_2nd, int volume2nd_s, int volume2nd_p, - int volStepXY, - int volDimX, int volDimY) -{ - /* - * Note ! - * volDimX == width / volStepXY - * volDimY == height / volStepXY - * width and height are needed to compute transformations, - * volDimX and volDimY may be the number of samples, reducing memory or computation - */ - - const int vx = blockIdx.x * blockDim.x + threadIdx.x; - const int vy = blockIdx.y * blockDim.y + threadIdx.y; - const int vz = blockIdx.z; // * blockDim.z + threadIdx.z; - - if( vx >= volDimX || vy >= volDimY ) // || vz >= volDimZ - return; - // if (vz >= nbDepthsToSearch) - // return; - assert(vz < nbDepthsToSearch); - - const int x = vx * volStepXY; - const int y = vy * volStepXY; - - // if(x >= rcWidth || y >= rcHeight) - // return; - - const int zIndex = startDepthIndex + vz; - const float fpPlaneDepth = depths_d[zIndex]; - - Patch ptcho; - volume_computePatch( rc_cam_cache_idx, - tc_cam_cache_idx, - ptcho, fpPlaneDepth, make_int2(x, y)); // no texture use - - float fsim = compNCCby3DptsYK(rc_tex, tc_tex, - rc_cam_cache_idx, tc_cam_cache_idx, - ptcho, wsh, - rcWidth, rcHeight, - tcWidth, tcHeight, - gammaC, gammaP); - - constexpr const float fminVal = -1.0f; - constexpr const float fmaxVal = 1.0f; - constexpr const float fmultiplier = 1.0f / (fmaxVal - fminVal); - - if(fsim == CUDART_INF_F) // invalid similarity - { - fsim = 255.0f; - } - else // valid similarity - { - fsim = (fsim - fminVal) * fmultiplier; - -#ifdef TSIM_USE_FLOAT - // no clamp -#else - fsim = fminf(1.0f, fmaxf(0.0f, fsim)); -#endif - // convert from (0, 1) to (0, 254) - // needed to store in the volume in uchar - // 255 is reserved for the similarity initialization, i.e. undefined values - fsim *= 254.0f; - } - - TSim* fsim_1st = get3DBufferAt(volume_1st, volume1st_s, volume1st_p, vx, vy, zIndex); - TSim* fsim_2nd = get3DBufferAt(volume_2nd, volume2nd_s, volume2nd_p, vx, vy, zIndex); - - if (fsim < *fsim_1st) - { - *fsim_2nd = *fsim_1st; - *fsim_1st = TSim(fsim); - } - else if (fsim < *fsim_2nd) - { - *fsim_2nd = TSim(fsim); - } -} - -__device__ float depthPlaneToDepth( - int cam_cache_idx, - const float2& pix, - float fpPlaneDepth) -{ - const CameraStructBase& cam = camsBasesDev[cam_cache_idx]; - float3 planen = M3x3mulV3(cam.iR, make_float3(0.0f, 0.0f, 1.0f)); - normalize(planen); - float3 planep = cam.C + planen * fpPlaneDepth; - float3 v = M3x3mulV2(cam.iP, pix); - normalize(v); - float3 p = linePlaneIntersect(cam.C, v, planep, planen); - float depth = size(cam.C - p); - return depth; -} - - -__global__ void volume_retrieveBestZ_kernel( - int rcamCacheId, - float* bestDepthM, int bestDepthM_s, - float* bestSimM, int bestSimM_s, - const TSim* simVolume, int simVolume_s, int simVolume_p, - int volDimX, int volDimY, int volDimZ, - const float* depths_d, - int scaleStep, bool interpolate) -{ - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if(x >= volDimX || y >= volDimY) - return; - - float bestSim = 255.0f; - int bestZIdx = -1; - for (int z = 0; z < volDimZ; ++z) - { - const float simAtZ = *get3DBufferAt(simVolume, simVolume_s, simVolume_p, x, y, z); - if (simAtZ < bestSim) - { - bestSim = simAtZ; - bestZIdx = z; - } - } - - // TODO: consider filtering out the values with a too bad score like (bestSim > 200.0f) - // to reduce the storage volume of the depth maps - if (bestZIdx == -1) - { - *get2DBufferAt(bestDepthM, bestDepthM_s, x, y) = -1.0f; - *get2DBufferAt(bestSimM, bestSimM_s, x, y) = 1.0f; - return; - } - - const float2 pix{float(x * scaleStep), float(y * scaleStep)}; - // Without depth interpolation (for debug purpose only) - if(!interpolate) - { - *get2DBufferAt(bestDepthM, bestDepthM_s, x, y) = depthPlaneToDepth(rcamCacheId, pix, depths_d[bestZIdx]); - *get2DBufferAt(bestSimM, bestSimM_s, x, y) = (bestSim / 255.0f) * 2.0f - 1.0f; // convert from (0, 255) to (-1, +1) - return; - } - - // With depth/sim interpolation - const int bestZIdx_m1 = max(0, bestZIdx - 1); - const int bestZIdx_p1 = min(volDimZ-1, bestZIdx + 1); - - float3 depths; - depths.x = depths_d[bestZIdx_m1]; - depths.y = depths_d[bestZIdx]; - depths.z = depths_d[bestZIdx_p1]; - - float3 sims; - sims.x = *get3DBufferAt(simVolume, simVolume_s, simVolume_p, x, y, bestZIdx_m1); - sims.y = bestSim; - sims.z = *get3DBufferAt(simVolume, simVolume_s, simVolume_p, x, y, bestZIdx_p1); - - // Convert sims from (0, 255) to (-1, +1) - sims.x = (sims.x / 255.0f) * 2.0f - 1.0f; - sims.y = (sims.y / 255.0f) * 2.0f - 1.0f; - sims.z = (sims.z / 255.0f) * 2.0f - 1.0f; - - // Interpolation between the 3 depth planes candidates - const float refinedDepth = refineDepthSubPixel(depths, sims); - - *get2DBufferAt(bestDepthM, bestDepthM_s, x, y) = depthPlaneToDepth(rcamCacheId, pix, refinedDepth); - *get2DBufferAt(bestSimM, bestSimM_s, x, y) = sims.y; -} - -///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -template -__global__ void volume_initVolumeYSlice_kernel(T* volume, int volume_s, int volume_p, const int3 volDim, const int3 axisT, int y, T cst) -{ - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int z = blockIdx.y * blockDim.y + threadIdx.y; - - int3 v; - (&v.x)[axisT.x] = x; - (&v.x)[axisT.y] = y; - (&v.x)[axisT.z] = z; - - if ((x >= 0) && (x < (&volDim.x)[axisT.x]) && (z >= 0) && (z < (&volDim.x)[axisT.z])) - { - T* volume_zyx = get3DBufferAt(volume, volume_s, volume_p, v.x, v.y, v.z); - *volume_zyx = cst; - } -} - -template -__global__ void volume_getVolumeXZSlice_kernel(T1* slice, int slice_p, - const T2* volume, int volume_s, int volume_p, - const int3 volDim, const int3 axisT, int y) -{ - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int z = blockIdx.y * blockDim.y + threadIdx.y; - - int3 v; - (&v.x)[axisT.x] = x; - (&v.x)[axisT.y] = y; - (&v.x)[axisT.z] = z; - - if (x >= (&volDim.x)[axisT.x] || z >= (&volDim.x)[axisT.z]) - return; - - const T2* volume_xyz = get3DBufferAt(volume, volume_s, volume_p, v); - T1* slice_xz = get2DBufferAt(slice, slice_p, x, z); - *slice_xz = (T1)(*volume_xyz); -} - -__global__ void volume_computeBestZInSlice_kernel(TSimAcc* xzSlice, int xzSlice_p, TSimAcc* ySliceBestInColCst, int volDimX, int volDimZ) -{ - const int x = blockIdx.x * blockDim.x + threadIdx.x; - - if(x >= volDimX) - return; - - TSimAcc bestCst = *get2DBufferAt(xzSlice, xzSlice_p, x, 0); - - for(int z = 1; z < volDimZ; ++z) - { - const TSimAcc cst = *get2DBufferAt(xzSlice, xzSlice_p, x, z); - bestCst = cst < bestCst ? cst : bestCst; // min(cst, bestCst); - } - ySliceBestInColCst[x] = bestCst; -} - -/** - * @param[inout] xySliceForZ input similarity plane - * @param[in] xySliceForZM1 - * @param[in] xSliceBestInColCst - * @param[out] volSimT output similarity volume - */ -__global__ void volume_agregateCostVolumeAtXinSlices_kernel( - cudaTextureObject_t rc_tex, - TSimAcc* xzSliceForY, int xzSliceForY_p, - const TSimAcc* xzSliceForYm1, int xzSliceForYm1_p, - const TSimAcc* bestSimInYm1, - TSim* volAgr, int volAgr_s, int volAgr_p, - const int3 volDim, - const int3 axisT, - float step, - int y, float _P1, float _P2, - int ySign, int filteringIndex) -{ - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int z = blockIdx.y * blockDim.y + threadIdx.y; - - int3 v; - (&v.x)[axisT.x] = x; - (&v.x)[axisT.y] = y; - (&v.x)[axisT.z] = z; - - if (x >= (&volDim.x)[axisT.x] || z >= volDim.z) - return; - - TSimAcc* sim_xz = get2DBufferAt(xzSliceForY, xzSliceForY_p, x, z); - float pathCost = 255.0f; - - if((z >= 1) && (z < volDim.z - 1)) - { - float P2 = 0; - - if(_P2 < 0) - { - // _P2 convention: use negative value to skip the use of deltaC. - P2 = std::abs(_P2); - } - else - { - const int imX0 = v.x * step; // current - const int imY0 = v.y * step; - - const int imX1 = imX0 - ySign * step * (axisT.y == 0); // M1 - const int imY1 = imY0 - ySign * step * (axisT.y == 1); - - const float4 gcr0 = tex2D_float4(rc_tex, float(imX0) + 0.5f, float(imY0) + 0.5f); - const float4 gcr1 = tex2D_float4(rc_tex, float(imX1) + 0.5f, float(imY1) + 0.5f); - const float deltaC = Euclidean3(gcr0, gcr1); - - // sigmoid f(x) = i + (a - i) * (1 / ( 1 + e^(10 * (x - P2) / w))) - // see: https://www.desmos.com/calculator/1qvampwbyx - // best values found from tests: i = 80, a = 255, w = 80, P2 = 100 - // historical values: i = 15, a = 255, w = 80, P2 = 20 - P2 = sigmoid(80.f, 255.f, 80.f, _P2, deltaC); - } - - const TSimAcc bestCostInColM1 = bestSimInYm1[x]; - const TSimAcc pathCostMDM1 = *get2DBufferAt(xzSliceForYm1, xzSliceForYm1_p, x, z - 1); // M1: minus 1 over depths - const TSimAcc pathCostMD = *get2DBufferAt(xzSliceForYm1, xzSliceForYm1_p, x, z); - const TSimAcc pathCostMDP1 = *get2DBufferAt(xzSliceForYm1, xzSliceForYm1_p, x, z + 1); // P1: plus 1 over depths - const float minCost = multi_fminf(pathCostMD, pathCostMDM1 + _P1, pathCostMDP1 + _P1, bestCostInColM1 + P2); - - // if 'pathCostMD' is the minimal value of the depth - pathCost = (*sim_xz) + minCost - bestCostInColM1; - } - - // fill the current slice with the new similarity score - *sim_xz = TSimAcc(pathCost); - -#ifndef TSIM_USE_FLOAT - // clamp if TSim = uchar (TSimAcc = unsigned int) - pathCost = fminf(255.0f, fmaxf(0.0f, pathCost)); -#endif - - // aggregate into the final output - TSim* volume_xyz = get3DBufferAt(volAgr, volAgr_s, volAgr_p, v.x, v.y, v.z); - const float val = (float(*volume_xyz) * float(filteringIndex) + pathCost) / float(filteringIndex + 1); - *volume_xyz = TSim(val); -} - -} // namespace depthMap -} // namespace aliceVision diff --git a/src/aliceVision/depthMap/cuda/planeSweeping/plane_sweeping_cuda.cu b/src/aliceVision/depthMap/cuda/planeSweeping/plane_sweeping_cuda.cu deleted file mode 100644 index a40476b821..0000000000 --- a/src/aliceVision/depthMap/cuda/planeSweeping/plane_sweeping_cuda.cu +++ /dev/null @@ -1,940 +0,0 @@ -// This file is part of the AliceVision project. -// Copyright (c) 2017 AliceVision contributors. -// This Source Code Form is subject to the terms of the Mozilla Public License, -// v. 2.0. If a copy of the MPL was not distributed with this file, -// You can obtain one at https://mozilla.org/MPL/2.0/. - -#include -#include -#include -// #include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include - -namespace aliceVision { -namespace depthMap { - -// Macro for checking cuda errors -#define CHECK_CUDA_ERROR() \ - if(cudaError_t err = cudaGetLastError()) \ - { \ - fprintf(stderr, "\n\nCUDAError: %s\n", cudaGetErrorString(err)); \ - fprintf(stderr, " file: %s\n", __FILE__); \ - fprintf(stderr, " function: %s\n", __FUNCTION__); \ - fprintf(stderr, " line: %d\n\n", __LINE__); \ - std::stringstream s; \ - s << "\n CUDA Error: " << cudaGetErrorString(err) \ - << "\n file: " << __FILE__ \ - << "\n function: " << __FUNCTION__ \ - << "\n line: " << __LINE__ << "\n"; \ - throw std::runtime_error(s.str()); \ - } - -#define ALICEVISION_CU_PRINT_DEBUG(a) \ - std::cerr << a << std::endl; - -#define ALICEVISION_CU_PRINT_ERROR(a) \ - std::cerr << a << std::endl; - -__host__ float3 ps_M3x3mulV3(const float* M3x3, const float3& V) -{ - return make_float3(M3x3[0] * V.x + M3x3[3] * V.y + M3x3[6] * V.z, M3x3[1] * V.x + M3x3[4] * V.y + M3x3[7] * V.z, - M3x3[2] * V.x + M3x3[5] * V.y + M3x3[8] * V.z); -} - -__host__ void ps_normalize(float3& a) -{ - float d = sqrt(a.x * a.x + a.y * a.y + a.z * a.z); - a.x /= d; - a.y /= d; - a.z /= d; -} - -void pr_printfDeviceMemoryInfo() -{ - size_t iavail; - size_t itotal; - cudaMemGetInfo(&iavail, &itotal); - size_t iused = itotal - iavail; - - double avail = double(iavail) / (1024.0 * 1024.0); - double total = double(itotal) / (1024.0 * 1024.0); - double used = double(iused) / (1024.0 * 1024.0); - - int CUDAdeviceNo; - cudaGetDevice(&CUDAdeviceNo); - - printf("Device %i memory - used: %f, free: %f, total: %f\n", CUDAdeviceNo, used, avail, total); -} - -__host__ void ps_initCameraMatrix( CameraStructBase& base ) -{ - float3 z; - z.x = 0.0f; - z.y = 0.0f; - z.z = 1.0f; - base.ZVect = ps_M3x3mulV3(base.iR, z); - ps_normalize(base.ZVect); - - float3 y; - y.x = 0.0f; - y.y = 1.0f; - y.z = 0.0f; - base.YVect = ps_M3x3mulV3(base.iR, y); - ps_normalize(base.YVect); - - float3 x; - x.x = 1.0f; - x.y = 0.0f; - x.z = 0.0f; - base.XVect = ps_M3x3mulV3(base.iR, x); - ps_normalize(base.XVect); -} - -int ps_listCUDADevices(bool verbose) -{ - int num_gpus = 0; // number of CUDA GPUs - - // determine the number of CUDA capable GPUs - cudaError_t err = cudaGetDeviceCount(&num_gpus); - CHECK_CUDA_ERROR(); - if(err != cudaSuccess) - { - printf("Error getting cuda device count"); - return 0; - } - - if(num_gpus < 1) - { - printf("ERROR: no CUDA capable devices detected"); - return 0; - } - - if(verbose == true) - { - // display CPU and GPU configuration - printf("number of CUDA devices:\t%d\n", num_gpus); - for(int i = 0; i < num_gpus; i++) - { - cudaDeviceProp dprop; - cudaGetDeviceProperties(&dprop, i); - printf(" %d: %s\n", i, dprop.name); - } - } - - return num_gpus; -} - -int ps_deviceAllocate(Pyramid& pyramid, int width, int height, int scales ) -{ - int bytesAllocated = 0; - - pyramid.resize(scales); - - for(int s = 0; s < scales; s++) - { - int w = width / (s + 1); - int h = height / (s + 1); - // printf("ps_deviceAllocate: CudaDeviceMemoryPitched: [c%i][s%i] %ix%i\n", c, s, w, h); - pyramid[s].arr = new CudaDeviceMemoryPitched(CudaSize<2>(w, h)); - bytesAllocated += pyramid[s].arr->getBytesPadded(); - - cudaTextureDesc tex_desc; - memset(&tex_desc, 0, sizeof(cudaTextureDesc)); - tex_desc.normalizedCoords = 0; // addressed (x,y) in [width,height] - tex_desc.addressMode[0] = cudaAddressModeClamp; - tex_desc.addressMode[1] = cudaAddressModeClamp; - tex_desc.addressMode[2] = cudaAddressModeClamp; -#if defined(ALICEVISION_DEPTHMAP_TEXTURE_USE_UCHAR) && defined(ALICEVISION_DEPTHMAP_TEXTURE_USE_INTERPOLATION) - tex_desc.readMode = cudaReadModeNormalizedFloat; // uchar to float [0:1], see tex2d_float4 function -#else - tex_desc.readMode = cudaReadModeElementType; -#endif -#ifdef ALICEVISION_DEPTHMAP_TEXTURE_USE_INTERPOLATION - // with subpixel interpolation (can have a large performance impact on some graphic cards) - // but could be critical for quality during SGM in small resolution - tex_desc.filterMode = cudaFilterModeLinear; -#else - // without interpolation - tex_desc.filterMode = cudaFilterModePoint; -#endif - - cudaResourceDesc res_desc; - res_desc.resType = cudaResourceTypePitch2D; - res_desc.res.pitch2D.desc = cudaCreateChannelDesc(); - res_desc.res.pitch2D.devPtr = pyramid[s].arr->getBuffer(); - res_desc.res.pitch2D.width = pyramid[s].arr->getSize()[0]; - res_desc.res.pitch2D.height = pyramid[s].arr->getSize()[1]; - res_desc.res.pitch2D.pitchInBytes = pyramid[s].arr->getPitch(); - - cudaError_t err = cudaCreateTextureObject( &pyramid[s].tex, &res_desc, &tex_desc, 0 ); - THROW_ON_CUDA_ERROR( err, "Failed to bind texture object to cam array" ); - } - - return bytesAllocated; -} - -void ps_deviceDeallocate( Pyramid& pyramid, int scales ) -{ - for( TexturedArray& entry : pyramid ) - { - delete entry.arr; - cudaDestroyTextureObject( entry.tex ); - } - pyramid.clear(); -} - -void ps_testCUDAdeviceNo(int CUDAdeviceNo) -{ - int myCUDAdeviceNo; - cudaGetDevice(&myCUDAdeviceNo); - if(myCUDAdeviceNo != CUDAdeviceNo) - { - printf("WARNING different device %i %i\n", myCUDAdeviceNo, CUDAdeviceNo); - } -} - -// void ps_device_updateCam( const CameraStruct& cam, int CUDAdeviceNo, -// int scales, int w, int h) -void ps_device_fillPyramidFromHostFrame( Pyramid& pyramid, - CudaHostMemoryHeap* host_frame, - int scales, int w, int h, - cudaStream_t stream ) -{ - ALICEVISION_CU_PRINT_DEBUG(std::endl - << "Calling " << __FUNCTION__ << std::endl - << " for " << scales << " scales" - << ", w: " << w << ", h: " << h - << std::endl); - - { - /* copy texture's data from host to device */ - pyramid[0].arr->copyFrom( *host_frame, stream ); - - const dim3 block(32, 2, 1); - const dim3 grid(divUp(w, block.x), divUp(h, block.y), 1); - ALICEVISION_CU_PRINT_DEBUG("rgb2lab_kernel: block=(" << block.x << ", " << block.y << ", " << block.z << "), grid=(" << grid.x << ", " << grid.y << ", " << grid.z << ")"); - - /* in-place color conversion into CIELAB */ - rgb2lab_kernel<<>>( - pyramid[0].arr->getBuffer(), pyramid[0].arr->getPitch(), - w, h); - CHECK_CUDA_ERROR(); - } - - /* For each scale, create a Gaussian-filtered and scaled version of the - * initial texture */ - for(int scale = 1; scale < scales; ++scale) - { - const int radius = scale + 1; - // const int sWidth = w / (scale + 1); - // const int sHeight = h / (scale + 1); - // ALICEVISION_CU_PRINT_DEBUG("Create downscaled image for camera id " << camId << " at scale " << scale << ": " << sWidth << "x" << sHeight); - - // const dim3 block(32, 2, 1); - // const dim3 grid(divUp(sWidth, block.x), divUp(sHeight, block.y), 1); - // ALICEVISION_CU_PRINT_DEBUG("ps_downscale_gauss: block=(" << block.x << ", " << block.y << ", " << block.z << "), grid=(" << grid.x << ", " << grid.y << ", " << grid.z << ")"); - - ps_downscale_gauss(pyramid, scale, w, h, radius, stream); - CHECK_CUDA_ERROR(); - } - - CHECK_CUDA_ERROR(); -} - - -/** - * @param[inout] d_volSimT similarity volume - */ -void ps_aggregatePathVolume( - CudaDeviceMemoryPitched& d_volAgr, - const CudaDeviceMemoryPitched& d_volSim, - const CudaSize<3>& volDim, - const CudaSize<3>& axisT, - cudaTextureObject_t rc_tex, - const SgmParams& sgmParams, - bool invY, int filteringIndex) -{ - const size_t volDimX = volDim[axisT[0]]; - const size_t volDimY = volDim[axisT[1]]; - const size_t volDimZ = volDim[axisT[2]]; - - const int3 volDim_ = make_int3(volDim[0], volDim[1], volDim[2]); - const int3 axisT_ = make_int3(axisT[0], axisT[1], axisT[2]); - const int ySign = (invY ? -1 : 1); - - // setup block and grid - const int blockSize = 8; - const dim3 blockVolXZ(blockSize, blockSize, 1); - const dim3 gridVolXZ(divUp(volDimX, blockVolXZ.x), divUp(volDimZ, blockVolXZ.y), 1); - - const int blockSizeL = 64; - const dim3 blockColZ(blockSizeL, 1, 1); - const dim3 gridColZ(divUp(volDimX, blockColZ.x), 1, 1); - - const dim3 blockVolSlide(blockSizeL, 1, 1); - const dim3 gridVolSlide(divUp(volDimX, blockVolSlide.x), volDimZ, 1); - - CudaDeviceMemoryPitched d_sliceBufferA(CudaSize<2>(volDimX, volDimZ)); - CudaDeviceMemoryPitched d_sliceBufferB(CudaSize<2>(volDimX, volDimZ)); - - CudaDeviceMemoryPitched* d_xzSliceForY = &d_sliceBufferA; // Y slice - CudaDeviceMemoryPitched* d_xzSliceForYm1 = &d_sliceBufferB; // Y-1 slice - - CudaDeviceMemoryPitched d_bestSimInYm1(CudaSize<2>(volDimX, 1)); // best sim score along the Y axis for each Z value - - // Copy the first XZ plane (at Y=0) from 'd_volSim' into 'd_xzSliceForYm1' - volume_getVolumeXZSlice_kernel<<>>( - d_xzSliceForYm1->getBuffer(), - d_xzSliceForYm1->getPitch(), - d_volSim.getBuffer(), - d_volSim.getBytesPaddedUpToDim(1), - d_volSim.getBytesPaddedUpToDim(0), - volDim_, axisT_, 0); // Y=0 - - // Set the first Z plane from 'd_volAgr' to 255 - volume_initVolumeYSlice_kernel<<>>( - d_volAgr.getBuffer(), - d_volAgr.getBytesPaddedUpToDim(1), - d_volAgr.getBytesPaddedUpToDim(0), - volDim_, axisT_, 0, 255); - - for(int iy = 1; iy < volDimY; ++iy) - { - const int y = invY ? volDimY - 1 - iy : iy; - - // For each column: compute the best score - // Foreach x: - // d_zBestSimInYm1[x] = min(d_xzSliceForY[1:height]) - volume_computeBestZInSlice_kernel<<>>( - d_xzSliceForYm1->getBuffer(), d_xzSliceForYm1->getPitch(), - d_bestSimInYm1.getBuffer(), - volDimX, volDimZ); - - // Copy the 'z' plane from 'd_volSimT' into 'd_xzSliceForY' - volume_getVolumeXZSlice_kernel<<>>( - d_xzSliceForY->getBuffer(), - d_xzSliceForY->getPitch(), - d_volSim.getBuffer(), - d_volSim.getBytesPaddedUpToDim(1), - d_volSim.getBytesPaddedUpToDim(0), - volDim_, axisT_, y); - - volume_agregateCostVolumeAtXinSlices_kernel<<>>( - rc_tex, - d_xzSliceForY->getBuffer(), d_xzSliceForY->getPitch(), // inout: xzSliceForY - d_xzSliceForYm1->getBuffer(), d_xzSliceForYm1->getPitch(), // in: xzSliceForYm1 - d_bestSimInYm1.getBuffer(), // in: bestSimInYm1 - d_volAgr.getBuffer(), d_volAgr.getBytesPaddedUpToDim(1), d_volAgr.getBytesPaddedUpToDim(0), // out: volAgr - volDim_, axisT_, - sgmParams.stepXY, - y, - sgmParams.p1, - sgmParams.p2Weighting, - ySign, filteringIndex); - - std::swap(d_xzSliceForYm1, d_xzSliceForY); - } - // CHECK_CUDA_ERROR(); -} - -void ps_SGMretrieveBestDepth(int rcamCacheId, - CudaDeviceMemoryPitched& bestDepth_dmp, - CudaDeviceMemoryPitched& bestSim_dmp, - const CudaDeviceMemoryPitched& volSim_dmp, - const CudaSize<3>& volDim, - const CudaDeviceMemory& depths_d, - int scaleStep, bool interpolate) -{ - const int block_size = 8; - const dim3 block(block_size, block_size, 1); - const dim3 grid(divUp(volDim.x(), block_size), divUp(volDim.y(), block_size), 1); - - volume_retrieveBestZ_kernel<<>>( - rcamCacheId, - bestDepth_dmp.getBuffer(), - bestDepth_dmp.getBytesPaddedUpToDim(0), - bestSim_dmp.getBuffer(), - bestSim_dmp.getBytesPaddedUpToDim(0), - volSim_dmp.getBuffer(), - volSim_dmp.getBytesPaddedUpToDim(1), volSim_dmp.getBytesPaddedUpToDim(0), - int(volDim.x()), - int(volDim.y()), - int(volDim.z()), - depths_d.getBuffer(), - scaleStep, - interpolate); -} - - - -namespace ps -{ -/* - * static private variables in this class - */ -bool SimilarityVolume::_configured = false; -dim3 SimilarityVolume::_block( 32, 1, 1 ); // minimal default settings - -SimilarityVolume::SimilarityVolume( const CudaSize<3>& volDim, - int volStepXY, - int scale, - const std::vector& depths_h) - : _dimX(int(volDim.x())) - , _dimY(int(volDim.y())) - , _dimZ(int(volDim.z())) - , _stepXY(volStepXY) - , _scale(scale) - , _depths_d(depths_h.data(), depths_h.size()) - , _stream_max( 2 ) -{ - configureGrid(); - - _sweep_stream.resize(_stream_max); - for( cudaStream_t& stream : _sweep_stream ) - { - cudaError_t err; - err = cudaStreamCreate( &stream ); - if( err != cudaSuccess ) - { - ALICEVISION_CU_PRINT_DEBUG("Failed to create a CUDA stream object for SimilarityVolume"); - stream = 0; - } - } -} - -SimilarityVolume::~SimilarityVolume( ) -{ - for( cudaStream_t& stream : _sweep_stream ) - { - cudaStreamSynchronize( stream ); - if( stream != 0 ) cudaStreamDestroy( stream ); - } -} - -void SimilarityVolume::initOutputVolumes( - CudaDeviceMemoryPitched& volBestSim_dmp, - CudaDeviceMemoryPitched& volSecBestSim_dmp, - const int streamIndex ) -{ - const dim3 block(32, 4, 1); - const dim3 grid(divUp(_dimX, block.x), divUp(_dimY, block.y), _dimZ); - - volume_init_kernel - <<>> - (volBestSim_dmp.getBuffer(), - volBestSim_dmp.getBytesPaddedUpToDim(1), - volBestSim_dmp.getBytesPaddedUpToDim(0), - _dimX, _dimY); - volume_init_kernel - <<>> - (volSecBestSim_dmp.getBuffer(), - volSecBestSim_dmp.getBytesPaddedUpToDim(1), - volSecBestSim_dmp.getBytesPaddedUpToDim(0), - _dimX, _dimY); -} - -void SimilarityVolume::compute( - CudaDeviceMemoryPitched& volBestSim_dmp, - CudaDeviceMemoryPitched& volSecBestSim_dmp, - const CameraStruct& rcam, int rcWidth, int rcHeight, - const CameraStruct& tcam, int tcWidth, int tcHeight, - const OneTC& cell, - const SgmParams& sgmParams, - const int streamIndex ) -{ - TSim* gpu_volume_1st = volBestSim_dmp.getBuffer(); - TSim* gpu_volume_2nd = volSecBestSim_dmp.getBuffer(); - - { - const int startDepthIndex = cell.getDepthToStart(); - const int nbDepthsToSearch = cell.getDepthsToSearch(); - - const dim3 grid(divUp(_dimX, _block.x), divUp(_dimY, _block.y), nbDepthsToSearch); - - ALICEVISION_CU_PRINT_DEBUG("===================="); - ALICEVISION_CU_PRINT_DEBUG("Volume slice kernel"); - ALICEVISION_CU_PRINT_DEBUG("RC: " << rcam.camId << ", TC: " << tcam.camId); - ALICEVISION_CU_PRINT_DEBUG("Cell TC index: " << cell.getTCIndex()); - ALICEVISION_CU_PRINT_DEBUG("grid: " << grid.x << ", " << grid.y << ", " << grid.z); - ALICEVISION_CU_PRINT_DEBUG("block: " << _block.x << ", " << _block.y << ", " << _block.z); - ALICEVISION_CU_PRINT_DEBUG("startDepthIndex: " << startDepthIndex); - ALICEVISION_CU_PRINT_DEBUG("nbDepthsToSearch: " << nbDepthsToSearch); - ALICEVISION_CU_PRINT_DEBUG("nb all depths: " << int(_depths_d.getUnitsTotal())); - ALICEVISION_CU_PRINT_DEBUG("startDepthIndex+nbDepthsToSearch: " << startDepthIndex+nbDepthsToSearch); - ALICEVISION_CU_PRINT_DEBUG("_dimX: " << _dimX << ", _dimY: " << _dimY); - ALICEVISION_CU_PRINT_DEBUG("scale-1: " << prevScale() ); - ALICEVISION_CU_PRINT_DEBUG("rcWH / scale: " << rcWidth / _scale << "x" << rcHeight / _scale); - ALICEVISION_CU_PRINT_DEBUG("tcWH / scale: " << tcWidth / _scale << "x" << tcHeight / _scale); - ALICEVISION_CU_PRINT_DEBUG("===================="); - - const Pyramid& rc_pyramid = *rcam.pyramid; - const Pyramid& tc_pyramid = *tcam.pyramid; - cudaTextureObject_t rc_tex = rc_pyramid[prevScale()].tex; - cudaTextureObject_t tc_tex = tc_pyramid[prevScale()].tex; - - volume_slice_kernel - <<>> - ( rc_tex, - tc_tex, - rcam.param_dev.i, - tcam.param_dev.i, - _depths_d.getBuffer(), - startDepthIndex, - nbDepthsToSearch, - rcWidth / _scale, rcHeight / _scale, - tcWidth / _scale, tcHeight / _scale, - sgmParams.wsh, - float(sgmParams.gammaC), - float(sgmParams.gammaP), - gpu_volume_1st, - volBestSim_dmp.getBytesPaddedUpToDim(1), - volBestSim_dmp.getBytesPaddedUpToDim(0), - gpu_volume_2nd, - volSecBestSim_dmp.getBytesPaddedUpToDim(1), - volSecBestSim_dmp.getBytesPaddedUpToDim(0), - _stepXY, - _dimX, _dimY); - - // cudaDeviceSynchronize(); - // CHECK_CUDA_ERROR(); - } - - // cudaDeviceSynchronize(); -} - -cudaStream_t SimilarityVolume::SweepStream( int streamIndex ) -{ - streamIndex %= _stream_max; - return _sweep_stream[streamIndex]; -} - -void SimilarityVolume::WaitSweepStream( const int streamIndex ) -{ - cudaStreamSynchronize( SweepStream(streamIndex) ); -} - -void SimilarityVolume::configureGrid( ) -{ - - if( _configured ) return; - _configured = true; - - int recommendedMinGridSize; - int recommendedBlockSize; - cudaError_t err; - err = cudaOccupancyMaxPotentialBlockSize( &recommendedMinGridSize, - &recommendedBlockSize, - volume_slice_kernel, - 0, // dynamic shared mem size: none used - 0 ); // no block size limit, 1 thread OK - if( err != cudaSuccess ) - { - ALICEVISION_CU_PRINT_DEBUG( "cudaOccupancyMaxPotentialBlockSize failed for kernel volume_slice_kernel, using defaults" ); - } - else - { - if( recommendedBlockSize > 32 ) - { - _block.x = 32; - _block.y = divUp( recommendedBlockSize, 32 ); - } - } -} -}; // namespace ps - -void ps_refineRcDepthMap(const CameraStruct& rcam, - const CameraStruct& tcam, - float* inout_depthMap_hmh, - float* out_simMap_hmh, - int rcWidth, int rcHeight, - int tcWidth, int tcHeight, - const RefineParams& refineParams, - int xFrom, int wPart, int CUDAdeviceNo) -{ - // setup block and grid - const dim3 block(16, 16, 1); - const dim3 grid(divUp(wPart, block.x), divUp(rcHeight, block.y), 1); - - const Pyramid& rcPyramid = *rcam.pyramid; - const Pyramid& tcPyramid = *tcam.pyramid; - const size_t pyramidScaleIndex = size_t(refineParams.scale) - 1; - - cudaTextureObject_t rc_tex = rcPyramid[pyramidScaleIndex].tex; - cudaTextureObject_t tc_tex = tcPyramid[pyramidScaleIndex].tex; - - CudaDeviceMemoryPitched rcDepthMap_dmp(CudaSize<2>(wPart, rcHeight)); - copy(rcDepthMap_dmp, inout_depthMap_hmh, wPart, rcHeight); - - CudaDeviceMemoryPitched bestSimMap_dmp(CudaSize<2>(wPart, rcHeight)); - CudaDeviceMemoryPitched bestDptMap_dmp(CudaSize<2>(wPart, rcHeight)); - - const int halfNSteps = ((refineParams.nDepthsToRefine - 1) / 2) + 1; // Default ntcsteps = 31 - - for(int i = 0; i < halfNSteps; ++i) - { - refine_compUpdateYKNCCSimMapPatch_kernel<<>>( - rcam.param_dev.i, - tcam.param_dev.i, - rc_tex, tc_tex, - bestSimMap_dmp.getBuffer(), bestSimMap_dmp.getPitch(), - bestDptMap_dmp.getBuffer(), bestDptMap_dmp.getPitch(), - rcDepthMap_dmp.getBuffer(), rcDepthMap_dmp.getPitch(), - wPart, rcHeight, - refineParams.wsh, - refineParams.gammaC, - refineParams.gammaP, - float(i), - refineParams.useTcOrRcPixSize, - xFrom, - rcWidth, rcHeight, - tcWidth, tcHeight); - } - - for(int i = 1; i < halfNSteps; ++i) - { - refine_compUpdateYKNCCSimMapPatch_kernel<<>>( - rcam.param_dev.i, - tcam.param_dev.i, - rc_tex, tc_tex, - bestSimMap_dmp.getBuffer(), bestSimMap_dmp.getPitch(), - bestDptMap_dmp.getBuffer(), bestDptMap_dmp.getPitch(), - rcDepthMap_dmp.getBuffer(), rcDepthMap_dmp.getPitch(), - wPart, rcHeight, - refineParams.wsh, - refineParams.gammaC, - refineParams.gammaP, - float(-i), - refineParams.useTcOrRcPixSize, - xFrom, - rcWidth, rcHeight, - tcWidth, tcHeight); - } - - /* - // Filter intermediate refined images does not improve - for (int i = 0; i < 5; ++i) - { - // Filter refined depth map - CudaTexture depthTex(bestDptMap_dmp); - float euclideanDelta = 1.0; - int radius = 3; - ps_bilateralFilter( - depthTex.textureObj, - bestDptMap_dmp, - euclideanDelta, - radius); - ps_medianFilter( - depthTex.textureObj, - bestDptMap_dmp, - radius); - } - */ - - CudaDeviceMemoryPitched lastThreeSimsMap_dmp(CudaSize<2>(wPart, rcHeight)); - CudaDeviceMemoryPitched simMap_dmp(CudaSize<2>(wPart, rcHeight)); - - { - // Set best sim map into lastThreeSimsMap_dmp.y - refine_setLastThreeSimsMap_kernel<<>>( - lastThreeSimsMap_dmp.getBuffer(), lastThreeSimsMap_dmp.getPitch(), - bestSimMap_dmp.getBuffer(), bestSimMap_dmp.getPitch(), - wPart, rcHeight, 1); - /* - // Compute NCC for depth-1 - refine_compYKNCCSimMapPatch_kernel<<>>( - rc_cam.param_dev.i, - tc_cam.param_dev.i, - rc_tex, tc_tex, - simMap_dmp.getBuffer(), simMap_dmp.getPitch(), - bestDptMap_dmp.getBuffer(), bestDptMap_dmp.getPitch(), - wPart, rcHeight, - refineParams.wsh, - refineParams.gammaC, - refineParams.gammaP, - 0.0f, - refineParams.useTcOrRcPixSize, - xFrom, - rcWidth, rcHeight, - tcWidth, tcHeight); - - // Set sim for depth-1 into lastThreeSimsMap_dmp.y - refine_setLastThreeSimsMap_kernel <<>>( - lastThreeSimsMap_dmp.getBuffer(), lastThreeSimsMap_dmp.getPitch(), - simMap_dmp.getBuffer(), simMap_dmp.getPitch(), - wPart, rcHeight, 1); - */ - } - - { - // Compute NCC for depth-1 - refine_compYKNCCSimMapPatch_kernel<<>>( - rcam.param_dev.i, - tcam.param_dev.i, - rc_tex, tc_tex, - simMap_dmp.getBuffer(), simMap_dmp.getPitch(), - bestDptMap_dmp.getBuffer(), bestDptMap_dmp.getPitch(), - wPart, rcHeight, - refineParams.wsh, - refineParams.gammaC, - refineParams.gammaP, - -1.0f, - refineParams.useTcOrRcPixSize, - xFrom, - rcWidth, rcHeight, - tcWidth, tcHeight); - - // Set sim for depth-1 into lastThreeSimsMap_dmp.x - refine_setLastThreeSimsMap_kernel<<>>( - lastThreeSimsMap_dmp.getBuffer(), lastThreeSimsMap_dmp.getPitch(), - simMap_dmp.getBuffer(), simMap_dmp.getPitch(), - wPart, rcHeight, 0); - } - - { - // Compute NCC for depth+1 - refine_compYKNCCSimMapPatch_kernel<<>>( - rcam.param_dev.i, - tcam.param_dev.i, - rc_tex, tc_tex, - simMap_dmp.getBuffer(), simMap_dmp.getPitch(), - bestDptMap_dmp.getBuffer(), bestDptMap_dmp.getPitch(), - wPart, rcHeight, - refineParams.wsh, - refineParams.gammaC, - refineParams.gammaP, - +1.0f, - refineParams.useTcOrRcPixSize, - xFrom, - rcWidth, rcHeight, - tcWidth, tcHeight); - - // Set sim for depth+1 into lastThreeSimsMap_dmp.z - refine_setLastThreeSimsMap_kernel<<>>( - lastThreeSimsMap_dmp.getBuffer(), lastThreeSimsMap_dmp.getPitch(), - simMap_dmp.getBuffer(), simMap_dmp.getPitch(), - wPart, rcHeight, 2); - } - - // Interpolation from the lastThreeSimsMap_dmp - refine_computeDepthSimMapFromLastThreeSimsMap_kernel<<>>( - rcam.param_dev.i, - tcam.param_dev.i, - bestSimMap_dmp.getBuffer(), bestSimMap_dmp.getPitch(), - bestDptMap_dmp.getBuffer(), bestDptMap_dmp.getPitch(), - lastThreeSimsMap_dmp.getBuffer(), lastThreeSimsMap_dmp.getPitch(), - wPart, rcHeight, - refineParams.useTcOrRcPixSize, - xFrom); - - copy(out_simMap_hmh, wPart, rcHeight, bestSimMap_dmp); - copy(inout_depthMap_hmh, wPart, rcHeight, bestDptMap_dmp); -} - -/** - * @brief ps_fuseDepthSimMapsGaussianKernelVoting - * @param ndepthSimMaps: number of Tc cameras - * @param nSamplesHalf (default value 150) - * @param nDepthsToRefine (default value 31) - */ -void ps_fuseDepthSimMapsGaussianKernelVoting(int width, int height, - CudaHostMemoryHeap* out_depthSimMap_hmh, - std::vector*>& depthSimMaps_hmh, - int ndepthSimMaps, - const RefineParams& refineParams) -{ - const float samplesPerPixSize = float(refineParams.nSamplesHalf / ((refineParams.nDepthsToRefine - 1) / 2)); - const float twoTimesSigmaPowerTwo = 2.0f * refineParams.sigma * refineParams.sigma; - - // setup block and grid - const int block_size = 16; - const dim3 block(block_size, block_size, 1); - const dim3 grid(divUp(width, block_size), divUp(height, block_size), 1); - - CudaDeviceMemoryPitched bestDepthSimMap_dmp(CudaSize<2>(width, height)); - CudaDeviceMemoryPitched bestGsvSampleMap_dmp(CudaSize<2>(width, height)); - CudaDeviceMemoryPitched gsvSampleMap_dmp(CudaSize<2>(width, height)); - std::vector*> depthSimMaps_dmp(ndepthSimMaps); - - for(int i = 0; i < ndepthSimMaps; i++) - { - depthSimMaps_dmp[i] = new CudaDeviceMemoryPitched(CudaSize<2>(width, height)); - copy((*depthSimMaps_dmp[i]), (*depthSimMaps_hmh[i])); - } - - for(int s = -refineParams.nSamplesHalf; s <= refineParams.nSamplesHalf; s++) // (-150, 150) - { - for(int c = 1; c < ndepthSimMaps; c++) // number of T cameras - { - fuse_computeGaussianKernelVotingSampleMap_kernel<<>>( - gsvSampleMap_dmp.getBuffer(), gsvSampleMap_dmp.getPitch(), - depthSimMaps_dmp[c]->getBuffer(), depthSimMaps_dmp[c]->getPitch(), - depthSimMaps_dmp[0]->getBuffer(), depthSimMaps_dmp[0]->getPitch(), - width, height, (float)s, c - 1, samplesPerPixSize, twoTimesSigmaPowerTwo); - } - fuse_updateBestGaussianKernelVotingSampleMap_kernel<<>>( - bestGsvSampleMap_dmp.getBuffer(), bestGsvSampleMap_dmp.getPitch(), - gsvSampleMap_dmp.getBuffer(), gsvSampleMap_dmp.getPitch(), - width, height, (float)s, s + refineParams.nSamplesHalf); - } - - fuse_computeFusedDepthSimMapFromBestGaussianKernelVotingSampleMap_kernel<<>>( - bestDepthSimMap_dmp.getBuffer(), bestDepthSimMap_dmp.getPitch(), - bestGsvSampleMap_dmp.getBuffer(), bestGsvSampleMap_dmp.getPitch(), - depthSimMaps_dmp[0]->getBuffer(), depthSimMaps_dmp[0]->getPitch(), - width, height, samplesPerPixSize); - - copy((*out_depthSimMap_hmh), bestDepthSimMap_dmp); - - for(int i = 0; i < ndepthSimMaps; i++) - { - delete depthSimMaps_dmp[i]; - } -} - -void ps_optimizeDepthSimMapGradientDescent(const CameraStruct& rcam, - CudaHostMemoryHeap& out_optimizedDepthSimMap_hmh, - const CudaHostMemoryHeap& sgmDepthPixSizeMap_hmh, - const CudaHostMemoryHeap& refinedDepthSimMap_hmh, - const CudaSize<2>& depthSimMapPartDim, - const RefineParams& refineParams, - int CUDAdeviceNo, int nbCamsAllocated, int yFrom) -{ - const int partWidth = depthSimMapPartDim.x(); - const int partHeight = depthSimMapPartDim.y(); - const float samplesPerPixSize = float(refineParams.nSamplesHalf / ((refineParams.nDepthsToRefine - 1) / 2)); - - // setup block and grid - const int block_size = 16; - const dim3 block(block_size, block_size, 1); - const dim3 grid(divUp(partWidth, block_size), divUp(partHeight, block_size), 1); - - const CudaDeviceMemoryPitched sgmDepthPixSizeMap_dmp(sgmDepthPixSizeMap_hmh); - const CudaDeviceMemoryPitched refinedDepthSimMap_dmp(refinedDepthSimMap_hmh); - - CudaDeviceMemoryPitched optDepthMap_dmp(depthSimMapPartDim); - CudaDeviceMemoryPitched optDepthSimMap_dmp(depthSimMapPartDim); - copy(optDepthSimMap_dmp, sgmDepthPixSizeMap_dmp); - - // get rc CUDA texture object - const size_t pyramidScaleIndex = size_t(refineParams.scale) - 1; - const Pyramid& rcPyramid = *rcam.pyramid; - cudaTextureObject_t rc_tex = rcPyramid[pyramidScaleIndex].tex; - - CudaDeviceMemoryPitched imgVariance_dmp(depthSimMapPartDim); - { - const dim3 lblock(32, 2, 1); - const dim3 lgrid(divUp(partWidth, lblock.x), divUp(partHeight, lblock.y), 1); - - compute_varLofLABtoW_kernel<<>>(rc_tex, - imgVariance_dmp.getBuffer(), - imgVariance_dmp.getPitch(), - partWidth, partHeight, yFrom); - } - CudaTexture imgVarianceTex(imgVariance_dmp); - - for(int iter = 0; iter < refineParams.nIters; ++iter) // nIters: 100 by default - { - // Copy depths values from optDepthSimMap to optDepthMap - fuse_getOptDeptMapFromOptDepthSimMap_kernel<<>>(optDepthMap_dmp.getBuffer(), optDepthMap_dmp.getPitch(), - optDepthSimMap_dmp.getBuffer(), optDepthSimMap_dmp.getPitch(), - partWidth, partHeight); - - CudaTexture depthTex(optDepthMap_dmp); - - // Adjust depth/sim by using previously computed depths - fuse_optimizeDepthSimMap_kernel<<>>(rc_tex, - rcam.param_dev.i, - imgVarianceTex.textureObj, - depthTex.textureObj, - optDepthSimMap_dmp.getBuffer(), optDepthSimMap_dmp.getPitch(), - sgmDepthPixSizeMap_dmp.getBuffer(), sgmDepthPixSizeMap_dmp.getPitch(), - refinedDepthSimMap_dmp.getBuffer(), refinedDepthSimMap_dmp.getPitch(), - partWidth, partHeight, iter, samplesPerPixSize, yFrom); - } - - copy(out_optimizedDepthSimMap_hmh, optDepthSimMap_dmp); -} - -// uchar4 with 0..255 components => float3 with 0..1 components -inline __device__ __host__ float3 uchar4_to_float3(const uchar4 c) -{ - return make_float3(float(c.x) / 255.0f, float(c.y) / 255.0f, float(c.z) / 255.0f); -} - -void ps_getSilhoueteMap(CudaHostMemoryHeap* omap_hmh, int width, - int height, int scale, - int step, - CameraStruct& cam, - uchar4 maskColorRgb, bool verbose) -{ - clock_t tall = tic(); - - uchar4 maskColorLab; - float3 flab = xyz2lab(h_rgb2xyz(uchar4_to_float3(maskColorRgb))); - maskColorLab.x = (unsigned char)(flab.x); - maskColorLab.y = (unsigned char)(flab.y); - maskColorLab.z = (unsigned char)(flab.z); - maskColorLab.w = 0; - - // setup block and grid - int block_size = 16; - dim3 block(block_size, block_size, 1); - dim3 grid(divUp(width / step, block_size), divUp(height / step, block_size), 1); - - Pyramid& pyramid = *cam.pyramid; - - CudaDeviceMemoryPitched map_dmp(CudaSize<2>(width / step, height / step)); - getSilhoueteMap_kernel<<>>( - pyramid[scale].tex, - map_dmp.getBuffer(), map_dmp.getPitch(), - step, width, height, maskColorLab); - CHECK_CUDA_ERROR(); - - copy((*omap_hmh), map_dmp); - - if(verbose) - printf("gpu elapsed time: %f ms \n", toc(tall)); -} - - -void ps_loadCameraStructs( const CameraStructBase* hst, - const CamCacheIdx& offset, - cudaStream_t stream ) -{ - cudaMemcpyKind kind = cudaMemcpyHostToDevice; - cudaError_t err; - if( stream == 0 ) - { - err = cudaMemcpyToSymbol( camsBasesDev, - &hst[offset.i], - sizeof(CameraStructBase), - offset.i*sizeof(CameraStructBase), - kind ); - } - else - { - err = cudaMemcpyToSymbolAsync( camsBasesDev, - &hst[offset.i], - sizeof(CameraStructBase), - offset.i*sizeof(CameraStructBase), - kind, - stream ); - } - THROW_ON_CUDA_ERROR( err, "Failed to copy CameraStructs from host to device in " << __FILE__ << ":" << __LINE__ << ": " << cudaGetErrorString(err) ); -} - -} // namespace depthMap -} // namespace aliceVision diff --git a/src/aliceVision/depthMap/cuda/planeSweeping/plane_sweeping_cuda.hpp b/src/aliceVision/depthMap/cuda/planeSweeping/plane_sweeping_cuda.hpp deleted file mode 100644 index 7686bfb0dc..0000000000 --- a/src/aliceVision/depthMap/cuda/planeSweeping/plane_sweeping_cuda.hpp +++ /dev/null @@ -1,162 +0,0 @@ -// This file is part of the AliceVision project. -// Copyright (c) 2017 AliceVision contributors. -// This Source Code Form is subject to the terms of the Mozilla Public License, -// v. 2.0. If a copy of the MPL was not distributed with this file, -// You can obtain one at https://mozilla.org/MPL/2.0/. - -#pragma once - -#include -#include -#include -#include - -namespace aliceVision { -namespace depthMap { - -#ifdef TSIM_USE_FLOAT - using TSim = float; - using TSimAcc = float; -#else - using TSim = unsigned char; - using TSimAcc = unsigned int; // TSimAcc is the similarity accumulation type -#endif - - -void ps_initCameraMatrix( CameraStructBase& base ); - -void pr_printfDeviceMemoryInfo(); - - -namespace ps -{ -class SimilarityVolume -{ -public: - SimilarityVolume( const CudaSize<3>& volDim, - int volStepXY, - int scale, - const std::vector& depths_h); - ~SimilarityVolume( ); - - void initOutputVolumes( - CudaDeviceMemoryPitched& volBestSim_dmp, - CudaDeviceMemoryPitched& volSecBestSim_dmp, - const int streamIndex ); - - void compute( - CudaDeviceMemoryPitched& volBestSim_dmp, - CudaDeviceMemoryPitched& volSecBestSim_dmp, - const CameraStruct& rcam, int rcWidth, int rcHeight, - const CameraStruct& tcams, int tcWidth, int tcHeight, - const OneTC& cell, - const SgmParams& sgmParams, - int streamIndex ); - - inline int dimX() const { return _dimX; } - inline int dimY() const { return _dimY; } - inline int dimZ() const { return _dimZ; } - inline int stepXY() const { return _stepXY; } - inline int scale() const { return _scale; } - inline int prevScale() const { return _scale-1; } - - cudaStream_t SweepStream( int offset ); - void WaitSweepStream( int offset ); - -private: - const int _dimX; - const int _dimY; - const int _dimZ; - const int _stepXY; - const int _scale; - - const CudaDeviceMemory _depths_d; - - const int _stream_max; - std::vector _sweep_stream; - - /* CUDA can help us to find good block sizes for a kernel, depending - * on architecture. Call configure_* functions and use *_block - * afterwards. - */ - static bool _configured; - static dim3 _block; - - static void configureGrid( ); -}; -}; // namespace ps - -void ps_aggregatePathVolume(CudaDeviceMemoryPitched& d_volAgr, - const CudaDeviceMemoryPitched& d_volSim, - const CudaSize<3>& volDim, - const CudaSize<3>& axisT, cudaTextureObject_t rc_tex, - const SgmParams& sgmParams, - bool invY, int filteringIndex); - -void ps_SGMretrieveBestDepth(int rcamCacheId, - CudaDeviceMemoryPitched& bestDepth_dmp, - CudaDeviceMemoryPitched& bestSim_dmp, - const CudaDeviceMemoryPitched& volSim_dmp, - const CudaSize<3>& volDim, - const CudaDeviceMemory& depths_d, - int scaleStep, bool interpolate); - -int ps_listCUDADevices(bool verbose); - -int ps_deviceAllocate( - Pyramid& pyramid, - int width, - int height, - int scales ); - -void ps_deviceDeallocate( - Pyramid& pyramid, - int scales ); - -void ps_testCUDAdeviceNo(int CUDAdeviceNo); - -void ps_device_fillPyramidFromHostFrame( - Pyramid& pyramid, - CudaHostMemoryHeap* host_frame, - int scales, int w, int h, - cudaStream_t stream ); - -void ps_refineRcDepthMap(const CameraStruct& rcam, - const CameraStruct& tcam, - float* inout_depthMap_hmh, - float* out_simMap_hmh, - int rcWidth, int rcHeight, - int tcWidth, int tcHeight, - const RefineParams& refineParams, - int xFrom, int wPart, int CUDAdeviceNo); - -void ps_fuseDepthSimMapsGaussianKernelVoting(int width, int height, - CudaHostMemoryHeap* out_depthSimMap_hmh, - std::vector*>& depthSimMaps_hmh, - int ndepthSimMaps, - const RefineParams& refineParams); - -void ps_optimizeDepthSimMapGradientDescent(const CameraStruct& rcam, - CudaHostMemoryHeap& out_optimizedDepthSimMap_hmh, - const CudaHostMemoryHeap& sgmDepthPixSizeMap_hmh, - const CudaHostMemoryHeap& refinedDepthSimMap_hmh, - const CudaSize<2>& depthSimMapPartDim, - const RefineParams& refineParams, - int CUDAdeviceNo, int nbCamsAllocated, int yFrom); - -void ps_getSilhoueteMap( - CudaHostMemoryHeap* omap_hmh, - int width, int height, - int scale, - int step, - CameraStruct& cam, - uchar4 maskColorRgb, - bool verbose); - -void ps_loadCameraStructs( const CameraStructBase* hst, - const CamCacheIdx& offset, - cudaStream_t stream ); - -} // namespace depthMap -} // namespace aliceVision - diff --git a/src/aliceVision/depthMap/cuda/planeSweeping/similarity.hpp b/src/aliceVision/depthMap/cuda/planeSweeping/similarity.hpp new file mode 100644 index 0000000000..39df1ffa5b --- /dev/null +++ b/src/aliceVision/depthMap/cuda/planeSweeping/similarity.hpp @@ -0,0 +1,41 @@ +// This file is part of the AliceVision project. +// Copyright (c) 2022 AliceVision contributors. +// This Source Code Form is subject to the terms of the Mozilla Public License, +// v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + +#pragma once + +#define TSIM_REFINE_USE_HALF + +#ifdef TSIM_REFINE_USE_HALF +#define CUDA_NO_HALF +#include +#endif + +namespace aliceVision { +namespace depthMap { + +/* + * @note TSim is the similarity type for volume in device memory. + * @note TSimAcc is the similarity accumulation type for volume in device memory. + * @note TSimRefine is the similarity type for volume refinement in device memory. + */ + +#ifdef TSIM_USE_FLOAT + using TSim = float; + using TSimAcc = float; +#else + using TSim = unsigned char; + using TSimAcc = unsigned int; // TSimAcc is the similarity accumulation type +#endif + +#ifdef TSIM_REFINE_USE_HALF + using TSimRefine = __half; +#else + using TSimRefine = float; +#endif + +} // namespace depthMap +} // namespace aliceVision + diff --git a/src/aliceVision/depthMap/depthMap.cpp b/src/aliceVision/depthMap/depthMap.cpp index 1cc2e7a2b0..e18f8535b5 100644 --- a/src/aliceVision/depthMap/depthMap.cpp +++ b/src/aliceVision/depthMap/depthMap.cpp @@ -7,14 +7,20 @@ #include "depthMap.hpp" #include -#include +#include #include -#include -#include -#include +#include +#include +#include +#include +#include #include -#include -#include +#include +#include +#include +#include +#include +#include #include @@ -23,100 +29,554 @@ namespace fs = boost::filesystem; namespace aliceVision { namespace depthMap { -void getSgmParams(const mvsUtils::MultiViewParams& mp, SgmParams& sgmParams) +int computeDownscale(const mvsUtils::MultiViewParams& mp, int scale, int maxWidth, int maxHeight) +{ + const int maxImageWidth = mp.getMaxImageWidth() / scale; + const int maxImageHeight = mp.getMaxImageHeight() / scale; + + int downscale = 1; + int downscaleWidth = mp.getMaxImageWidth() / scale; + int downscaleHeight = mp.getMaxImageHeight() / scale; + + while((downscaleWidth > maxWidth) || (downscaleHeight > maxHeight)) + { + downscale++; + downscaleWidth = maxImageWidth / downscale; + downscaleHeight = maxImageHeight / downscale; + } + + return downscale; +} + +bool computeScaleStepSgmParams(const mvsUtils::MultiViewParams& mp, SgmParams& sgmParams) { + if(sgmParams.scale != -1 && sgmParams.stepXY != -1) + return false; + + const int fileScale = 1; // input images scale (should be one) + const int maxSideXY = 700 / mp.getProcessDownscale(); // max side in order to fit in device memory + const int maxImageW = mp.getMaxImageWidth(); + const int maxImageH = mp.getMaxImageHeight(); + + int maxW = maxSideXY; + int maxH = maxSideXY * 0.8; + + if(maxImageW < maxImageH) + std::swap(maxW, maxH); + + if(sgmParams.scale == -1) + { + // compute the number of scales that will be used in the plane sweeping. + // the highest scale should have a resolution close to 700x550 (or less). + const int scaleTmp = computeDownscale(mp, fileScale, maxW, maxH); + sgmParams.scale = std::min(2, scaleTmp); + } + + if(sgmParams.stepXY == -1) + { + sgmParams.stepXY = computeDownscale(mp, fileScale * sgmParams.scale, maxW, maxH); + } + + return true; +} + +void updateDepthMapParamsForSingleTileComputation(const mvsUtils::MultiViewParams& mp, bool autoSgmScaleStep, DepthMapParams& depthMapParams) +{ + if(!depthMapParams.autoAdjustSmallImage) + { + // cannot adjust depth map parameters + return; + } + + // update SGM maxTCamsPerTile + if(depthMapParams.sgmParams.maxTCamsPerTile < depthMapParams.maxTCams) + { + ALICEVISION_LOG_WARNING("Single tile computation, override SGM maximum number of T cameras per tile (before: " + << depthMapParams.sgmParams.maxTCamsPerTile << ", now: " << depthMapParams.maxTCams << ")."); + depthMapParams.sgmParams.maxTCamsPerTile = depthMapParams.maxTCams; + } + + // update Refine maxTCamsPerTile + if(depthMapParams.refineParams.maxTCamsPerTile < depthMapParams.maxTCams) + { + ALICEVISION_LOG_WARNING("Single tile computation, override Refine maximum number of T cameras per tile (before: " + << depthMapParams.refineParams.maxTCamsPerTile << ", now: " << depthMapParams.maxTCams << ")."); + depthMapParams.refineParams.maxTCamsPerTile = depthMapParams.maxTCams; + } + + const int maxSgmBufferWidth = divideRoundUp(mp.getMaxImageWidth() , depthMapParams.sgmParams.scale * depthMapParams.sgmParams.stepXY); + const int maxSgmBufferHeight = divideRoundUp(mp.getMaxImageHeight(), depthMapParams.sgmParams.scale * depthMapParams.sgmParams.stepXY); + + // update SGM step XY + if(!autoSgmScaleStep && // user define SGM scale & stepXY + (depthMapParams.sgmParams.stepXY == 2) && // default stepXY + (maxSgmBufferWidth < depthMapParams.tileParams.bufferWidth * 0.5) && + (maxSgmBufferHeight < depthMapParams.tileParams.bufferHeight * 0.5)) + { + ALICEVISION_LOG_WARNING("Single tile computation, override SGM step XY (before: " << depthMapParams.sgmParams.stepXY << ", now: 1)."); + depthMapParams.sgmParams.stepXY = 1; + } +} + +int getNbStreams(const mvsUtils::MultiViewParams& mp, const DepthMapParams& depthMapParams, int nbTilesPerCamera) +{ + const int maxImageSize = mp.getMaxImageWidth() * mp.getMaxImageHeight(); // process downscale apply + + const double sgmFrameCostMB = ((maxImageSize / depthMapParams.sgmParams.scale) * sizeof(CudaRGBA)) / (1024.0 * 1024.0); // SGM RGBA + const double refineFrameCostMB = ((maxImageSize / depthMapParams.refineParams.scale) * sizeof(CudaRGBA)) / (1024.0 * 1024.0); // Refine RGBA + const double cameraFrameCostMB = sgmFrameCostMB + (depthMapParams.useRefine ? refineFrameCostMB : 0.0); // SGM + Refine single frame cost + + double sgmTileCostMB = 0.0; + double sgmTileCostUnpaddedMB = 0.0; + { + Sgm sgm(mp, depthMapParams.tileParams, depthMapParams.sgmParams, 0 /*stream*/); + sgmTileCostMB = sgm.getDeviceMemoryConsumption(); + sgmTileCostUnpaddedMB = sgm.getDeviceMemoryConsumptionUnpadded(); + } + + double refineTileCostMB = 0.0; + double refineTileCostUnpaddedMB = 0.0; + + if(depthMapParams.useRefine) + { + Refine refine(mp, depthMapParams.tileParams, depthMapParams.refineParams, 0 /*stream*/); + refineTileCostMB = refine.getDeviceMemoryConsumption(); + refineTileCostUnpaddedMB = refine.getDeviceMemoryConsumptionUnpadded(); + } + + const double tileCostMB = sgmTileCostMB + refineTileCostMB; + const double tileCostUnpaddedMB = sgmTileCostUnpaddedMB + refineTileCostUnpaddedMB; + + const double rcCamsCost = cameraFrameCostMB + depthMapParams.maxTCams * cameraFrameCostMB; + const double rcMinCostMB = rcCamsCost + tileCostMB; + const double rcMaxCostMB = rcCamsCost + nbTilesPerCamera * tileCostMB; + const int rcCamParams = (1 + depthMapParams.maxTCams) * 2; // number of camera parameters in device constant memory + + double deviceMemoryMB; + { + double availableMB, usedMB, totalMB; + getDeviceMemoryInfo(availableMB, usedMB, totalMB); + deviceMemoryMB = availableMB * 0.8; // available memory margin + } + + int nbAllowedSimultaneousRc = int(deviceMemoryMB / rcMaxCostMB); + int nbRemainingTiles = 0; + + { + const double remainingMemoryMB = deviceMemoryMB - (nbAllowedSimultaneousRc * rcMaxCostMB); + nbRemainingTiles = int(std::max(0.0, remainingMemoryMB - rcCamsCost) / tileCostMB); + } + + // check that we do not need more constant camera parameters than the ones in device constant memory + if(ALICEVISION_DEVICE_MAX_CONSTANT_CAMERA_PARAM_SETS < (nbAllowedSimultaneousRc * rcCamParams)) + { + nbAllowedSimultaneousRc = int(ALICEVISION_DEVICE_MAX_CONSTANT_CAMERA_PARAM_SETS / rcCamParams); + nbRemainingTiles = 0; + } + + const int out_nbAllowedStreams = nbAllowedSimultaneousRc * nbTilesPerCamera + nbRemainingTiles; + + ALICEVISION_LOG_INFO("Device memory:" << std::endl + << "\t- available: " << deviceMemoryMB << " MB" << std::endl + << "\t- requirement for the first tile: " << rcMinCostMB << " MB" << std::endl + << "\t- # computation buffers per tile: " << tileCostMB << " MB" << " (Sgm: " << sgmTileCostMB << " MB" << ", Refine: " << refineTileCostMB << " MB)" << std::endl + << "\t- # input images (R + " << depthMapParams.maxTCams << " Ts): " << rcCamsCost << " MB (single multi-res image size: " << cameraFrameCostMB << " MB)"); + + ALICEVISION_LOG_DEBUG( "Theoretical device memory cost for a tile without padding: " << tileCostUnpaddedMB << " MB" << " (Sgm: " << sgmTileCostUnpaddedMB << " MB" << ", Refine: " << refineTileCostUnpaddedMB << " MB)"); + + ALICEVISION_LOG_INFO("Parallelization:" << std::endl + << "\t- # tiles per image: " << nbTilesPerCamera << std::endl + << "\t- # simultaneous depth maps computation: " << ((nbRemainingTiles < 1) ? nbAllowedSimultaneousRc : (nbAllowedSimultaneousRc + 1)) << std::endl + << "\t- # streams: " << out_nbAllowedStreams); + + if(out_nbAllowedStreams < 1 || rcCamParams > ALICEVISION_DEVICE_MAX_CONSTANT_CAMERA_PARAM_SETS) + ALICEVISION_THROW_ERROR("Not enough GPU memory to compute a single tile."); + + return out_nbAllowedStreams; +} + +void getDepthMapParams(const mvsUtils::MultiViewParams& mp, DepthMapParams& depthMapParams) +{ + // get tile user parameters from MultiViewParams property_tree + + auto& tileParams = depthMapParams.tileParams; + tileParams.bufferWidth = mp.userParams.get("tile.bufferWidth", tileParams.bufferWidth); + tileParams.bufferHeight = mp.userParams.get("tile.bufferHeight", tileParams.bufferHeight); + tileParams.padding = mp.userParams.get("tile.padding", tileParams.padding); + // get SGM user parameters from MultiViewParams property_tree + auto& sgmParams = depthMapParams.sgmParams; sgmParams.scale = mp.userParams.get("sgm.scale", sgmParams.scale); sgmParams.stepXY = mp.userParams.get("sgm.stepXY", sgmParams.stepXY); sgmParams.stepZ = mp.userParams.get("sgm.stepZ", sgmParams.stepZ); sgmParams.wsh = mp.userParams.get("sgm.wsh", sgmParams.wsh); - sgmParams.maxTCams = mp.userParams.get("sgm.maxTCams", sgmParams.maxTCams); sgmParams.maxDepths = mp.userParams.get("sgm.maxDepths", sgmParams.maxDepths); - sgmParams.maxDepthsPerTc = mp.userParams.get("sgm.maxDepthsPerTc", sgmParams.maxDepthsPerTc); - sgmParams.maxSideXY = mp.userParams.get("sgm.maxSideXY", sgmParams.maxSideXY); + sgmParams.maxTCamsPerTile = mp.userParams.get("sgm.maxTCamsPerTile", sgmParams.maxTCamsPerTile); + sgmParams.seedsRangeInflate = mp.userParams.get("sgm.seedsRangeInflate", sgmParams.seedsRangeInflate); sgmParams.gammaC = mp.userParams.get("sgm.gammaC", sgmParams.gammaC); sgmParams.gammaP = mp.userParams.get("sgm.gammaP", sgmParams.gammaP); sgmParams.p1 = mp.userParams.get("sgm.p1", sgmParams.p1); sgmParams.p2Weighting = mp.userParams.get("sgm.p2Weighting", sgmParams.p2Weighting); sgmParams.filteringAxes = mp.userParams.get("sgm.filteringAxes", sgmParams.filteringAxes); sgmParams.useSfmSeeds = mp.userParams.get("sgm.useSfmSeeds", sgmParams.useSfmSeeds); - sgmParams.exportIntermediateResults = mp.userParams.get("sgm.exportIntermediateResults", sgmParams.exportIntermediateResults); -} + sgmParams.depthListPerTile = mp.userParams.get("sgm.depthListPerTile", sgmParams.depthListPerTile); + sgmParams.exportIntermediateDepthSimMaps = mp.userParams.get("sgm.exportIntermediateDepthSimMaps", sgmParams.exportIntermediateDepthSimMaps); + sgmParams.exportIntermediateVolumes = mp.userParams.get("sgm.exportIntermediateVolumes", sgmParams.exportIntermediateVolumes); + sgmParams.exportIntermediateCrossVolumes = mp.userParams.get("sgm.exportIntermediateCrossVolumes", sgmParams.exportIntermediateCrossVolumes); + sgmParams.exportIntermediateVolume9pCsv = mp.userParams.get("sgm.exportIntermediateVolume9pCsv", sgmParams.exportIntermediateVolume9pCsv); -void getRefineParams(const mvsUtils::MultiViewParams& mp, RefineParams& refineParams) -{ // get Refine user parameters from MultiViewParams property_tree + auto& refineParams = depthMapParams.refineParams; + refineParams.scale = mp.userParams.get("refine.scale", refineParams.scale); + refineParams.stepXY = mp.userParams.get("refine.stepXY", refineParams.stepXY); refineParams.wsh = mp.userParams.get("refine.wsh", refineParams.wsh); - refineParams.maxTCams = mp.userParams.get("refine.maxTCams", refineParams.maxTCams); - refineParams.nDepthsToRefine = mp.userParams.get("refine.nDepthsToRefine", refineParams.nDepthsToRefine); - refineParams.nSamplesHalf = mp.userParams.get("refine.nSamplesHalf", refineParams.nSamplesHalf); - refineParams.nIters = mp.userParams.get("refine.nIters", refineParams.nIters); + refineParams.halfNbDepths = mp.userParams.get("refine.halfNbDepths", refineParams.halfNbDepths); + refineParams.nbSubsamples = mp.userParams.get("refine.nbSubsamples", refineParams.nbSubsamples); + refineParams.maxTCamsPerTile = mp.userParams.get("refine.maxTCamsPerTile", refineParams.maxTCamsPerTile); + refineParams.optimizationNbIterations = mp.userParams.get("refine.optimizationNbIterations", refineParams.optimizationNbIterations); refineParams.sigma = mp.userParams.get("refine.sigma", refineParams.sigma); refineParams.gammaC = mp.userParams.get("refine.gammaC", refineParams.gammaC); refineParams.gammaP = mp.userParams.get("refine.gammaP", refineParams.gammaP); - refineParams.useTcOrRcPixSize = mp.userParams.get("refine.useTcOrRcPixSize", refineParams.useTcOrRcPixSize); - refineParams.exportIntermediateResults = mp.userParams.get("refine.exportIntermediateResults", refineParams.exportIntermediateResults); + refineParams.useRefineFuse = mp.userParams.get("refine.useRefineFuse", refineParams.useRefineFuse); + refineParams.useColorOptimization = mp.userParams.get("refine.useColorOptimization", refineParams.useColorOptimization); + refineParams.exportIntermediateDepthSimMaps = mp.userParams.get("refine.exportIntermediateDepthSimMaps", refineParams.exportIntermediateDepthSimMaps); + refineParams.exportIntermediateCrossVolumes = mp.userParams.get("refine.exportIntermediateCrossVolumes", refineParams.exportIntermediateCrossVolumes); + refineParams.exportIntermediateVolume9pCsv = mp.userParams.get("refine.exportIntermediateVolume9pCsv", refineParams.exportIntermediateVolume9pCsv); + + // get workflow user parameters from MultiViewParams property_tree + + depthMapParams.maxTCams = mp.userParams.get("depthMap.maxTCams", depthMapParams.maxTCams); + depthMapParams.chooseTCamsPerTile = mp.userParams.get("depthMap.chooseTCamsPerTile", depthMapParams.chooseTCamsPerTile); + depthMapParams.exportTilePattern = mp.userParams.get("depthMap.exportTilePattern", depthMapParams.exportTilePattern); + depthMapParams.autoAdjustSmallImage = mp.userParams.get("depthMap.autoAdjustSmallImage", depthMapParams.autoAdjustSmallImage); } -void estimateAndRefineDepthMaps(int cudaDeviceIndex, mvsUtils::MultiViewParams& mp, const std::vector& cams) +void estimateAndRefineDepthMaps(int cudaDeviceId, mvsUtils::MultiViewParams& mp, const std::vector& cams) { - SgmParams sgmParams; - RefineParams refineParams; + // set the device to use for GPU executions + // the CUDA runtime API is thread-safe, it maintains per-thread state about the current device + setCudaDeviceId(cudaDeviceId); + + // initialize RAM image cache + mvsUtils::ImagesCache> ic(mp, image::EImageColorSpace::LINEAR); // get user parameters from MultiViewParams property_tree - getSgmParams(mp, sgmParams); - getRefineParams(mp, refineParams); + DepthMapParams depthMapParams; + getDepthMapParams(mp, depthMapParams); - // compute scale and step - computeScaleStepSgmParams(mp, sgmParams); + // compute SGM scale and step (set to -1) + const bool autoSgmScaleStep = computeScaleStepSgmParams(mp, depthMapParams.sgmParams); - // load images from files into RAM - mvsUtils::ImagesCache> ic(mp, image::EImageColorSpace::LINEAR); + // single tile case, update parameters + if(hasOnlyOneTile(depthMapParams.tileParams, mp.getMaxImageWidth(), mp.getMaxImageHeight())) + updateDepthMapParamsForSingleTileComputation(mp, autoSgmScaleStep, depthMapParams); - // load stuff on GPU memory and creates multi-level images and computes gradients - PlaneSweepingCuda cps(cudaDeviceIndex, ic, mp, sgmParams.scale); + // compute the maximum downscale factor + const int maxDownscale = std::max(depthMapParams.sgmParams.scale * depthMapParams.sgmParams.stepXY, + depthMapParams.refineParams.scale * depthMapParams.refineParams.stepXY); - for(const int rc : cams) + if(depthMapParams.tileParams.padding % maxDownscale != 0) + { + const int padding = divideRoundUp(depthMapParams.tileParams.padding, maxDownscale) * maxDownscale; + ALICEVISION_LOG_WARNING("Override tiling padding parameter (before: " << depthMapParams.tileParams.padding << ", now: " << padding << ")."); + depthMapParams.tileParams.padding = padding; + } + + // compute tile ROI list + std::vector tileRoiList; + getTileRoiList(depthMapParams.tileParams, mp.getMaxImageWidth(), mp.getMaxImageHeight(), maxDownscale, tileRoiList); + const int nbTilesPerCamera = tileRoiList.size(); + + // log tiling information and ROI list + logTileRoiList(depthMapParams.tileParams, mp.getMaxImageWidth(), mp.getMaxImageHeight(), maxDownscale, tileRoiList); + + // log SGM downscale & stepXY + ALICEVISION_LOG_INFO("SGM parameters:" << std::endl + << "\t- scale: " << depthMapParams.sgmParams.scale << std::endl + << "\t- stepXY: " << depthMapParams.sgmParams.stepXY); + + // log Refine downscale & stepXY + ALICEVISION_LOG_INFO("Refine parameters:" << std::endl + << "\t- scale: " << depthMapParams.refineParams.scale << std::endl + << "\t- stepXY: " << depthMapParams.refineParams.stepXY); + + // get maximum number of stream (simultaneous tiles) + const int nbStreams = getNbStreams(mp, depthMapParams, nbTilesPerCamera); + DeviceStreamManager deviceStreamManager(nbStreams); + + // build device cache + const int nbRcPerBatch = divideRoundUp(nbStreams, nbTilesPerCamera); // number of R cameras in the same batch + const int nbTilesPerBatch = nbRcPerBatch * nbTilesPerCamera; // number of tiles in the same batch + const bool hasRcWithoutDownscale = depthMapParams.sgmParams.scale == 1 || (depthMapParams.useRefine && depthMapParams.refineParams.scale == 1); + const int nbCamerasPerSgm = (1 + depthMapParams.maxTCams) + (hasRcWithoutDownscale ? 0 : 1); // number of Sgm cameras per R camera + const int nbCamerasPerRefine = depthMapParams.useRefine ? (1 + depthMapParams.maxTCams) : 0; // number of Refine cameras per R camera + const int nbCamerasPerBatch = nbRcPerBatch * (nbCamerasPerSgm + nbCamerasPerRefine); // number of cameras in the same batch + + DeviceCache& deviceCache = DeviceCache::getInstance(); + deviceCache.buildCache(nbCamerasPerBatch); + + // build tile list + // order by R camera + std::vector tiles; + tiles.reserve(cams.size() * tileRoiList.size()); + + for(int rc : cams) + { + // compute T cameras list per R camera + const std::vector tCams = mp.findNearestCamsFromLandmarks(rc, depthMapParams.maxTCams).getDataWritable(); + const ROI rcImageRoi(Range(0, mp.getWidth(rc)), Range(0, mp.getHeight(rc))); + + for(std::size_t ti = 0; ti < tileRoiList.size(); ++ti) + { + Tile t; + + t.id = ti; + t.nbTiles = nbTilesPerCamera; + t.rc = rc; + t.roi = intersect(tileRoiList.at(ti), rcImageRoi); + + if(t.roi.isEmpty()) + { + // do nothing, this ROI cannot intersect the R camera ROI. + } + else if(depthMapParams.chooseTCamsPerTile) + { + // find nearest T cameras per tile + t.sgmTCams = mp.findTileNearestCams(rc, depthMapParams.sgmParams.maxTCamsPerTile, tCams, t.roi); + + if(depthMapParams.useRefine) + t.refineTCams = mp.findTileNearestCams(rc, depthMapParams.refineParams.maxTCamsPerTile, tCams, t.roi); + } + else + { + // use previously selected T cameras from the entire image + t.sgmTCams = tCams; + t.refineTCams = tCams; + } + + tiles.push_back(t); + } + } + + // allocate Sgm and Refine per stream in device memory + std::vector sgmPerStream; + std::vector refinePerStream; + + sgmPerStream.reserve(nbStreams); + refinePerStream.reserve(depthMapParams.useRefine ? nbStreams : 0); + + // initialize Sgm objects + for(int i = 0; i < nbStreams; ++i) + sgmPerStream.emplace_back(mp, depthMapParams.tileParams, depthMapParams.sgmParams, deviceStreamManager.getStream(i)); + + // initialize Refine objects + if(depthMapParams.useRefine) + for(int i = 0; i < nbStreams; ++i) + refinePerStream.emplace_back(mp, depthMapParams.tileParams, depthMapParams.refineParams, deviceStreamManager.getStream(i)); + + // allocate final deth/similarity map tile list in host memory + std::vector>> depthSimMapTilePerCam(nbRcPerBatch); + std::vector>> depthMinMaxTilePerCam(nbRcPerBatch); + + for(int i = 0; i < nbRcPerBatch; ++i) { - Sgm sgm(sgmParams, mp, cps, rc); - Refine refine(refineParams, mp, cps, rc); + auto& depthSimMapTiles = depthSimMapTilePerCam.at(i); + auto& depthMinMaxTiles = depthMinMaxTilePerCam.at(i); + + depthSimMapTiles.resize(nbTilesPerCamera); + depthMinMaxTiles.resize(nbTilesPerCamera); + + for(int j = 0; j < nbTilesPerCamera; ++j) + { + if(depthMapParams.useRefine) + depthSimMapTiles.at(j).allocate(refinePerStream.front().getDeviceDepthSimMap().getSize()); + else // final depth/similarity map is SGM only + depthSimMapTiles.at(j).allocate(sgmPerStream.front().getDeviceDepthSimMap().getSize()); + } + } + + // log device memory information + logDeviceMemoryInfo(); + + // compute number of batches + const int nbBatches = divideRoundUp(int(tiles.size()), nbTilesPerBatch); + + // compute each batch of R cameras + for(int b = 0; b < nbBatches; ++b) + { + // find first/last tile to compute + const int firstTileIndex = b * nbTilesPerBatch; + const int lastTileIndex = std::min((b + 1) * nbTilesPerBatch, int(tiles.size())); - // preload sgmTcams async + // load tile R and corresponding T cameras in device cache + for(int i = firstTileIndex; i < lastTileIndex; ++i) + { + const Tile& tile = tiles.at(i); + + // add Sgm R camera to Device cache + deviceCache.addCamera(tile.rc, depthMapParams.sgmParams.scale, ic, mp); + + // add Sgm T cameras to Device cache + for(const int tc : tile.sgmTCams) + deviceCache.addCamera(tc, depthMapParams.sgmParams.scale, ic, mp); + + if(depthMapParams.useRefine) + { + // add Refine R camera to Device cache + deviceCache.addCamera(tile.rc, depthMapParams.refineParams.scale, ic, mp); + + // add Refine T cameras to Device cache + for(const int tc : tile.refineTCams) + deviceCache.addCamera(tc, depthMapParams.refineParams.scale, ic, mp); + } + + if(depthMapParams.sgmParams.scale != 1 && (!depthMapParams.useRefine || depthMapParams.refineParams.scale != 1)) + { + // add SGM R camera at scale 1 to Device cache. + // R camera parameters at scale 1 are required for SGM retrieve best depth + // TODO: Add only camera parameters to Device cache + deviceCache.addCamera(tile.rc, 1, ic, mp); + } + } + + // wait for camera loading in device cache + cudaDeviceSynchronize(); + + // compute each batch tile + for(int i = firstTileIndex; i < lastTileIndex; ++i) { - const auto startTime = std::chrono::high_resolution_clock::now(); - cps._ic.refreshImages_async(sgm.getTCams().getData()); - ALICEVISION_LOG_INFO("Preload T cameras done in: " << std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - startTime).count() << " ms."); + Tile& tile = tiles.at(i); + const int batchCamIndex = tile.rc % nbRcPerBatch; + const int streamIndex = tile.id % nbStreams; + + // do not compute empty ROI + // some images in the dataset may be smaller than others + if(tile.roi.isEmpty()) + continue; + + // get tile result depth/similarity map in host memory + CudaHostMemoryHeap& tileDepthSimMap_hmh = depthSimMapTilePerCam.at(batchCamIndex).at(tile.id); + + // check T cameras + if(tile.sgmTCams.empty() || (depthMapParams.useRefine && tile.refineTCams.empty())) // no T camera found + { + resetDepthSimMap(tileDepthSimMap_hmh); + continue; + } + + // build tile SGM depth list + SgmDepthList sgmDepthList(mp, depthMapParams.sgmParams, tile); + + // compute the R camera depth list + sgmDepthList.computeListRc(); + + // check number of depths + if(sgmDepthList.getDepths().empty()) // no depth found + { + resetDepthSimMap(tileDepthSimMap_hmh); + depthMinMaxTilePerCam.at(batchCamIndex).at(tile.id) = {0.f, 0.f}; + continue; + } + + // remove T cameras with no depth found. + sgmDepthList.removeTcWithNoDepth(tile); + + // store min/max depth + depthMinMaxTilePerCam.at(batchCamIndex).at(tile.id) = sgmDepthList.getMinMaxDepths(); + + // log debug camera / depth information + sgmDepthList.logRcTcDepthInformation(); + + // check if starting and stopping depth are valid + sgmDepthList.checkStartingAndStoppingDepth(); + + // compute Semi-Global Matching + Sgm& sgm = sgmPerStream.at(streamIndex); + sgm.sgmRc(tile, sgmDepthList); + + // compute Refine + if(depthMapParams.useRefine) + { + Refine& refine = refinePerStream.at(streamIndex); + refine.refineRc(tile, sgm.getDeviceDepthSimMap(), sgm.getDeviceNormalMap()); + + // copy Refine depth/similarity map from device to host + tileDepthSimMap_hmh.copyFrom(refine.getDeviceDepthSimMap(), deviceStreamManager.getStream(streamIndex)); + } + else + { + // copy Sgm depth/similarity map from device to host + tileDepthSimMap_hmh.copyFrom(sgm.getDeviceDepthSimMap(), deviceStreamManager.getStream(streamIndex)); + } } - sgm.sgmRc(); + // wait for tiles batch computation + cudaDeviceSynchronize(); + + // find first and last tile R camera + const int firstRc = tiles.at(firstTileIndex).rc; + int lastRc = tiles.at(lastTileIndex - 1).rc; + + // check if last tile depth map is finished + if(lastTileIndex < tiles.size() && (tiles.at(lastTileIndex).rc == lastRc)) + --lastRc; - // rc has no tcam - if(refine.getTCams().empty() || sgm.getDepths().empty()) + // write depth/sim map result + for(int c = firstRc; c <= lastRc; ++c) { - ALICEVISION_LOG_INFO("No T cameras for camera rc: " << rc << ", generate default depth and sim maps."); - refine.getDepthSimMap().save(); // generate default depthSimMap - continue; + const int batchCamIndex = c % nbRcPerBatch; + + if(depthMapParams.useRefine) + writeDepthSimMapFromTileList(c, mp, depthMapParams.tileParams, tileRoiList, depthSimMapTilePerCam.at(batchCamIndex), depthMapParams.refineParams.scale, depthMapParams.refineParams.stepXY); + else + writeDepthSimMapFromTileList(c, mp, depthMapParams.tileParams, tileRoiList, depthSimMapTilePerCam.at(batchCamIndex), depthMapParams.sgmParams.scale, depthMapParams.sgmParams.stepXY); + + if(depthMapParams.exportTilePattern) + exportDepthSimMapTilePatternObj(c, mp, tileRoiList, depthMinMaxTilePerCam.at(batchCamIndex)); } + } - refine.refineRc(sgm.getDepthSimMap()); + // merge intermediate results tiles if needed and desired + if(tiles.size() > cams.size()) + { + // merge tiles if needed and desired + for(int rc : cams) + { + if(depthMapParams.sgmParams.exportIntermediateDepthSimMaps) + { + mergeDepthSimMapTiles(rc, mp, depthMapParams.sgmParams.scale, depthMapParams.sgmParams.stepXY, "_sgm"); + } - // write results - refine.getDepthSimMap().save(); + if(depthMapParams.useRefine && depthMapParams.refineParams.exportIntermediateDepthSimMaps) + { + mergeDepthSimMapTiles(rc, mp, depthMapParams.refineParams.scale, depthMapParams.refineParams.stepXY, "_sgmUpscaled"); + mergeDepthSimMapTiles(rc, mp, depthMapParams.refineParams.scale, depthMapParams.refineParams.stepXY, "_refinedFused"); + } + } } + + // some objects countains CUDA objects + // this objects should be destroyed before the end of the program (i.e. the end of the CUDA context) + DeviceCache::getInstance().clear(); + sgmPerStream.clear(); + refinePerStream.clear(); } -void computeNormalMaps(int cudaDeviceIndex, mvsUtils::MultiViewParams& mp, const std::vector& cams) +void computeNormalMaps(int cudaDeviceId, mvsUtils::MultiViewParams& mp, const std::vector& cams) { + // set the device to use for GPU executions + // the CUDA runtime API is thread-safe, it maintains per-thread state about the current device + setCudaDeviceId(cudaDeviceId); + const float gammaC = 1.0f; const float gammaP = 1.0f; const int wsh = 3; mvsUtils::ImagesCache> ic(mp, image::EImageColorSpace::LINEAR); - PlaneSweepingCuda cps(cudaDeviceIndex, ic, mp, 1); - NormalMapping* mapping = cps.createNormalMapping(); + DeviceNormalMapper normalMapper; for(const int rc : cams) { @@ -124,19 +584,52 @@ void computeNormalMaps(int cudaDeviceIndex, mvsUtils::MultiViewParams& mp, const if (!fs::exists(normalMapFilepath)) { + const int scale = 1; + image::Image depthMap; - readImage(getFileNameFromIndex(mp, rc, mvsUtils::EFileType::depthMap, 0), depthMap, - image::EImageColorSpace::NO_CONVERSION); + readImage(getFileNameFromIndex(mp, rc, mvsUtils::EFileType::depthMap, 0), depthMap, image::EImageColorSpace::NO_CONVERSION); image::Image normalMap(mp.getWidth(rc), mp.getHeight(rc)); - cps.computeNormalMap(mapping, depthMap, normalMap, rc, 1, gammaC, gammaP, wsh); + const int w = mp.getWidth(rc) / scale; + const int h = mp.getHeight(rc) / scale; + + const system::Timer timer; + ALICEVISION_LOG_INFO("Compute normal map (rc: " << rc << ")"); + + // Fill Camera Struct + + fillHostCameraParameters(*(normalMapper.cameraParameters_h), rc, scale, mp); + normalMapper.loadCameraParameters(); + normalMapper.allocHostMaps(w, h); + normalMapper.copyDepthMap(depthMap.data(), depthMap.size()); + + cuda_computeNormalMap(&normalMapper, w, h, wsh, gammaC, gammaP); + + float3* normalMapPtr = normalMapper.getNormalMapHst(); + + constexpr bool q = (sizeof(image::RGBfColor[2]) == sizeof(float3[2])); + if(q == true) + { + memcpy(normalMap.data(), normalMapper.getNormalMapHst(), w * h * sizeof(float3)); + } + else + { + for(int i = 0; i < w * h; i++) + { + normalMap(i).r() = normalMapPtr[i].x; + normalMap(i).g() = normalMapPtr[i].y; + normalMap(i).b() = normalMapPtr[i].z; + } + } + image::writeImage(normalMapFilepath, normalMap, image::ImageWriteOptions().toColorSpace(image::EImageColorSpace::LINEAR) .storageDataType(image::EStorageDataType::Float)); + + ALICEVISION_LOG_INFO("Compute normal map (rc: " << rc << ") done in: " << timer.elapsedMs() << " ms."); } } - cps.deleteNormalMapping(mapping); } } // namespace depthMap diff --git a/src/aliceVision/depthMap/depthMap.hpp b/src/aliceVision/depthMap/depthMap.hpp index 6ea8da5c0f..be0b5c6118 100644 --- a/src/aliceVision/depthMap/depthMap.hpp +++ b/src/aliceVision/depthMap/depthMap.hpp @@ -15,8 +15,8 @@ namespace mvsUtils { class MultiViewParams; } namespace depthMap { -void estimateAndRefineDepthMaps(int cudaDeviceIndex, mvsUtils::MultiViewParams& mp, const std::vector& cams); -void computeNormalMaps(int cudaDeviceIndex, mvsUtils::MultiViewParams& mp, const std::vector& cams); +void estimateAndRefineDepthMaps(int cudaDeviceId, mvsUtils::MultiViewParams& mp, const std::vector& cams); +void computeNormalMaps(int cudaDeviceId, mvsUtils::MultiViewParams& mp, const std::vector& cams); } // namespace depthMap } // namespace aliceVision diff --git a/src/aliceVision/depthMap/depthMapUtils.cpp b/src/aliceVision/depthMap/depthMapUtils.cpp new file mode 100644 index 0000000000..d77d55b82a --- /dev/null +++ b/src/aliceVision/depthMap/depthMapUtils.cpp @@ -0,0 +1,370 @@ +// This file is part of the AliceVision project. +// Copyright (c) 2022 AliceVision contributors. +// This Source Code Form is subject to the terms of the Mozilla Public License, +// v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + +#include "depthMapUtils.hpp" + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace aliceVision { +namespace depthMap { + +void writeDeviceImage(const CudaDeviceMemoryPitched& in_img_dmp, const std::string& path) +{ + const CudaSize<2>& imgSize = in_img_dmp.getSize(); + + // copy image from device pitched memory to host memory + CudaHostMemoryHeap img_hmh(imgSize); + img_hmh.copyFrom(in_img_dmp); + + // copy image from host memory to an Image + image::Image img(imgSize.x(), imgSize.y(), true, {0.f,0.f,0.f}); + + for(size_t x = 0; x < imgSize.x(); ++x) + { + for(size_t y = 0; y < imgSize.y(); ++y) + { + const CudaRGBA& rgba_hmh = img_hmh(x, y); + image::RGBfColor& rgb = img(int(y), int(x)); + rgb.r() = rgba_hmh.x; + rgb.g() = rgba_hmh.y; + rgb.b() = rgba_hmh.z; + } + } + + // write the vector buffer + image::writeImage(path, img, image::ImageWriteOptions().toColorSpace(image::EImageColorSpace::NO_CONVERSION).storageDataType(image::EStorageDataType::Float)); +} + +void writeDeviceImage(const CudaDeviceMemoryPitched& in_img_dmp, const std::string& path) +{ + const CudaSize<2>& imgSize = in_img_dmp.getSize(); + + // copy image from device pitched memory to host memory + CudaHostMemoryHeap img_hmh(imgSize); + img_hmh.copyFrom(in_img_dmp); + + // copy image from host memory to an Image + image::Image img(imgSize.x(), imgSize.y(), true, {0.f, 0.f, 0.f}); + + for(size_t x = 0; x < imgSize.x(); ++x) + { + for(size_t y = 0; y < imgSize.y(); ++y) + { + const float3& rgba_hmh = img_hmh(x, y); + image::RGBfColor& rgb = img(int(y), int(x)); + rgb.r() = rgba_hmh.x; + rgb.g() = rgba_hmh.y; + rgb.b() = rgba_hmh.z; + } + } + + // write the vector buffer + image::writeImage(path, img, image::ImageWriteOptions().toColorSpace(image::EImageColorSpace::NO_CONVERSION).storageDataType(image::EStorageDataType::Float)); +} + +void resetDepthSimMap(CudaHostMemoryHeap& inout_depthSimMap_hmh, float depth, float sim) +{ + const CudaSize<2>& depthSimMapSize = inout_depthSimMap_hmh.getSize(); + + for(size_t x = 0; x < depthSimMapSize.x(); ++x) + { + for(size_t y = 0; y < depthSimMapSize.y(); ++y) + { + float2& depthSim_hmh = inout_depthSimMap_hmh(x, y); + depthSim_hmh.x = depth; + depthSim_hmh.y = sim; + } + } +} + +void copyDepthSimMap(image::Image& out_depthMap, image::Image& out_simMap, const CudaHostMemoryHeap& in_depthSimMap_hmh, const ROI& roi, int downscale) +{ + const ROI downscaledROI = downscaleROI(roi, downscale); + const int width = int(downscaledROI.width()); + const int height = int(downscaledROI.height()); + + // resize output vectors + out_depthMap.resize(width, height); + out_simMap.resize(width, height); + + // copy image from host memory to output vectors + for(int x = 0; x < width; ++x) + { + for(int y = 0; y < height; ++y) + { + const float2& depthSim = in_depthSimMap_hmh(x, y); + out_depthMap(y, x) = depthSim.x; + out_simMap(y, x) = depthSim.y; + } + } +} + +void copyDepthSimMap(image::Image& out_depthMap, image::Image& out_simMap, const CudaDeviceMemoryPitched& in_depthSimMap_dmp, const ROI& roi, int downscale) +{ + // copy depth/sim maps from device pitched memory to host memory + CudaHostMemoryHeap depthSimMap_hmh(in_depthSimMap_dmp.getSize()); + depthSimMap_hmh.copyFrom(in_depthSimMap_dmp); + + copyDepthSimMap(out_depthMap, out_simMap, depthSimMap_hmh, roi, downscale); +} + +void writeDepthSimMap(int rc, + const mvsUtils::MultiViewParams& mp, + const mvsUtils::TileParams& tileParams, + const ROI& roi, + const CudaHostMemoryHeap& in_depthSimMap_hmh, + int scale, + int step, + const std::string& customSuffix) +{ + const int scaleStep = scale * step; + + image::Image depthMap; + image::Image simMap; + + copyDepthSimMap(depthMap, simMap, in_depthSimMap_hmh, roi, scaleStep); + + mvsUtils::writeDepthSimMap(rc, mp, tileParams, roi, depthMap, simMap, scale, step, customSuffix); +} + +void writeDepthSimMap(int rc, + const mvsUtils::MultiViewParams& mp, + const mvsUtils::TileParams& tileParams, + const ROI& roi, + const CudaDeviceMemoryPitched& in_depthSimMap_dmp, + int scale, + int step, + const std::string& customSuffix) +{ + const int scaleStep = scale * step; + + image::Image depthMap; + image::Image simMap; + + copyDepthSimMap(depthMap, simMap, in_depthSimMap_dmp, roi, scaleStep); + + mvsUtils::writeDepthSimMap(rc, mp, tileParams, roi, depthMap, simMap, scale, step, customSuffix); +} + +void writeDepthSimMapFromTileList(int rc, + const mvsUtils::MultiViewParams& mp, + const mvsUtils::TileParams& tileParams, + const std::vector& tileRoiList, + const std::vector>& in_depthSimMapTiles_hmh, + int scale, + int step, + const std::string& customSuffix) +{ + ALICEVISION_LOG_TRACE("Merge and write depth/similarity map tiles (rc: " << rc << ", view id: " << mp.getViewId(rc) << ")."); + + const ROI imageRoi(Range(0, mp.getWidth(rc)), Range(0, mp.getHeight(rc))); + + const int scaleStep = scale * step; + const int width = divideRoundUp(mp.getWidth(rc), scaleStep); + const int height = divideRoundUp(mp.getHeight(rc), scaleStep); + + image::Image depthMap(width, height, true, 0.0f); // map should be initialize, additive process + image::Image simMap(width, height, true, 0.0f); // map should be initialize, additive process + + for(size_t i = 0; i < tileRoiList.size(); ++i) + { + const ROI roi = intersect(tileRoiList.at(i), imageRoi); + + if(roi.isEmpty()) + continue; + + image::Image tileDepthMap; + image::Image tileSimMap; + + // copy tile depth/sim map from host memory + copyDepthSimMap(tileDepthMap, tileSimMap, in_depthSimMapTiles_hmh.at(i), roi, scaleStep); + + // add tile maps to the full-size maps with weighting + addTileMapWeighted(rc, mp, tileParams, roi, scaleStep, tileDepthMap, depthMap); + addTileMapWeighted(rc, mp, tileParams, roi, scaleStep, tileSimMap, simMap); + } + + // write full-size maps on disk + mvsUtils::writeDepthSimMap(rc, mp, depthMap, simMap, scale, step, customSuffix); +} + +void mergeDepthSimMapTiles(int rc, + const mvsUtils::MultiViewParams& mp, + int scale, + int step, + const std::string& customSuffix) +{ + image::Image depthMap; + image::Image simMap; + + mvsUtils::readDepthSimMap(rc, mp, depthMap, simMap, scale, step, customSuffix); // read and merge tiles + mvsUtils::writeDepthSimMap(rc, mp, depthMap, simMap, scale, step, customSuffix); // write the merged depth/sim maps + mvsUtils::deleteDepthSimMapTiles(rc, mp, scale, step, customSuffix); // delete tile files +} + +void exportDepthSimMapTilePatternObj(int rc, + const mvsUtils::MultiViewParams& mp, + const std::vector& tileRoiList, + const std::vector>& tileMinMaxDepthsList) +{ + const std::string filepath = mvsUtils::getFileNameFromIndex(mp, rc, mvsUtils::EFileType::tilePattern, 1); + + const int nbRoiCornerVertices = 6; // 6 vertices per ROI corner + const int nbRoiCornerFaces = 4; // 4 faces per ROI corner + const int nbRoiVertices = nbRoiCornerVertices * 4; // 24 vertices per ROI + const int nbRoiFaces = nbRoiCornerFaces * 4 + 2; // 18 faces per ROI (16 for corners + 2 for first/last depth) + + std::vector vertices(nbRoiVertices * tileRoiList.size()); + std::vector> faces(nbRoiFaces * tileRoiList.size()); + + const double cornerPixSize = tileRoiList.front().x.size() / 5; // corner bevel size in image pixel + + // 2 points offset from corner (to draw a bevel) + const std::vector> roiCornerOffsets = { + {{ cornerPixSize, 0.0},{0.0, cornerPixSize}}, // corner (roi.x.begin, roi.y.begin) + {{ cornerPixSize, 0.0},{0.0, -cornerPixSize}}, // corner (roi.x.begin, roi.y.end ) + {{-cornerPixSize, 0.0},{0.0, cornerPixSize}}, // corner (roi.x.end, roi.y.begin) + {{-cornerPixSize, 0.0},{0.0, -cornerPixSize}} // corner (roi.x.end, roi.y.end ) + }; + + // vertex color sets + const std::vector roiColors = { + {1, 0, 0, 0}, + {0, 1, 0, 0}, + {0, 0, 1, 0}, + {1, 1, 0, 0}, + {0, 1, 1, 0}, + {1, 0, 1, 0}, + }; + + // build vertices and faces for each ROI + for(std::size_t ri = 0; ri < tileRoiList.size(); ++ri) + { + const ROI& roi = tileRoiList.at(ri); + + const auto& minMaxDepth = tileMinMaxDepthsList.at(ri); + const Point3d planeN = (mp.iRArr[rc] * Point3d(0.0f, 0.0f, 1.0f)).normalize(); // plane normal + const Point3d firstPlaneP = mp.CArr[rc] + planeN * minMaxDepth.first; // first depth plane point + const Point3d lastPlaneP = mp.CArr[rc] + planeN * minMaxDepth.second; // last depth plane point + + const std::vector roiCorners = { + {double(roi.x.begin), double(roi.y.begin)}, + {double(roi.x.begin), double(roi.y.end) }, + {double(roi.x.end), double(roi.y.begin)}, + {double(roi.x.end), double(roi.y.end) } + }; + + // build vertices and faces for each ROI corner + for(std::size_t ci = 0; ci < roiCorners.size(); ++ci) + { + const std::size_t vStartIdx = ri * nbRoiVertices + ci * nbRoiCornerVertices; + const std::size_t fStartIdx = ri * nbRoiFaces + ci * nbRoiCornerFaces; + + const auto& corner = roiCorners.at(ci); // corner 2d point + const auto& cornerOffsets = roiCornerOffsets.at(ci); + + const Point2d cornerX = corner + cornerOffsets.first; // corner 2d point X offsetted + const Point2d cornerY = corner + cornerOffsets.second; // corner 2d point Y offsetted + + vertices[vStartIdx ] = linePlaneIntersect(mp.CArr[rc], (mp.iCamArr[rc] * corner ).normalize(), firstPlaneP, planeN); + vertices[vStartIdx + 1] = linePlaneIntersect(mp.CArr[rc], (mp.iCamArr[rc] * corner ).normalize(), lastPlaneP , planeN); + vertices[vStartIdx + 2] = linePlaneIntersect(mp.CArr[rc], (mp.iCamArr[rc] * cornerX).normalize(), firstPlaneP, planeN); + vertices[vStartIdx + 3] = linePlaneIntersect(mp.CArr[rc], (mp.iCamArr[rc] * cornerX).normalize(), lastPlaneP , planeN); + vertices[vStartIdx + 4] = linePlaneIntersect(mp.CArr[rc], (mp.iCamArr[rc] * cornerY).normalize(), firstPlaneP, planeN); + vertices[vStartIdx + 5] = linePlaneIntersect(mp.CArr[rc], (mp.iCamArr[rc] * cornerY).normalize(), lastPlaneP , planeN); + + faces[fStartIdx ] = {vStartIdx , vStartIdx + 1, vStartIdx + 2}; + faces[fStartIdx + 1] = {vStartIdx + 1, vStartIdx + 2, vStartIdx + 3}; + faces[fStartIdx + 2] = {vStartIdx , vStartIdx + 1, vStartIdx + 4}; + faces[fStartIdx + 3] = {vStartIdx + 1, vStartIdx + 4, vStartIdx + 5}; + } + + // build first/last depth faces + { + const std::size_t vStartIdx = ri * nbRoiVertices; + const std::size_t fStartIdx = ri * nbRoiFaces + roiCorners.size() * nbRoiCornerFaces; + + // first depth + faces[fStartIdx ] = {vStartIdx, + vStartIdx + 1 * nbRoiCornerVertices, + vStartIdx + 2 * nbRoiCornerVertices}; + + // last depth + faces[fStartIdx + 1] = {vStartIdx + 1 * nbRoiCornerVertices + 1, + vStartIdx + 2 * nbRoiCornerVertices + 1, + vStartIdx + 3 * nbRoiCornerVertices + 1}; + } + } + + aiScene scene; + + scene.mRootNode = new aiNode; + + scene.mMeshes = new aiMesh*[1]; + scene.mNumMeshes = 1; + scene.mRootNode->mMeshes = new unsigned int[1]; + scene.mRootNode->mNumMeshes = 1; + + scene.mMaterials = new aiMaterial*[1]; + scene.mNumMaterials = 1; + scene.mMaterials[0] = new aiMaterial; + + scene.mRootNode->mMeshes[0] = 0; + scene.mMeshes[0] = new aiMesh; + aiMesh* aimesh = scene.mMeshes[0]; + aimesh->mMaterialIndex = 0; + + aimesh->mNumVertices = vertices.size(); + aimesh->mVertices = new aiVector3D[vertices.size()]; + + for(std::size_t i = 0; i < vertices.size(); ++i) + { + const auto& vertex = vertices[i]; + aimesh->mVertices[i].x = vertex.x; + aimesh->mVertices[i].y = -vertex.y; // openGL display + aimesh->mVertices[i].z = -vertex.z; // openGL display + } + + aimesh->mColors[0] = new aiColor4D[vertices.size()]; + + for(std::size_t i = 0; i < vertices.size(); ++i) + { + aimesh->mColors[0][i] = roiColors[(i/nbRoiVertices) % roiColors.size()]; + } + + aimesh->mNumFaces = faces.size(); + aimesh->mFaces = new aiFace[faces.size()]; + + for(std::size_t i = 0; i < faces.size(); ++i) + { + const auto& face = faces[i]; + aimesh->mFaces[i].mNumIndices = 3; + aimesh->mFaces[i].mIndices = new unsigned int[3]; + aimesh->mFaces[i].mIndices[0] = std::get<0>(face); + aimesh->mFaces[i].mIndices[1] = std::get<1>(face); + aimesh->mFaces[i].mIndices[2] = std::get<2>(face); + } + + const std::string formatId = "objnomtl"; + const unsigned int pPreprocessing = 0u; + + Assimp::Exporter exporter; + exporter.Export(&scene, formatId, filepath, pPreprocessing); + + ALICEVISION_LOG_INFO("Save debug tiles pattern obj (rc: " << rc << ", view id: " << mp.getViewId(rc) << ") done."); +} + +} // namespace depthMap +} // namespace aliceVision diff --git a/src/aliceVision/depthMap/depthMapUtils.hpp b/src/aliceVision/depthMap/depthMapUtils.hpp new file mode 100644 index 0000000000..c318120d8c --- /dev/null +++ b/src/aliceVision/depthMap/depthMapUtils.hpp @@ -0,0 +1,161 @@ +// This file is part of the AliceVision project. +// Copyright (c) 2022 AliceVision contributors. +// This Source Code Form is subject to the terms of the Mozilla Public License, +// v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace aliceVision { +namespace depthMap { + +/** + * @brief Copy an image from device memory to host memory and write on disk. + * @note This function can be useful for code analysis and debugging. + * @param[in] in_img_dmp the image in device memory + * @param[in] path the path of the output image on disk + */ +void writeDeviceImage(const CudaDeviceMemoryPitched& in_img_dmp, const std::string& path); + +/** + * @brief Copy an image from device memory to host memory and write on disk. + * @note This function can be useful for code analysis and debugging. + * @param[in] in_img_dmp the image in device memory + * @param[in] path the path of the output image on disk + */ +void writeDeviceImage(const CudaDeviceMemoryPitched& in_img_dmp, const std::string& path); + +/** + * @brief Reset a depth/similarity map in host memory to the given default depth and similarity. + * @param[in,out] inout_depthSimMap_hmh the depth/similarity map in host memory + * @param[in] depth the depth reset value + * @param[in] sim the sim reset value + */ +void resetDepthSimMap(CudaHostMemoryHeap& inout_depthSimMap_hmh, float depth = -1.f, float sim = 1.f); + +/** + * @brief Copy a depth/similarity map from host memory to 2 images. + * @param[out] out_depthMap the output depth image + * @param[out] out_simMap the output similarity image + * @param[in] in_depthSimMap_hmh the depth/similarity map in host memory + * @param[in] roi the 2d region of interest without any downscale apply + * @param[in] downscale the depth/similarity map downscale factor + */ +void copyDepthSimMap(image::Image& out_depthMap, + image::Image& out_simMap, + const CudaHostMemoryHeap& in_depthSimMap_hmh, + const ROI& roi, + int downscale); +/** + * @brief Copy a depth/similarity map from device memory to 2 images. + * @param[out] out_depthMap the output depth image + * @param[out] out_simMap the output similarity image + * @param[in] in_depthSimMap_dmp the depth/similarity map in device memory + * @param[in] roi the 2d region of interest without any downscale apply + * @param[in] downscale the depth/similarity map downscale factor + */ +void copyDepthSimMap(image::Image& out_depthMap, + image::Image& out_simMap, + const CudaDeviceMemoryPitched& in_depthSimMap_dmp, + const ROI& roi, + int downscale); + +/** + * @brief Write a depth/similarity map on disk from host memory. + * @param[in] rc the related R camera index + * @param[in] mp the multi-view parameters + * @param[in] tileParams tile workflow parameters + * @param[in] roi the 2d region of interest without any downscale apply + * @param[in] in_depthSimMap_hmh the depth/similarity map in host memory + * @param[in] scale the depth/similarity map downscale factor + * @param[in] step the depth/similarity map step factor + * @param[in] customSuffix the filename custom suffix + */ +void writeDepthSimMap(int rc, + const mvsUtils::MultiViewParams& mp, + const mvsUtils::TileParams& tileParams, + const ROI& roi, + const CudaHostMemoryHeap& in_depthSimMap_hmh, + int scale, + int step, + const std::string& customSuffix = ""); + +/** + * @brief Write a depth/similarity map on disk from device memory. + * @param[in] rc the related R camera index + * @param[in] mp the multi-view parameters + * @param[in] tileParams tile workflow parameters + * @param[in] roi the 2d region of interest without any downscale apply + * @param[in] in_depthSimMap_dmp the depth/similarity map in device memory + * @param[in] scale the depth/similarity map downscale factor + * @param[in] step the depth/similarity map step factor + * @param[in] customSuffix the filename custom suffix + */ +void writeDepthSimMap(int rc, + const mvsUtils::MultiViewParams& mp, + const mvsUtils::TileParams& tileParams, + const ROI& roi, + const CudaDeviceMemoryPitched& in_depthSimMap_dmp, + int scale, + int step, + const std::string& customSuffix = ""); + +/** + * @brief Write a depth/similarity map on disk from a tile list in host memory. + * @param[in] rc the related R camera index + * @param[in] mp the multi-view parameters + * @param[in] tileParams tile workflow parameters + * @param[in] tileRoiList the 2d region of interest of each tile + * @param[in] in_depthSimMapTiles_hmh the depth/similarity map tile list in host memory + * @param[in] scale the depth/similarity map downscale factor + * @param[in] step the depth/similarity map step factor + * @param[in] customSuffix the filename custom suffix + */ +void writeDepthSimMapFromTileList(int rc, + const mvsUtils::MultiViewParams& mp, + const mvsUtils::TileParams& tileParams, + const std::vector& tileRoiList, + const std::vector>& in_depthSimMapTiles_hmh, + int scale, + int step, + const std::string& customSuffix = ""); + +/** + * @brief Merge depth/similarity map tiles on disk. + * @param[in] rc the related R camera index + * @param[in] mp the multi-view parameters + * @param[in] scale the depth/similarity map downscale factor + * @param[in] step the depth/similarity map step factor + * @param[in] customSuffix the filename custom suffix + */ +void mergeDepthSimMapTiles(int rc, + const mvsUtils::MultiViewParams& mp, + int scale, + int step, + const std::string& customSuffix = ""); + +/** + * @brief Build and write a debug OBJ file with all tiles areas + * @param[in] rc the related R camera index + * @param[in] mp the multi-view parameters + * @param[in] tileRoiList tile region-of-interest list + * @param[in] tileMinMaxDepthsList tile min/max depth list + */ +void exportDepthSimMapTilePatternObj(int rc, + const mvsUtils::MultiViewParams& mp, + const std::vector& tileRoiList, + const std::vector>& tileMinMaxDepthsList); + +} // namespace depthMap +} // namespace aliceVision + diff --git a/src/aliceVision/depthMap/volumeIO.cpp b/src/aliceVision/depthMap/volumeIO.cpp index 436bf30f13..35db85c478 100644 --- a/src/aliceVision/depthMap/volumeIO.cpp +++ b/src/aliceVision/depthMap/volumeIO.cpp @@ -7,6 +7,7 @@ #include "volumeIO.hpp" #include +#include #include #include #include @@ -14,11 +15,8 @@ #include #include #include -#include -#include -#include - #include +#include #include #include @@ -27,34 +25,147 @@ namespace aliceVision { namespace depthMap { +void exportSimilaritySamplesCSV(const CudaHostMemoryHeap& in_volumeSim_hmh, + const std::vector& in_depths, + int camIndex, + const std::string& name, + const std::string& filepath) +{ + const auto volDim = in_volumeSim_hmh.getSize(); + const size_t spitch = in_volumeSim_hmh.getBytesPaddedUpToDim(1); + const size_t pitch = in_volumeSim_hmh.getBytesPaddedUpToDim(0); + + const int sampleSize = 3; + + const int xOffset = std::floor(volDim[0] / (sampleSize + 1.0f)); + const int yOffset = std::floor(volDim[1] / (sampleSize + 1.0f)); + + std::vector> ptsDepths(sampleSize*sampleSize); -void exportSimilarityVolume(const CudaHostMemoryHeap& volumeSim, const StaticVector& depths, const mvsUtils::MultiViewParams& mp, int camIndex, int scale, int step, const std::string& filepath) + for (int iy = 0; iy < sampleSize; ++iy) + { + for (int ix = 0; ix < sampleSize; ++ix) + { + const int x = (ix + 1) * xOffset; + const int y = (iy + 1) * yOffset; + + std::vector& pDepths = ptsDepths.at(iy * sampleSize + ix); + + for(int iz = 0; iz < in_depths.size(); ++iz) + { + float simValue = *get3DBufferAt_h(in_volumeSim_hmh.getBuffer(), spitch, pitch, x, y, iz); + pDepths.push_back(simValue); + } + } + } + + std::stringstream ss; + { + ss << name << "\n"; + int ptId = 1; + for (const std::vector& pDepths : ptsDepths) + { + ss << "p" << ptId << ";"; + for (const float depth : pDepths) + ss << depth << ";"; + ss << "\n"; + ++ptId; + } + } + + std::ofstream file; + file.open(filepath, std::ios_base::app); + if (file.is_open()) + file << ss.str(); +} + +void exportSimilaritySamplesCSV(const CudaHostMemoryHeap& in_volumeSim_hmh, + int camIndex, + const std::string& name, + const std::string& filepath) +{ + const auto volDim = in_volumeSim_hmh.getSize(); + const size_t spitch = in_volumeSim_hmh.getBytesPaddedUpToDim(1); + const size_t pitch = in_volumeSim_hmh.getBytesPaddedUpToDim(0); + + const int sampleSize = 3; + + const int xOffset = std::floor(volDim.x() / (sampleSize + 1.0f)); + const int yOffset = std::floor(volDim.y() / (sampleSize + 1.0f)); + + std::vector> simPerDepthsPerPts(sampleSize * sampleSize); + + for(int iy = 0; iy < sampleSize; ++iy) + { + for(int ix = 0; ix < sampleSize; ++ix) + { + const int x = (ix + 1) * xOffset; + const int y = (iy + 1) * yOffset; + + std::vector& simPerDepths = simPerDepthsPerPts.at(iy * sampleSize + ix); + simPerDepths.reserve(volDim.z()); + + for(int iz = 0; iz < volDim.z(); ++iz) + { + float sim = float(*get3DBufferAt_h(in_volumeSim_hmh.getBuffer(), spitch, pitch, x, y, iz)); + simPerDepths.push_back(sim); + } + } + } + + std::stringstream ss; + { + ss << name << "\n"; + int ptId = 1; + for(const std::vector& simPerDepths : simPerDepthsPerPts) + { + ss << "p" << ptId << ";"; + for(const float sim : simPerDepths) + ss << sim << ";"; + ss << "\n"; + ++ptId; + } + } + + std::ofstream file; + file.open(filepath, std::ios_base::app); + if(file.is_open()) + file << ss.str(); +} +void exportSimilarityVolume(const CudaHostMemoryHeap& in_volumeSim_hmh, + const std::vector& in_depths, + const mvsUtils::MultiViewParams& mp, + int camIndex, + const SgmParams& sgmParams, + const std::string& filepath, + const ROI& roi) { sfmData::SfMData pointCloud; const int xyStep = 10; IndexT landmarkId; - const auto volDim = volumeSim.getSize(); - const size_t spitch = volumeSim.getBytesPaddedUpToDim(1); - const size_t pitch = volumeSim.getBytesPaddedUpToDim(0); + const auto volDim = in_volumeSim_hmh.getSize(); + const size_t spitch = in_volumeSim_hmh.getBytesPaddedUpToDim(1); + const size_t pitch = in_volumeSim_hmh.getBytesPaddedUpToDim(0); - ALICEVISION_LOG_DEBUG("DepthMap exportSimilarityVolume: " << volDim[0] << " x " << volDim[1] << " x " << volDim[2] << ", xyStep=" << xyStep << "."); - - for (int z = 0; z < volDim[2]; ++z) + for (int vy = 0; vy < volDim[1]; vy += xyStep) { - for (int y = 0; y < volDim[1]; y += xyStep) + for (int vx = 0; vx < volDim[0]; vx += xyStep) { - for (int x = 0; x < volDim[0]; x += xyStep) + const double x = roi.x.begin + (vx * sgmParams.scale * sgmParams.stepXY); + const double y = roi.y.begin + (vy * sgmParams.scale * sgmParams.stepXY); + + for(int vz = 0; vz < in_depths.size(); ++vz) { - const double planeDepth = depths[z]; + const double planeDepth = in_depths[vz]; const Point3d planen = (mp.iRArr[camIndex] * Point3d(0.0f, 0.0f, 1.0f)).normalize(); const Point3d planep = mp.CArr[camIndex] + planen * planeDepth; - const Point3d v = (mp.iCamArr[camIndex] * Point2d(x * scale * step, y * scale * step)).normalize(); + const Point3d v = (mp.iCamArr[camIndex] * Point2d(x,y)).normalize(); const Point3d p = linePlaneIntersect(mp.CArr[camIndex], v, planep, planen); const float maxValue = 80.f; - float simValue = *get3DBufferAt_h(volumeSim.getBuffer(), spitch, pitch, x, y, z); + float simValue = *get3DBufferAt_h(in_volumeSim_hmh.getBuffer(), spitch, pitch, vx, vy, vz); if (simValue > maxValue) continue; const rgb c = getRGBFromJetColorMap(simValue / maxValue); @@ -68,46 +179,47 @@ void exportSimilarityVolume(const CudaHostMemoryHeap& volumeSim, const sfmDataIO::Save(pointCloud, filepath, sfmDataIO::ESfMData::STRUCTURE); } -inline unsigned char float_to_uchar(float v) -{ - float vv = std::max(0.f, v); - vv = std::min(255.f, vv); - unsigned char out = vv; - return out; -} - -inline rgb float4_to_rgb(const float4& v) -{ - return { float_to_uchar(v.x), float_to_uchar(v.y), float_to_uchar(v.z) }; -} - -void exportColorVolume(const CudaHostMemoryHeap& volumeSim, const std::vector& depths, int startDepth, int nbDepths, const mvsUtils::MultiViewParams& mp, int camIndex, int scale, int step, const std::string& filepath) +void exportSimilarityVolumeCross(const CudaHostMemoryHeap& in_volumeSim_hmh, + const std::vector& in_depths, + const mvsUtils::MultiViewParams& mp, + int camIndex, + const SgmParams& sgmParams, + const std::string& filepath, + const ROI& roi) { sfmData::SfMData pointCloud; - int xyStep = 10; IndexT landmarkId; - auto volDim = volumeSim.getSize(); - size_t spitch = volumeSim.getBytesPaddedUpToDim(1); - size_t pitch = volumeSim.getBytesPaddedUpToDim(0); + const auto volDim = in_volumeSim_hmh.getSize(); + const size_t spitch = in_volumeSim_hmh.getBytesPaddedUpToDim(1); + const size_t pitch = in_volumeSim_hmh.getBytesPaddedUpToDim(0); - ALICEVISION_LOG_DEBUG("DepthMap exportColorVolume: " << volDim[0] << " x " << volDim[1] << " x " << nbDepths << ", volDim[2]=" << volDim[2] << ", xyStep=" << xyStep << "."); - - for (int z = 0; z < nbDepths; ++z) + for(int vz = 0; vz < in_depths.size(); ++vz) { - for (int y = 0; y < volDim[1]; y += xyStep) + for(int vy = 0; vy < volDim[1]; ++vy) { - for (int x = 0; x < volDim[0]; x += xyStep) + const bool vyCenter = (vy >= volDim[1]/2) && ((vy-1)< volDim[1]/2); + const int xIdxStart = (vyCenter ? 0 : (volDim[0] / 2)); + const int xIdxStop = (vyCenter ? volDim[0] : (xIdxStart + 1)); + + for(int vx = xIdxStart; vx < xIdxStop; ++vx) { - const double planeDepth = depths[startDepth + z]; + const double x = roi.x.begin + (vx * sgmParams.scale * sgmParams.stepXY); + const double y = roi.y.begin + (vy * sgmParams.scale * sgmParams.stepXY); + const double planeDepth = in_depths[vz]; const Point3d planen = (mp.iRArr[camIndex] * Point3d(0.0f, 0.0f, 1.0f)).normalize(); const Point3d planep = mp.CArr[camIndex] + planen * planeDepth; - const Point3d v = (mp.iCamArr[camIndex] * Point2d(x * scale * step, y * scale * step)).normalize(); + const Point3d v = (mp.iCamArr[camIndex] * Point2d(x,y)).normalize(); const Point3d p = linePlaneIntersect(mp.CArr[camIndex], v, planep, planen); - float4 colorValue = *get3DBufferAt_h(volumeSim.getBuffer(), spitch, pitch, x, y, z); - const rgb c = float4_to_rgb(colorValue); // TODO: convert Lab color into sRGB color + const float maxValue = 80.f; + float simValue = *get3DBufferAt_h(in_volumeSim_hmh.getBuffer(), spitch, pitch, vx, vy, vz); + + if(simValue > maxValue) + continue; + + const rgb c = getRGBFromJetColorMap(simValue / maxValue); pointCloud.getLandmarks()[landmarkId] = sfmData::Landmark(Vec3(p.x, p.y, p.z), feature::EImageDescriberType::UNKNOWN, sfmData::Observations(), image::RGBColor(c.r, c.g, c.b)); ++landmarkId; @@ -118,56 +230,128 @@ void exportColorVolume(const CudaHostMemoryHeap& volumeSim, const std sfmDataIO::Save(pointCloud, filepath, sfmDataIO::ESfMData::STRUCTURE); } -void exportSimilaritySamplesCSV(const CudaHostMemoryHeap& volumeSim, const StaticVector& depths, int camIndex, int scale, int step, const std::string& name, const std::string& filepath) +void exportSimilarityVolumeCross(const CudaHostMemoryHeap& in_volumeSim_hmh, + const CudaHostMemoryHeap& in_depthSimMapSgmUpscale_hmh, + const mvsUtils::MultiViewParams& mp, + int camIndex, + const RefineParams& refineParams, + const std::string& filepath, + const ROI& roi) { - const auto volDim = volumeSim.getSize(); - const size_t spitch = volumeSim.getBytesPaddedUpToDim(1); - const size_t pitch = volumeSim.getBytesPaddedUpToDim(0); + sfmData::SfMData pointCloud; - const int sampleSize = 3; + const auto volDim = in_volumeSim_hmh.getSize(); + const size_t spitch = in_volumeSim_hmh.getBytesPaddedUpToDim(1); + const size_t pitch = in_volumeSim_hmh.getBytesPaddedUpToDim(0); - const int xOffset = std::floor(volDim[0] / (sampleSize + 1.0f)); - const int yOffset = std::floor(volDim[1] / (sampleSize + 1.0f)); + IndexT landmarkId = 0; - std::vector> ptsDepths(sampleSize*sampleSize); - - for (int iy = 0; iy < sampleSize; ++iy) + for(int vy = 0; vy < volDim[1]; ++vy) { - for (int ix = 0; ix < sampleSize; ++ix) + const bool vyCenter = ((vy*2) == volDim[1]); + const int xIdxStart = (vyCenter ? 0 : (volDim[0] / 2)); + const int xIdxStop = (vyCenter ? volDim[0] : (xIdxStart + 1)); + + for(int vx = xIdxStart; vx < xIdxStop; ++vx) { - const int x = (ix + 1) * xOffset; - const int y = (iy + 1) * yOffset; + const int x = roi.x.begin + (double(vx) * refineParams.scale * refineParams.stepXY); + const int y = roi.y.begin + (double(vy) * refineParams.scale * refineParams.stepXY); + const Point2d pix(x, y); - std::vector& pDepths = ptsDepths.at(iy * sampleSize + ix); + const double orignalDepth = in_depthSimMapSgmUpscale_hmh(vx, vy).x; + + if(orignalDepth < 0.0f) // original depth invalid or masked + continue; - for (int iz = 0; iz < volDim[2]; ++iz) + const Point3d originalP = mp.CArr[camIndex] + (mp.iCamArr[camIndex] * pix).normalize() * orignalDepth; + const double pixSize = mp.getCamPixelSize(originalP, camIndex); + + for(int vz = 0; vz < volDim[2]; ++vz) { - float simValue = *get3DBufferAt_h(volumeSim.getBuffer(), spitch, pitch, x, y, iz); - pDepths.push_back(simValue); + const float simValue = float(*get3DBufferAt_h(in_volumeSim_hmh.getBuffer(), spitch, pitch, vx, vy, vz)); + + const float maxValue = 10.f; // sum of similarity between 0 and 1 + if(simValue > maxValue) + continue; + + const int relativeDepthIndexOffset = vz - refineParams.halfNbDepths; + const double depth = orignalDepth + (relativeDepthIndexOffset * pixSize); // original depth + z based pixSize offset + + const Point3d p = mp.CArr[camIndex] + (mp.iCamArr[camIndex] * pix).normalize() * depth; + + const rgb c = getRGBFromJetColorMap(simValue / maxValue); + pointCloud.getLandmarks()[landmarkId] = sfmData::Landmark(Vec3(p.x, p.y, p.z), feature::EImageDescriberType::UNKNOWN, sfmData::Observations(), image::RGBColor(c.r, c.g, c.b)); + + ++landmarkId; } } } - std::stringstream ss; + sfmDataIO::Save(pointCloud, filepath, sfmDataIO::ESfMData::STRUCTURE); +} + +inline unsigned char float_to_uchar(float v) +{ + float vv = std::max(0.f, v); + vv = std::min(255.f, vv); + unsigned char out = vv; + return out; +} + +inline rgb float4_to_rgb(const float4& v) +{ + return { float_to_uchar(v.x), float_to_uchar(v.y), float_to_uchar(v.z) }; +} + +void exportColorVolume(const CudaHostMemoryHeap& in_volumeSim_hmh, + const std::vector& in_depths, + int startDepth, + int nbDepths, + const mvsUtils::MultiViewParams& mp, + int camIndex, + int scale, + int step, + const std::string& filepath, + const ROI& roi) +{ + sfmData::SfMData pointCloud; + int xyStep = 10; + + IndexT landmarkId; + + auto volDim = in_volumeSim_hmh.getSize(); + size_t spitch = in_volumeSim_hmh.getBytesPaddedUpToDim(1); + size_t pitch = in_volumeSim_hmh.getBytesPaddedUpToDim(0); + + ALICEVISION_LOG_DEBUG("DepthMap exportColorVolume: " << volDim[0] << " x " << volDim[1] << " x " << nbDepths << ", volDim[2]=" << volDim[2] << ", xyStep=" << xyStep << "."); + + + for (int vy = 0; vy < volDim[1]; vy += xyStep) { - ss << name << "\n"; - int ptId = 1; - for (const std::vector& pDepths : ptsDepths) + for (int vx = 0; vx < volDim[0]; vx += xyStep) { - ss << "p" << ptId << ";"; - for (const float depth : pDepths) - ss << depth << ";"; - ss << "\n"; - ++ptId; + const double x = roi.x.begin + (vx * scale * step); + const double y = roi.y.begin + (vy * scale * step); + + for(int vz = 0; vz < nbDepths; ++vz) + { + const double planeDepth = in_depths[startDepth + vz]; + const Point3d planen = (mp.iRArr[camIndex] * Point3d(0.0f, 0.0f, 1.0f)).normalize(); + const Point3d planep = mp.CArr[camIndex] + planen * planeDepth; + const Point3d v = (mp.iCamArr[camIndex] * Point2d(x, y)).normalize(); + const Point3d p = linePlaneIntersect(mp.CArr[camIndex], v, planep, planen); + + float4 colorValue = *get3DBufferAt_h(in_volumeSim_hmh.getBuffer(), spitch, pitch, vx, vy, vz); + const rgb c = float4_to_rgb(colorValue); // TODO: convert Lab color into sRGB color + pointCloud.getLandmarks()[landmarkId] = sfmData::Landmark(Vec3(p.x, p.y, p.z), feature::EImageDescriberType::UNKNOWN, sfmData::Observations(), image::RGBColor(c.r, c.g, c.b)); + + ++landmarkId; + } } } - std::ofstream file; - file.open(filepath, std::ios_base::app); - if (file.is_open()) - file << ss.str(); + sfmDataIO::Save(pointCloud, filepath, sfmDataIO::ESfMData::STRUCTURE); } - } // namespace depthMap } // namespace aliceVision diff --git a/src/aliceVision/depthMap/volumeIO.hpp b/src/aliceVision/depthMap/volumeIO.hpp index cb3da858ef..1444792780 100644 --- a/src/aliceVision/depthMap/volumeIO.hpp +++ b/src/aliceVision/depthMap/volumeIO.hpp @@ -4,26 +4,112 @@ // v. 2.0. If a copy of the MPL was not distributed with this file, // You can obtain one at https://mozilla.org/MPL/2.0/. - -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include #include - +#include namespace aliceVision { namespace depthMap { -void exportSimilarityVolume(const CudaHostMemoryHeap& volumeSim, const StaticVector& depths, const mvsUtils::MultiViewParams& mp, int camIndex, int scale, int step, const std::string& filepath); +/** + * @brief Export 9 similarity values over the entire depth in a CSV file. + * @param[in] in_volumeSim_hmh the similarity in host memory + * @param[in] in_depths the SGM depth list + * @param[in] camIndex the R cam global index + * @param[in] name the export name + * @param[in] filepath the export filepath + */ +void exportSimilaritySamplesCSV(const CudaHostMemoryHeap& in_volumeSim_hmh, + const std::vector& in_depths, + int camIndex, + const std::string& name, + const std::string& filepath); + +/** + * @brief Export 9 similarity values over the entire depth in a CSV file. + * @param[in] in_volumeSim_hmh the similarity in host memory + * @param[in] camIndex the R cam global index + * @param[in] name the export name + * @param[in] filepath the export filepath + */ +void exportSimilaritySamplesCSV(const CudaHostMemoryHeap& in_volumeSim_hmh, + int camIndex, + const std::string& name, + const std::string& filepath); + +/** + * @brief Export the given similarity volume to an Alembic file. + * @param[in] in_volumeSim_hmh the similarity in host memory + * @param[in] in_depths the SGM depth list + * @param[in] mp the multi-view parameters + * @param[in] camIndex the R cam global index + * @param[in] sgmParams the Semi Global Matching parameters + * @param[in] filepath the export filepath + * @param[in] roi the 2d region of interest + */ +void exportSimilarityVolume(const CudaHostMemoryHeap& in_volumeSim_hmh, + const std::vector& in_depths, + const mvsUtils::MultiViewParams& mp, + int camIndex, + const SgmParams& sgmParams, + const std::string& filepath, + const ROI& roi); -void exportColorVolume(const CudaHostMemoryHeap& volumeSim, const std::vector& depths, int startDepth, int nbDepths, const mvsUtils::MultiViewParams& mp, int camIndex, int scale, int step, const std::string& filepath); +/** + * @brief Export a cross of the given similarity volume to an Alembic file. + * @param[in] in_volumeSim_hmh the similarity in host memory + * @param[in] in_depths the SGM depth list + * @param[in] mp the multi-view parameters + * @param[in] camIndex the R cam global index + * @param[in] sgmParams the Semi Global Matching parameters + * @param[in] filepath the export filepath + * @param[in] roi the 2d region of interest + */ +void exportSimilarityVolumeCross(const CudaHostMemoryHeap& in_volumeSim_hmh, + const std::vector& in_depths, + const mvsUtils::MultiViewParams& mp, + int camIndex, + const SgmParams& sgmParams, + const std::string& filepath, + const ROI& roi); -void exportSimilaritySamplesCSV(const CudaHostMemoryHeap& volumeSim, const StaticVector& depths, int camIndex, int scale, int step, const std::string& name, const std::string& filepath); +/** + * @brief Export a cross of the given similarity volume to an Alembic file. + * @param[in] in_volumeSim_hmh the similarity in host memory + * @param[in] in_depthSimMapSgmUpscale_hmh the upscaled SGM depth/sim map + * @param[in] mp the multi-view parameters + * @param[in] camIndex the R cam global index + * @param[in] refineParams the Refine parameters + * @param[in] filepath the export filepath + * @param[in] roi the 2d region of interest + */ +void exportSimilarityVolumeCross(const CudaHostMemoryHeap& in_volumeSim_hmh, + const CudaHostMemoryHeap& in_depthSimMapSgmUpscale_hmh, + const mvsUtils::MultiViewParams& mp, + int camIndex, + const RefineParams& refineParams, + const std::string& filepath, + const ROI& roi); +/** + * @brief Export the given similarity volume to an Alembic file. + */ +void exportColorVolume(const CudaHostMemoryHeap& in_volumeSim_hmh, + const std::vector& in_depths, + int startDepth, + int nbDepths, + const mvsUtils::MultiViewParams& mp, + int camIndex, + int scale, + int step, + const std::string& filepath, + const ROI& roi); } // namespace depthMap } // namespace aliceVision diff --git a/src/aliceVision/fuseCut/DelaunayGraphCut.cpp b/src/aliceVision/fuseCut/DelaunayGraphCut.cpp index 4c0937e704..1b49bc9537 100644 --- a/src/aliceVision/fuseCut/DelaunayGraphCut.cpp +++ b/src/aliceVision/fuseCut/DelaunayGraphCut.cpp @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include #include @@ -297,38 +297,32 @@ void createVerticesWithVisibilities(const StaticVector& cams, std::vector

depthMap; image::Image simMap; + const int width = mp.getWidth(c); + const int height = mp.getHeight(c); + + // read depth map + mvsUtils::readDepthMap(c, mp, depthMap, 0); + + if(depthMap.size() <= 0) { - const std::string depthMapFilepath = getFileNameFromIndex(mp, c, mvsUtils::EFileType::depthMap, 0); - image::readImage(depthMapFilepath, depthMap, image::EImageColorSpace::NO_CONVERSION); - if (depthMap.size() == 0) - { - ALICEVISION_LOG_WARNING("Empty depth map: " << depthMapFilepath); - continue; - } - int wTmp, hTmp; - const std::string simMapFilepath = getFileNameFromIndex(mp, c, mvsUtils::EFileType::simMap, 0); - // If we have a simMap in input use it, - // else init with a constant value. - if(boost::filesystem::exists(simMapFilepath)) - { - image::readImage(simMapFilepath, simMap, image::EImageColorSpace::NO_CONVERSION); - if (simMap.Width() != depthMap.Width() || simMap.Height() != depthMap.Height()) - throw std::runtime_error("Similarity map size doesn't match the depth map size: " + simMapFilepath + - ", " + depthMapFilepath); - { - image::Image simMapTmp; - imageAlgo::convolveImage(simMap, simMapTmp, - "gaussian", simGaussianSize, simGaussianSize); - simMap.swap(simMapTmp); - } - } - else - { - ALICEVISION_LOG_WARNING("simMap file can't be found."); - simMap.resize(depthMap.Width(), depthMap.Height(), true, -1); - } + ALICEVISION_LOG_WARNING("Empty depth map (cam id: " << c << ")"); + continue; + } + // read similarity map + try + { + mvsUtils::readSimMap(c, mp, simMap, 0); + image::Image simMapTmp(simMap.Width(), simMap.Height()); + imageAlgo::convolveImage(simMap, simMapTmp, "gaussian", simGaussianSize, simGaussianSize); + simMap.swap(simMapTmp); + } + catch(const std::exception& e) + { + ALICEVISION_LOG_WARNING("Cannot find similarity map file."); + simMap.resize(width * height, -1); } + // Add visibility #pragma omp parallel for for (int y = 0; y < depthMap.Height(); ++y) @@ -946,20 +940,19 @@ void DelaunayGraphCut::addMaskHelperPoints(const Point3d voxel[8], const StaticV for(int c = 0; c < cams.size(); c++) { image::Image depthMap; + + mvsUtils::readDepthMap(c, _mp, depthMap, 0); + + if(depthMap.size() <= 0) { - const std::string depthMapFilepath = getFileNameFromIndex(_mp, c, mvsUtils::EFileType::depthMap, 0); - image::readImage(depthMapFilepath, depthMap, image::EImageColorSpace::NO_CONVERSION); - if(depthMap.size() == 0) - { - ALICEVISION_LOG_WARNING("Empty depth map: " << depthMapFilepath); - continue; - } + ALICEVISION_LOG_WARNING("Empty depth map (cam id: " << c << ")"); + continue; } const int width = depthMap.Width(); const int height = depthMap.Height(); - int syMax = divideRoundUp(height, step); - int sxMax = divideRoundUp(width, step); + const int syMax = divideRoundUp(height, step); + const int sxMax = divideRoundUp(width, step); for(int sy = 0; sy < syMax; ++sy) { @@ -1089,41 +1082,38 @@ void DelaunayGraphCut::fuseFromDepthMaps(const StaticVector& cams, const Po image::Image depthMap; image::Image simMap; image::Image numOfModalsMap; - int width, height; + + const int width = _mp.getWidth(c); + const int height = _mp.getHeight(c); + { - const std::string depthMapFilepath = getFileNameFromIndex(_mp, c, mvsUtils::EFileType::depthMap, 0); - image::readImage(depthMapFilepath, depthMap, image::EImageColorSpace::NO_CONVERSION); - if (depthMap.size() == 0) + // read depth map + mvsUtils::readDepthMap(c, _mp, depthMap, 0); + + if(depthMap.size() <= 0) { - ALICEVISION_LOG_WARNING("Empty depth map: " << depthMapFilepath); + ALICEVISION_LOG_WARNING("Empty depth map (cam id: " << c << ")"); continue; } - width = depthMap.Width(); - height = depthMap.Height(); - int wTmp, hTmp; - const std::string simMapFilepath = getFileNameFromIndex(_mp, c, mvsUtils::EFileType::simMap, 0); - // If we have a simMap in input use it, - // else init with a constant value. - if(boost::filesystem::exists(simMapFilepath)) + // read similarity map + try { - image::readImage(simMapFilepath, simMap, image::EImageColorSpace::NO_CONVERSION); - if (simMap.Width() != width || simMap.Height() != height) - throw std::runtime_error("Wrong sim map dimensions: " + simMapFilepath); - { + mvsUtils::readSimMap(c, _mp, simMap, 0); image::Image simMapTmp; imageAlgo::convolveImage(simMap, simMapTmp, "gaussian", params.simGaussianSizeInit, params.simGaussianSizeInit); simMap.swap(simMapTmp); - } } - else + catch(const std::exception& e) { ALICEVISION_LOG_WARNING("simMap file can't be found."); simMap.resize(width, height, true, -1); } + // read nmod map + int wTmp, hTmp; const std::string nmodMapFilepath = getFileNameFromIndex(_mp, c, mvsUtils::EFileType::nmodMap, 0); // If we have an nModMap in input (from depthmapfilter) use it, // else init with a constant value. diff --git a/src/aliceVision/fuseCut/Fuser.cpp b/src/aliceVision/fuseCut/Fuser.cpp index 5035aabba6..e12d3c4285 100644 --- a/src/aliceVision/fuseCut/Fuser.cpp +++ b/src/aliceVision/fuseCut/Fuser.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -35,24 +36,7 @@ unsigned long computeNumberOfAllPoints(const mvsUtils::MultiViewParams& mp, int #pragma omp parallel for reduction(+:npts) for(int rc = 0; rc < mp.ncams; rc++) { - const std::string filename = mvsUtils::getFileNameFromIndex(mp, rc, mvsUtils::EFileType::depthMap, scale); - const auto metadata = image::readImageMetadata(filename); - int nbDepthValues = metadata.get_int("AliceVision:nbDepthValues", -1); - - if(nbDepthValues < 0) - { - image::Image depthMap; - nbDepthValues = 0; - - ALICEVISION_LOG_WARNING("Can't find or invalid 'nbDepthValues' metadata in '" << filename << "'. Recompute the number of valid values."); - - image::readImage(mvsUtils::getFileNameFromIndex(mp, rc, mvsUtils::EFileType::depthMap, scale), - depthMap, image::EImageColorSpace::NO_CONVERSION); - // no need to transpose for this operation - for(int i = 0; i < depthMap.size(); ++i) - nbDepthValues += static_cast(depthMap(i) > 0.0f); - } - + const unsigned long nbDepthValues = mvsUtils::getNbDepthValuesFromDepthMap(rc, mp, scale); npts += nbDepthValues; } return npts; @@ -162,10 +146,7 @@ bool Fuser::filterGroupsRC(int rc, float pixToleranceFactor, int pixSizeBall, in image::Image depthMap; image::Image simMap; - image::readImage(getFileNameFromIndex(_mp, rc, mvsUtils::EFileType::depthMap, 1), - depthMap, image::EImageColorSpace::NO_CONVERSION); - image::readImage(getFileNameFromIndex(_mp, rc, mvsUtils::EFileType::simMap, 1), - simMap, image::EImageColorSpace::NO_CONVERSION); + mvsUtils::readDepthSimMap(rc, _mp, depthMap, simMap, 1); image::Image numOfModalsMap(w, h, true, 0); @@ -190,16 +171,16 @@ bool Fuser::filterGroupsRC(int rc, float pixToleranceFactor, int pixSizeBall, in image::Image tcdepthMap; - image::readImage(getFileNameFromIndex(_mp, tc, mvsUtils::EFileType::depthMap, 1), - tcdepthMap, image::EImageColorSpace::NO_CONVERSION); + mvsUtils::readDepthMap(tc, _mp, tcdepthMap, 1); if (tcdepthMap.Height() > 0 && tcdepthMap.Width() > 0) { - for(int y = 0; y < h; ++y) + for(int y = 0; y < tcdepthMap.Height(); ++y) { - for(int x = 0; x < w; ++x) + for(int x = 0; x < tcdepthMap.Width(); ++x) { float depth = tcdepthMap(y, x); + if(depth > 0.0f) { Point3d p = _mp.CArr[tc] + (_mp.iCamArr[tc] * Point2d((float)x, (float)y)).normalize() * depth; @@ -255,13 +236,10 @@ bool Fuser::filterDepthMapsRC(int rc, int minNumOfModals, int minNumOfModalsWSP2 image::Image simMap; image::Image numOfModalsMap; + mvsUtils::readDepthSimMap(rc, _mp, depthMap, simMap); // scale 1 + { int width, height; - - image::readImage(getFileNameFromIndex(_mp, rc, mvsUtils::EFileType::depthMap, 1), - depthMap, image::EImageColorSpace::NO_CONVERSION); - image::readImage(getFileNameFromIndex(_mp, rc, mvsUtils::EFileType::simMap, 1), - simMap, image::EImageColorSpace::NO_CONVERSION); image::readImage(getFileNameFromIndex(_mp, rc, mvsUtils::EFileType::nmodMap), numOfModalsMap, image::EImageColorSpace::NO_CONVERSION); } @@ -272,8 +250,6 @@ bool Fuser::filterDepthMapsRC(int rc, int minNumOfModals, int minNumOfModalsWSP2 throw std::invalid_argument("depthMap, simMap and numOfModalsMap must have same size"); } - int nbDepthValues = 0; - for(int i = 0; i < depthMap.size(); i++) { // if the point is part of a mask (alpha) skip @@ -301,34 +277,9 @@ bool Fuser::filterDepthMapsRC(int rc, int minNumOfModals, int minNumOfModalsWSP2 depthMap(i) = -1.0f; simMap(i) = 1.0f; } - - if(depthMap(i) > 0.0f) - ++nbDepthValues; } - auto metadata = image::getMetadataFromMap(_mp.getMetadata(rc)); - metadata.push_back(oiio::ParamValue("AliceVision:nbDepthValues", oiio::TypeDesc::INT32, 1, &nbDepthValues)); - metadata.push_back(oiio::ParamValue("AliceVision:downscale", _mp.getDownscaleFactor(rc))); - metadata.push_back(oiio::ParamValue("AliceVision:CArr", oiio::TypeDesc(oiio::TypeDesc::DOUBLE, oiio::TypeDesc::VEC3), 1, _mp.CArr[rc].m)); - metadata.push_back(oiio::ParamValue("AliceVision:iCamArr", oiio::TypeDesc(oiio::TypeDesc::DOUBLE, oiio::TypeDesc::MATRIX33), 1, _mp.iCamArr[rc].m)); - { - float minDepth, maxDepth, midDepth; - size_t nbDepths; - _mp.getMinMaxMidNbDepth(rc, minDepth, maxDepth, midDepth, nbDepths); - metadata.push_back(oiio::ParamValue("AliceVision:maxDepth", maxDepth)); - metadata.push_back(oiio::ParamValue("AliceVision:minDepth", minDepth)); - } - { - std::vector matrixP = _mp.getOriginalP(rc); - metadata.push_back(oiio::ParamValue("AliceVision:P", oiio::TypeDesc(oiio::TypeDesc::DOUBLE, oiio::TypeDesc::MATRIX44), 1, matrixP.data())); - } - - image::writeImage(getFileNameFromIndex(_mp, rc, mvsUtils::EFileType::depthMap, 0), depthMap, - image::ImageWriteOptions().toColorSpace(image::EImageColorSpace::LINEAR) - .storageDataType(image::EStorageDataType::Float), metadata); - image::writeImage(getFileNameFromIndex(_mp, rc, mvsUtils::EFileType::simMap, 0), simMap, - image::ImageWriteOptions().toColorSpace(image::EImageColorSpace::LINEAR) - .storageDataType(image::EStorageDataType::Half), metadata); + mvsUtils::writeDepthSimMap(rc, _mp, depthMap, simMap, 0); ALICEVISION_LOG_DEBUG(rc << " solved."); mvsUtils::printfElapsedTime(t1); @@ -353,8 +304,7 @@ float Fuser::computeAveragePixelSizeInHexahedron(Point3d* hexah, int step, int s int w = _mp.getWidth(rc) / scaleuse; image::Image rcdepthMap; - image::readImage(getFileNameFromIndex(_mp, rc, mvsUtils::EFileType::depthMap, scale), - rcdepthMap, image::EImageColorSpace::NO_CONVERSION); + mvsUtils::readDepthMap(rc, _mp, rcdepthMap, scale); if (rcdepthMap.size() < w * h) throw std::runtime_error("Invalid image size"); @@ -451,8 +401,8 @@ void Fuser::divideSpaceFromDepthMaps(Point3d* hexah, float& minPixSize) int w = _mp.getWidth(rc); image::Image depthMap; - image::readImage(getFileNameFromIndex(_mp, rc, mvsUtils::EFileType::depthMap, scale), - depthMap, image::EImageColorSpace::NO_CONVERSION); + + mvsUtils::readDepthMap(rc, _mp, depthMap, scale); for(int i = 0; i < depthMap.size(); i += stepPts) { @@ -493,8 +443,7 @@ void Fuser::divideSpaceFromDepthMaps(Point3d* hexah, float& minPixSize) image::Image depthMap; - image::readImage(getFileNameFromIndex(_mp, rc, mvsUtils::EFileType::depthMap, scale), - depthMap, image::EImageColorSpace::NO_CONVERSION); + mvsUtils::readDepthMap(rc, _mp, depthMap, scale); for(int i = 0; i < depthMap.size(); i += stepPts) { @@ -765,10 +714,7 @@ std::string generateTempPtsSimsFiles(const std::string& tmpDir, mvsUtils::MultiV image::Image depthMap; image::Image simMap; - image::readImage(getFileNameFromIndex(mp, rc, mvsUtils::EFileType::depthMap, scale), - depthMap, image::EImageColorSpace::NO_CONVERSION); - image::readImage(getFileNameFromIndex(mp, rc, mvsUtils::EFileType::simMap, scale), - simMap, image::EImageColorSpace::NO_CONVERSION); + mvsUtils::readDepthSimMap(rc, mp, depthMap, simMap, scale); if (depthMap.size() != (w * h) || simMap.size() != (w * h)) { diff --git a/src/aliceVision/gpu/gpu.cpp b/src/aliceVision/gpu/gpu.cpp index 7096d2a596..7a654c57b3 100644 --- a/src/aliceVision/gpu/gpu.cpp +++ b/src/aliceVision/gpu/gpu.cpp @@ -96,13 +96,18 @@ std::string gpuInformationCUDA() std::size_t avail; std::size_t total; - if(cudaMemGetInfo(&avail, &total) != cudaSuccess) + cudaError_t memInfoErr = cudaMemGetInfo(&avail, &total); + if(memInfoErr != cudaSuccess) { // if the card does not provide this information. avail = 0; total = 0; - ALICEVISION_LOG_WARNING("Cannot get available memory information for CUDA gpu device " << i << "."); + ALICEVISION_LOG_WARNING("Cannot get available memory information for CUDA gpu device " << i << ":" << std::endl + << "\t (error code: " << memInfoErr << ") " << cudaGetErrorName(memInfoErr)); + + cudaError_t err = cudaGetLastError(); // clear error } + std::stringstream deviceSS; deviceSS << "Device information:" << std::endl @@ -153,14 +158,16 @@ std::string gpuInformationCUDA() } else { - information = "No CUDA-Enabled GPU."; + information = "No CUDA-Enabled GPU.\n"; } + std::stringstream ss; + ss << "CUDA build version: " << CUDART_VERSION/1000 << "." << CUDART_VERSION/10%100; + information += ss.str(); #else information = "AliceVision built without CUDA support."; #endif return information; } - } // namespace gpu -} // namespace aliceVision \ No newline at end of file +} // namespace aliceVision diff --git a/src/aliceVision/image/io.cpp b/src/aliceVision/image/io.cpp index f2c85c0d15..1ed1183a36 100644 --- a/src/aliceVision/image/io.cpp +++ b/src/aliceVision/image/io.cpp @@ -704,7 +704,8 @@ void writeImage(const std::string& path, const Image& image, const ImageWriteOptions& options, const oiio::ParamValueList& metadata = oiio::ParamValueList(), - const oiio::ROI& roi = oiio::ROI()) + const oiio::ROI& displayRoi = oiio::ROI(), + const oiio::ROI& pixelRoi = oiio::ROI()) { const fs::path bPath = fs::path(path); const std::string extension = boost::to_lower_copy(bPath.extension().string()); @@ -733,12 +734,19 @@ void writeImage(const std::string& path, oiio::ImageSpec imageSpec(image.Width(), image.Height(), nchannels, typeDesc); imageSpec.extra_attribs = metadata; // add custom metadata - imageSpec.attribute("jpeg:subsampling", "4:4:4"); // if possible, always subsampling 4:4:4 for jpeg - imageSpec.attribute("compression", isEXR ? "zips" : "none"); // if possible, set compression (zips for EXR, none for the other) - if(roi.defined() && isEXR) - { - imageSpec.set_roi_full(roi); - } + imageSpec.attribute("jpeg:subsampling", "4:4:4"); // if possible, always subsampling 4:4:4 for jpeg + imageSpec.attribute("compression", isEXR ? "zips" : "none"); // if possible, set compression (zips for EXR, none for the other) + + if(displayRoi.defined() && isEXR) + { + imageSpec.set_roi_full(displayRoi); + } + + if(pixelRoi.defined() && isEXR) + { + imageSpec.set_roi(pixelRoi); + } + imageSpec.attribute("AliceVision:ColorSpace", (toColorSpace == EImageColorSpace::NO_CONVERSION) @@ -931,18 +939,24 @@ void writeImage(const std::string& path, const Image& image, writeImageNoFloat(path, oiio::TypeDesc::UINT32, image, options, metadata); } -void writeImage(const std::string& path, const Image& image, - const ImageWriteOptions& options, const oiio::ParamValueList& metadata, - const oiio::ROI& roi) +void writeImage(const std::string& path, + const Image& image, + const ImageWriteOptions& options, + const oiio::ParamValueList& metadata, + const oiio::ROI& displayRoi, + const oiio::ROI& pixelRoi) { - writeImage(path, oiio::TypeDesc::FLOAT, 1, image, options, metadata,roi); + writeImage(path, oiio::TypeDesc::FLOAT, 1, image, options, metadata, displayRoi, pixelRoi); } -void writeImage(const std::string& path, const Image& image, - const ImageWriteOptions& options, const oiio::ParamValueList& metadata, - const oiio::ROI& roi) +void writeImage(const std::string& path, + const Image& image, + const ImageWriteOptions& options, + const oiio::ParamValueList& metadata, + const oiio::ROI& displayRoi, + const oiio::ROI& pixelRoi) { - writeImage(path, oiio::TypeDesc::FLOAT, 4, image, options, metadata,roi); + writeImage(path, oiio::TypeDesc::FLOAT, 4, image, options, metadata, displayRoi, pixelRoi); } void writeImage(const std::string& path, const Image& image, @@ -951,11 +965,14 @@ void writeImage(const std::string& path, const Image& image, writeImage(path, oiio::TypeDesc::UINT8, 4, image, options, metadata); } -void writeImage(const std::string& path, const Image& image, - const ImageWriteOptions& options, const oiio::ParamValueList& metadata, - const oiio::ROI& roi) +void writeImage(const std::string& path, + const Image& image, + const ImageWriteOptions& options, + const oiio::ParamValueList& metadata, + const oiio::ROI& displayRoi, + const oiio::ROI& pixelRoi) { - writeImage(path, oiio::TypeDesc::FLOAT, 3, image, options, metadata, roi); + writeImage(path, oiio::TypeDesc::FLOAT, 3, image, options, metadata, displayRoi, pixelRoi); } void writeImage(const std::string& path, const Image& image, diff --git a/src/aliceVision/image/io.hpp b/src/aliceVision/image/io.hpp index 8a647a51dc..78d75ac3c2 100644 --- a/src/aliceVision/image/io.hpp +++ b/src/aliceVision/image/io.hpp @@ -337,20 +337,29 @@ void writeImage(const std::string& path, const Image& image, const ImageWriteOptions& options, const oiio::ParamValueList& metadata = oiio::ParamValueList()); -void writeImage(const std::string& path, const Image& image, const ImageWriteOptions& options, +void writeImage(const std::string& path, + const Image& image, + const ImageWriteOptions& options, const oiio::ParamValueList& metadata = oiio::ParamValueList(), - const oiio::ROI& roi = oiio::ROI()); + const oiio::ROI& displayRoi = oiio::ROI(), + const oiio::ROI& pixelRoi = oiio::ROI()); -void writeImage(const std::string& path, const Image& image, const ImageWriteOptions& options, +void writeImage(const std::string& path, + const Image& image, + const ImageWriteOptions& options, const oiio::ParamValueList& metadata = oiio::ParamValueList(), - const oiio::ROI& roi = oiio::ROI()); + const oiio::ROI& displayRoi = oiio::ROI(), + const oiio::ROI& pixelRoi = oiio::ROI()); void writeImage(const std::string& path, const Image& image, const ImageWriteOptions& options, const oiio::ParamValueList& metadata = oiio::ParamValueList()); -void writeImage(const std::string& path, const Image& image, const ImageWriteOptions& options, +void writeImage(const std::string& path, + const Image& image, + const ImageWriteOptions& options, const oiio::ParamValueList& metadata = oiio::ParamValueList(), - const oiio::ROI& roi = oiio::ROI()); + const oiio::ROI& displayRoi = oiio::ROI(), + const oiio::ROI& pixelRoi = oiio::ROI()); void writeImage(const std::string& path, const Image& image, const ImageWriteOptions& options, const oiio::ParamValueList& metadata = oiio::ParamValueList()); diff --git a/src/aliceVision/mvsData/CMakeLists.txt b/src/aliceVision/mvsData/CMakeLists.txt index 8334045e6d..cb48026ce7 100644 --- a/src/aliceVision/mvsData/CMakeLists.txt +++ b/src/aliceVision/mvsData/CMakeLists.txt @@ -10,6 +10,7 @@ set(mvsData_files_headers Point3d.hpp Point4d.hpp Pixel.hpp + ROI.hpp Stat3d.hpp StaticVector.hpp structures.hpp diff --git a/src/aliceVision/mvsData/ROI.hpp b/src/aliceVision/mvsData/ROI.hpp new file mode 100644 index 0000000000..1c5fdb608b --- /dev/null +++ b/src/aliceVision/mvsData/ROI.hpp @@ -0,0 +1,247 @@ +// This file is part of the AliceVision project. +// Copyright (c) 2022 AliceVision contributors. +// This Source Code Form is subject to the terms of the Mozilla Public License, +// v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + +#pragma once + +// allows code sharing between NVCC and other compilers +#if defined(__NVCC__) +#define CUDA_HOST_DEVICE __host__ __device__ +#define CUDA_HOST __host__ +#define CUDA_CEIL(f) ceil(f) +#define CUDA_FLOOR(f) floor(f) +#define CUDA_MIN(a,b) min(a,b) +#define CUDA_MAX(a,b) max(a,b) +#else +#define CUDA_HOST_DEVICE +#define CUDA_HOST +#define CUDA_CEIL(f) std::ceil(f) +#define CUDA_FLOOR(f) std::floor(f) +#define CUDA_MIN(a,b) std::min(a, b) +#define CUDA_MAX(a,b) std::max(a, b) +#include +#include +#include +#endif + +namespace aliceVision { + +/* + * @struct Range + * @brief Small CPU and GPU host / device struct descibing a 1d range. + */ +struct Range +{ + unsigned int begin = 0; + unsigned int end = 0; + + // default constructor + Range() = default; + + /** + * @brief Range constructor + * @param[in] in_begin the range begin index + * @param[in] in_end the range end index + */ + CUDA_HOST_DEVICE Range(unsigned int in_begin, + unsigned int in_end) + : begin(in_begin) + , end(in_end) + {} + + /** + * @brief Return true if the given index is contained in the Range. + * @param[in] i the given index + * @return true if the given index point is contained in the Range + */ + CUDA_HOST_DEVICE inline unsigned int size() const { return end - begin; } + + CUDA_HOST_DEVICE inline bool isEmpty() const { return begin >= end; } + + /** + * @brief Return true if the given index is contained in the Range. + * @param[in] i the given index + * @return true if the given index point is contained in the Range + */ + CUDA_HOST inline bool contains(unsigned int i) const + { + return ((begin <= i) && (end > i)); + } +}; + +inline Range intersect(const Range& a, const Range& b) +{ + return Range(CUDA_MAX(a.begin, b.begin), + CUDA_MIN(a.end, b.end)); +} + +/* + * @struct ROI + * @brief Small CPU and GPU host / device struct descibing a rectangular 2d region of interest. + */ +struct ROI +{ + Range x, y; + + // default constructor + ROI() = default; + + /** + * @brief ROI constructor + * @param[in] in_beginX the range X begin index + * @param[in] in_endX the range X end index + * @param[in] in_beginY the range Y begin index + * @param[in] in_endY the range Y end index + */ + CUDA_HOST_DEVICE ROI(unsigned int in_beginX, + unsigned int in_endX, + unsigned int in_beginY, + unsigned int in_endY) + : x(in_beginX, in_endX) + , y(in_beginY, in_endY) + {} + + /** + * @brief ROI constructor + * @param[in] in_rangeX the X index range + * @param[in] in_rangeY the Y index range + */ + CUDA_HOST_DEVICE ROI(const Range& in_rangeX, + const Range& in_rangeY) + : x(in_rangeX) + , y(in_rangeY) + {} + + /** + * @brief Get the ROI width + * @return the X range size + */ + CUDA_HOST_DEVICE inline unsigned int width() const { return x.size(); } + + /** + * @brief Get the ROI height + * @return the Y range size + */ + CUDA_HOST_DEVICE inline unsigned int height() const { return y.size(); } + + CUDA_HOST_DEVICE inline bool isEmpty() const { return x.isEmpty() || y.isEmpty(); } + + /** + * @brief Return true if the given 2d point is contained in the ROI. + * @param[in] in_x the given 2d point X coordinate + * @param[in] in_y the given 2d point Y coordinate + * @return true if the given 2d point is contained in the ROI + */ + CUDA_HOST inline bool contains(unsigned int in_x, unsigned int in_y) const + { + return (x.contains(in_x) && y.contains(in_y)); + } +}; + +/** + * @brief check if a given ROI is valid and can be contained in a given image + * @param[in] roi the given ROI + * @param[in] width the given image width + * @param[in] height the given image height + * @return true if valid + */ +CUDA_HOST inline bool checkImageROI(const ROI& roi, int width, int height) +{ + return ((roi.x.end <= (unsigned int)(width)) && (roi.x.begin < roi.x.end) && + (roi.y.end <= (unsigned int)(height)) && (roi.y.begin < roi.y.end)); +} + +/** + * @brief Downscale the given Range with the given downscale factor + * @param[in] range the given Range + * @param[in] downscale the downscale factor to apply + * @return the downscaled Range + */ +CUDA_HOST inline Range downscaleRange(const Range& range, float downscale) +{ + return Range(CUDA_FLOOR(range.begin / downscale), CUDA_CEIL(range.end / downscale)); +} + +/** + * @brief Upscale the given Range with the given upscale factor + * @param[in] range the given Range + * @param[in] upscale the upscale factor to apply + * @return the upscaled Range + */ +CUDA_HOST inline Range upscaleRange(const Range& range, float upscale) +{ + return Range(CUDA_FLOOR(range.begin * upscale), CUDA_CEIL(range.end * upscale)); +} + +/** + * @brief Inflate the given Range with the given factor + * @param[in] range the given Range + * @param[in] factor the inflate factor to apply + * @return the inflated Range + */ +CUDA_HOST inline Range inflateRange(const Range& range, float factor) +{ + const float midRange = range.begin + (range.size() * 0.5f); + const float inflateSize = range.size() * factor * 0.5f; + return Range(CUDA_FLOOR(CUDA_MAX(midRange - inflateSize, 0.f)), CUDA_CEIL(midRange + inflateSize)); +} + +/** + * @brief Downscale the given ROI with the given downscale factor + * @param[in] roi the given ROI + * @param[in] downscale the downscale factor to apply + * @return the downscaled ROI + */ +CUDA_HOST inline ROI downscaleROI(const ROI& roi, float downscale) +{ + return ROI(downscaleRange(roi.x, downscale), + downscaleRange(roi.y, downscale)); +} + +/** + * @brief Upscale the given ROI with the given upscale factor + * @param[in] roi the given ROI + * @param[in] upscale the upscale factor to apply + * @return the upscaled ROI + */ +CUDA_HOST inline ROI upscaleROI(const ROI& roi, float upscale) +{ + return ROI(upscaleRange(roi.x, upscale), + upscaleRange(roi.y, upscale)); +} + +/** + * @brief Inflate the given ROI with the given factor + * @param[in] roi the given ROI + * @param[in] factor the inflate factor to apply + * @return the Inflated ROI + */ +CUDA_HOST inline ROI inflateROI(const ROI& roi, float factor) +{ + return ROI(inflateRange(roi.x, factor), + inflateRange(roi.y, factor)); +} + + +inline ROI intersect(const ROI& a, const ROI& b) +{ + return ROI(intersect(a.x, b.x), intersect(a.y, b.y)); +} + +#if !defined(__NVCC__) +inline std::ostream& operator<<(std::ostream& os, const Range& range) +{ + os << range.begin << "-" << range.end; + return os; +} +inline std::ostream& operator<<(std::ostream& os, const ROI& roi) +{ + os << "x: " << roi.x << ", y: " << roi.y; + return os; +} +#endif + +} // namespace aliceVision + diff --git a/src/aliceVision/mvsUtils/CMakeLists.txt b/src/aliceVision/mvsUtils/CMakeLists.txt index 63d2670664..15b86bbd93 100644 --- a/src/aliceVision/mvsUtils/CMakeLists.txt +++ b/src/aliceVision/mvsUtils/CMakeLists.txt @@ -1,17 +1,21 @@ # Headers set(mvsUtils_files_headers common.hpp + depthSimMapIO.hpp fileIO.hpp ImagesCache.hpp MultiViewParams.hpp + TileParams.hpp ) # Sources set(mvsUtils_files_sources common.cpp + depthSimMapIO.cpp fileIO.cpp ImagesCache.cpp MultiViewParams.cpp + TileParams.cpp ) alicevision_add_library(aliceVision_mvsUtils diff --git a/src/aliceVision/mvsUtils/MultiViewParams.cpp b/src/aliceVision/mvsUtils/MultiViewParams.cpp index 8e1d6d572d..2485fe1270 100644 --- a/src/aliceVision/mvsUtils/MultiViewParams.cpp +++ b/src/aliceVision/mvsUtils/MultiViewParams.cpp @@ -328,50 +328,6 @@ bool MultiViewParams::is3DPointInFrontOfCam(const Point3d* X, int rc) const return XT.z >= 0; } -void MultiViewParams::getMinMaxMidNbDepth(int index, float& min, float& max, float& mid, std::size_t& nbDepths, float percentile) const -{ - using namespace boost::accumulators; - - const std::size_t cacheSize = 1000; - accumulator_set>> accDistanceMin(tag::tail::cache_size = cacheSize); - accumulator_set>> accDistanceMax(tag::tail::cache_size = cacheSize); - - const IndexT viewId = getViewId(index); - - ALICEVISION_LOG_DEBUG("Compute min/max/mid/nb depth for view id: " << viewId); - - OrientedPoint cameraPlane; - cameraPlane.p = CArr[index]; - cameraPlane.n = iRArr[index] * Point3d(0.0, 0.0, 1.0); - cameraPlane.n = cameraPlane.n.normalize(); - - Point3d midDepthPoint = Point3d(); - nbDepths = 0; - - for(const auto& landmarkPair : _sfmData.getLandmarks()) - { - const sfmData::Landmark& landmark = landmarkPair.second; - const Point3d point(landmark.X(0), landmark.X(1), landmark.X(2)); - - for(const auto& observationPair : landmark.observations) - { - if(observationPair.first == viewId) - { - const float distance = static_cast(pointPlaneDistance(point, cameraPlane.p, cameraPlane.n)); - accDistanceMin(distance); - accDistanceMax(distance); - midDepthPoint = midDepthPoint + point; - ++nbDepths; - } - } - } - - min = quantile(accDistanceMin, quantile_probability = 1.0 - percentile); - max = quantile(accDistanceMax, quantile_probability = percentile); - midDepthPoint = midDepthPoint / static_cast(nbDepths); - mid = pointPlaneDistance(midDepthPoint, cameraPlane.p, cameraPlane.n); -} - void MultiViewParams::getPixelFor3DPoint(Point2d* out, const Point3d& X, int rc) const { getPixelFor3DPoint(out, X, camArr[rc]); @@ -628,6 +584,93 @@ StaticVector MultiViewParams::findNearestCamsFromLandmarks(int rc, int nbNe return out; } +std::vector MultiViewParams::findTileNearestCams(int rc, int nbNearestCams, const std::vector& tCams, const ROI& roi) const +{ + auto plateauFunction = [](int a, int b, int c, int d, int x) + { + if(x > a && x <= b) + return (float(x - a) / float(b - a)); + if(x > b && x <= c) + return 1.0f; + if(x > c && x <= d) + return 1.0f - (float(x - c) / float(d - c)); + return 0.f; + }; + + std::vector out; + std::map tcScore; + + for(std::size_t i = 0; i < tCams.size(); ++i) + tcScore[tCams[i]] = 0.0f; + + const sfmData::SfMData& sfmData = getInputSfMData(); + + const IndexT viewId = getViewId(rc); + const sfmData::View& view = *(sfmData.getViews().at(viewId)); + const geometry::Pose3 pose = sfmData.getPose(view).getTransform(); + const camera::IntrinsicBase* intrinsicPtr = sfmData.getIntrinsicPtr(view.getIntrinsicId()); + + const ROI fullsizeRoi = upscaleROI(roi, getProcessDownscale()); // landmark observations are in the full-size image coordinate system + + for(const auto& landmarkPair : sfmData.getLandmarks()) + { + const auto& observations = landmarkPair.second.observations; + + auto viewObsIt = observations.find(viewId); + + // has landmark observation for the R camera + if(viewObsIt == observations.end()) + continue; + + // landmark R camera observation is in the image full-size ROI + if(!fullsizeRoi.contains(viewObsIt->second.x.x(), viewObsIt->second.x.y())) + continue; + + for(const auto& observationPair : observations) + { + const IndexT otherViewId = observationPair.first; + + // other view should not be the R camera + if(otherViewId == viewId) + continue; + + const int tc = getIndexFromViewId(otherViewId); + + // other view should be a T camera + if(tcScore.find(tc) == tcScore.end()) + continue; + + const sfmData::View& otherView = *(sfmData.getViews().at(otherViewId)); + const geometry::Pose3 otherPose = sfmData.getPose(otherView).getTransform(); + const camera::IntrinsicBase* otherIntrinsicPtr = sfmData.getIntrinsicPtr(otherView.getIntrinsicId()); + + const double angle = camera::angleBetweenRays(pose, intrinsicPtr, otherPose, otherIntrinsicPtr, viewObsIt->second.x, observationPair.second.x); + + tcScore[tc] += plateauFunction(1,10,50,150, angle); + } + } + + std::vector ids; + ids.reserve(tcScore.size()); + + for(const auto& tcScorePair : tcScore) + { + if(tcScorePair.second > 0.0f) + ids.push_back(SortedId(tcScorePair.first, tcScorePair.second)); + } + + qsort(&ids[0], ids.size(), sizeof(SortedId), qsortCompareSortedIdDesc); + + // ensure the ideal number of target cameras is not superior to the actual number of cameras + const int maxTc = std::min(std::min(getNbCameras(), nbNearestCams), static_cast(ids.size())); + out.reserve(maxTc); + + for(int i = 0; i < maxTc; ++i) + out.push_back(ids[i].id); + + return out; +} + StaticVector MultiViewParams::findCamsWhichIntersectsHexahedron(const Point3d hexah[8], const std::string& minMaxDepthsFileName) const { StaticVector* minMaxDepths = loadArrayFromFile(minMaxDepthsFileName); diff --git a/src/aliceVision/mvsUtils/MultiViewParams.hpp b/src/aliceVision/mvsUtils/MultiViewParams.hpp index 911e52f23d..cdc570b2eb 100644 --- a/src/aliceVision/mvsUtils/MultiViewParams.hpp +++ b/src/aliceVision/mvsUtils/MultiViewParams.hpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -29,7 +30,8 @@ class SfMData; namespace mvsUtils { -enum class EFileType { +enum class EFileType +{ P = 0, K = 1, iK = 2, @@ -68,6 +70,10 @@ enum class EFileType { nmodMap = 41, D = 42, normalMap = 43, + volume = 44, + volumeCross = 45, + stats9p = 46, + tilePattern = 47 }; class MultiViewParams @@ -174,6 +180,16 @@ class MultiViewParams return _processDownscale; } + inline int getMaxImageOriginalWidth() const + { + return _maxImageWidth; + } + + inline int getMaxImageOriginalHeight() const + { + return _maxImageHeight; + } + inline int getMaxImageWidth() const { return _maxImageWidth / getProcessDownscale(); @@ -238,7 +254,6 @@ class MultiViewParams bool is3DPointInFrontOfCam(const Point3d* X, int rc) const; - void getMinMaxMidNbDepth(int index, float& min, float& max, float& mid, std::size_t& nbDepths, float percentile = 0.999f) const; void getPixelFor3DPoint(Point2d* out, const Point3d& X, const Matrix3x4& P) const; void getPixelFor3DPoint(Point2d* out, const Point3d& X, int rc) const; void getPixelFor3DPoint(Pixel* out, const Point3d& X, int rc) const; @@ -281,6 +296,15 @@ class MultiViewParams */ StaticVector findNearestCamsFromLandmarks(int rc, int nbNearestCams) const; + /** + * @brief Find nearest cameras for a given tile + * @param[in] rc R camera id + * @param[in] nbNearestCams maximum number of desired nearest cameras + * @param[in] tCams a given list of pre-selected nearest cameras + * @param[in] roi the tile 2d region of interest + * @return nearest cameras list for the given tile + */ + std::vector findTileNearestCams(int rc, int nbNearestCams, const std::vector& tCams, const ROI& roi) const; inline void setMinViewAngle(float minViewAngle) { diff --git a/src/aliceVision/mvsUtils/TileParams.cpp b/src/aliceVision/mvsUtils/TileParams.cpp new file mode 100644 index 0000000000..39942b56d6 --- /dev/null +++ b/src/aliceVision/mvsUtils/TileParams.cpp @@ -0,0 +1,121 @@ +// This file is part of the AliceVision project. +// Copyright (c) 2022 AliceVision contributors. +// This Source Code Form is subject to the terms of the Mozilla Public License, +// v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + +#include "TileParams.hpp" + +#include +#include + +namespace aliceVision { +namespace mvsUtils { + +void getTileRoiList(const TileParams& tileParams, int imageWidth, int imageHeight, int maxDownscale, std::vector& out_tileRoiList) +{ + assert(maxDownscale > 0); + assert(2 * tileParams.padding < tileParams.bufferWidth); + assert(2 * tileParams.padding < tileParams.bufferHeight); + + // single tile case + if(hasOnlyOneTile(tileParams, imageWidth, imageHeight)) + { + out_tileRoiList.emplace_back(0, imageWidth, 0, imageHeight); + return; + } + + // compute maximum effective tile width and height: maximum size without padding + const int maxEffectiveTileWidth = tileParams.bufferWidth - 2 * tileParams.padding; + const int maxEffectiveTileHeight = tileParams.bufferHeight - 2 * tileParams.padding; + + // compute nb of tile buffers per side + const int nbTileSideX = divideRoundUp(imageWidth , maxEffectiveTileWidth); + const int nbTileSideY = divideRoundUp(imageHeight, maxEffectiveTileHeight); + + // allocate roi list + out_tileRoiList.resize(nbTileSideX * nbTileSideY); + + // compute downscaled image width and height + const int downscaledImageWidth = divideRoundUp(imageWidth, maxDownscale); + const int downscaledImageHeight = divideRoundUp(imageHeight, maxDownscale); + + // compute effective tile width and height for the best tile layout at the maximum downscale + const int effectiveTileWidth = divideRoundUp(downscaledImageWidth , nbTileSideX) * maxDownscale; + const int effectiveTileHeight = divideRoundUp(downscaledImageHeight, nbTileSideY) * maxDownscale; + + // compute each tile ROI + for(int i = 0; i < nbTileSideX; ++i) + { + const int beginX = i * effectiveTileWidth; + const int endX = std::min((i + 1) * effectiveTileWidth + tileParams.padding, imageWidth); + + for(int j = 0; j < nbTileSideY; ++j) + { + const int beginY = j * effectiveTileHeight; + const int endY = std::min((j + 1) * effectiveTileHeight + tileParams.padding, imageHeight); + + out_tileRoiList.at(i * nbTileSideY + j) = ROI(beginX, endX, beginY, endY); + } + } +} + +void logTileRoiList(const TileParams& tileParams, int imageWidth, int imageHeight, int maxDownscale, const std::vector& in_tileRoiList) +{ + // compute maximum effective tile width and height: maximum size without padding + const int maxEffectiveTileWidth = tileParams.bufferWidth - 2 * tileParams.padding; + const int maxEffectiveTileHeight = tileParams.bufferHeight - 2 * tileParams.padding; + + // compute nb of tile buffers per side + const int nbTileSideX = divideRoundUp(imageWidth , maxEffectiveTileWidth); + const int nbTileSideY = divideRoundUp(imageHeight, maxEffectiveTileHeight); + + // compute downscaled image width and height + const int downscaledImageWidth = divideRoundUp(imageWidth, maxDownscale); + const int downscaledImageHeight = divideRoundUp(imageHeight, maxDownscale); + + // compute effective tile width and height for the best tile layout at the maximum downscale + const int effectiveTileWidth = divideRoundUp(downscaledImageWidth , nbTileSideX) * maxDownscale; + const int effectiveTileHeight = divideRoundUp(downscaledImageHeight, nbTileSideY) * maxDownscale; + + std::ostringstream ostr; + ostr << "Tiling information: " << std::endl + << "\t- parameters: " << std::endl + << "\t - buffer width: " << tileParams.bufferWidth << " px" << std::endl + << "\t - buffer height: " << tileParams.bufferHeight << " px" << std::endl + << "\t - padding: " << tileParams.padding << " px" << std::endl + << "\t- maximum downscale: " << maxDownscale << std::endl + << "\t- maximum image width: " << imageWidth << " px" << std::endl + << "\t- maximum image height: " << imageHeight << " px" << std::endl; + + if(hasOnlyOneTile(tileParams, imageWidth, imageHeight)) + { + ALICEVISION_LOG_INFO(ostr.str()); + ALICEVISION_LOG_INFO("Maximum image size is smaller than one tile, use only one tile."); + return; + } + + ostr << "\t- maximum effective tile width: " << maxEffectiveTileWidth << " px" << std::endl + << "\t- maximum effective tile height: " << maxEffectiveTileHeight << " px" << std::endl + << "\t- # tiles on X-side: " << nbTileSideX << std::endl + << "\t- # tiles on Y-side: " << nbTileSideY << std::endl + << "\t- effective tile width: " << effectiveTileWidth << " px" << std::endl + << "\t- effective tile height: " << effectiveTileHeight << " px" << std::endl + << "\t- tile list: " << std::endl; + + if(in_tileRoiList.empty()) + ostr << "\t empty" << std::endl; + + for(size_t i = 0; i < in_tileRoiList.size(); ++i) + { + const ROI& roi = in_tileRoiList.at(i); + + ostr << "\t - tile (" << (i + 1) << "/" << in_tileRoiList.size() << ") " + << "size: " << roi.width() << "x" << roi.height() << " px, roi: [" << roi << "]" << std::endl; + } + + ALICEVISION_LOG_INFO(ostr.str()); +} + +} // namespace mvsUtils +} // namespace aliceVision diff --git a/src/aliceVision/mvsUtils/TileParams.hpp b/src/aliceVision/mvsUtils/TileParams.hpp new file mode 100644 index 0000000000..65457246d5 --- /dev/null +++ b/src/aliceVision/mvsUtils/TileParams.hpp @@ -0,0 +1,61 @@ +// This file is part of the AliceVision project. +// Copyright (c) 2022 AliceVision contributors. +// This Source Code Form is subject to the terms of the Mozilla Public License, +// v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + +#pragma once + +#include +#include + +namespace aliceVision { +namespace mvsUtils { + +/** + * @brief Tile Parameters + * This structure handle tiling user parameters. + */ +struct TileParams +{ + // user parameters + + int bufferWidth = 1024; + int bufferHeight = 1024; + int padding = 64; +}; + +/** +* @brief Check if the given image size can contain only one tile +* @param[in] tileParams the tile parameters +* @param[in] imageWidth the image width +* @param[in] imageHeight the image height +* @return true if single tile case +*/ +inline bool hasOnlyOneTile(const TileParams& tileParams, int imageWidth, int imageHeight) +{ + return (tileParams.bufferHeight >= imageWidth && tileParams.bufferHeight >= imageHeight); +} + + /** + * @brief Get tile list from tile parameters and image width/height + * @param[in] tileParams the tile parameters + * @param[in] imageWidth the image width + * @param[in] imageHeight the image height + * @param[in] maxDownscale the maximum downscale that can be applied to the image + * @param[out] out_tileRoiList the output tile ROI list + */ +void getTileRoiList(const TileParams& tileParams, int imageWidth, int imageHeight, int maxDownscale, std::vector& out_tileRoiList); + +/** +* @brief Log tile list and tile parameters +* @param[in] tileParams the tile parameters +* @param[in] imageWidth the image width used for the tile ROI list computation +* @param[in] imageHeight the image height used for the tile ROI list computation +* @param[in] maxDownscale the maximum downscale that can be applied to the image +* @param[in] in_tileRoiList the tile ROI list +*/ +void logTileRoiList(const TileParams& tileParams, int imageWidth, int imageHeight, int maxDownscale, const std::vector& in_tileRoiList); + +} // namespace mvsUtils +} // namespace aliceVision diff --git a/src/aliceVision/mvsUtils/depthSimMapIO.cpp b/src/aliceVision/mvsUtils/depthSimMapIO.cpp new file mode 100644 index 0000000000..d3f307d9ae --- /dev/null +++ b/src/aliceVision/mvsUtils/depthSimMapIO.cpp @@ -0,0 +1,658 @@ +// This file is part of the AliceVision project. +// Copyright (c) 2022 AliceVision contributors. +// This Source Code Form is subject to the terms of the Mozilla Public License, +// v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + +#include "depthSimMapIO.hpp" + +#include +#include +#include +#include + +#include +#include + +namespace fs = boost::filesystem; + +namespace aliceVision { +namespace mvsUtils { + +/** + * @brief Get tile map ROI from file metadata + * @param[in] mapTilePath the tile map file path + * @param[in,out] out_roi the corresponding region-of-interest read from file metadata + */ +void getRoiFromMetadata(const std::string& mapTilePath, ROI& out_roi) +{ + const oiio::ParamValueList metadata = image::readImageMetadata(mapTilePath); + + const auto roiBeginXIt = metadata.find("AliceVision:roiBeginX"); + const auto roiBeginYIt = metadata.find("AliceVision:roiBeginY"); + const auto roiEndXIt = metadata.find("AliceVision:roiEndX"); + const auto roiEndYIt = metadata.find("AliceVision:roiEndY"); + + if(roiBeginXIt != metadata.end() && roiBeginXIt->type() == oiio::TypeDesc::INT) + out_roi.x.begin = roiBeginXIt->get_int(); + + if(roiBeginYIt != metadata.end() && roiBeginYIt->type() == oiio::TypeDesc::INT) + out_roi.y.begin = roiBeginYIt->get_int(); + + if(roiEndXIt != metadata.end() && roiEndXIt->type() == oiio::TypeDesc::INT) + out_roi.x.end = roiEndXIt->get_int(); + + if(roiEndYIt != metadata.end() && roiEndYIt->type() == oiio::TypeDesc::INT) + out_roi.y.end = roiEndYIt->get_int(); + + // invalid or no roi metadata + if((out_roi.x.begin < 0) || (out_roi.y.begin < 0) || (out_roi.x.end <= 0) || (out_roi.y.end <= 0)) + { + ALICEVISION_THROW_ERROR("Cannot find ROI information in file: " << mapTilePath); + } +} + +/** + * @brief Get tile map TileParams from file metadata + * @param[in] mapTilePath the tile map file path + * @param[in,out] out_tileParams the corresponding TileParams read from file metadata + */ +void getTileParamsFromMetadata(const std::string& mapTilePath, TileParams& out_tileParams) +{ + const oiio::ParamValueList metadata = image::readImageMetadata(mapTilePath); + + const auto tileWidthIt = metadata.find("AliceVision:tileBufferWidth"); + const auto tileHeightIt = metadata.find("AliceVision:tileBufferHeight"); + const auto tilePaddingIt = metadata.find("AliceVision:tilePadding"); + + if(tileWidthIt != metadata.end() && tileWidthIt->type() == oiio::TypeDesc::INT) + out_tileParams.bufferWidth = tileWidthIt->get_int(); + + if(tileHeightIt != metadata.end() && tileHeightIt->type() == oiio::TypeDesc::INT) + out_tileParams.bufferHeight = tileHeightIt->get_int(); + + if(tilePaddingIt != metadata.end() && tilePaddingIt->type() == oiio::TypeDesc::INT) + out_tileParams.padding = tilePaddingIt->get_int(); + + // invalid or no tile metadata + if((out_tileParams.bufferWidth <= 0) || (out_tileParams.bufferHeight <= 0) || (out_tileParams.padding < 0)) + { + ALICEVISION_THROW_ERROR("Cannot find tile parameters in file: " << mapTilePath); + } +} + +/** + * @brief Get the tile map path list for a R camera et a given scale / stepXY + * @param[in] rc the related R camera index + * @param[in] mp the multi-view parameters + * @param[in] scale the depth/sim map downscale factor + * @param[in] step the depth/sim map step factor + * @param[in] customSuffix the filename custom suffix + * @param[in,out] out_mapTilePathList the tile map path list + */ +void getTilePathList(int rc, + const MultiViewParams& mp, + EFileType fileType, + int scale, + int step, + const std::string& customSuffix, + std::vector& out_mapTilePathList) +{ + const fs::path mapPath(getFileNameFromIndex(mp, rc, fileType, scale, customSuffix)); + const fs::path mapDirectory(mapPath.parent_path()); + + if(!is_directory(mapDirectory)) + ALICEVISION_THROW_ERROR("Cannot find depth/similarity map directory (rc: " << rc << ")."); + + const boost::regex mapPattern(mapPath.stem().string() + "_\\d+_\\d+" + mapPath.extension().string()); + + for(auto& entry : boost::make_iterator_range(boost::filesystem::directory_iterator(mapDirectory), {})) + { + if(boost::regex_match(entry.path().filename().string(), mapPattern)) + out_mapTilePathList.push_back(entry.path().string()); + } +} + +/** + * @brief Weight one of the corners/edges of a tile according to the size of the padding + * + * When merging tiles, there are 8 intersection areas: + * * 4 corners (intersection of 4 tiles or 2 tiles when the tile is on one image edge) + * * 4 edges (intersection of 2 tiles) + * + * @param a alpha for top-left + * @param b alpha for top-right + * @param c alpha for bottom-right + * @param d alpha for bottom-left + * @param borderWidth tiles intersection area width (could be the intersection between 2 or 4 tiles) + * @param borderHeight tiles intersection area height + * @param lu left-up corner of the intersection area in the tile coordinate system + * @param in_tileMap image of the tile + */ +void weightTileBorder(int a, int b, int c, int d, + int borderWidth, + int borderHeight, + const Point2d& lu, + image::Image& in_tileMap) +{ + const Point2d rd = lu + Point2d(borderWidth, borderHeight); + + const int endX = std::min(int(rd.x), in_tileMap.Width()); + const int endY = std::min(int(rd.y), in_tileMap.Height()); + + // Add small margin where alpha is 0 for corners (lu and rd) + static const double margin = 2.0; + const Point2d lu_m(lu.x + margin, lu.y + margin); + const Point2d rd_m(rd.x - margin, rd.y - margin); + const double borderWidth_m = borderWidth - 2.0 * margin; + const double borderHeight_m = borderHeight - 2.0 * margin; + + for(int x = lu.x; x < endX; ++x) + { + for(int y = lu.y; y < endY; ++y) + { + // bilinear interpolation + const float r_x = clamp((rd_m.x - x) / borderWidth_m, 0.0, 1.0); + const float r_y = clamp((rd_m.y - y) / borderHeight_m, 0.0, 1.0); + const float l_x = clamp((x - lu_m.x) / borderWidth_m, 0.0, 1.0); + const float l_y = clamp((y - lu_m.y) / borderHeight_m, 0.0, 1.0); + + const float weight = r_y * (r_x * a + l_x * b) + l_y * (r_x * d + l_x * c); + + // apply weight to tile depth/sim map + in_tileMap(y, x) *= weight; + } + } +} + +void addTileMapWeighted(int rc, + const MultiViewParams& mp, + const TileParams& tileParams, + const ROI& roi, + int downscale, + image::Image& in_tileMap, + image::Image& inout_map) +{ + // get downscaled ROI + const ROI downscaledRoi = downscaleROI(roi, downscale); + + // get tile border size + const int tileWidth = downscaledRoi.width(); + const int tileHeight = downscaledRoi.height(); + const int tilePadding = tileParams.padding / downscale; + + // get tile position information + const bool firstColumn = (roi.x.begin == 0); + const bool lastColumn = (roi.x.end == mp.getWidth(rc)); + const bool firstRow = (roi.y.begin == 0); + const bool lastRow = (roi.y.end == mp.getHeight(rc)); + + // weight the top left corner + if(!firstColumn || !firstRow) + { + const Point2d lu(0, 0); + const int b = (firstRow) ? 1 : 0; + const int d = (firstColumn) ? 1 : 0; + weightTileBorder(0, b, 1, d, tilePadding, tilePadding, lu, in_tileMap); + } + + // weight the bottom left corner + if(!firstColumn || !lastRow) + { + const Point2d lu(0, tileHeight - tilePadding); + const int a = (firstColumn) ? 1 : 0; + const int c = (lastRow) ? 1 : 0; + weightTileBorder(a, 1, c, 0, tilePadding, tilePadding, lu, in_tileMap); + } + + // weight the top right corner + if(!lastColumn || !firstRow) + { + const Point2d lu(tileWidth - tilePadding, 0); + const int a = (firstRow) ? 1 : 0; + const int c = (lastColumn) ? 1 : 0; + weightTileBorder(a, 0, c, 1, tilePadding, tilePadding, lu, in_tileMap); + } + + // weight the bottom right corner + if(!lastColumn || !lastRow) + { + const Point2d lu(tileWidth - tilePadding, tileHeight - tilePadding); + const int b = (lastColumn) ? 1 : 0; + const int d = (lastRow) ? 1 : 0; + weightTileBorder(1, b, 0, d, tilePadding, tilePadding, lu, in_tileMap); + } + + // weight the top border + if(!firstRow) + { + const Point2d lu(tilePadding, 0); + weightTileBorder(0, 0, 1, 1, tileWidth - 2 * tilePadding, tilePadding, lu, in_tileMap); + } + + // weight the bottom border + if(!lastRow) + { + const Point2d lu(tilePadding, tileHeight - tilePadding); + weightTileBorder(1, 1, 0, 0, tileWidth - 2 * tilePadding, tilePadding, lu, in_tileMap); + } + + // weight the left border + if(!firstColumn) + { + const Point2d lu(0, tilePadding); + weightTileBorder(0, 1, 1, 0, tilePadding, tileHeight - 2 * tilePadding, lu, in_tileMap); + } + + // weight the right border + if(!lastColumn) + { + const Point2d lu(tileWidth - tilePadding, tilePadding); + weightTileBorder(1, 0, 0, 1, tilePadding, tileHeight - 2 * tilePadding, lu, in_tileMap); + } + + // add weighted tile to the depth/sim map + for(int x = downscaledRoi.x.begin; x < downscaledRoi.x.end; ++x) + { + for(int y = downscaledRoi.y.begin; y < downscaledRoi.y.end; ++y) + { + const int tx = x - downscaledRoi.x.begin; + const int ty = y - downscaledRoi.y.begin; + + inout_map(y, x) += in_tileMap(ty, tx); + } + } +} + +void readMapFromTiles(int rc, + const MultiViewParams& mp, + EFileType fileType, + image::Image& out_map, + int scale, + int step, + const std::string& customSuffix) +{ + const ROI imageRoi(Range(0, mp.getWidth(rc)), Range(0, mp.getHeight(rc))); + + const int scaleStep = std::max(scale, 1) * step; // avoid 0 special case (reserved for depth map filtering) + const int width = divideRoundUp(mp.getWidth(rc) , scaleStep); + const int height = divideRoundUp(mp.getHeight(rc), scaleStep); + + // the output full map + out_map.resize(width, height, true, 0.f); // should be initialized, additive process + + // get tile map path list for the given R camera + std::vector mapTilePathList; + getTilePathList(rc, mp, fileType, scale, step, customSuffix, mapTilePathList); + + if(mapTilePathList.empty()) + { + // map can be empty + ALICEVISION_LOG_INFO("Cannot find any map tile file (rc: " << rc << ")."); + return; // nothing to do, already initialized + } + + // get tileParams from first tile file metadata + TileParams tileParams; + getTileParamsFromMetadata(mapTilePathList.front(), tileParams); + + // get tile roi list from each file metadata + std::vector tileRoiList; + tileRoiList.resize(mapTilePathList.size()); + for(size_t i = 0; i < mapTilePathList.size(); ++i) + { + getRoiFromMetadata(mapTilePathList.at(i), tileRoiList.at(i)); + } + + // read and add each tile to the full map + for(size_t i = 0; i < tileRoiList.size(); ++i) + { + const ROI roi = intersect(tileRoiList.at(i), imageRoi); + const std::string mapTilePath = getFileNameFromIndex(mp, rc, fileType, scale, customSuffix, roi.x.begin, roi.y.begin); + + if(roi.isEmpty()) + continue; + + try + { + // read tile + image::Image tileMap; + image::readImage(mapTilePath, tileMap, image::EImageColorSpace::NO_CONVERSION); + + // add tile to the full map + addTileMapWeighted(rc, mp, tileParams, roi, scaleStep, tileMap, out_map); + } + catch(const std::exception& e) + { + ALICEVISION_LOG_WARNING("Cannot find depth/sim map (rc: " << rc << "): " << mapTilePath); + } + } +} + +void writeDepthSimMap(int rc, + const MultiViewParams& mp, + const TileParams& tileParams, + const ROI& roi, + const image::Image& depthMap, + const image::Image& simMap, + int scale, + int step, + const std::string& customSuffix) +{ + const int scaleStep = std::max(scale, 1) * step; // avoid 0 special case (reserved for depth map filtering) + + // get image dimensions at scale / stepXY + const int imageWidth = divideRoundUp(mp.getWidth(rc) , scaleStep); + const int imageHeight = divideRoundUp(mp.getHeight(rc), scaleStep); + + // get downscaled ROI + const ROI downscaledROI = downscaleROI(roi, scaleStep); + + // OIIO roi for depth / similarity map writing + // displayRoi is the image region of interest for display (image size) + // pixelRoi is the buffer region of interest within the displayRoi (tile size) + // no tiling if displayRoi == pixelRoi + const oiio::ROI displayRoi(0, imageWidth, 0, imageHeight); + const oiio::ROI pixelRoi(downscaledROI.x.begin, downscaledROI.x.end, downscaledROI.y.begin, downscaledROI.y.end, 0, 1, 0, 1); + + // output map path + std::string depthMapPath; + std::string simMapPath; + + if(downscaledROI.width() != imageWidth || downscaledROI.height() != imageHeight) // is a tile + { + // tiled depth/sim map + depthMapPath = getFileNameFromIndex(mp, rc, EFileType::depthMap, scale, customSuffix, roi.x.begin, roi.y.begin); + simMapPath = getFileNameFromIndex(mp, rc, EFileType::simMap, scale, customSuffix, roi.x.begin, roi.y.begin); + } + else + { + // fullsize depth/sim map + depthMapPath = getFileNameFromIndex(mp, rc, EFileType::depthMap, scale, customSuffix); + simMapPath = getFileNameFromIndex(mp, rc, EFileType::simMap, scale, customSuffix); + } + + oiio::ParamValueList metadata = image::getMetadataFromMap(mp.getMetadata(rc)); + + // downscale metadata + metadata.push_back(oiio::ParamValue("AliceVision:downscale", mp.getDownscaleFactor(rc) * scaleStep)); + + // roi metadata + { + metadata.push_back(oiio::ParamValue("AliceVision:roiBeginX", int(roi.x.begin))); + metadata.push_back(oiio::ParamValue("AliceVision:roiBeginY", int(roi.y.begin))); + metadata.push_back(oiio::ParamValue("AliceVision:roiEndX", int(roi.x.end))); + metadata.push_back(oiio::ParamValue("AliceVision:roiEndY", int(roi.y.end))); + } + + // tile params metadata + { + metadata.push_back(oiio::ParamValue("AliceVision:tileBufferWidth", tileParams.bufferWidth)); + metadata.push_back(oiio::ParamValue("AliceVision:tileBufferHeight", tileParams.bufferHeight)); + metadata.push_back(oiio::ParamValue("AliceVision:tilePadding", tileParams.padding)); + } + + // projection matrix metadata + { + std::vector matrixP = mp.getOriginalP(rc); + metadata.push_back(oiio::ParamValue("AliceVision:P", oiio::TypeDesc(oiio::TypeDesc::DOUBLE, oiio::TypeDesc::MATRIX44), 1, matrixP.data())); + } + + // CArr & iCamArr metadata + { + Point3d C = mp.CArr[rc]; + Matrix3x3 iP = mp.iCamArr[rc]; + + if (scaleStep > 1) + { + Matrix3x4 P = mp.camArr[rc]; + for (int i = 0; i < 8; ++i) + P.m[i] /= double(scaleStep); + Matrix3x3 K, iK; + Matrix3x3 R, iR; + + P.decomposeProjectionMatrix(K, R, C); // replace C + iK = K.inverse(); + iR = R.inverse(); + iP = iR * iK; // replace iP + } + + metadata.push_back(oiio::ParamValue("AliceVision:CArr", oiio::TypeDesc(oiio::TypeDesc::DOUBLE, oiio::TypeDesc::VEC3), 1, C.m)); + metadata.push_back(oiio::ParamValue("AliceVision:iCamArr", oiio::TypeDesc(oiio::TypeDesc::DOUBLE, oiio::TypeDesc::MATRIX33), 1, iP.m)); + } + + // min/max/nb depth metadata + { + const int nbDepthValues = std::count_if(depthMap.data(), depthMap.data() + depthMap.size(), [](float v) { return v > 0.0f; }); + float maxDepth = -1.0f; + float minDepth = std::numeric_limits::max(); + + for(int i = 0; i < depthMap.size(); ++i) + { + const float depth = depthMap(i); + + if(depth <= -1.0f) + continue; + + maxDepth = std::max(maxDepth, depth); + minDepth = std::min(minDepth, depth); + } + + metadata.push_back(oiio::ParamValue("AliceVision:nbDepthValues", nbDepthValues)); + metadata.push_back(oiio::ParamValue("AliceVision:minDepth", minDepth)); + metadata.push_back(oiio::ParamValue("AliceVision:maxDepth", maxDepth)); + } + + // write depth map + if(!depthMap.size() <= 0) + { + image::writeImage(depthMapPath, + depthMap, + image::ImageWriteOptions() + .toColorSpace(image::EImageColorSpace::NO_CONVERSION) + .storageDataType(image::EStorageDataType::Float), + metadata, + displayRoi, + pixelRoi); + } + + // write sim map + if(!simMap.size() <= 0) + { + image::writeImage(simMapPath, + simMap, + image::ImageWriteOptions() + .toColorSpace(image::EImageColorSpace::NO_CONVERSION) + .storageDataType(image::EStorageDataType::Half), + metadata, + displayRoi, + pixelRoi); + } +} + +void writeDepthSimMap(int rc, + const MultiViewParams& mp, + const image::Image& depthMap, + const image::Image& simMap, + int scale, + int step, + const std::string& customSuffix) +{ + const TileParams tileParams; // default tile parameters, no tiles + const ROI roi = ROI(0, mp.getWidth(rc), 0, mp.getHeight(rc)); // full roi + writeDepthSimMap(rc, mp, tileParams, roi, depthMap, simMap, scale, step, customSuffix); +} + + +void writeDepthMap(int rc, + const MultiViewParams& mp, + const image::Image& depthMap, + int scale, + int step, + const std::string& customSuffix) +{ + const TileParams tileParams; // default tile parameters, no tiles + const ROI roi = ROI(0, mp.getWidth(rc), 0, mp.getHeight(rc)); // full roi + image::Image simMap; // empty simMap, write only depth map + writeDepthSimMap(rc, mp, tileParams, roi, depthMap, simMap, scale, step, customSuffix); +} + +void readDepthSimMap(int rc, + const MultiViewParams& mp, + image::Image& out_depthMap, + image::Image& out_simMap, + int scale, + int step, + const std::string& customSuffix) +{ + const std::string depthMapPath = getFileNameFromIndex(mp, rc,EFileType::depthMap, scale, customSuffix); + const std::string simMapPath = getFileNameFromIndex(mp, rc, EFileType::simMap, scale, customSuffix); + + if (fs::exists(depthMapPath) && fs::exists(simMapPath)) + { + image::readImage(depthMapPath, out_depthMap, image::EImageColorSpace::NO_CONVERSION); + image::readImage(simMapPath, out_simMap, image::EImageColorSpace::NO_CONVERSION); + } + else + { + readMapFromTiles(rc, mp, EFileType::depthMap, out_depthMap, scale, step, customSuffix); + readMapFromTiles(rc, mp, EFileType::simMap, out_simMap, scale, step, customSuffix); + } +} + +void readDepthMap(int rc, + const MultiViewParams& mp, + image::Image& out_depthMap, + int scale, + int step, + const std::string& customSuffix) +{ + const std::string depthMapPath = getFileNameFromIndex(mp, rc, EFileType::depthMap, scale, customSuffix); + + if (fs::exists(depthMapPath)) + { + image::readImage(depthMapPath, out_depthMap, image::EImageColorSpace::NO_CONVERSION); + } + else + { + readMapFromTiles(rc, mp, EFileType::depthMap, out_depthMap, scale, step, customSuffix); + } +} + +void readSimMap(int rc, + const MultiViewParams& mp, + image::Image& out_simMap, + int scale, + int step, + const std::string& customSuffix) +{ + const std::string simMapPath = getFileNameFromIndex(mp, rc, EFileType::simMap, scale, customSuffix); + + if (fs::exists(simMapPath)) + { + image::readImage(simMapPath, out_simMap, image::EImageColorSpace::NO_CONVERSION); + } + else + { + readMapFromTiles(rc, mp, EFileType::simMap, out_simMap, scale, step, customSuffix); + } +} + +unsigned long getNbDepthValuesFromDepthMap(int rc, + const MultiViewParams& mp, + int scale, + int step, + const std::string& customSuffix) +{ + const std::string depthMapPath = getFileNameFromIndex(mp, rc, EFileType::depthMap, scale, customSuffix); + int nbDepthValues = -1; + + // get nbDepthValues from metadata + if (fs::exists(depthMapPath)) // untilled + { + const oiio::ParamValueList metadata = image::readImageMetadata(depthMapPath); + nbDepthValues = metadata.get_int("AliceVision:nbDepthValues", -1); + } + else // tilled + { + std::vector mapTilePathList; + getTilePathList(rc, mp, EFileType::depthMap, scale, step, customSuffix, mapTilePathList); + + if(mapTilePathList.empty()) // depth map can be empty + ALICEVISION_LOG_INFO("Cannot find any depth map tile file (rc: " << rc << ")."); + + for(const std::string& mapTilePath : mapTilePathList) + { + const oiio::ParamValueList metadata = image::readImageMetadata(mapTilePath); + + const int nbTileDepthValues = metadata.get_int("AliceVision:nbDepthValues", -1); + + if(nbTileDepthValues < 0) + ALICEVISION_THROW_ERROR("Cannot find or incorrect 'AliceVision:nbDepthValues' metadata in depth map tile (rc: " << rc << ")"); + + nbDepthValues += nbTileDepthValues; + } + } + + // no metadata compute number of depth values + if(nbDepthValues < 0) + { + image::Image depthMap; + + ALICEVISION_LOG_WARNING("Can't find or invalid 'nbDepthValues' metadata in depth map (rc: " << rc << "). Recompute the number of valid values."); + + readDepthMap(rc, mp, depthMap, scale, step, customSuffix); + + nbDepthValues = std::count_if(depthMap.data(), depthMap.data() + depthMap.size(), [](float v) { return v > 0.0f; }); + } + + return nbDepthValues; +} + +void deleteDepthSimMapTiles(int rc, + const MultiViewParams& mp, + int scale, + int step, + const std::string& customSuffix) +{ + std::vector depthMapTilePathList; + std::vector simMapTilePathList; + + getTilePathList(rc, mp, EFileType::depthMap, scale, step, customSuffix, depthMapTilePathList); + getTilePathList(rc, mp, EFileType::simMap, scale, step, customSuffix, simMapTilePathList); + + if(depthMapTilePathList.empty()) // depth map can be empty + ALICEVISION_LOG_INFO("Cannot find any depth map tile file to delete (rc: " << rc << ")."); + + if(simMapTilePathList.empty()) // sim map can be empty + ALICEVISION_LOG_INFO("Cannot find any similarity map tile file to delete (rc: " << rc << ")."); + + // delete depth map tile files + for(const std::string& depthMapTilePath : depthMapTilePathList) + { + try + { + fs::remove(depthMapTilePath); + } + catch (const std::exception& e) + { + ALICEVISION_LOG_WARNING("Cannot delete depth map tile file (rc: " << rc << "): " << fs::path(depthMapTilePath).filename().string() << std::endl); + } + } + + // delete similarity map tile files + for(const std::string& simMapTilePath : simMapTilePathList) + { + try + { + fs::remove(simMapTilePath); + } + catch (const std::exception& e) + { + ALICEVISION_LOG_WARNING("Cannot delete similarity map tile file (rc: " << rc << "): " << fs::path(simMapTilePath).filename().string() << std::endl); + } + } +} + +} // namespace mvsUtils +} // namespace aliceVision diff --git a/src/aliceVision/mvsUtils/depthSimMapIO.hpp b/src/aliceVision/mvsUtils/depthSimMapIO.hpp new file mode 100644 index 0000000000..36275c429f --- /dev/null +++ b/src/aliceVision/mvsUtils/depthSimMapIO.hpp @@ -0,0 +1,171 @@ +// This file is part of the AliceVision project. +// Copyright (c) 2022 AliceVision contributors. +// This Source Code Form is subject to the terms of the Mozilla Public License, +// v. 2.0. If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. + +#pragma once + +#include +#include +#include + +#include + +namespace aliceVision { +namespace mvsUtils { + +/** + * @brief Add a tile to a full map with weighting + * @param[in] rc the related R camera index + * @param[in] mp the multi-view parameters + * @param[in] tileParams tile workflow parameters + * @param[in] roi the 2d region of interest without any downscale apply + * @param[in] downscale the depth/sim map downscale factor + * @param[in] in_tileMap the tile map to add + * @param[in,out] inout_map the full output map + */ +void addTileMapWeighted(int rc, + const MultiViewParams& mp, + const TileParams& tileParams, + const ROI& roi, + int downscale, + image::Image& in_tileMap, + image::Image& inout_map); + +/** + * @brief Write the depth map and the similarity map + * @param[in] rc the related R camera index + * @param[in] mp the multi-view parameters + * @param[in] tileParams tile workflow parameters + * @param[in] roi the 2d region of interest without any downscale apply + * @param[in] depthMap the corresponding depth map + * @param[in] simMap the corresponding similarity map + * @param[in] scale the depth/sim map downscale factor + * @param[in] step the depth/sim map step factor + * @param[in] customSuffix the filename custom suffix + */ +void writeDepthSimMap(int rc, + const MultiViewParams& mp, + const TileParams& tileParams, + const ROI& roi, + const image::Image& depthMap, + const image::Image& simMap, + int scale, + int step, + const std::string& customSuffix = ""); + +/** + * @brief Write the depth map and the similarity map + * @param[in] rc the related R camera index + * @param[in] mp the multi-view parameters + * @param[in] depthMap the corresponding depth map + * @param[in] simMap the corresponding similarity map + * @param[in] scale the depth/sim map downscale factor + * @param[in] step the depth/sim map step factor + * @param[in] customSuffix the filename custom suffix + */ +void writeDepthSimMap(int rc, + const MultiViewParams& mp, + const image::Image& depthMap, + const image::Image& simMap, + int scale = 1, + int step = 1, + const std::string& customSuffix = ""); + +/** + * @brief Write the depth map + * @param[in] rc the related R camera index + * @param[in] mp the multi-view parameters + * @param[in] depthMap the corresponding depth map + * @param[in] scale the depth/sim map downscale factor + * @param[in] step the depth/sim map step factor + * @param[in] customSuffix the filename custom suffix + */ +void writeDepthMap(int rc, + const MultiViewParams& mp, + const image::Image& depthMap, + int scale = 1, + int step = 1, + const std::string& customSuffix = ""); + +/** + * @brief read the depth map and the similarity map from files + * @param[in] rc the related R camera index + * @param[in] mp the multi-view parameters + * @param[out] out_depthMap the corresponding depth map + * @param[out] out_simMap the corresponding similarity map + * @param[in] scale the depth/sim map downscale factor + * @param[in] step the depth/sim map step factor + * @param[in] customSuffix the filename custom suffix + */ +void readDepthSimMap(int rc, + const MultiViewParams& mp, + image::Image& out_depthMap, + image::Image& out_simMap, + int scale = 1, + int step = 1, + const std::string& customSuffix = ""); + +/** + * @brief read the depth map from file(s) + * @param[in] rc the related R camera index + * @param[in] mp the multi-view parameters + * @param[out] out_depthMap the corresponding depth map + * @param[in] scale the depth/sim map downscale factor + * @param[in] step the depth/sim map step factor + * @param[in] customSuffix the filename custom suffix + */ +void readDepthMap(int rc, + const MultiViewParams& mp, + image::Image& out_depthMap, + int scale = 1, + int step = 1, + const std::string& customSuffix = ""); + +/** + * @brief read the similarity map from file(s) + * @param[in] rc the related R camera index + * @param[in] mp the multi-view parameters + * @param[out] out_simMap the corresponding similarity map + * @param[in] scale the depth/sim map downscale factor + * @param[in] step the depth/sim map step factor + * @param[in] customSuffix the filename custom suffix + */ +void readSimMap(int rc, + const MultiViewParams& mp, + image::Image& out_simMap, + int scale = 1, + int step = 1, + const std::string& customSuffix = ""); + +/** + * @brief Get depth map number of depth values from metadata or count + * @param[in] rc the related R camera index + * @param[in] mp the multi-view parameters + * @param[in] scale the depth/sim map downscale factor + * @param[in] step the depth/sim map step factor + * @param[in] customSuffix the filename custom suffix + */ +unsigned long getNbDepthValuesFromDepthMap(int rc, + const MultiViewParams& mp, + int scale = 1, + int step = 1, + const std::string& customSuffix = ""); + + +/** + * @brief Delete depth/sim map tiles from disk + * @param[in] rc the related R camera index + * @param[in] mp the multi-view parameters + * @param[in] scale the depth/sim map downscale factor + * @param[in] step the depth/sim map step factor + * @param[in] customSuffix the filename custom suffix + */ +void deleteDepthSimMapTiles(int rc, + const MultiViewParams& mp, + int scale = 1, + int step = 1, + const std::string& customSuffix = ""); +} // namespace mvsUtils +} // namespace aliceVision diff --git a/src/aliceVision/mvsUtils/fileIO.cpp b/src/aliceVision/mvsUtils/fileIO.cpp index 0b80ee0a44..06d0861ec4 100644 --- a/src/aliceVision/mvsUtils/fileIO.cpp +++ b/src/aliceVision/mvsUtils/fileIO.cpp @@ -18,12 +18,18 @@ namespace aliceVision { namespace mvsUtils { -std::string getFileNameFromViewId(const MultiViewParams& mp, int viewId, EFileType fileType, int scale, const std::string& customSuffix) +std::string getFileNameFromViewId(const MultiViewParams& mp, int viewId, EFileType fileType, int scale, const std::string& customSuffix, int tileBeginX, int tileBeginY) { std::string folder = mp._imagesFolder; std::string suffix; + std::string tileSuffix; std::string ext; + if(tileBeginX >= 0 && tileBeginY >= 0) + { + tileSuffix = std::string("_" + std::to_string(tileBeginX) + "_" + std::to_string(tileBeginY)); + } + switch(fileType) { case EFileType::P: @@ -264,19 +270,47 @@ std::string getFileNameFromViewId(const MultiViewParams& mp, int viewId, EFileTy ext = "txt"; break; } + case EFileType::volume: + { + folder = mp.getDepthMapsFolder(); + suffix = "_volume"; + ext = "abc"; + break; + } + case EFileType::volumeCross: + { + folder = mp.getDepthMapsFolder(); + suffix = "_volume-cross"; + ext = "abc"; + break; + } + case EFileType::stats9p: + { + folder = mp.getDepthMapsFolder(); + suffix = "_9p"; + ext = "csv"; + break; + } + case EFileType::tilePattern: + { + folder = mp.getDepthMapsFolder(); + suffix = "_tilePattern"; + ext = "obj"; + break; + } } if(scale > 1) { suffix += "_scale" + num2str(scale); } - std::string fileName = folder + std::to_string(viewId) + suffix + customSuffix + "." + ext; + std::string fileName = folder + std::to_string(viewId) + suffix + customSuffix + tileSuffix + "." + ext; return fileName; } -std::string getFileNameFromIndex(const MultiViewParams& mp, int index, EFileType mv_file_type, int scale, const std::string& customSuffix) +std::string getFileNameFromIndex(const MultiViewParams& mp, int index, EFileType mv_file_type, int scale, const std::string& customSuffix, int tileBeginX, int tileBeginY) { - return getFileNameFromViewId(mp, mp.getViewId(index), mv_file_type, scale, customSuffix); + return getFileNameFromViewId(mp, mp.getViewId(index), mv_file_type, scale, customSuffix, tileBeginX, tileBeginY); } FILE* mv_openFile(const MultiViewParams& mp, int index, EFileType mv_file_type, const char* readWrite) diff --git a/src/aliceVision/mvsUtils/fileIO.hpp b/src/aliceVision/mvsUtils/fileIO.hpp index 393e590db9..47e36021ef 100644 --- a/src/aliceVision/mvsUtils/fileIO.hpp +++ b/src/aliceVision/mvsUtils/fileIO.hpp @@ -25,9 +25,9 @@ namespace oiio = OIIO; namespace aliceVision { namespace mvsUtils { -std::string getFileNameFromViewId(const MultiViewParams& mp, int viewId, EFileType fileType, int scale = 0, const std::string& customSuffix = ""); +std::string getFileNameFromViewId(const MultiViewParams& mp, int viewId, EFileType fileType, int scale = 0, const std::string& customSuffix = "", int tileBeginX = -1, int tileBeginY = -1); -std::string getFileNameFromIndex(const MultiViewParams& mp, int index, EFileType fileType, int scale = 0, const std::string& customSuffix = ""); +std::string getFileNameFromIndex(const MultiViewParams& mp, int index, EFileType fileType, int scale = 0, const std::string& customSuffix = "", int tileBeginX = -1, int tileBeginY = -1); FILE* mv_openFile(const MultiViewParams& mp, int index, EFileType mv_file_type, const char* readWrite); Matrix3x4 load3x4MatrixFromFile(std::istream& in); diff --git a/src/software/pipeline/main_depthMapEstimation.cpp b/src/software/pipeline/main_depthMapEstimation.cpp index ec09664ec5..5a3d18d5d8 100644 --- a/src/software/pipeline/main_depthMapEstimation.cpp +++ b/src/software/pipeline/main_depthMapEstimation.cpp @@ -4,16 +4,15 @@ // v. 2.0. If a copy of the MPL was not distributed with this file, // You can obtain one at https://mozilla.org/MPL/2.0/. +#include +#include +#include #include #include #include #include #include -#include -#include -#include -#include -#include +#include #include #include @@ -21,7 +20,7 @@ // These constants define the current software version. // They must be updated when the command line is changed. -#define ALICEVISION_SOFTWARE_VERSION_MAJOR 2 +#define ALICEVISION_SOFTWARE_VERSION_MAJOR 3 #define ALICEVISION_SOFTWARE_VERSION_MINOR 0 using namespace aliceVision; @@ -40,26 +39,34 @@ int aliceVision_main(int argc, char* argv[]) int rangeStart = -1; int rangeSize = -1; - // image downscale factor during process + // global image downscale factor int downscale = 2; // min / max view angle float minViewAngle = 2.0f; float maxViewAngle = 70.0f; + // DepthMap parameters + depthMap::DepthMapParams depthMapParams; + + // Tiling parameters + auto& tileParams = depthMapParams.tileParams; + // Semi Global Matching Parameters - depthMap::SgmParams sgmParams; + auto& sgmParams = depthMapParams.sgmParams; // Refine Parameters - depthMap::RefineParams refineParams; + auto& refineParams = depthMapParams.refineParams; // intermediate results - bool exportIntermediateResults = false; + bool exportIntermediateDepthSimMaps = false; + bool exportIntermediateVolumes = false; + bool exportIntermediateCrossVolumes = false; + bool exportIntermediateVolume9pCsv = false; // number of GPUs to use (0 means use all GPUs) int nbGPUs = 0; - po::options_description requiredParams("Required parameters"); requiredParams.add_options() ("input,i", po::value(&sfmDataFilename)->required(), @@ -76,63 +83,97 @@ int aliceVision_main(int argc, char* argv[]) ("rangeSize", po::value(&rangeSize)->default_value(rangeSize), "Compute a sub-range of N images (N=rangeSize).") ("downscale", po::value(&downscale)->default_value(downscale), - "Image downscale factor.") + "Downscale the input images to compute the depth map. " + "Full resolution (downscale=1) gives the best result, " + "but using a larger downscale will reduce computation time at the expense of quality. " + "If the images are noisy, blurry or if the surfaces are challenging (weakly-textured or with specularities) a larger downscale may improve.") ("minViewAngle", po::value(&minViewAngle)->default_value(minViewAngle), - "minimum angle between two views.") + "Minimum angle between two views (select the neighbouring cameras, select depth planes from epipolar segment point).") ("maxViewAngle", po::value(&maxViewAngle)->default_value(maxViewAngle), - "maximum angle between two views.") + "Maximum angle between two views (select the neighbouring cameras, select depth planes from epipolar segment point).") + ("tileBufferWidth", po::value(&tileParams.bufferWidth)->default_value(tileParams.bufferWidth), + "Maximum tile buffer width.") + ("tileBufferHeight", po::value(&tileParams.bufferHeight)->default_value(tileParams.bufferHeight), + "Maximum tile buffer height.") + ("tilePadding", po::value(&tileParams.padding)->default_value(tileParams.padding), + "Buffer padding for overlapping tiles.") + ("chooseTCamsPerTile", po::value(&depthMapParams.chooseTCamsPerTile)->default_value(depthMapParams.chooseTCamsPerTile), + "Choose neighbour cameras per tile or globally to the image.") + ("maxTCams", po::value(&depthMapParams.maxTCams)->default_value(depthMapParams.maxTCams), + "Maximum number of neighbour cameras per image.") ("sgmScale", po::value(&sgmParams.scale)->default_value(sgmParams.scale), - "Semi Global Matching: Downscale factor used to compute the similarity volume.") + "Semi Global Matching: Downscale factor applied on source images for the SGM step (in addition to the global downscale).") ("sgmStepXY", po::value(&sgmParams.stepXY)->default_value(sgmParams.stepXY), - "Semi Global Matching: Step used to compute the similarity volume on the X and Y axis.") + "Semi Global Matching: Step is used to compute the similarity volume for one pixel over N (in the XY image plane).") ("sgmStepZ", po::value(&sgmParams.stepZ)->default_value(sgmParams.stepZ), - "Semi Global Matching: Step used to compute the similarity volume on the Z axis.") - ("sgmMaxSideXY", po::value(&sgmParams.maxSideXY)->default_value(sgmParams.maxSideXY), - "Semi Global Matching: Max side in pixels used to automatically decide for sgmScale/sgmStepXY if not defined.") - ("sgmMaxTCams", po::value(&sgmParams.maxTCams)->default_value(sgmParams.maxTCams), - "Semi Global Matching: Number of neighbour cameras.") + "Semi Global Matching: Initial step used to compute the similarity volume on Z axis (every N pixels on the epilolar line). " + "-1 means automatic estimation. " + "This value will be adjusted in all case to fit in the max memory (sgmMaxDepths).") + ("sgmMaxTCamsPerTile", po::value(&sgmParams.maxTCamsPerTile)->default_value(sgmParams.maxTCamsPerTile), + "Semi Global Matching: Maximum number of neighbour cameras used per tile.") ("sgmWSH", po::value(&sgmParams.wsh)->default_value(sgmParams.wsh), - "Semi Global Matching: Size of the patch used to compute the similarity.") + "Semi Global Matching: Half-size of the patch used to compute the similarity. Patch width is wsh*2+1.") + ("sgmUseSfmSeeds", po::value(&sgmParams.useSfmSeeds)->default_value(sgmParams.useSfmSeeds), + "Semi Global Matching: Use landmarks from Structure-from-Motion as input seeds to define min/max depth ranges.") + ("sgmSeedsRangeInflate", po::value(&sgmParams.seedsRangeInflate)->default_value(sgmParams.seedsRangeInflate), + "Semi Global Matching: Inflate factor to add margins around SfM seeds.") ("sgmGammaC", po::value(&sgmParams.gammaC)->default_value(sgmParams.gammaC), - "Semi Global Matching: GammaC threshold.") + "Semi Global Matching: GammaC threshold used for similarity computation.") ("sgmGammaP", po::value(&sgmParams.gammaP)->default_value(sgmParams.gammaP), - "Semi Global Matching: GammaP threshold.") + "Semi Global Matching: GammaP threshold used for similarity computation.") ("sgmP1", po::value(&sgmParams.p1)->default_value(sgmParams.p1), - "Semi Global Matching: P1.") - ("sgmP2", po::value(&sgmParams.p2Weighting)->default_value(sgmParams.p2Weighting), - "Semi Global Matching: P2 Weighting.") + "Semi Global Matching: P1 parameter for SGM filtering.") + ("sgmP2Weighting", po::value(&sgmParams.p2Weighting)->default_value(sgmParams.p2Weighting), + "Semi Global Matching: P2 weighting parameter for SGM filtering.") ("sgmMaxDepths", po::value(&sgmParams.maxDepths)->default_value(sgmParams.maxDepths), - "Semi Global Matching: Max number of depths in the overall similarity volume.") - ("sgmMaxDepthsPerTc", po::value(&sgmParams.maxDepthsPerTc)->default_value(sgmParams.maxDepthsPerTc), - "Semi Global Matching: Max number of depths to sweep in the similarity volume per Rc/Tc cameras.") - ("sgmUseSfmSeeds", po::value(&sgmParams.useSfmSeeds)->default_value(sgmParams.useSfmSeeds), - "Semi Global Matching: Use landmarks from SfM to define the ranges for the plane sweeping.") + "Semi Global Matching: Maximum number of depths in the similarity volume.") ("sgmFilteringAxes", po::value(&sgmParams.filteringAxes)->default_value(sgmParams.filteringAxes), - "Semi Global Matching: Filtering axes for the 3D volume.") - ("refineMaxTCams", po::value(&refineParams.maxTCams)->default_value(refineParams.maxTCams), - "Refine: Number of neighbour cameras.") - ("refineNSamplesHalf", po::value(&refineParams.nSamplesHalf)->default_value(refineParams.nSamplesHalf), - "Refine: Number of samples.") - ("refineNDepthsToRefine", po::value(&refineParams.nDepthsToRefine)->default_value(refineParams.nDepthsToRefine), - "Refine: Number of depths.") - ("refineNiters", po::value(&refineParams.nIters)->default_value(refineParams.nIters), - "Refine: Number of iterations.") + "Semi Global Matching: Define axes for the filtering of the similarity volume.") + ("sgmDepthListPerTile", po::value(&sgmParams.depthListPerTile)->default_value(sgmParams.depthListPerTile), + "Semi Global Matching: Select the list of depth planes per tile or globally to the image.") + ("refineScale", po::value(&refineParams.scale)->default_value(refineParams.scale), + "Refine: Downscale factor applied on source images for the Refine step (in addition to the global downscale).") + ("refineStepXY", po::value(&refineParams.stepXY)->default_value(refineParams.stepXY), + "Refine: Step is used to compute the refine volume for one pixel over N (in the XY image plane).") + ("refineMaxTCamsPerTile", po::value(&refineParams.maxTCamsPerTile)->default_value(refineParams.maxTCamsPerTile), + "Refine: Maximum number of neighbour cameras used per tile.") + ("refineHalfNbDepths", po::value(&refineParams.halfNbDepths)->default_value(refineParams.halfNbDepths), + "Refine: The thickness of the refine area around the initial depth map. " + "This parameter defines the number of depths in front of and behind the initial value " + "for which we evaluate the similarity with a finer z sampling.") + ("refineSubsampling", po::value(&refineParams.nbSubsamples)->default_value(refineParams.nbSubsamples), + "Refine: Number of subsamples used to extract the best depth from the refine volume (sliding gaussian window precision).") ("refineWSH", po::value(&refineParams.wsh)->default_value(refineParams.wsh), - "Refine: Size of the patch used to compute the similarity.") + "Refine: Half-size of the patch used to compute the similarity. Patch width is wsh*2+1.") ("refineSigma", po::value(&refineParams.sigma)->default_value(refineParams.sigma), - "Refine: Sigma threshold.") + "Refine: Sigma (2*sigma^2) of the gaussian filter used to extract the best depth from the refine volume.") ("refineGammaC", po::value(&refineParams.gammaC)->default_value(refineParams.gammaC), - "Refine: GammaC threshold.") + "Refine: GammaC threshold used for similarity computation.") ("refineGammaP", po::value(&refineParams.gammaP)->default_value(refineParams.gammaP), - "Refine: GammaP threshold.") - ("refineUseTcOrRcPixSize", po::value(&refineParams.useTcOrRcPixSize)->default_value(refineParams.useTcOrRcPixSize), - "Refine: Use current camera pixel size or minimum pixel size of neighbour cameras.") - ("exportIntermediateResults", po::value(&exportIntermediateResults)->default_value(exportIntermediateResults), - "Export intermediate results from the SGM and Refine steps.") + "Refine: GammaP threshold used for similarity computation.") + ("colorOptimizationNbIterations", po::value(&refineParams.optimizationNbIterations)->default_value(refineParams.optimizationNbIterations), + "Color Optimization: Number of iterations of the optimization.") + ("refineEnabled", po::value(&refineParams.useRefineFuse)->default_value(refineParams.useRefineFuse), + "Enable/Disable depth/similarity map refinement process.") + ("colorOptimizationEnabled", po::value(&refineParams.useColorOptimization)->default_value(refineParams.useColorOptimization), + "Enable/Disable depth/similarity map post-process color optimization.") + ("autoAdjustSmallImage", po::value(&depthMapParams.autoAdjustSmallImage)->default_value(depthMapParams.autoAdjustSmallImage), + "Automatically adjust depth map parameters if images are smaller than one tile (maxTCamsPerTile=maxTCams, adjust step if needed).") + ("exportIntermediateDepthSimMaps", po::value(&exportIntermediateDepthSimMaps)->default_value(exportIntermediateDepthSimMaps), + "Export intermediate depth/similarity maps from the SGM and Refine steps.") + ("exportIntermediateVolumes", po::value(&exportIntermediateVolumes)->default_value(exportIntermediateVolumes), + "Export intermediate full similarity volumes from the SGM and Refine steps.") + ("exportIntermediateCrossVolumes", po::value(&exportIntermediateCrossVolumes)->default_value(exportIntermediateCrossVolumes), + "Export intermediate similarity cross volumes from the SGM and Refine steps.") + ("exportIntermediateVolume9pCsv", po::value(&exportIntermediateVolume9pCsv)->default_value(exportIntermediateVolume9pCsv), + "Export intermediate volumes 9 points from the SGM and Refine steps in CSV files.") + ("exportTilePattern", po::value(&depthMapParams.exportTilePattern)->default_value(depthMapParams.exportTilePattern), + "Export workflow tile pattern.") ("nbGPUs", po::value(&nbGPUs)->default_value(nbGPUs), "Number of GPUs to use (0 means use all GPUs)."); - CmdLine cmdline("This program estimates depth maps for each input image.\n" + CmdLine cmdline("Dense Reconstruction.\n" + "This program estimate a depth map for each input calibrated camera using Plane Sweeping, a multi-view stereo algorithm notable for its efficiency on modern graphics hardware (GPU).\n" "AliceVision depthMapEstimation"); cmdline.add(requiredParams); cmdline.add(optionalParams); @@ -158,6 +199,34 @@ int aliceVision_main(int argc, char* argv[]) return EXIT_FAILURE; } + // check that Sgm scaleStep is greater or equal to the Refine scaleStep + if(depthMapParams.useRefine) + { + const int sgmScaleStep = sgmParams.scale * sgmParams.stepXY; + const int refineScaleStep = refineParams.scale * refineParams.stepXY; + + if(sgmScaleStep < refineScaleStep) + { + ALICEVISION_LOG_ERROR("SGM downscale (scale x step) should be greater or equal to the Refine downscale (scale x step)."); + return EXIT_FAILURE; + } + + if(sgmScaleStep % refineScaleStep != 0) + { + ALICEVISION_LOG_ERROR("SGM downscale (scale x step) should be a multiple of the Refine downscale (scale x step)."); + return EXIT_FAILURE; + } + } + + // check min/max view angle + if(minViewAngle < 0.f || minViewAngle > 360.f || + maxViewAngle < 0.f || maxViewAngle > 360.f || + minViewAngle > maxViewAngle) + { + ALICEVISION_LOG_ERROR("Invalid value for minViewAngle/maxViewAngle parameter(s). Should be between 0 and 360."); + return EXIT_FAILURE; + } + // read the input SfM scene sfmData::SfMData sfmData; if(!sfmDataIO::Load(sfmData, sfmDataFilename, sfmDataIO::ESfMData::ALL)) @@ -166,45 +235,86 @@ int aliceVision_main(int argc, char* argv[]) return EXIT_FAILURE; } - // initialization + // MultiViewParams initialization mvsUtils::MultiViewParams mp(sfmData, imagesFolder, outputFolder, "", false, downscale); + // set MultiViewParams min/max view angle mp.setMinViewAngle(minViewAngle); mp.setMaxViewAngle(maxViewAngle); + // set undefined tile dimensions + if(tileParams.bufferWidth <= 0 || tileParams.bufferHeight <= 0) + { + tileParams.bufferWidth = mp.getMaxImageWidth(); + tileParams.bufferHeight = mp.getMaxImageHeight(); + } + + // check if the tile padding is correct + if(tileParams.padding < 0 && + tileParams.padding * 2 < tileParams.bufferWidth && + tileParams.padding * 2 < tileParams.bufferHeight) + { + ALICEVISION_LOG_ERROR("Invalid value for tilePadding parameter. Should be at least 0 and not exceed half buffer width and height."); + return EXIT_FAILURE; + } + + // check if tile size > max image size + if(tileParams.bufferWidth > mp.getMaxImageWidth() || tileParams.bufferHeight > mp.getMaxImageHeight()) + ALICEVISION_LOG_WARNING("Tile buffer size (width: " << tileParams.bufferWidth << ", height: " << tileParams.bufferHeight << ") is larger than the maximum image size (width: " << mp.getMaxImageWidth() << ", height: " << mp.getMaxImageHeight() << ")."); + // set params in bpt + // Tile Parameters + mp.userParams.put("tile.bufferWidth", tileParams.bufferWidth); + mp.userParams.put("tile.bufferHeight", tileParams.bufferHeight); + mp.userParams.put("tile.padding", tileParams.padding); + // SGM Parameters - mp.userParams.put("sgm.maxTCams", sgmParams.maxTCams); + mp.userParams.put("sgm.scale", sgmParams.scale); + mp.userParams.put("sgm.stepXY", sgmParams.stepXY); + mp.userParams.put("sgm.stepZ", sgmParams.stepZ); mp.userParams.put("sgm.wsh", sgmParams.wsh); + mp.userParams.put("sgm.seedsRangeInflate", sgmParams.seedsRangeInflate); mp.userParams.put("sgm.gammaC", sgmParams.gammaC); mp.userParams.put("sgm.gammaP", sgmParams.gammaP); mp.userParams.put("sgm.p1", sgmParams.p1); mp.userParams.put("sgm.p2Weighting", sgmParams.p2Weighting); - mp.userParams.put("sgm.scale", sgmParams.scale); - mp.userParams.put("sgm.stepXY", sgmParams.stepXY); - mp.userParams.put("sgm.stepZ", sgmParams.stepZ); - mp.userParams.put("sgm.maxSideXY", sgmParams.maxSideXY); + mp.userParams.put("sgm.maxTCamsPerTile", sgmParams.maxTCamsPerTile); mp.userParams.put("sgm.maxDepths", sgmParams.maxDepths); - mp.userParams.put("sgm.maxDepthsPerTc", sgmParams.maxDepthsPerTc); - mp.userParams.put("sgm.useSfmSeeds", sgmParams.useSfmSeeds); mp.userParams.put("sgm.filteringAxes", sgmParams.filteringAxes); - mp.userParams.put("sgm.exportIntermediateResults", exportIntermediateResults); + mp.userParams.put("sgm.useSfmSeeds", sgmParams.useSfmSeeds); + mp.userParams.put("sgm.depthListPerTile", sgmParams.depthListPerTile); + mp.userParams.put("sgm.exportIntermediateDepthSimMaps", exportIntermediateDepthSimMaps); + mp.userParams.put("sgm.exportIntermediateVolumes", exportIntermediateVolumes); + mp.userParams.put("sgm.exportIntermediateCrossVolumes", exportIntermediateCrossVolumes); + mp.userParams.put("sgm.exportIntermediateVolume9pCsv", exportIntermediateVolume9pCsv); // Refine Parameters - mp.userParams.put("refine.maxTCams", refineParams.maxTCams); - mp.userParams.put("refine.nSamplesHalf", refineParams.nSamplesHalf); - mp.userParams.put("refine.nDepthsToRefine", refineParams.nDepthsToRefine); - mp.userParams.put("refine.nIters", refineParams.nIters); + mp.userParams.put("refine.scale", refineParams.scale); + mp.userParams.put("refine.stepXY", refineParams.stepXY); mp.userParams.put("refine.wsh", refineParams.wsh); mp.userParams.put("refine.sigma", refineParams.sigma); mp.userParams.put("refine.gammaC", refineParams.gammaC); mp.userParams.put("refine.gammaP", refineParams.gammaP); - mp.userParams.put("refine.useTcOrRcPixSize", refineParams.useTcOrRcPixSize); - mp.userParams.put("refine.exportIntermediateResults", exportIntermediateResults); + mp.userParams.put("refine.maxTCamsPerTile", refineParams.maxTCamsPerTile); + mp.userParams.put("refine.nbSubsamples", refineParams.nbSubsamples); + mp.userParams.put("refine.halfNbDepths", refineParams.halfNbDepths); + mp.userParams.put("refine.optimizationNbIterations", refineParams.optimizationNbIterations); + mp.userParams.put("refine.useRefineFuse", refineParams.useRefineFuse); + mp.userParams.put("refine.useColorOptimization", refineParams.useColorOptimization); + mp.userParams.put("refine.exportIntermediateDepthSimMaps", exportIntermediateDepthSimMaps); + mp.userParams.put("refine.exportIntermediateCrossVolumes", exportIntermediateCrossVolumes); + mp.userParams.put("refine.exportIntermediateVolume9pCsv", exportIntermediateVolume9pCsv); + + // Workflow Parameters + mp.userParams.put("depthMap.chooseTCamsPerTile", depthMapParams.chooseTCamsPerTile); + mp.userParams.put("depthMap.maxTCams", depthMapParams.maxTCams); + mp.userParams.put("depthMap.exportTilePattern", depthMapParams.exportTilePattern); + mp.userParams.put("depthMap.autoAdjustSmallImage", depthMapParams.autoAdjustSmallImage); std::vector cams; cams.reserve(mp.ncams); + if(rangeSize == -1) { for(int rc = 0; rc < mp.ncams; ++rc) // process all cameras