From be883dc22ffb462178cb698611d013e502738754 Mon Sep 17 00:00:00 2001 From: root Date: Wed, 6 Mar 2024 19:19:34 +0000 Subject: [PATCH 001/388] Audio Decoder PR 1 --- rocAL/CMakeLists.txt | 16 + rocAL/include/api/rocal_api_data_loaders.h | 48 +++ rocAL/include/api/rocal_api_types.h | 5 +- rocAL/include/decoders/audio/audio_decoder.h | 51 ++++ .../decoders/audio/audio_decoder_factory.h | 29 ++ .../include/decoders/audio/sndfile_decoder.h | 37 +++ rocAL/include/decoders/image/decoder.h | 1 + rocAL/include/loaders/audio/audio_loader.h | 84 +++++ .../loaders/audio/audio_loader_sharded.h | 56 ++++ .../loaders/audio/audio_read_and_decode.h | 72 +++++ .../loaders/audio/audio_source_evaluator.h | 49 +++ .../include/loaders/audio/node_audio_loader.h | 55 ++++ .../audio/node_audio_loader_single_shard.h | 53 ++++ rocAL/include/loaders/circular_buffer.h | 15 +- .../loaders/image/cifar10_data_loader.h | 6 +- rocAL/include/loaders/image/image_loader.h | 6 +- .../loaders/image/image_loader_sharded.h | 2 +- rocAL/include/loaders/loader_module.h | 4 +- rocAL/include/loaders/video/video_loader.h | 6 +- .../loaders/video/video_loader_sharded.h | 2 +- rocAL/include/meta_data/bounding_box_graph.h | 4 +- rocAL/include/meta_data/meta_data_graph.h | 4 +- rocAL/include/pipeline/commons.h | 3 + rocAL/include/pipeline/master_graph.h | 31 ++ rocAL/include/pipeline/tensor.h | 8 +- .../readers/image/caffe2_lmdb_record_reader.h | 3 + .../readers/image/caffe_lmdb_record_reader.h | 3 + .../readers/image/cifar10_data_reader.h | 3 + .../readers/image/coco_file_source_reader.h | 3 + .../readers/image/external_source_reader.h | 5 +- .../readers/image/file_source_reader.h | 6 +- rocAL/include/readers/image/image_reader.h | 4 + .../readers/image/mxnet_recordio_reader.h | 3 + .../include/readers/image/tf_record_reader.h | 3 + .../video/sequence_file_source_reader.h | 7 +- rocAL/source/api/rocal_api_data_loaders.cpp | 150 +++++++++ .../decoders/audio/audio_decoder_factory.cpp | 36 +++ .../source/decoders/audio/sndfile_decoder.cpp | 87 ++++++ rocAL/source/loaders/audio/audio_loader.cpp | 288 ++++++++++++++++++ .../loaders/audio/audio_loader_sharded.cpp | 166 ++++++++++ .../loaders/audio/audio_read_and_decode.cpp | 157 ++++++++++ .../loaders/audio/audio_source_evaluator.cpp | 77 +++++ .../loaders/audio/node_audio_loader.cpp | 58 ++++ .../audio/node_audio_loader_single_shard.cpp | 63 ++++ rocAL/source/loaders/circular_buffer.cpp | 16 +- .../loaders/image/cifar10_data_loader.cpp | 14 +- rocAL/source/loaders/image/image_loader.cpp | 12 +- .../loaders/image/image_loader_sharded.cpp | 4 +- rocAL/source/loaders/video/video_loader.cpp | 12 +- .../loaders/video/video_loader_sharded.cpp | 4 +- rocAL/source/meta_data/bounding_box_graph.cpp | 4 +- rocAL/source/pipeline/master_graph.cpp | 16 +- rocAL/source/pipeline/tensor.cpp | 67 ++++ .../readers/image/file_source_reader.cpp | 202 ++++++------ .../rocAL_audio_unittests/CMakeLists.txt | 73 +++++ .../rocAL_audio_unittests/README.md | 24 ++ .../rocAL_audio_unittests.cpp | 255 ++++++++++++++++ 57 files changed, 2314 insertions(+), 158 deletions(-) create mode 100644 rocAL/include/decoders/audio/audio_decoder.h create mode 100644 rocAL/include/decoders/audio/audio_decoder_factory.h create mode 100644 rocAL/include/decoders/audio/sndfile_decoder.h create mode 100644 rocAL/include/loaders/audio/audio_loader.h create mode 100644 rocAL/include/loaders/audio/audio_loader_sharded.h create mode 100644 rocAL/include/loaders/audio/audio_read_and_decode.h create mode 100644 rocAL/include/loaders/audio/audio_source_evaluator.h create mode 100644 rocAL/include/loaders/audio/node_audio_loader.h create mode 100644 rocAL/include/loaders/audio/node_audio_loader_single_shard.h create mode 100644 rocAL/source/decoders/audio/audio_decoder_factory.cpp create mode 100644 rocAL/source/decoders/audio/sndfile_decoder.cpp create mode 100644 rocAL/source/loaders/audio/audio_loader.cpp create mode 100644 rocAL/source/loaders/audio/audio_loader_sharded.cpp create mode 100644 rocAL/source/loaders/audio/audio_read_and_decode.cpp create mode 100644 rocAL/source/loaders/audio/audio_source_evaluator.cpp create mode 100644 rocAL/source/loaders/audio/node_audio_loader.cpp create mode 100644 rocAL/source/loaders/audio/node_audio_loader_single_shard.cpp create mode 100644 tests/cpp_api_tests/rocAL_audio_unittests/CMakeLists.txt create mode 100644 tests/cpp_api_tests/rocAL_audio_unittests/README.md create mode 100644 tests/cpp_api_tests/rocAL_audio_unittests/rocAL_audio_unittests.cpp diff --git a/rocAL/CMakeLists.txt b/rocAL/CMakeLists.txt index a5d4ffecf..682c1ccdb 100644 --- a/rocAL/CMakeLists.txt +++ b/rocAL/CMakeLists.txt @@ -41,6 +41,10 @@ find_package(LMDB QUIET) find_package(RapidJSON QUIET) find_package(StdFilesystem QUIET) find_package(HALF QUIET) +find_library(libsnd_LIBS + NAMES sndfile libsndfile + PATHS ${CMAKE_SYSTEM_PREFIX_PATH} ${LIBSND_ROOT_DIR} "/usr/local" + PATH_SUFFIXES lib lib64) if(DEFINED ENV{ROCM_PATH}) set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Default ROCm installation path") @@ -189,6 +193,12 @@ if(NOT HALF_FOUND) set(BUILD_ROCAL false) message("-- ${Yellow}NOTE: rocAL library requires HALF, Not Found ${ColourReset}") endif() +if (${libsnd_LIBS} STREQUAL "libsnd_LIBS-NOTFOUND") + set(BUILD_ROCAL false) + message("-- ${Yellow}NOTE: rocAL library requires libsnd (sndfile), but it was not found. Try specifying its location with `-DLIBSND_ROOT_DIR`.${ColourReset}") +else() + message(STATUS "Found libsnd: ${libsnd_LIBS}") +endif() if(${BUILD_ROCAL}) # AMD RPP @@ -219,6 +229,9 @@ if(${BUILD_ROCAL}) set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} ${FILESYSTEM_LIBRARIES}) # half include_directories(${HALF_INCLUDE_DIRS}) + # SndFile + include_directories(${SndFile_INCLUDE_DIRS}) + set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} sndfile) # rocAL uses C++ 17 features set(CMAKE_CXX_STANDARD 17) @@ -231,13 +244,16 @@ if(${BUILD_ROCAL}) include/augmentations/color_augmentations/ include/augmentations/effects_augmentations/ include/augmentations/geometry_augmentations/ + include/augmentations/audio_augmentations/ include/decoders/image/ include/decoders/video/ + include/decoders/audio/ include/decoders/libjpeg/ include/device/ include/loaders/ include/loaders/image/ include/loaders/video/ + include/loaders/audio/ include/meta_data/ include/parameters/ include/pipeline/ diff --git a/rocAL/include/api/rocal_api_data_loaders.h b/rocAL/include/api/rocal_api_data_loaders.h index 62e3a6e66..bfb63b318 100644 --- a/rocAL/include/api/rocal_api_data_loaders.h +++ b/rocAL/include/api/rocal_api_data_loaders.h @@ -824,4 +824,52 @@ extern "C" RocalTensor ROCAL_API_CALL rocalJpegExternalFileSource(RocalContext p RocalDecoderType rocal_decoder_type = RocalDecoderType::ROCAL_DECODER_TJPEG, RocalExternalSourceMode external_source_mode = RocalExternalSourceMode::ROCAL_EXTSOURCE_FNAME); +/// Creates Audio file reader and decoder. It allocates the resources and objects required to read and decode audio files stored on the file systems. It has internal sharding capability to load/decode in parallel is user wants. +/// If the files are not in standard audio compression formats they will be ignored. +/// \param context Rocal context +/// \param source_path A NULL terminated char string pointing to the location on the disk +/// \param shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available. +/// \param is_output Determines if the user wants the loaded audio to be part of the output or not. +/// \param shuffle Determines if the user wants to shuffle the dataset or not. +/// \param loop Determines if the user wants to indefinitely loops through audio or not. +/// \param downmix If set to True, downmix all input channels to mono. If downmixing is turned on, the decoder output is 1D. If downmixing is turned off, it produces 2D output with interleaved channels incase of multichannel audio. +/// \param max_frames The maximum frames of the decoded audio. +/// \param max_channels The maximum channels of the decoded audio. +/// \return Reference to the output audio +extern "C" RocalTensor ROCAL_API_CALL rocalAudioFileSource(RocalContext context, + const char* source_path, + unsigned shard_count, + bool is_output, + bool shuffle = false, + bool loop = false, + bool downmix = false, + unsigned max_frames = 1, + unsigned max_channels = 1); + +/// Creates Audio file reader and decoder. It allocates the resources and objects required to read and decode audio files stored on the file systems. It has internal sharding capability to load/decode in parallel is user wants. +/// If the files are not in standard audio compression formats they will be ignored. +/// \param context Rocal context +/// \param source_path A NULL terminated char string pointing to the location on the disk +/// \param shard_id Shard id for this loader +/// \param shard_count Defines the parallelism level by internally sharding the input dataset and load/decode using multiple decoder/loader instances. Using shard counts bigger than 1 improves the load/decode performance if compute resources (CPU cores) are available. +/// \param is_output Determines if the user wants the loaded audio to be part of the output or not. +/// \param shuffle Determines if the user wants to shuffle the dataset or not. +/// \param loop Determines if the user wants to indefinitely loops through audio or not. +/// \param downmix If set to True, downmix all input channels to mono. If downmixing is turned on, the decoder output is 1D. If downmixing is turned off, it produces 2D output with interleaved channels incase of multichannel audio. +/// \param max_frames The maximum frames of the decoded audio. +/// \param max_channels The maximum channels of the decoded audio. +/// \param storage_type Determines the storage type +/// \return Reference to the output audio +extern "C" RocalTensor ROCAL_API_CALL rocalAudioFileSourceSingleShard(RocalContext p_context, + const char* source_path, + unsigned shard_id, + unsigned shard_count, + bool is_output, + bool shuffle = false, + bool loop = false, + bool downmix = false, + unsigned max_frames = 1, + unsigned max_channels = 1, + unsigned storage_type = 9); + #endif // MIVISIONX_ROCAL_API_DATA_LOADERS_H diff --git a/rocAL/include/api/rocal_api_types.h b/rocAL/include/api/rocal_api_types.h index 929ab5892..cf4b665c1 100644 --- a/rocAL/include/api/rocal_api_types.h +++ b/rocAL/include/api/rocal_api_types.h @@ -259,7 +259,10 @@ enum RocalDecoderType { ROCAL_DECODER_VIDEO_FFMPEG_SW = 3, /*! \brief AMD ROCAL_DECODER_VIDEO_FFMPEG_HW */ - ROCAL_DECODER_VIDEO_FFMPEG_HW = 4 + ROCAL_DECODER_VIDEO_FFMPEG_HW = 4, + /*! \brief AMD ROCAL_DECODER_AUDIO_SNDFILE + */ + ROCAL_DECODER_AUDIO_SNDFILE = 5 }; enum RocalOutputMemType { diff --git a/rocAL/include/decoders/audio/audio_decoder.h b/rocAL/include/decoders/audio/audio_decoder.h new file mode 100644 index 000000000..63e23cacd --- /dev/null +++ b/rocAL/include/decoders/audio/audio_decoder.h @@ -0,0 +1,51 @@ +/* +Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include +#include +#include +#include "parameter_factory.h" +#include "sndfile.h" + +class AudioDecoder { +public: + enum class Status { + OK = 0, + HEADER_DECODE_FAILED, + CONTENT_DECODE_FAILED, + UNSUPPORTED, + FAILED, + NO_MEMORY + }; + virtual AudioDecoder::Status initialize(const char *src_filename) = 0; // This function is responsible for initializing the audio decoder. It takes the source filename as input and returns the status of the initialization process. + virtual AudioDecoder::Status decode(float* buffer) = 0; //to pass buffer & number of frames/samples to decode + virtual AudioDecoder::Status decode_info(int* samples, int* channels, float* sample_rates) = 0; //to decode info about the audio samples + virtual void release() = 0; + virtual ~AudioDecoder() = default; +protected: + const char *_src_filename = NULL; + SF_INFO _sfinfo; + SNDFILE* _sf_ptr; +}; + diff --git a/rocAL/include/decoders/audio/audio_decoder_factory.h b/rocAL/include/decoders/audio/audio_decoder_factory.h new file mode 100644 index 000000000..6eb09e2da --- /dev/null +++ b/rocAL/include/decoders/audio/audio_decoder_factory.h @@ -0,0 +1,29 @@ +/* +Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once +#include +#include "decoder.h" +#include "audio_decoder.h" + +std::shared_ptr create_audio_decoder(DecoderConfig config); + diff --git a/rocAL/include/decoders/audio/sndfile_decoder.h b/rocAL/include/decoders/audio/sndfile_decoder.h new file mode 100644 index 000000000..991a82a35 --- /dev/null +++ b/rocAL/include/decoders/audio/sndfile_decoder.h @@ -0,0 +1,37 @@ +/* +Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include "audio_decoder.h" + +class SndFileDecoder : public AudioDecoder { +public: + //! Default constructor + SndFileDecoder(); + AudioDecoder::Status initialize(const char *src_filename) override; + AudioDecoder::Status decode(float* buffer) override; + AudioDecoder::Status decode_info(int* samples, int* channels, float* sample_rates) override; + void release() override; + ~SndFileDecoder() override; +}; + diff --git a/rocAL/include/decoders/image/decoder.h b/rocAL/include/decoders/image/decoder.h index ba9692930..8a6a75cb7 100644 --- a/rocAL/include/decoders/image/decoder.h +++ b/rocAL/include/decoders/image/decoder.h @@ -37,6 +37,7 @@ enum class DecoderType { OVX_FFMPEG = 5, //!< Uses FFMPEG to decode video streams, can decode up to 4 video streams simultaneously FFMPEG_SOFTWARE_DECODE = 6, FFMPEG_HARDWARE_DECODE = 7, + SNDFILE = 8, //!< Uses sndfile to decode audio files }; class DecoderConfig { diff --git a/rocAL/include/loaders/audio/audio_loader.h b/rocAL/include/loaders/audio/audio_loader.h new file mode 100644 index 000000000..9a59056e4 --- /dev/null +++ b/rocAL/include/loaders/audio/audio_loader.h @@ -0,0 +1,84 @@ +/* +Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include +#include +#include +#include "commons.h" +#include "circular_buffer.h" +#include "audio_read_and_decode.h" +#include "meta_data_reader.h" + +// AudioLoader runs an internal thread for loading an decoding of audios asynchronously +// It uses a circular buffer to store decoded audios for the user +class AudioLoader : public LoaderModule { +public: + explicit AudioLoader(void* dev_resources); + ~AudioLoader() override; + LoaderModuleStatus load_next() override; + void initialize(ReaderConfig reader_cfg, DecoderConfig decoder_cfg, RocalMemType mem_type, unsigned batch_size, bool keep_orig_size = false) override; + void set_output(Tensor* output_audio) override; + void set_random_bbox_data_reader(std::shared_ptr randombboxcrop_meta_data_reader) override { THROW("set_random_bbox_data_reader is not compatible with this implementation") }; + size_t remaining_count() override; // returns number of remaining items to be loaded + void reset() override; // Resets the loader to load from the beginning of the media + Timing timing() override; + void start_loading() override; + LoaderModuleStatus set_cpu_affinity(cpu_set_t cpu_mask); + LoaderModuleStatus set_cpu_sched_policy(struct sched_param sched_policy); + std::vector get_id() override; + decoded_sample_info get_decode_sample_info() override; + void set_prefetch_queue_depth(size_t prefetch_queue_depth) override; + void set_gpu_device_id(int device_id); + void shut_down() override; + void feed_external_input(const std::vector& input_images_names, const std::vector& input_buffer, + const std::vector& roi_xywh, unsigned int max_width, unsigned int max_height, unsigned int channels, ExternalSourceFileMode mode, bool eos) override {} +private: + bool is_out_of_data(); + void de_init(); + void stop_internal_thread(); + std::shared_ptr _audio_loader; + LoaderModuleStatus update_output_audio(); + LoaderModuleStatus load_routine(); + Tensor* _output_tensor; + std::vector _output_names;//!< audio name/ids that are stores in the _output_audio + size_t _output_mem_size; + MetaDataBatch* _meta_data = nullptr;//!< The output of the meta_data_graph, + bool _internal_thread_running; + size_t _batch_size; + std::thread _load_thread; + RocalMemType _mem_type; + decoded_sample_info _decoded_audio_info; + decoded_sample_info _output_decoded_audio_info; + CircularBuffer _circ_buff; + TimingDBG _swap_handle_time; + bool _is_initialized; + bool _stopped = false; + bool _loop;// randombboxcrop_meta_data_reader) override { THROW("set_random_bbox_data_reader is not compatible with this implementation") }; + size_t remaining_count() override; + void reset() override; + void start_loading() override; + std::vector get_id() override; + decoded_sample_info get_decode_sample_info() override; + Timing timing() override; + void set_prefetch_queue_depth(size_t prefetch_queue_depth) override; + void shut_down() override; + void feed_external_input(const std::vector& input_images_names, const std::vector& input_buffer, + const std::vector& roi_xywh, unsigned int max_width, unsigned int max_height, unsigned int channels, ExternalSourceFileMode mode, bool eos) override {} +private: + void increment_loader_idx(); + void* _dev_resources; + bool _initialized = false; + std::vector> _loaders; + size_t _loader_idx; + size_t _shard_count = 1; + void fast_forward_through_empty_loaders(); + size_t _prefetch_queue_depth; + Tensor *_output_tensor; +}; diff --git a/rocAL/include/loaders/audio/audio_read_and_decode.h b/rocAL/include/loaders/audio/audio_read_and_decode.h new file mode 100644 index 000000000..151b7c961 --- /dev/null +++ b/rocAL/include/loaders/audio/audio_read_and_decode.h @@ -0,0 +1,72 @@ +/* +Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once +#include +#include +#include "commons.h" +#include "sndfile_decoder.h" +#include "reader_factory.h" +#include "timing_debug.h" +#include "loader_module.h" +#include "audio_decoder.h" + +class AudioReadAndDecode { +public: + AudioReadAndDecode(); + ~AudioReadAndDecode(); + size_t count(); + void reset(); + void create(ReaderConfig reader_config, DecoderConfig decoder_config, int batch_size, int device_id = 0); + //! Loads a decompressed batch of audios into the buffer indicated by buff + /// \param buff User's buffer provided to be filled with decoded audio samples + /// \param names User's buffer provided to be filled with name of the audio files + /// \param max_decoded_samples User's buffer maximum samples per decoded audio. User expects the decoder to downscale the audio if audio's original samples is bigger than max_samples + /// \param max_decoded_channels user's buffer maximum channels per decoded audio. User expects the decoder to downscale the audio if audio's original channels is bigger than max_channels + /// \param roi_samples is set by the load() function to the samples of the region that decoded audio is located. It's less than max_samples and is either equal to the original audio samples if original audio samples is smaller than max_samples or downscaled if necessary to fit the max_samples criterion. + /// \param roi_channels is set by the load() function to the samples of the region that decoded audio is located.It's less than max_channels and is either equal to the original audio channels if original audio channels is smaller than max_channels or downscaled if necessary to fit the max_channels criterion. + LoaderModuleStatus load( + float* buff, + std::vector& names, + const size_t max_decoded_samples, + const size_t max_decoded_channels, + std::vector &actual_samples, + std::vector &actual_channels, + std::vector &actual_sample_rates); + //! returns timing info or other status information + Timing timing(); +private: + std::vector> _decoder; + std::shared_ptr _reader; + std::vector _audio_names; + std::vector _audio_file_path; + std::vector _decompressed_buff_ptrs; + std::vector _actual_decoded_samples; + std::vector _actual_decoded_channels; + std::vector _original_samples; + std::vector _original_channels; + std::vector _original_sample_rates; + TimingDBG _file_load_time, _decode_time; + size_t _batch_size, _num_threads; + DecoderConfig _decoder_config; + std::string _input_path; +}; diff --git a/rocAL/include/loaders/audio/audio_source_evaluator.h b/rocAL/include/loaders/audio/audio_source_evaluator.h new file mode 100644 index 000000000..b6b5eb7cf --- /dev/null +++ b/rocAL/include/loaders/audio/audio_source_evaluator.h @@ -0,0 +1,49 @@ +/* +Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once +#include +#include +#include "sndfile_decoder.h" +#include "reader_factory.h" +#include "timing_debug.h" +#include "loader_module.h" + +enum class AudioSourceEvaluatorStatus { + OK = 0, + UNSUPPORTED_DECODER_TYPE, + UNSUPPORTED_STORAGE_TYPE, +}; + +class AudioSourceEvaluator { +public: + AudioSourceEvaluatorStatus create(ReaderConfig reader_cfg, DecoderConfig decoder_cfg); + void find_max_dimension(); + size_t max_samples(); + size_t max_channels(); +private: + int _samples_max = 0, _channels_max = 0; + std::shared_ptr _decoder; + std::shared_ptr _reader; + std::string _input_path; +}; + diff --git a/rocAL/include/loaders/audio/node_audio_loader.h b/rocAL/include/loaders/audio/node_audio_loader.h new file mode 100644 index 000000000..22eccc222 --- /dev/null +++ b/rocAL/include/loaders/audio/node_audio_loader.h @@ -0,0 +1,55 @@ +/* +Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once +#include "node.h" +#include "audio_loader_sharded.h" +#include "graph.h" + +class AudioLoaderNode: public Node { +public: + /// \param device_resources shard count from user + /// internal_shard_count number of loader/decoders are created and each shard is loaded and decoded using separate and independent resources increasing the parallelism and performance. + AudioLoaderNode(Tensor *output, void* device_resources); + ~AudioLoaderNode() override; + AudioLoaderNode() = delete; + /// \param internal_shard_count Defines the amount of parallelism user wants for the load and decode process to be handled internally. + /// \param source_path Defines the path that includes the Audio dataset + /// \param storage_type Determines the storage type + /// \param decoder_type Determines the decoder_type + /// \param shuffle Determines if the user wants to shuffle the dataset or not. + /// \param loop Determines if the user wants to indefinitely loops through audio or not. + /// \param load_batch_count Defines the quantum count of the Audios to be loaded. It's usually equal to the user's batch size. + /// \param mem_type Memory type, host or device + /// \param meta_data_reader Determines the meta-data information + /// The loader will repeat Audios if necessary to be able to have Audios in multiples of the load_batch_count, + /// for example if there are 10 Audios in the dataset and load_batch_count is 3, the loader repeats 2 Audios as if there are 12 Audios available. + void init(unsigned internal_shard_count, unsigned cpu_num_threads, const std::string &source_path, const std::string &source_file_list, StorageType storage_type, + DecoderType decoder_type, bool shuffle, bool loop, size_t load_batch_count, RocalMemType mem_type, std::shared_ptr meta_data_reader); + std::shared_ptr get_loader_module(); +protected: + void create_node() override {}; + void update_node() override {}; +private: + std::shared_ptr _loader_module = nullptr; +}; + diff --git a/rocAL/include/loaders/audio/node_audio_loader_single_shard.h b/rocAL/include/loaders/audio/node_audio_loader_single_shard.h new file mode 100644 index 000000000..202edf2a4 --- /dev/null +++ b/rocAL/include/loaders/audio/node_audio_loader_single_shard.h @@ -0,0 +1,53 @@ +/* +Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once +#include "node.h" +#include "audio_loader_sharded.h" +#include "graph.h" + +class AudioLoaderSingleShardNode: public Node { +public: + AudioLoaderSingleShardNode(Tensor *output, void* device_resources); + ~AudioLoaderSingleShardNode() override; + /// \param user_shard_count shard count from user + /// \param user_shard_id shard id from user + /// \param source_path Defines the path that includes the Audio dataset + /// \param storage_type Determines the storage type + /// \param decoder_type Determines the decoder_type + /// \param shuffle Determines if the user wants to shuffle the dataset or not. + /// \param loop Determines if the user wants to indefinitely loops through audios or not. + /// \param load_batch_count Defines the quantum count of the Audios to be loaded. It's usually equal to the user's batch size. + /// \param mem_type Memory type, host or device + /// \param meta_data_reader Determines the meta-data information + /// The loader will repeat Audios if necessary to be able to have Audios in multiples of the load_batch_count, + /// for example if there are 10 Audios in the dataset and load_batch_count is 3, the loader repeats 2 Audios as if there are 12 Audios available. + void init(unsigned shard_id, unsigned shard_count, unsigned cpu_num_threads, const std::string &source_path, const std::string &source_file_list, + StorageType storage_type, DecoderType decoder_type, bool shuffle, bool loop, size_t load_batch_count, RocalMemType mem_type, + std::shared_ptr meta_data_reader); + std::shared_ptr get_loader_module(); +protected: + void create_node() override {}; + void update_node() override {}; +private: + std::shared_ptr _loader_module = nullptr; +}; diff --git a/rocAL/include/loaders/circular_buffer.h b/rocAL/include/loaders/circular_buffer.h index ac4fafe13..168052ece 100644 --- a/rocAL/include/loaders/circular_buffer.h +++ b/rocAL/include/loaders/circular_buffer.h @@ -31,12 +31,15 @@ THE SOFTWARE. #include "commons.h" #include "device_manager.h" #include "device_manager_hip.h" -struct decoded_image_info { - std::vector _image_names; +struct decoded_sample_info { + std::vector _sample_names; std::vector _roi_width; std::vector _roi_height; std::vector _original_width; std::vector _original_height; + std::vector _original_audio_samples; + std::vector _original_audio_channels; + std::vector _original_audio_sample_rates; }; struct crop_image_info { @@ -54,9 +57,9 @@ class CircularBuffer { void unblock_writer(); // Unblocks the thread currently waiting on get_write_buffer void push(); // The latest write goes through, effectively adds one element to the buffer void pop(); // The oldest write will be erased and overwritten in upcoming writes - void set_image_info(const decoded_image_info& info) { _last_image_info = info; } + void set_sample_info(const decoded_sample_info& info) { _last_sample_info = info; } void set_crop_image_info(const crop_image_info& info) { _last_crop_image_info = info; } - decoded_image_info& get_image_info(); + decoded_sample_info& get_sample_info(); crop_image_info& get_cropped_image_info(); bool random_bbox_crop_flag = false; void* get_read_buffer_dev(); @@ -73,8 +76,8 @@ class CircularBuffer { bool full(); bool empty(); size_t _buff_depth; - decoded_image_info _last_image_info; - std::queue _circ_image_info; //!< Stores the loaded images names, decoded_width and decoded_height(data is stored in the _circ_buff) + decoded_sample_info _last_sample_info; + std::queue _circ_sample_info; //!< Stores the loaded images names, decoded_width and decoded_height(data is stored in the _circ_buff) crop_image_info _last_crop_image_info; // for Random BBox crop coordinates std::queue _circ_crop_image_info; //!< Stores the crop coordinates of the images for random bbox crop (data is stored in the _circ_buff) std::mutex _names_buff_lock; diff --git a/rocAL/include/loaders/image/cifar10_data_loader.h b/rocAL/include/loaders/image/cifar10_data_loader.h index 9736b6dfc..7f8c6a784 100644 --- a/rocAL/include/loaders/image/cifar10_data_loader.h +++ b/rocAL/include/loaders/image/cifar10_data_loader.h @@ -40,7 +40,7 @@ class CIFAR10DataLoader : public LoaderModule { void reset() override; void start_loading() override; std::vector get_id() override; - decoded_image_info get_decode_image_info() override; + decoded_sample_info get_decode_sample_info() override; crop_image_info get_crop_image_info() override; Timing timing() override; void set_prefetch_queue_depth(size_t prefetch_queue_depth) override; @@ -59,8 +59,8 @@ class CIFAR10DataLoader : public LoaderModule { LoaderModuleStatus load_routine(); std::shared_ptr _reader; void *_dev_resources; - decoded_image_info _raw_img_info; // image info to store the names. In this case the ID of image is stored in _roi_width field - decoded_image_info _output_decoded_img_info; + decoded_sample_info _raw_img_info; // image info to store the names. In this case the ID of image is stored in _roi_width field + decoded_sample_info _output_decoded_img_info; bool _initialized = false; RocalMemType _mem_type; size_t _output_mem_size; diff --git a/rocAL/include/loaders/image/image_loader.h b/rocAL/include/loaders/image/image_loader.h index 082bf2015..9b65d5f92 100644 --- a/rocAL/include/loaders/image/image_loader.h +++ b/rocAL/include/loaders/image/image_loader.h @@ -49,7 +49,7 @@ class ImageLoader : public LoaderModule { LoaderModuleStatus set_cpu_sched_policy(struct sched_param sched_policy); void set_gpu_device_id(int device_id); std::vector get_id() override; - decoded_image_info get_decode_image_info() override; + decoded_sample_info get_decode_sample_info() override; crop_image_info get_crop_image_info() override; void set_prefetch_queue_depth(size_t prefetch_queue_depth) override; void shut_down() override; @@ -74,9 +74,9 @@ class ImageLoader : public LoaderModule { size_t _batch_size; std::thread _load_thread; RocalMemType _mem_type; - decoded_image_info _decoded_img_info; + decoded_sample_info _decoded_img_info; crop_image_info _crop_image_info; - decoded_image_info _output_decoded_img_info; + decoded_sample_info _output_decoded_img_info; crop_image_info _output_cropped_img_info; CircularBuffer _circ_buff; TimingDBG _swap_handle_time; diff --git a/rocAL/include/loaders/image/image_loader_sharded.h b/rocAL/include/loaders/image/image_loader_sharded.h index 3b7bdf998..7c92dfa5a 100644 --- a/rocAL/include/loaders/image/image_loader_sharded.h +++ b/rocAL/include/loaders/image/image_loader_sharded.h @@ -40,7 +40,7 @@ class ImageLoaderSharded : public LoaderModule { void reset() override; void start_loading() override; std::vector get_id() override; - decoded_image_info get_decode_image_info() override; + decoded_sample_info get_decode_sample_info() override; crop_image_info get_crop_image_info() override; Timing timing() override; void set_prefetch_queue_depth(size_t prefetch_queue_depth) override; diff --git a/rocAL/include/loaders/loader_module.h b/rocAL/include/loaders/loader_module.h index 4d4531a52..7bb7f6087 100644 --- a/rocAL/include/loaders/loader_module.h +++ b/rocAL/include/loaders/loader_module.h @@ -53,8 +53,8 @@ class LoaderModule { virtual Timing timing() = 0; // Returns timing info virtual std::vector get_id() = 0; // returns the id of the last batch of images/frames loaded virtual void start_loading() = 0; // starts internal loading thread - virtual decoded_image_info get_decode_image_info() = 0; - virtual crop_image_info get_crop_image_info() = 0; + virtual decoded_sample_info get_decode_sample_info() = 0; + virtual crop_image_info get_crop_image_info() { return {}; } virtual void set_prefetch_queue_depth(size_t prefetch_queue_depth) = 0; // introduce meta data reader virtual void set_random_bbox_data_reader(std::shared_ptr randombboxcrop_meta_data_reader) = 0; diff --git a/rocAL/include/loaders/video/video_loader.h b/rocAL/include/loaders/video/video_loader.h index 4ff85f11c..81bbaee50 100644 --- a/rocAL/include/loaders/video/video_loader.h +++ b/rocAL/include/loaders/video/video_loader.h @@ -49,7 +49,7 @@ class VideoLoader : public LoaderModule { LoaderModuleStatus set_cpu_affinity(cpu_set_t cpu_mask); LoaderModuleStatus set_cpu_sched_policy(struct sched_param sched_policy); std::vector get_id() override; - decoded_image_info get_decode_image_info() override; + decoded_sample_info get_decode_sample_info() override; void set_prefetch_queue_depth(size_t prefetch_queue_depth) override; crop_image_info get_crop_image_info() override { return _crop_img_info; } void set_random_bbox_data_reader(std::shared_ptr randombboxcrop_meta_data_reader) override{}; @@ -74,8 +74,8 @@ class VideoLoader : public LoaderModule { size_t _sequence_length; std::thread _load_thread; RocalMemType _mem_type; - decoded_image_info _decoded_img_info; - decoded_image_info _output_decoded_img_info; + decoded_sample_info _decoded_img_info; + decoded_sample_info _output_decoded_img_info; CircularBuffer _circ_buff; TimingDBG _swap_handle_time; bool _is_initialized; diff --git a/rocAL/include/loaders/video/video_loader_sharded.h b/rocAL/include/loaders/video/video_loader_sharded.h index 41cd062a6..99ae55ed6 100644 --- a/rocAL/include/loaders/video/video_loader_sharded.h +++ b/rocAL/include/loaders/video/video_loader_sharded.h @@ -41,7 +41,7 @@ class VideoLoaderSharded : public LoaderModule { void reset() override; void start_loading() override; std::vector get_id() override; - decoded_image_info get_decode_image_info() override; + decoded_sample_info get_decode_sample_info() override; void set_prefetch_queue_depth(size_t prefetch_queue_depth) override; crop_image_info get_crop_image_info() override { return _crop_img_info; } void set_random_bbox_data_reader(std::shared_ptr randombboxcrop_meta_data_reader) override{}; diff --git a/rocAL/include/meta_data/bounding_box_graph.h b/rocAL/include/meta_data/bounding_box_graph.h index 34710b3ae..0c83ca8c3 100644 --- a/rocAL/include/meta_data/bounding_box_graph.h +++ b/rocAL/include/meta_data/bounding_box_graph.h @@ -31,8 +31,8 @@ typedef struct { float xc; float yc; float w; float h; } BoundingBoxCord_xcycwh class BoundingBoxGraph : public MetaDataGraph { public: void process(pMetaDataBatch input_meta_data, pMetaDataBatch output_meta_data) override; - void update_meta_data(pMetaDataBatch meta_data, decoded_image_info decode_image_info) override; - void update_random_bbox_meta_data(pMetaDataBatch input_meta_data, pMetaDataBatch output_meta_data, decoded_image_info decoded_image_info, crop_image_info crop_image_info) override; + void update_meta_data(pMetaDataBatch meta_data, decoded_sample_info decode_image_info) override; + void update_random_bbox_meta_data(pMetaDataBatch input_meta_data, pMetaDataBatch output_meta_data, decoded_sample_info decoded_sample_info, crop_image_info crop_image_info) override; void update_box_encoder_meta_data(std::vector *anchors, pMetaDataBatch full_batch_meta_data, float criteria, bool offset, float scale, std::vector &means, std::vector &stds, float *encoded_boxes_data, int *encoded_labels_data) override; void update_box_iou_matcher(BoxIouMatcherInfo &iou_matcher_info, int *matches_idx_buffer, pMetaDataBatch full_batch_meta_data) override; }; diff --git a/rocAL/include/meta_data/meta_data_graph.h b/rocAL/include/meta_data/meta_data_graph.h index 563f7f069..97b745617 100644 --- a/rocAL/include/meta_data/meta_data_graph.h +++ b/rocAL/include/meta_data/meta_data_graph.h @@ -41,8 +41,8 @@ class MetaDataGraph { public: virtual ~MetaDataGraph() = default; virtual void process(pMetaDataBatch input_meta_data, pMetaDataBatch output_meta_data) = 0; - virtual void update_meta_data(pMetaDataBatch meta_data, decoded_image_info decoded_image_info) = 0; - virtual void update_random_bbox_meta_data(pMetaDataBatch input_meta_data, pMetaDataBatch output_meta_data, decoded_image_info decoded_image_info, crop_image_info crop_image_info) = 0; + virtual void update_meta_data(pMetaDataBatch meta_data, decoded_sample_info decoded_sample_info) = 0; + virtual void update_random_bbox_meta_data(pMetaDataBatch input_meta_data, pMetaDataBatch output_meta_data, decoded_sample_info decoded_sample_info, crop_image_info crop_image_info) = 0; virtual void update_box_encoder_meta_data(std::vector *anchors, pMetaDataBatch full_batch_meta_data, float criteria, bool offset, float scale, std::vector &means, std::vector &stds, float *encoded_boxes_data, int *encoded_labels_data) = 0; virtual void update_box_iou_matcher(BoxIouMatcherInfo &iou_matcher_info, int *matches_idx_buffer, pMetaDataBatch full_batch_meta_data) = 0; std::list> _meta_nodes; diff --git a/rocAL/include/pipeline/commons.h b/rocAL/include/pipeline/commons.h index d3687b582..e12522438 100644 --- a/rocAL/include/pipeline/commons.h +++ b/rocAL/include/pipeline/commons.h @@ -153,4 +153,7 @@ struct Timing { long long unsigned video_read_time= 0; long long unsigned video_decode_time= 0; long long unsigned video_process_time= 0; + long long unsigned audio_read_time = 0; + long long unsigned audio_decode_time = 0; + long long unsigned audio_process_time = 0; }; diff --git a/rocAL/include/pipeline/master_graph.h b/rocAL/include/pipeline/master_graph.h index dfe663d96..6019389b2 100644 --- a/rocAL/include/pipeline/master_graph.h +++ b/rocAL/include/pipeline/master_graph.h @@ -37,6 +37,8 @@ THE SOFTWARE. #include "node_image_loader_single_shard.h" #include "node_video_loader.h" #include "node_video_loader_single_shard.h" +#include "node_audio_loader.h" +#include "node_audio_loader_single_shard.h" #include "ring_buffer.h" #include "timing_debug.h" #if ENABLE_HIP @@ -389,3 +391,32 @@ inline std::shared_ptr MasterGraph::add_node(const s return node; } + +/* + * Explicit specialization for AudioLoaderNode + */ +template<> inline std::shared_ptr MasterGraph::add_node(const std::vector& inputs, const std::vector& outputs) { + if(_loader_module) + THROW("A loader already exists, cannot have more than one loader") + auto node = std::make_shared(outputs[0], _device.resources()); + _loader_module = node->get_loader_module(); + _loader_module->set_prefetch_queue_depth(_prefetch_queue_depth); + _root_nodes.push_back(node); + for(auto& output: outputs) + _tensor_map.insert(make_pair(output, node)); + + return node; +} + +template<> inline std::shared_ptr MasterGraph::add_node(const std::vector& inputs, const std::vector& outputs) { + if(_loader_module) + THROW("A loader already exists, cannot have more than one loader") + auto node = std::make_shared(outputs[0], _device.resources()); + _loader_module = node->get_loader_module(); + _loader_module->set_prefetch_queue_depth(_prefetch_queue_depth); + _root_nodes.push_back(node); + for(auto& output: outputs) + _tensor_map.insert(make_pair(output, node)); + + return node; +} \ No newline at end of file diff --git a/rocAL/include/pipeline/tensor.h b/rocAL/include/pipeline/tensor.h index 63a639c6b..bdf055638 100644 --- a/rocAL/include/pipeline/tensor.h +++ b/rocAL/include/pipeline/tensor.h @@ -180,7 +180,7 @@ class TensorInfo { } } else { // For other tensors if (!_max_shape.size()) _max_shape.resize(_num_of_dims - 1, 0); // Since 2 values will be stored in the vector - _max_shape.assign(_dims.begin() + 1, _dims.end()); + _max_shape.assign(_dims.begin() + 1, _dims.end()); // RECHECK for audio tensors } reset_tensor_roi_buffers(); } @@ -267,6 +267,7 @@ class TensorInfo { bool is_metadata() const { return _is_metadata; } void set_roi_ptr(unsigned* roi_ptr) { _roi.reset_ptr(roi_ptr); } void copy_roi(void* roi_buffer) { _roi.copy(roi_buffer); } + std::shared_ptr> get_sample_rate() const { return _sample_rate; } private: Type _type = Type::UNKNOWN; //!< tensor type, whether is virtual tensor, created from handle or is a regular tensor @@ -287,6 +288,8 @@ class TensorInfo { bool _is_image = false; bool _is_metadata = false; size_t _channels = 3; //!< stores the channel dimensions in the tensor + std::shared_ptr> _sample_rate; + void reallocate_tensor_sample_rate_buffers(); }; bool operator==(const TensorInfo& rhs, const TensorInfo& lhs); @@ -317,6 +320,7 @@ class Tensor : public rocalTensor { unsigned copy_data(hipStream_t stream, void* host_memory, bool sync); #endif unsigned copy_data(void* user_buffer, RocalOutputMemType external_mem_type) override; + unsigned copy_data(void* user_buffer, uint max_x1, uint max_y1); //! Default destructor /*! Releases the OpenVX Tensor object */ ~Tensor(); @@ -328,6 +332,8 @@ class Tensor : public rocalTensor { void update_tensor_roi(const std::vector& width, const std::vector& height); void update_tensor_roi(const std::vector>& shape); void reset_tensor_roi() { _info.reset_tensor_roi_buffers(); } + void reset_audio_sample_rate() { _info.reallocate_tensor_sample_rate_buffers(); } + void update_audio_tensor_sample_rate(const std::vector& sample_rate); void set_roi(unsigned* roi_ptr) { _info.set_roi_ptr(roi_ptr); } void copy_roi(void* roi_buffer) override { _info.copy_roi(roi_buffer); } size_t get_roi_dims_size() override { return _info.roi().no_of_dims(); } diff --git a/rocAL/include/readers/image/caffe2_lmdb_record_reader.h b/rocAL/include/readers/image/caffe2_lmdb_record_reader.h index 363ffffe6..d30a3ad64 100644 --- a/rocAL/include/readers/image/caffe2_lmdb_record_reader.h +++ b/rocAL/include/readers/image/caffe2_lmdb_record_reader.h @@ -60,6 +60,9 @@ class Caffe2LMDBRecordReader : public Reader { //! Returns the id of the latest file opened std::string id() override { return _last_id; }; + //! Returns the name of the latest file_path opened + std::string file_path() override { return _last_file_name; } + unsigned count_items() override; ~Caffe2LMDBRecordReader() override; diff --git a/rocAL/include/readers/image/caffe_lmdb_record_reader.h b/rocAL/include/readers/image/caffe_lmdb_record_reader.h index 97be19ea4..710852b04 100644 --- a/rocAL/include/readers/image/caffe_lmdb_record_reader.h +++ b/rocAL/include/readers/image/caffe_lmdb_record_reader.h @@ -60,6 +60,9 @@ class CaffeLMDBRecordReader : public Reader { //! Returns the id of the latest file opened std::string id() override { return _last_id; }; + //! Returns the name of the latest file_path opened + std::string file_path() override { return _last_file_name; } + unsigned count_items() override; ~CaffeLMDBRecordReader() override; diff --git a/rocAL/include/readers/image/cifar10_data_reader.h b/rocAL/include/readers/image/cifar10_data_reader.h index 9b8b12276..cbd397140 100644 --- a/rocAL/include/readers/image/cifar10_data_reader.h +++ b/rocAL/include/readers/image/cifar10_data_reader.h @@ -54,6 +54,9 @@ class CIFAR10DataReader : public Reader { //! Returns the name of the latest data_id opened std::string id() override { return _last_id; }; + //! Returns the name of the latest file_path opened + std::string file_path() override {return _last_file_name; } + unsigned count_items() override; ~CIFAR10DataReader() override; diff --git a/rocAL/include/readers/image/coco_file_source_reader.h b/rocAL/include/readers/image/coco_file_source_reader.h index fd14c5061..e8896a35f 100644 --- a/rocAL/include/readers/image/coco_file_source_reader.h +++ b/rocAL/include/readers/image/coco_file_source_reader.h @@ -58,6 +58,9 @@ class COCOFileSourceReader : public Reader { //! Returns the name of the latest file opened std::string id() override { return _last_id; }; + //! Returns the name of the latest file_path opened + std::string file_path() override { return _last_file_name; } + unsigned count_items() override; ~COCOFileSourceReader() override; diff --git a/rocAL/include/readers/image/external_source_reader.h b/rocAL/include/readers/image/external_source_reader.h index 9864ed71b..f37f23c64 100644 --- a/rocAL/include/readers/image/external_source_reader.h +++ b/rocAL/include/readers/image/external_source_reader.h @@ -58,6 +58,9 @@ class ExternalSourceReader : public Reader, public ExternalSourceImageReader { //! Returns the name of the latest file opened std::string id() override { return _last_id; } + //! Returns the name of the latest file_path opened + std::string file_path() override { return _last_file_name; } + //! Return batch_size() for count_items unless end_of_sequence has been signalled unsigned count_items() override; @@ -91,7 +94,7 @@ class ExternalSourceReader : public Reader, public ExternalSourceImageReader { unsigned _curr_file_idx; FILE* _current_fPtr; unsigned _current_file_size; - std::string _last_id; + std::string _last_id, _last_file_name; size_t _shard_id = 0; size_t _shard_count = 1; // equivalent of batch size //!< _batch_count Defines the quantum count of the images to be read. It's usually equal to the user's batch size. diff --git a/rocAL/include/readers/image/file_source_reader.h b/rocAL/include/readers/image/file_source_reader.h index 428ba5874..925be176c 100644 --- a/rocAL/include/readers/image/file_source_reader.h +++ b/rocAL/include/readers/image/file_source_reader.h @@ -56,6 +56,9 @@ class FileSourceReader : public Reader { //! Returns the name of the latest file opened std::string id() override { return _last_id; }; + //! Returns the name of the latest file_path opened + std::string file_path() override { return _last_file_path; } + unsigned count_items() override; ~FileSourceReader() override; @@ -77,7 +80,7 @@ class FileSourceReader : public Reader { FILE *_current_fPtr; unsigned _current_file_size; std::string _last_id; - std::string _last_file_name; + std::string _last_file_name, _last_file_path; size_t _shard_id = 0; size_t _shard_count = 1; // equivalent of batch size //!< _batch_count Defines the quantum count of the images to be read. It's usually equal to the user's batch size. @@ -97,4 +100,5 @@ class FileSourceReader : public Reader { void incremenet_file_id() { _file_id++; } void replicate_last_image_to_fill_last_shard(); void replicate_last_batch_to_pad_partial_shard(); + std::shared_ptr _meta_data_reader = nullptr; }; diff --git a/rocAL/include/readers/image/image_reader.h b/rocAL/include/readers/image/image_reader.h index d7f13b4d6..e10bb2b5c 100644 --- a/rocAL/include/readers/image/image_reader.h +++ b/rocAL/include/readers/image/image_reader.h @@ -171,6 +171,10 @@ class Reader { //! Returns the name/identifier of the last item opened in this resource virtual std::string id() = 0; //! Returns the number of items remained in this resource + + //! Returns the path of the last item opened in this resource + virtual std::string file_path() = 0; + virtual unsigned count_items() = 0; virtual ~Reader() = default; diff --git a/rocAL/include/readers/image/mxnet_recordio_reader.h b/rocAL/include/readers/image/mxnet_recordio_reader.h index 04c1299af..eafcba565 100644 --- a/rocAL/include/readers/image/mxnet_recordio_reader.h +++ b/rocAL/include/readers/image/mxnet_recordio_reader.h @@ -58,6 +58,9 @@ class MXNetRecordIOReader : public Reader { //! Returns the id of the latest file opened std::string id() override { return _last_id; }; + //! Returns the name of the latest file_path opened + std::string file_path() override { return _last_file_name; } + unsigned count_items() override; ~MXNetRecordIOReader() override; diff --git a/rocAL/include/readers/image/tf_record_reader.h b/rocAL/include/readers/image/tf_record_reader.h index 2fe1d6fb1..caf79d6bb 100644 --- a/rocAL/include/readers/image/tf_record_reader.h +++ b/rocAL/include/readers/image/tf_record_reader.h @@ -60,6 +60,9 @@ class TFRecordReader : public Reader { //! Returns the id of the latest file opened std::string id() override { return _last_id; }; + //! Returns the name of the latest file_path opened + std::string file_path() override { return _last_file_name; } + unsigned count_items() override; ~TFRecordReader() override; diff --git a/rocAL/include/readers/video/sequence_file_source_reader.h b/rocAL/include/readers/video/sequence_file_source_reader.h index a2f4a555f..77b2241c8 100644 --- a/rocAL/include/readers/video/sequence_file_source_reader.h +++ b/rocAL/include/readers/video/sequence_file_source_reader.h @@ -54,7 +54,10 @@ class SequenceFileSourceReader : public Reader { void reset() override; //! Returns the name of the latest file opened - std::string id() override { return _last_id; }; + std::string id() override { return _last_id; } + + //! Returns the name of the latest file_path opened + std::string file_path() override { return _last_file_name; } unsigned count_items() override; @@ -80,7 +83,7 @@ class SequenceFileSourceReader : public Reader { unsigned _curr_file_idx; FILE *_current_fPtr; unsigned _current_file_size; - std::string _last_id; + std::string _last_id, _last_file_name; std::vector _last_sequence; size_t _sequence_length; size_t _step; diff --git a/rocAL/source/api/rocal_api_data_loaders.cpp b/rocAL/source/api/rocal_api_data_loaders.cpp index e28fdc290..d5543fc90 100644 --- a/rocAL/source/api/rocal_api_data_loaders.cpp +++ b/rocAL/source/api/rocal_api_data_loaders.cpp @@ -34,9 +34,28 @@ THE SOFTWARE. #include "node_fused_jpeg_crop_single_shard.h" #include "node_image_loader.h" #include "node_image_loader_single_shard.h" +#include "node_audio_loader.h" +#include "node_audio_loader_single_shard.h" +#include "audio_source_evaluator.h" #include "node_resize.h" #include "rocal_api.h" + +std::tuple +evaluate_audio_data_set(StorageType storage_type, DecoderType decoder_type, + const std::string &source_path, const std::string &json_path) +{ + AudioSourceEvaluator source_evaluator; + if(source_evaluator.create(ReaderConfig(storage_type, source_path, json_path), DecoderConfig(decoder_type)) != AudioSourceEvaluatorStatus::OK) + THROW("Initializing file source input evaluator failed") + auto max_samples = source_evaluator.max_samples(); + auto max_channels = source_evaluator.max_channels(); + if(max_samples == 0 || max_channels == 0) + THROW("Cannot find size of the audio files or files cannot be accessed") + LOG("Maximum input audio dimension [ " + TOSTR(max_samples) + " x " + TOSTR(max_channels)+ " ] for audio's in " + source_path) + return std::make_tuple(max_samples, max_channels); +}; + std::tuple evaluate_image_data_set(RocalImageSizeEvaluationPolicy decode_size_policy, StorageType storage_type, DecoderType decoder_type, const std::string& source_path, const std::string& json_path) { @@ -2078,6 +2097,137 @@ rocalJpegExternalFileSource( return output; } +RocalTensor ROCAL_API_CALL +rocalAudioFileSourceSingleShard( + RocalContext p_context, + const char* source_path, + unsigned shard_id, + unsigned shard_count, + bool is_output, + bool shuffle, + bool loop, + bool downmix, + unsigned max_frames, + unsigned max_channels, + unsigned storage_type) { + Tensor* output = nullptr; + auto context = static_cast(p_context); + try { + if(shard_count < 1) + THROW("Shard count should be bigger than 0") + if(shard_id >= shard_count) + THROW("Shard id should be smaller than shard count") + auto [max_frames, max_channels] = evaluate_audio_data_set(StorageType::FILE_SYSTEM, DecoderType::SNDFILE, source_path, ""); + INFO("Internal buffer size for audio frames = " + TOSTR(max_frames)) + RocalTensorlayout tensor_layout = RocalTensorlayout::NONE; + RocalTensorDataType tensor_data_type = RocalTensorDataType::FP32; + std::vector dims = {context->user_batch_size(), max_frames, max_channels}; + auto info = TensorInfo(std::vector(std::move(dims)), + context->master_graph->mem_type(), + tensor_data_type); + info.set_max_shape(); + info.set_tensor_layout(tensor_layout); + output = context->master_graph->create_loader_output_tensor(info); + output->reset_audio_sample_rate(); + auto cpu_num_threads = context->master_graph->calculate_cpu_num_threads(shard_count); + context->master_graph->add_node({}, {output})->init(shard_id, shard_count, cpu_num_threads, + source_path, + "", + StorageType(storage_type), + DecoderType::SNDFILE, + shuffle, + loop, + context->user_batch_size(), + context->master_graph->mem_type(), + context->master_graph->meta_data_reader() + ); + context->master_graph->set_loop(loop); + if(is_output) { + auto actual_output = context->master_graph->create_tensor(info, is_output); + context->master_graph->add_node({output}, {actual_output}); + } + } + catch(const std::exception& e) { + context->capture_error(e.what()); + std::cerr << e.what() << '\n'; + } + return output; +} + +RocalTensor ROCAL_API_CALL +rocalAudioFileSource( + RocalContext p_context, + const char* source_path, + unsigned internal_shard_count, + bool is_output, + bool shuffle, + bool loop, + bool downmix, + unsigned max_frames, + unsigned max_channels) { + Tensor* output = nullptr; + auto context = static_cast(p_context); + try { + auto [max_frames, max_channels] = evaluate_audio_data_set(StorageType::FILE_SYSTEM, DecoderType::SNDFILE, + source_path, ""); + INFO("Internal buffer size for audio frames = " + TOSTR(max_frames)) + RocalTensorDataType tensor_data_type = RocalTensorDataType::FP32; + std::vector dims = {context->user_batch_size(), max_frames, max_channels}; + auto info = TensorInfo(std::vector(std::move(dims)), + context->master_graph->mem_type(), + tensor_data_type); + info.set_max_shape(); + output = context->master_graph->create_loader_output_tensor(info); + output->reset_audio_sample_rate(); + auto cpu_num_threads = context->master_graph->calculate_cpu_num_threads(internal_shard_count); + context->master_graph->add_node({}, {output})->init(internal_shard_count, cpu_num_threads, + source_path, + "", + StorageType::FILE_SYSTEM, + DecoderType::SNDFILE, + shuffle, + loop, + context->user_batch_size(), + context->master_graph->mem_type(), + context->master_graph->meta_data_reader()); + context->master_graph->set_loop(loop); + /* Commenting out this peice of code in this PR - Next PR will contain augmentations & this code will be uncommented + if(downmix) + { + // For the resize node, user can create an image with a different width and height + TensorInfo output_info = info; + std::vector output_dims; + output_dims.resize(3); + output_dims.at(0) = context->user_batch_size(); + output_dims.at(1) = info.dims()[1]; + output_dims.at(2) = 1; + output_info.set_dims(output_dims); + output_info.set_tensor_layout(RocalTensorlayout::NONE); + auto downmixed_output = context->master_graph->create_tensor(output_info, false); + std::shared_ptr downmix_node = context->master_graph->add_node({output}, {downmixed_output}); + if(is_output) + { + auto actual_output = context->master_graph->create_tensor(output_info, is_output); + context->master_graph->add_node({downmixed_output}, {actual_output}); + output = downmixed_output; + } + } + else + { */ + if(is_output) + { + auto actual_output = context->master_graph->create_tensor(info, is_output); + context->master_graph->add_node({output}, {actual_output}); + } + // } + } + catch(const std::exception& e) { + context->capture_error(e.what()); + std::cerr << e.what() << '\n'; + } + return output; +} + RocalStatus ROCAL_API_CALL rocalResetLoaders(RocalContext p_context) { auto context = static_cast(p_context); diff --git a/rocAL/source/decoders/audio/audio_decoder_factory.cpp b/rocAL/source/decoders/audio/audio_decoder_factory.cpp new file mode 100644 index 000000000..6b7693d18 --- /dev/null +++ b/rocAL/source/decoders/audio/audio_decoder_factory.cpp @@ -0,0 +1,36 @@ +/* +Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include "audio_decoder_factory.h" +#include "decoder_factory.h" +#include +#include +#include "commons.h" + +std::shared_ptr create_audio_decoder(DecoderConfig config) { + switch (config.type()) { + case DecoderType::SNDFILE: + return std::make_shared(); + default: + THROW("Unsupported decoder type " + TOSTR(config.type())); + } +} + diff --git a/rocAL/source/decoders/audio/sndfile_decoder.cpp b/rocAL/source/decoders/audio/sndfile_decoder.cpp new file mode 100644 index 000000000..1a8ccebaf --- /dev/null +++ b/rocAL/source/decoders/audio/sndfile_decoder.cpp @@ -0,0 +1,87 @@ +/* +Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include +#include "sndfile_decoder.h" + +SndFileDecoder::SndFileDecoder(){}; + +AudioDecoder::Status SndFileDecoder::decode(float* buffer) { + int read_frame_count = 0; + read_frame_count = sf_readf_float(_sf_ptr, buffer, _sfinfo.frames); + if(read_frame_count != _sfinfo.frames) { + printf("Not able to decode all frames. Only decoded %d frames\n", read_frame_count); + sf_close(_sf_ptr); + AudioDecoder::Status status = Status::CONTENT_DECODE_FAILED; + return status; + } + AudioDecoder::Status status = Status::OK; + return status; +} + +AudioDecoder::Status SndFileDecoder::decode_info(int* samples, int* channels, float* sample_rate) { + // Set the samples and channels using the struct variables _sfinfo.samples and _sfinfo.channels + *samples = _sfinfo.frames; + *channels = _sfinfo.channels; + *sample_rate = _sfinfo.samplerate; + AudioDecoder::Status status = Status::OK; + if (_sfinfo.channels < 1) { + THROW("Not able to process less than" + TOSTR(_sfinfo.channels) + "channels"); + sf_close(_sf_ptr); + status = Status::HEADER_DECODE_FAILED; + return status; + }; + if (_sfinfo.frames < 1) { + THROW("Not able to process less than" + TOSTR(_sfinfo.frames) + "frames"); + sf_close(_sf_ptr); + status = Status::HEADER_DECODE_FAILED; + return status; + }; + return status; +} + +// Initialize will open a new decoder and initialize the context +AudioDecoder::Status SndFileDecoder::initialize(const char *src_filename) { + _src_filename = src_filename; + AudioDecoder::Status status = Status::OK; + memset(&_sfinfo, 0, sizeof(_sfinfo)) ; + if (!(_sf_ptr = sf_open(src_filename, SFM_READ, &_sfinfo))) { + /* Open failed so print an error message. */ + printf("Not able to open input file %s.\n", src_filename); + /* Print the error message from libsndfile. */ + puts(sf_strerror(NULL)); + sf_close(_sf_ptr); + status = Status::HEADER_DECODE_FAILED; + return status; + }; + return status; +} + +void SndFileDecoder::release() { + if(_sf_ptr != NULL) + sf_close(_sf_ptr); +} + +SndFileDecoder::~SndFileDecoder() {} + diff --git a/rocAL/source/loaders/audio/audio_loader.cpp b/rocAL/source/loaders/audio/audio_loader.cpp new file mode 100644 index 000000000..1fa5fd645 --- /dev/null +++ b/rocAL/source/loaders/audio/audio_loader.cpp @@ -0,0 +1,288 @@ +/* +Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include "audio_loader.h" +#include "audio_read_and_decode.h" +#include "vx_ext_amd.h" + +AudioLoader::AudioLoader(void* dev_resources): +_circ_buff(dev_resources), +_swap_handle_time("Swap_handle_time", DBG_TIMING) { + _output_tensor = nullptr; + _mem_type = RocalMemType::HOST; + _internal_thread_running = false; + _output_mem_size = 0; + _batch_size = 1; + _is_initialized = false; + _remaining_audio_count = 0; + _device_id = 0; +} + +AudioLoader::~AudioLoader() { + de_init(); +} + +void AudioLoader::shut_down() { + if(_internal_thread_running) + stop_internal_thread(); + _circ_buff.release(); +} + +void AudioLoader::set_prefetch_queue_depth(size_t prefetch_queue_depth) { + if(prefetch_queue_depth <= 0) + THROW("Prefetch queue depth value cannot be zero or negative"); + _prefetch_queue_depth = prefetch_queue_depth; +} + +void AudioLoader::set_gpu_device_id(int device_id) { + if(device_id < 0) + THROW("invalid device_id passed to loader"); + _device_id = device_id; +} + +size_t +AudioLoader::remaining_count() { + return _remaining_audio_count; +} + +void AudioLoader::reset() { + // stop the writer thread and empty the internal circular buffer + _internal_thread_running = false; + _circ_buff.unblock_writer(); + if (_load_thread.joinable()) + _load_thread.join(); + // Emptying the internal circular buffer + _circ_buff.reset(); + // resetting the reader thread to the start of the media + _audio_counter = 0; + _audio_loader->reset(); + // Start loading (writer thread) again + start_loading(); +} + +void AudioLoader::de_init() { + // Set running to 0 and wait for the internal thread to join + stop_internal_thread(); + _output_mem_size = 0; + _batch_size = 1; + _is_initialized = false; +} + +LoaderModuleStatus +AudioLoader::load_next() { + return update_output_audio(); +} + +void AudioLoader::set_output(Tensor* output_tensor) { + _output_tensor = output_tensor; + _output_mem_size = _output_tensor->info().data_size(); +} + +void AudioLoader::stop_internal_thread() { + _internal_thread_running = false; + _stopped = true; + _circ_buff.unblock_reader(); + _circ_buff.unblock_writer(); + _circ_buff.reset(); + if (_load_thread.joinable()) + _load_thread.join(); +} + +void AudioLoader::initialize(ReaderConfig reader_cfg, DecoderConfig decoder_cfg, RocalMemType mem_type, unsigned batch_size, bool decoder_keep_original) { + if (_is_initialized) + WRN("initialize() function is already called and loader module is initialized") + if (_output_mem_size == 0) + THROW("output audio size is 0, set_output() should be called before initialize for loader modules") + _mem_type = mem_type; + _batch_size = batch_size; + _loop = reader_cfg.loop(); + _audio_loader = std::make_shared(); + size_t shard_count = reader_cfg.get_shard_count(); + int device_id = reader_cfg.get_shard_id(); + try { + // set the device_id for decoder same as shard_id for number of shards > 1 + if (shard_count > 1) + _audio_loader->create(reader_cfg, decoder_cfg, _batch_size, device_id); + else + _audio_loader->create(reader_cfg, decoder_cfg, _batch_size); + } + catch (const std::exception &e) { + de_init(); + throw; + } + _max_decoded_samples = _output_tensor->info().max_shape().at(0); + _max_decoded_channels = _output_tensor->info().max_shape().at(1); + _decoded_audio_info._sample_names.resize(_batch_size); + _decoded_audio_info._original_audio_samples.resize(_batch_size); + _decoded_audio_info._original_audio_channels.resize(_batch_size); + _decoded_audio_info._original_audio_sample_rates.resize(_batch_size); + _circ_buff.init(_mem_type, _output_mem_size,_prefetch_queue_depth ); + _is_initialized = true; + LOG("Loader module initialized"); +} + +void AudioLoader::start_loading() { + if (!_is_initialized) + THROW("start_loading() should be called after initialize() function is called") + + _remaining_audio_count = _audio_loader->count(); + _internal_thread_running = true; + _load_thread = std::thread(&AudioLoader::load_routine, this); +} + +LoaderModuleStatus +AudioLoader::load_routine() { + LOG("Started the internal loader thread"); + LoaderModuleStatus last_load_status = LoaderModuleStatus::OK; + // Initially record number of all the audios that are going to be loaded, this is used to know how many still there + + while (_internal_thread_running) + { + auto data = (float*)_circ_buff.get_write_buffer(); + if (!_internal_thread_running) + break; + + auto load_status = LoaderModuleStatus::NO_MORE_DATA_TO_READ; + { + load_status = _audio_loader->load(data, + _decoded_audio_info._sample_names, + _max_decoded_samples, + _max_decoded_channels, + _decoded_audio_info._original_audio_samples, + _decoded_audio_info._original_audio_channels, + _decoded_audio_info._original_audio_sample_rates); + + if(load_status == LoaderModuleStatus::OK) { + _circ_buff.set_sample_info(_decoded_audio_info); + _circ_buff.push(); + _audio_counter += _output_tensor->info().batch_size(); + } + } + if (load_status != LoaderModuleStatus::OK) { + if (last_load_status != load_status) { + if (load_status == LoaderModuleStatus::NO_MORE_DATA_TO_READ || + load_status == LoaderModuleStatus::NO_FILES_TO_READ) { + LOG("Cycled through all audios, count " + TOSTR(_audio_counter)); + } + else { + ERR("ERROR: Detected error in reading the audios"); + } + last_load_status = load_status; + } + // Here it sets the out-of-data flag and signal the circular buffer's internal + // read semaphore using release() call + // , and calls the release() allows the reader thread to wake up and handle + // the out-of-data case properly + // It also slows down the reader thread since there is no more data to read, + // till program ends or till reset is called + _circ_buff.unblock_reader(); + std::this_thread::sleep_for(std::chrono::seconds(1)); + } + + } + return LoaderModuleStatus::OK; +} + +bool AudioLoader::is_out_of_data() { + return (remaining_count() < 0); +} + +// size_t AudioLoader::last_batch_padded_size() { +// return _audio_loader->last_batch_padded_size(); +// } + +LoaderModuleStatus +AudioLoader::update_output_audio() { + LoaderModuleStatus status = LoaderModuleStatus::OK; + + if (is_out_of_data()) + return LoaderModuleStatus::NO_MORE_DATA_TO_READ; + if (_stopped) + return LoaderModuleStatus::OK; + // _circ_buff.get_read_buffer_x() is blocking and puts the caller on sleep until new audios are written to the _circ_buff + if((_mem_type== RocalMemType::OCL) || (_mem_type== RocalMemType::HIP)) { + auto data_buffer = _circ_buff.get_read_buffer_dev(); + _swap_handle_time.start(); + if(_output_tensor->swap_handle(data_buffer) != 0) + return LoaderModuleStatus ::DEVICE_BUFFER_SWAP_FAILED; + _swap_handle_time.end(); + } + else { + auto data_buffer = _circ_buff.get_read_buffer_host(); + _swap_handle_time.start(); + if(_output_tensor->swap_handle(data_buffer) != 0) + return LoaderModuleStatus::HOST_BUFFER_SWAP_FAILED; + _swap_handle_time.end(); + } + if (_stopped) + return LoaderModuleStatus::OK; + _output_decoded_audio_info = _circ_buff.get_sample_info(); + _output_names = _output_decoded_audio_info._sample_names; + _output_tensor->update_tensor_roi(_output_decoded_audio_info._original_audio_samples, _output_decoded_audio_info._original_audio_channels); + _output_tensor->update_audio_tensor_sample_rate(_output_decoded_audio_info._original_audio_sample_rates); + _circ_buff.pop(); + if (!_loop) + _remaining_audio_count -= _batch_size; + return status; +} + +Timing AudioLoader::timing() { + auto t = _audio_loader->timing(); + t.audio_process_time = _swap_handle_time.get_timing(); + return t; +} + +LoaderModuleStatus AudioLoader::set_cpu_affinity(cpu_set_t cpu_mask) { + if (!_internal_thread_running) + THROW("set_cpu_affinity() should be called after start_loading function is called") +#if defined(WIN32) || defined(_WIN32) || defined(__WIN32) && !defined(__CYGWIN__) +#else + int ret = pthread_setaffinity_np(_load_thread.native_handle(), + sizeof(cpu_set_t), &cpu_mask); + if (ret != 0) + WRN("Error calling pthread_setaffinity_np: " + TOSTR(ret)); +#endif + return LoaderModuleStatus::OK; +} + +LoaderModuleStatus AudioLoader::set_cpu_sched_policy(struct sched_param sched_policy) { + if (!_internal_thread_running) + THROW("set_cpu_sched_policy() should be called after start_loading function is called") +#if defined(WIN32) || defined(_WIN32) || defined(__WIN32) && !defined(__CYGWIN__) +#else + auto ret = pthread_setschedparam(_load_thread.native_handle(), SCHED_FIFO, &sched_policy); + if (ret != 0) + WRN("Unsuccessful in setting thread realtime priority for loader thread err = " + TOSTR(ret)) +#endif + return LoaderModuleStatus::OK; +} + +std::vector AudioLoader::get_id() { + return _output_names; +} + +decoded_sample_info AudioLoader::get_decode_sample_info() { + return _output_decoded_audio_info; +} + diff --git a/rocAL/source/loaders/audio/audio_loader_sharded.cpp b/rocAL/source/loaders/audio/audio_loader_sharded.cpp new file mode 100644 index 000000000..529fe4bee --- /dev/null +++ b/rocAL/source/loaders/audio/audio_loader_sharded.cpp @@ -0,0 +1,166 @@ +/* +Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "audio_loader_sharded.h" + +AudioLoaderSharded::AudioLoaderSharded(void* dev_resources): + _dev_resources(dev_resources) { + _loader_idx = 0; +} + +void AudioLoaderSharded::set_prefetch_queue_depth(size_t prefetch_queue_depth) { + if(prefetch_queue_depth <= 0) + THROW("Prefetch queue depth value cannot be zero or negative"); + _prefetch_queue_depth = prefetch_queue_depth; +} + +std::vector AudioLoaderSharded::get_id() { + if(!_initialized) + THROW("get_id() should be called after initialize() function"); + return _loaders[_loader_idx]->get_id(); +} + +decoded_sample_info AudioLoaderSharded::get_decode_sample_info() { + return _loaders[_loader_idx]->get_decode_sample_info(); +} + +AudioLoaderSharded::~AudioLoaderSharded() { + _loaders.clear(); +} + +// size_t AudioLoaderSharded::last_batch_padded_size() { +// size_t sum = 0; +// for(auto& loader: _loaders) +// sum += loader->last_batch_padded_size(); +// return sum; +// } + +void +AudioLoaderSharded::fast_forward_through_empty_loaders() { + int loaders_count = _loaders.size(); + // reject empty loaders and get to a loader that still has audios to play + while (_loaders[_loader_idx]->remaining_count() == 0 && loaders_count-- > 0) + increment_loader_idx(); +} + +LoaderModuleStatus AudioLoaderSharded::load_next() { + if(!_initialized) + return LoaderModuleStatus::NOT_INITIALIZED; + increment_loader_idx(); + // Since loaders may have different number of audios loaded, some run out earlier than other. + // Fast forward through loaders that are empty to get to a loader that is not empty. + fast_forward_through_empty_loaders(); + auto ret = _loaders[_loader_idx]->load_next(); + return ret; +} + +void +AudioLoaderSharded::initialize(ReaderConfig reader_cfg, DecoderConfig decoder_cfg, RocalMemType mem_type, + unsigned batch_size, bool keep_orig_size) { + if(_initialized) + return; + _shard_count = reader_cfg.get_shard_count(); + // Create loader modules + for(size_t i = 0; i < _shard_count; i++) { + std::shared_ptr loader = std::make_shared(_dev_resources); + loader->set_prefetch_queue_depth(_prefetch_queue_depth); + _loaders.push_back(loader); + } + // Initialize loader modules + for(size_t idx = 0; idx < _shard_count; idx++) { + _loaders[idx]->set_output(_output_tensor); + _loaders[idx]->set_gpu_device_id(idx); + reader_cfg.set_shard_count(_shard_count); + reader_cfg.set_shard_id(idx); + _loaders[idx]->initialize(reader_cfg, decoder_cfg, mem_type, batch_size, keep_orig_size); + } + _initialized = true; +} + +void AudioLoaderSharded::start_loading() { + for(unsigned i = 0; i < _loaders.size(); i++) { + _loaders[i]->start_loading(); + // Changing thread scheduling policy and it's priority does not help on latest Ubuntu builds + // and needs tweaking the Linux security settings , can be turned on for experimentation +#if 0 + // Set thread scheduling policy + struct sched_param params; + params.sched_priority = sched_get_priority_max(SCHED_FIFO); + _loaders[i]->set_cpu_sched_policy(params); +#endif + // Setting cpu affinity for threads works and can be activated below for experimentation +#if 0 + // Set thread affinity thread 0 to core 0 , 1 toc core 1 , ... + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + CPU_SET(i, &cpuset); + _loaders[i]->set_cpu_affinity(cpuset); +#endif + } +} + +void AudioLoaderSharded::shut_down() { + for(unsigned i = 0; i < _loaders.size(); i++) + _loaders[i]->shut_down(); +} + +void AudioLoaderSharded::set_output(Tensor* output_tensor) { + _output_tensor = output_tensor; +} + +size_t AudioLoaderSharded::remaining_count() { + int sum = 0; + for(auto& loader: _loaders) + sum += loader->remaining_count(); + return sum; +} + +void AudioLoaderSharded::reset() { + for(auto& loader: _loaders) { + loader->reset(); + // loader->last_batch_padded_size(); + } +} + +void AudioLoaderSharded::increment_loader_idx() { + _loader_idx = (_loader_idx + 1)%_shard_count; +} + +Timing AudioLoaderSharded::timing() { + Timing t; + long long unsigned max_decode_time = 0; + long long unsigned max_read_time = 0; + long long unsigned swap_handle_time = 0; + // audio read and decode runs in parallel using multiple loaders, and the observable latency that the AudioLoaderSharded user + // is experiences on the load_next() call due to read and decode time is the maximum of all + for(auto& loader: _loaders) { + auto info = loader->timing(); + max_read_time = (info.audio_read_time > max_read_time) ? info.audio_read_time : max_read_time; + max_decode_time = (info.audio_decode_time > max_decode_time) ? info.audio_decode_time : max_decode_time; + swap_handle_time += info.audio_process_time; + } + t.audio_decode_time = max_decode_time; + t.audio_read_time = max_read_time; + t.audio_process_time = swap_handle_time; + return t; +} + diff --git a/rocAL/source/loaders/audio/audio_read_and_decode.cpp b/rocAL/source/loaders/audio/audio_read_and_decode.cpp new file mode 100644 index 000000000..55a17f652 --- /dev/null +++ b/rocAL/source/loaders/audio/audio_read_and_decode.cpp @@ -0,0 +1,157 @@ +/* +Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + + +#include +#include +#include "decoder_factory.h" +#include "audio_decoder_factory.h" +#include "audio_read_and_decode.h" + +Timing +AudioReadAndDecode::timing() { + Timing t; + t.audio_decode_time = _decode_time.get_timing(); + t.audio_read_time = _file_load_time.get_timing(); + return t; +} + +AudioReadAndDecode::AudioReadAndDecode(): + _file_load_time("FileLoadTime", DBG_TIMING ), + _decode_time("DecodeTime", DBG_TIMING) { +} + +AudioReadAndDecode::~AudioReadAndDecode() { + _reader = nullptr; + _decoder.clear(); +} + +void +AudioReadAndDecode::create(ReaderConfig reader_config, DecoderConfig decoder_config, int batch_size, int device_id) { + // Can initialize it to any decoder types if needed + _batch_size = batch_size; + _decoder.resize(batch_size); + _audio_names.resize(batch_size); + _audio_file_path.resize(batch_size); + _decompressed_buff_ptrs.resize(_batch_size); + _actual_decoded_samples.resize(_batch_size); + _actual_decoded_channels.resize(_batch_size); + _original_channels.resize(_batch_size); + _original_samples.resize(_batch_size); + _original_sample_rates.resize(_batch_size); + _decoder_config = decoder_config; + if ((_decoder_config._type != DecoderType::SKIP_DECODE)) { + for (int i = 0; i < batch_size; i++) { + _decoder[i] = create_audio_decoder(decoder_config); + } + } + _reader = create_reader(reader_config); + _input_path = reader_config.path(); + _num_threads = reader_config.get_cpu_num_threads(); + if(_input_path.back() != '/') + _input_path = _input_path + "/"; +} + +void +AudioReadAndDecode::reset() { + _reader->reset(); +} + +size_t +AudioReadAndDecode::count() { + return _reader->count_items(); +} + +// size_t +// AudioReadAndDecode::last_batch_padded_size() { +// return _reader->last_batch_padded_size(); +// } + +LoaderModuleStatus +AudioReadAndDecode::load(float* buff, + std::vector& names, + const size_t max_decoded_samples, + const size_t max_decoded_channels, + std::vector &roi_samples, + std::vector &roi_channels, + std::vector &actual_sample_rates) { + if(max_decoded_samples == 0 || max_decoded_channels == 0 ) + THROW("Zero audio dimension is not valid") + if(!buff) + THROW("Null pointer passed as output buffer") + if(_reader->count_items() < _batch_size) + return LoaderModuleStatus::NO_MORE_DATA_TO_READ; + // load audios/frames from the disk and push them as a large audio onto the buff + unsigned file_counter = 0; + const size_t audio_size = max_decoded_samples * max_decoded_channels; + // Decode with the channels and size equal to a single audio + // File read is done serially since I/O parallelization does not work very well. + _file_load_time.start();// Debug timing + while ((file_counter != _batch_size) && _reader->count_items() > 0) { + size_t fsize = _reader->open(); + if (fsize == 0) { + WRN("Opened file " + _reader->id() + " of size 0"); + continue; + } + _audio_names[file_counter] = _reader->id(); + _audio_file_path[file_counter] = _reader->file_path(); + _reader->close(); + file_counter++; + } + _file_load_time.end();// Debug timing + _decode_time.start();// Debug timing + if (_decoder_config._type != DecoderType::SKIP_DECODE) { + for (size_t i = 0; i < _batch_size; i++){ + _decompressed_buff_ptrs[i] = buff + (audio_size * i); + } +#pragma omp parallel for num_threads(_num_threads) // default(none) TBD: option disabled in Ubuntu 20.04 + for (size_t i = 0; i < _batch_size; i++) { + // initialize the actual decoded channels and samples with the maximum + _actual_decoded_samples[i] = max_decoded_samples; + _actual_decoded_channels[i] = max_decoded_channels; + int original_samples, original_channels; + float original_sample_rates; + if (_decoder[i]->initialize(_audio_file_path[i].c_str()) != AudioDecoder::Status::OK) { + THROW("Decoder can't be initialized for file: " + _audio_names[i].c_str()) + } + if (_decoder[i]->decode_info(&original_samples, &original_channels, &original_sample_rates) != AudioDecoder::Status::OK) { + THROW("Unable to fetch decode info for file: " + _audio_names[i].c_str()) + } + _original_channels[i] = original_channels; + _original_samples[i] = original_samples; + _original_sample_rates[i] = original_sample_rates; + if (_decoder[i]->decode(_decompressed_buff_ptrs[i]) != AudioDecoder::Status::OK) { + THROW("Decoder failed for file: " + _audio_names[i].c_str()) + } + _decoder[i]->release(); + } + for (size_t i = 0; i < _batch_size; i++) { + names[i] = _audio_names[i]; + roi_samples[i] = _original_samples[i]; + roi_channels[i] = _original_channels[i]; + actual_sample_rates[i] = _original_sample_rates[i]; + } + } + _decode_time.end();// Debug timing + return LoaderModuleStatus::OK; +} + diff --git a/rocAL/source/loaders/audio/audio_source_evaluator.cpp b/rocAL/source/loaders/audio/audio_source_evaluator.cpp new file mode 100644 index 000000000..3cafbf205 --- /dev/null +++ b/rocAL/source/loaders/audio/audio_source_evaluator.cpp @@ -0,0 +1,77 @@ +/* +Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "audio_source_evaluator.h" +#include "audio_decoder_factory.h" +#include "reader_factory.h" + +size_t AudioSourceEvaluator::max_samples() { + return _samples_max; +} + +size_t AudioSourceEvaluator::max_channels() { + return _channels_max; +} + +AudioSourceEvaluatorStatus +AudioSourceEvaluator::create(ReaderConfig reader_cfg, DecoderConfig decoder_cfg) { + AudioSourceEvaluatorStatus status = AudioSourceEvaluatorStatus::OK; + // Can initialize it to any decoder types if needed + _input_path = reader_cfg.path(); + if(_input_path.back() != '/') { + _input_path = _input_path + "/"; + } + _decoder = create_audio_decoder(std::move(decoder_cfg)); + _reader = create_reader(std::move(reader_cfg)); + find_max_dimension(); + return status; +} + +void +AudioSourceEvaluator::find_max_dimension() { + _reader->reset(); + while( _reader->count_items() ) { + size_t fsize = _reader->open(); + if( (fsize) == 0 ) + continue; + auto file_name = _reader->file_path(); + if(_decoder->initialize(file_name.c_str()) != AudioDecoder::Status::OK) { + WRN("Could not initialize audio decoder for file : "+ _reader->id()) + continue; + } + int samples, channels; + float sample_rates; + if(_decoder->decode_info(&samples, &channels, &sample_rates) != AudioDecoder::Status::OK) { + WRN("Could not decode the header of the: "+ _reader->id()) + continue; + } + if(samples <= 0 || channels <= 0) + continue; + _samples_max = std::max(samples, _samples_max); + _channels_max = std::max(channels, _channels_max); + _decoder->release(); + } + // return the reader read pointer to the begining of the resource + _reader->reset(); +} + + diff --git a/rocAL/source/loaders/audio/node_audio_loader.cpp b/rocAL/source/loaders/audio/node_audio_loader.cpp new file mode 100644 index 000000000..8058be518 --- /dev/null +++ b/rocAL/source/loaders/audio/node_audio_loader.cpp @@ -0,0 +1,58 @@ +/* +Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "node_audio_loader.h" +#include "exception.h" + +AudioLoaderNode::AudioLoaderNode(Tensor *output, void* device_resources): + Node({}, {output}) { + _loader_module = std::make_shared(device_resources); +} + + +void AudioLoaderNode::init(unsigned internal_shard_count, unsigned cpu_num_threads, const std::string &source_path, const std::string &source_list_path, StorageType storage_type, + DecoderType decoder_type, bool shuffle, bool loop, size_t load_batch_count, RocalMemType mem_type, std::shared_ptr meta_data_reader) { + if(!_loader_module) + THROW("ERROR: loader module is not set for AudioLoaderNode, cannot initialize") + if(internal_shard_count < 1) + THROW("Shard count should be greater than or equal to one") + _loader_module->set_output(_outputs[0]); + // Set reader and decoder config accordingly for the AudioLoaderNode + auto reader_cfg = ReaderConfig(storage_type, source_path, source_list_path, std::map(), shuffle, loop); + reader_cfg.set_shard_count(internal_shard_count); + reader_cfg.set_batch_count(load_batch_count); + reader_cfg.set_meta_data_reader(meta_data_reader); + reader_cfg.set_cpu_num_threads(cpu_num_threads); + _loader_module->initialize(reader_cfg, DecoderConfig(decoder_type), mem_type, _batch_size, false); + _loader_module->start_loading(); +} + +std::shared_ptr AudioLoaderNode::get_loader_module() { + if(!_loader_module) + WRN("AudioLoaderNode's loader module is null, not initialized") + return _loader_module; +} + +AudioLoaderNode::~AudioLoaderNode() { + _loader_module = nullptr; +} + diff --git a/rocAL/source/loaders/audio/node_audio_loader_single_shard.cpp b/rocAL/source/loaders/audio/node_audio_loader_single_shard.cpp new file mode 100644 index 000000000..0105db9d0 --- /dev/null +++ b/rocAL/source/loaders/audio/node_audio_loader_single_shard.cpp @@ -0,0 +1,63 @@ +/* +Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "node_audio_loader_single_shard.h" +#include "exception.h" + + +AudioLoaderSingleShardNode::AudioLoaderSingleShardNode(Tensor *output, void* device_resources): + Node({}, {output}) { + _loader_module = std::make_shared(device_resources); +} + +void +AudioLoaderSingleShardNode::init(unsigned shard_id, unsigned shard_count, unsigned cpu_num_threads, const std::string &source_path, const std::string &source_list_path, + StorageType storage_type, DecoderType decoder_type, bool shuffle, bool loop, size_t load_batch_count, + RocalMemType mem_type, std::shared_ptr meta_data_reader) { + if(!_loader_module) + THROW("ERROR: loader module is not set for AudioLoaderNode, cannot initialize") + if(shard_count < 1) + THROW("Shard count should be greater than or equal to one") + if(shard_id >= shard_count) + THROW("Shard is should be smaller than shard count") + _loader_module->set_output(_outputs[0]); + // Set reader and decoder config accordingly for the AudioLoaderNode + auto reader_cfg = ReaderConfig(storage_type, source_path, source_list_path, std::map(), shuffle, loop); + reader_cfg.set_shard_count(shard_count); + reader_cfg.set_shard_id(shard_id); + reader_cfg.set_batch_count(load_batch_count); + reader_cfg.set_meta_data_reader(meta_data_reader); + reader_cfg.set_cpu_num_threads(cpu_num_threads); + _loader_module->initialize(reader_cfg, DecoderConfig(decoder_type), mem_type, _batch_size); + _loader_module->start_loading(); +} + +std::shared_ptr AudioLoaderSingleShardNode::get_loader_module() { + if(!_loader_module) + WRN("AudioLoaderSingleShardNode's loader module is null, not initialized") + return _loader_module; +} + +AudioLoaderSingleShardNode::~AudioLoaderSingleShardNode() { + _loader_module = nullptr; +} + diff --git a/rocAL/source/loaders/circular_buffer.cpp b/rocAL/source/loaders/circular_buffer.cpp index 25ba7cb59..134bd9e08 100644 --- a/rocAL/source/loaders/circular_buffer.cpp +++ b/rocAL/source/loaders/circular_buffer.cpp @@ -40,8 +40,8 @@ void CircularBuffer::reset() { _write_ptr = 0; _read_ptr = 0; _level = 0; - while (!_circ_image_info.empty()) - _circ_image_info.pop(); + while (!_circ_sample_info.empty()) + _circ_sample_info.pop(); if (random_bbox_crop_flag == true) { while (!_circ_crop_image_info.empty()) _circ_crop_image_info.pop(); @@ -135,7 +135,7 @@ void CircularBuffer::push() { sync(); // Pushing to the _circ_buff and _circ_buff_names must happen all at the same time std::unique_lock lock(_names_buff_lock); - _circ_image_info.push(_last_image_info); + _circ_sample_info.push(_last_sample_info); if (random_bbox_crop_flag == true) _circ_crop_image_info.push(_last_crop_image_info); increment_write_ptr(); @@ -147,7 +147,7 @@ void CircularBuffer::pop() { // Pushing to the _circ_buff and _circ_buff_names must happen all at the same time std::unique_lock lock(_names_buff_lock); increment_read_ptr(); - _circ_image_info.pop(); + _circ_sample_info.pop(); if (random_bbox_crop_flag == true) _circ_crop_image_info.pop(); } @@ -338,12 +338,12 @@ CircularBuffer::~CircularBuffer() { _initialized = false; } -decoded_image_info &CircularBuffer::get_image_info() { +decoded_sample_info &CircularBuffer::get_sample_info() { block_if_empty(); std::unique_lock lock(_names_buff_lock); - if (_level != _circ_image_info.size()) - THROW("CircularBuffer internals error, image and image info sizes not the same " + TOSTR(_level) + " != " + TOSTR(_circ_image_info.size())) - return _circ_image_info.front(); + if (_level != _circ_sample_info.size()) + THROW("CircularBuffer internals error, sample and sample info sizes not the same " + TOSTR(_level) + " != " + TOSTR(_circ_sample_info.size())) + return _circ_sample_info.front(); } crop_image_info &CircularBuffer::get_cropped_image_info() { diff --git a/rocAL/source/loaders/image/cifar10_data_loader.cpp b/rocAL/source/loaders/image/cifar10_data_loader.cpp index d3e89218e..15e1347ce 100644 --- a/rocAL/source/loaders/image/cifar10_data_loader.cpp +++ b/rocAL/source/loaders/image/cifar10_data_loader.cpp @@ -119,7 +119,7 @@ void CIFAR10DataLoader::initialize(ReaderConfig reader_cfg, DecoderConfig decode throw; } _actual_read_size.resize(batch_size); - _raw_img_info._image_names.resize(_batch_size); + _raw_img_info._sample_names.resize(_batch_size); _raw_img_info._roi_width.resize(_batch_size); // used to store the individual image in a big raw file _raw_img_info._roi_height.resize(batch_size); _raw_img_info._original_height.resize(_batch_size); @@ -182,7 +182,7 @@ CIFAR10DataLoader::load_routine() { continue; } _actual_read_size[file_counter] = _reader->read_data(read_ptr, readSize); - _raw_img_info._image_names[file_counter] = _reader->id(); + _raw_img_info._sample_names[file_counter] = _reader->id(); _raw_img_info._roi_width[file_counter] = _output_tensor->info().max_shape()[0]; _raw_img_info._roi_height[file_counter] = _output_tensor->info().max_shape()[1]; _reader->close(); @@ -190,13 +190,13 @@ CIFAR10DataLoader::load_routine() { } if (_randombboxcrop_meta_data_reader) { // Fetch the crop co-ordinates for a batch of images - _bbox_coords = _randombboxcrop_meta_data_reader->get_batch_crop_coords(_raw_img_info._image_names); + _bbox_coords = _randombboxcrop_meta_data_reader->get_batch_crop_coords(_raw_img_info._sample_names); set_batch_random_bbox_crop_coords(_bbox_coords); _crop_image_info._crop_image_coords = get_batch_random_bbox_crop_coords(); _circ_buff.set_crop_image_info(_crop_image_info); } _file_load_time.end(); // Debug timing - _circ_buff.set_image_info(_raw_img_info); + _circ_buff.set_sample_info(_raw_img_info); _circ_buff.push(); _image_counter += _output_tensor->info().batch_size(); load_status = LoaderModuleStatus::OK; @@ -254,11 +254,11 @@ CIFAR10DataLoader::update_output_image() { if (_stopped) return LoaderModuleStatus::OK; - _output_decoded_img_info = _circ_buff.get_image_info(); + _output_decoded_img_info = _circ_buff.get_sample_info(); if (_randombboxcrop_meta_data_reader) { _output_cropped_image_info = _circ_buff.get_cropped_image_info(); } - _output_names = _output_decoded_img_info._image_names; + _output_names = _output_decoded_img_info._sample_names; _output_tensor->update_tensor_roi(_output_decoded_img_info._roi_width, _output_decoded_img_info._roi_height); _circ_buff.pop(); @@ -279,7 +279,7 @@ std::vector CIFAR10DataLoader::get_id() { return _output_names; } -decoded_image_info CIFAR10DataLoader::get_decode_image_info() { +decoded_sample_info CIFAR10DataLoader::get_decode_sample_info() { return _output_decoded_img_info; } diff --git a/rocAL/source/loaders/image/image_loader.cpp b/rocAL/source/loaders/image/image_loader.cpp index 00abf74cc..71a8e1009 100644 --- a/rocAL/source/loaders/image/image_loader.cpp +++ b/rocAL/source/loaders/image/image_loader.cpp @@ -152,7 +152,7 @@ void ImageLoader::initialize(ReaderConfig reader_cfg, DecoderConfig decoder_cfg, } _max_tensor_width = _output_tensor->info().max_shape().at(0); _max_tensor_height = _output_tensor->info().max_shape().at(1); - _decoded_img_info._image_names.resize(_batch_size); + _decoded_img_info._sample_names.resize(_batch_size); _decoded_img_info._roi_height.resize(_batch_size); _decoded_img_info._roi_width.resize(_batch_size); _decoded_img_info._original_height.resize(_batch_size); @@ -187,7 +187,7 @@ ImageLoader::load_routine() { auto load_status = LoaderModuleStatus::NO_MORE_DATA_TO_READ; { load_status = _image_loader->load(data, - _decoded_img_info._image_names, + _decoded_img_info._sample_names, _max_tensor_width, _max_tensor_height, _decoded_img_info._roi_width, @@ -201,7 +201,7 @@ ImageLoader::load_routine() { _crop_image_info._crop_image_coords = _image_loader->get_batch_random_bbox_crop_coords(); _circ_buff.set_crop_image_info(_crop_image_info); } - _circ_buff.set_image_info(_decoded_img_info); + _circ_buff.set_sample_info(_decoded_img_info); _circ_buff.push(); _image_counter += _output_tensor->info().batch_size(); } @@ -259,11 +259,11 @@ ImageLoader::update_output_image() { if (_stopped) return LoaderModuleStatus::OK; - _output_decoded_img_info = _circ_buff.get_image_info(); + _output_decoded_img_info = _circ_buff.get_sample_info(); if (_randombboxcrop_meta_data_reader) { _output_cropped_img_info = _circ_buff.get_cropped_image_info(); } - _output_names = _output_decoded_img_info._image_names; + _output_names = _output_decoded_img_info._sample_names; _output_tensor->update_tensor_roi(_output_decoded_img_info._roi_width, _output_decoded_img_info._roi_height); _circ_buff.pop(); if (!_loop) @@ -307,7 +307,7 @@ std::vector ImageLoader::get_id() { return _output_names; } -decoded_image_info ImageLoader::get_decode_image_info() { +decoded_sample_info ImageLoader::get_decode_sample_info() { return _output_decoded_img_info; } diff --git a/rocAL/source/loaders/image/image_loader_sharded.cpp b/rocAL/source/loaders/image/image_loader_sharded.cpp index 02d7f05f0..bacf5b573 100644 --- a/rocAL/source/loaders/image/image_loader_sharded.cpp +++ b/rocAL/source/loaders/image/image_loader_sharded.cpp @@ -38,8 +38,8 @@ std::vector ImageLoaderSharded::get_id() { return _loaders[_loader_idx]->get_id(); } -decoded_image_info ImageLoaderSharded::get_decode_image_info() { - return _loaders[_loader_idx]->get_decode_image_info(); +decoded_sample_info ImageLoaderSharded::get_decode_sample_info() { + return _loaders[_loader_idx]->get_decode_sample_info(); } crop_image_info ImageLoaderSharded::get_crop_image_info() { diff --git a/rocAL/source/loaders/video/video_loader.cpp b/rocAL/source/loaders/video/video_loader.cpp index db83786d8..0faadeb1e 100644 --- a/rocAL/source/loaders/video/video_loader.cpp +++ b/rocAL/source/loaders/video/video_loader.cpp @@ -131,7 +131,7 @@ void VideoLoader::initialize(ReaderConfig reader_cfg, DecoderConfig decoder_cfg, } _max_tensor_width = _output_tensor->info().max_shape().at(0); _max_tensor_height = _output_tensor->info().max_shape().at(1); - _decoded_img_info._image_names.resize(_batch_size); + _decoded_img_info._sample_names.resize(_batch_size); _decoded_img_info._roi_height.resize(_batch_size); _decoded_img_info._roi_width.resize(_batch_size); _decoded_img_info._original_height.resize(_batch_size); @@ -163,7 +163,7 @@ VideoLoader::load_routine() { auto load_status = LoaderModuleStatus::NO_MORE_DATA_TO_READ; { load_status = _video_loader->load(data, - _decoded_img_info._image_names, + _decoded_img_info._sample_names, _max_tensor_width, _max_tensor_height, _decoded_img_info._roi_width, @@ -175,7 +175,7 @@ VideoLoader::load_routine() { _output_tensor->info().color_format()); if (load_status == LoaderModuleStatus::OK) { - _circ_buff.set_image_info(_decoded_img_info); + _circ_buff.set_sample_info(_decoded_img_info); _circ_buff.push(); _image_counter += _output_tensor->info().batch_size(); } @@ -233,8 +233,8 @@ VideoLoader::update_output_image() { } if (_stopped) return LoaderModuleStatus::OK; - _output_decoded_img_info = _circ_buff.get_image_info(); - _output_names = _output_decoded_img_info._image_names; + _output_decoded_img_info = _circ_buff.get_sample_info(); + _output_names = _output_decoded_img_info._sample_names; _output_tensor->update_tensor_roi(_output_decoded_img_info._roi_width, _output_decoded_img_info._roi_height); _circ_buff.pop(); if (!_loop) @@ -277,7 +277,7 @@ std::vector VideoLoader::get_id() { return _output_names; } -decoded_image_info VideoLoader::get_decode_image_info() { +decoded_sample_info VideoLoader::get_decode_sample_info() { return _output_decoded_img_info; } diff --git a/rocAL/source/loaders/video/video_loader_sharded.cpp b/rocAL/source/loaders/video/video_loader_sharded.cpp index 5afecf642..6093b507a 100644 --- a/rocAL/source/loaders/video/video_loader_sharded.cpp +++ b/rocAL/source/loaders/video/video_loader_sharded.cpp @@ -39,8 +39,8 @@ std::vector VideoLoaderSharded::get_id() { return _loaders[_loader_idx]->get_id(); } -decoded_image_info VideoLoaderSharded::get_decode_image_info() { - return _loaders[_loader_idx]->get_decode_image_info(); +decoded_sample_info VideoLoaderSharded::get_decode_sample_info() { + return _loaders[_loader_idx]->get_decode_sample_info(); } VideoLoaderSharded::~VideoLoaderSharded() { diff --git a/rocAL/source/meta_data/bounding_box_graph.cpp b/rocAL/source/meta_data/bounding_box_graph.cpp index 0dd882a2d..1943876b4 100644 --- a/rocAL/source/meta_data/bounding_box_graph.cpp +++ b/rocAL/source/meta_data/bounding_box_graph.cpp @@ -31,7 +31,7 @@ void BoundingBoxGraph::process(pMetaDataBatch input_meta_data, pMetaDataBatch ou } // This function is used to rescale the metadata values w.r.t the decoded image sizes -void BoundingBoxGraph::update_meta_data(pMetaDataBatch input_meta_data, decoded_image_info decode_image_info) { +void BoundingBoxGraph::update_meta_data(pMetaDataBatch input_meta_data, decoded_sample_info decode_image_info) { std::vector original_height = decode_image_info._original_height; std::vector original_width = decode_image_info._original_width; std::vector roi_width = decode_image_info._roi_width; @@ -68,7 +68,7 @@ inline float ssd_BBoxIntersectionOverUnion(const BoundingBoxCord &box1, const fl return (float)(intersection_area / (box1_area + box2_area - intersection_area)); } -void BoundingBoxGraph::update_random_bbox_meta_data(pMetaDataBatch input_meta_data, pMetaDataBatch output_meta_data, decoded_image_info decode_image_info, crop_image_info crop_image_info) { +void BoundingBoxGraph::update_random_bbox_meta_data(pMetaDataBatch input_meta_data, pMetaDataBatch output_meta_data, decoded_sample_info decode_image_info, crop_image_info crop_image_info) { std::vector original_height = decode_image_info._original_height; std::vector original_width = decode_image_info._original_width; std::vector roi_width = decode_image_info._roi_width; diff --git a/rocAL/source/pipeline/master_graph.cpp b/rocAL/source/pipeline/master_graph.cpp index 917913338..34409ee52 100644 --- a/rocAL/source/pipeline/master_graph.cpp +++ b/rocAL/source/pipeline/master_graph.cpp @@ -903,16 +903,16 @@ void MasterGraph::output_routine() { THROW("Loader module failed to load next batch of images, status " + TOSTR(load_ret)) if (!_processing) break; - auto full_batch_image_names = _loader_module->get_id(); - auto decode_image_info = _loader_module->get_decode_image_info(); + auto full_batch_sample_names = _loader_module->get_id(); + auto decode_sample_info = _loader_module->get_decode_sample_info(); auto crop_image_info = _loader_module->get_crop_image_info(); - if (full_batch_image_names.size() != _user_batch_size) - WRN("Internal problem: names count " + TOSTR(full_batch_image_names.size())) + if (full_batch_sample_names.size() != _user_batch_size) + WRN("Internal problem: names count " + TOSTR(full_batch_sample_names.size())) // meta_data lookup is done before _meta_data_graph->process() is called to have the new meta_data ready for processing if (_meta_data_reader) - _meta_data_reader->lookup(full_batch_image_names); + _meta_data_reader->lookup(full_batch_sample_names); if (!_processing) break; @@ -936,9 +936,9 @@ void MasterGraph::output_routine() { output_meta_data = _augmented_meta_data->clone(!_augmentation_metanode); // copy the data if metadata is not processed by the nodes, else create an empty instance if (_meta_data_graph) { if (_is_random_bbox_crop) { - _meta_data_graph->update_random_bbox_meta_data(_augmented_meta_data, output_meta_data, decode_image_info, crop_image_info); + _meta_data_graph->update_random_bbox_meta_data(_augmented_meta_data, output_meta_data, decode_sample_info, crop_image_info); } else { - _meta_data_graph->update_meta_data(_augmented_meta_data, decode_image_info); + _meta_data_graph->update_meta_data(_augmented_meta_data, decode_sample_info); } _meta_data_graph->process(_augmented_meta_data, output_meta_data); } @@ -970,7 +970,7 @@ void MasterGraph::output_routine() { _sequence_start_framenum_vec.insert(_sequence_start_framenum_vec.begin(), _loader_module->get_sequence_start_frame_number()); _sequence_frame_timestamps_vec.insert(_sequence_frame_timestamps_vec.begin(), _loader_module->get_sequence_frame_timestamps()); #endif - _ring_buffer.set_meta_data(full_batch_image_names, output_meta_data); + _ring_buffer.set_meta_data(full_batch_sample_names, output_meta_data); _ring_buffer.push(); // Image data and metadata is now stored in output the ring_buffer, increases it's level by 1 } } catch (const std::exception &e) { diff --git a/rocAL/source/pipeline/tensor.cpp b/rocAL/source/pipeline/tensor.cpp index bc234e813..40482fa7a 100644 --- a/rocAL/source/pipeline/tensor.cpp +++ b/rocAL/source/pipeline/tensor.cpp @@ -77,6 +77,7 @@ vx_enum interpret_tensor_data_type(RocalTensorDataType data_type) { return VX_TYPE_FLOAT16; case RocalTensorDataType::UINT8: return VX_TYPE_UINT8; + case RocalTensorDataType::INT8: default: THROW("Unsupported Tensor type " + TOSTR(data_type)) } @@ -123,6 +124,12 @@ void TensorInfo::reset_tensor_roi_buffers() { } } +void TensorInfo::reallocate_tensor_sample_rate_buffers() { + if (_is_image) + THROW("No sample rate available for Image data") + _sample_rate = std::make_shared>(_batch_size); +} + TensorInfo::TensorInfo() : _type(Type::UNKNOWN), _num_of_dims(0), @@ -202,8 +209,49 @@ void Tensor::update_tensor_roi(const std::vector &width, } } } + else if(!_info.is_metadata()) { // Audio - Data + auto max_dims = _info.max_shape(); + unsigned max_samples = max_dims.at(0); + unsigned max_channels = max_dims.at(1); + auto samples = width; + auto channels = height; + Roi2DCords *roi = _info.roi().get_2D_roi(); + + if (samples.size() != channels.size()) + THROW("Batch size of Tensor height and width info does not match") + if (samples.size() != info().batch_size()) + THROW("The batch size of actual Tensor height and width different from Tensor batch size " + TOSTR(samples.size()) + " != " + TOSTR(info().batch_size())) + for (unsigned i = 0; i < info().batch_size(); i++) { + if (samples[i] > max_samples) { + ERR("Given ROI width is larger than buffer width for tensor[" + TOSTR(i) + "] " + TOSTR(samples[i]) + " > " + TOSTR(max_samples)) + roi[i].xywh.x = max_samples; + } + else { + roi[i].xywh.x = samples[i]; + } + if (channels[i] > max_channels) { + ERR("Given ROI height is larger than buffer with for tensor[" + TOSTR(i) + "] " + TOSTR(channels[i]) + " > " + TOSTR(max_channels)) + roi[i].xywh.y = max_channels; + } + else { + roi[i].xywh.y = channels[i]; + } + } + } +} + +void Tensor::update_audio_tensor_sample_rate(const std::vector &sample_rate) { + if (_info.is_image()) { + THROW("No sample rate available for Image data") + } + else if(!_info.is_metadata()) { + for (unsigned i = 0; i < info().batch_size(); i++) { + _info.get_sample_rate()->at(i) = sample_rate[i]; + } + } } + void Tensor::update_tensor_roi(const std::vector> &shape) { auto max_shape = _info.max_shape(); if (shape.size() != info().batch_size()) @@ -399,6 +447,25 @@ unsigned Tensor::copy_data(void *user_buffer, RocalOutputMemType external_mem_ty return 0; } +unsigned Tensor::copy_data(void *user_buffer, uint max_y1, uint max_x1) { + if (_mem_handle == nullptr) return 0; + //TODO : Handle this case for HIP buffer + auto max_shape_x1 = _info.max_shape().at(0); + auto dtype_size = _info.data_type_size(); + auto src_stride = (max_shape_x1 * _info.max_shape().at(1) * dtype_size); + auto dst_stride = (max_y1 * max_x1 * dtype_size); + for (uint i = 0; i < _info._batch_size; i++) { + auto temp_src_ptr = static_cast(_mem_handle) + i * src_stride; + auto temp_dst_ptr = static_cast(user_buffer) + i * dst_stride; + for (uint height = 0; height < max_y1; height++) { + memcpy(temp_dst_ptr, temp_src_ptr, max_x1 * dtype_size); + temp_src_ptr += max_shape_x1 * dtype_size; + temp_dst_ptr += max_x1 * dtype_size; + } + } + return 0; +} + int Tensor::swap_handle(void *handle) { vx_status status; if ((status = vxSwapTensorHandle(_vx_handle, handle, nullptr)) != VX_SUCCESS) { diff --git a/rocAL/source/readers/image/file_source_reader.cpp b/rocAL/source/readers/image/file_source_reader.cpp index 2623dd411..b4e9f9d70 100644 --- a/rocAL/source/readers/image/file_source_reader.cpp +++ b/rocAL/source/readers/image/file_source_reader.cpp @@ -40,15 +40,17 @@ FileSourceReader::FileSourceReader() { _file_count_all_shards = 0; } -unsigned FileSourceReader::count_items() { - if (_loop) +unsigned FileSourceReader::count_items() +{ + if(_loop) return _file_names.size(); - int ret = ((int)_file_names.size() - _read_counter); + int ret = ((int)_file_names.size() -_read_counter); return ((ret < 0) ? 0 : ret); } -Reader::Status FileSourceReader::initialize(ReaderConfig desc) { +Reader::Status FileSourceReader::initialize(ReaderConfig desc) +{ auto ret = Reader::Status::OK; _file_id = 0; _folder_path = desc.path(); @@ -57,58 +59,64 @@ Reader::Status FileSourceReader::initialize(ReaderConfig desc) { _batch_count = desc.get_batch_size(); _shuffle = desc.shuffle(); _loop = desc.loop(); + _meta_data_reader = desc.meta_data_reader(); ret = subfolder_reading(); // the following code is required to make every shard the same size:: required for multi-gpu training if (_shard_count > 1 && _batch_count > 1) { - int _num_batches = _file_names.size() / _batch_count; - int max_batches_per_shard = (_file_count_all_shards + _shard_count - 1) / _shard_count; - max_batches_per_shard = (max_batches_per_shard + _batch_count - 1) / _batch_count; + int _num_batches = _file_names.size()/_batch_count; + int max_batches_per_shard = (_file_count_all_shards + _shard_count-1)/_shard_count; + max_batches_per_shard = (max_batches_per_shard + _batch_count-1)/_batch_count; if (_num_batches < max_batches_per_shard) { replicate_last_batch_to_pad_partial_shard(); } } - // shuffle dataset if set - if (ret == Reader::Status::OK && _shuffle) + //shuffle dataset if set + if( ret==Reader::Status::OK && _shuffle) std::random_shuffle(_file_names.begin(), _file_names.end()); return ret; } -void FileSourceReader::incremenet_read_ptr() { +void FileSourceReader::incremenet_read_ptr() +{ _read_counter++; _curr_file_idx = (_curr_file_idx + 1) % _file_names.size(); } -size_t FileSourceReader::open() { - auto file_path = _file_names[_curr_file_idx]; // Get next file name +size_t FileSourceReader::open() +{ + auto file_path = _file_names[_curr_file_idx];// Get next file name incremenet_read_ptr(); - _last_id = file_path; + _last_file_path = _last_id = file_path; auto last_slash_idx = _last_id.find_last_of("\\/"); - if (std::string::npos != last_slash_idx) { + if (std::string::npos != last_slash_idx) + { _last_id.erase(0, last_slash_idx + 1); } - _current_fPtr = fopen(file_path.c_str(), "rb"); // Open the file, + _current_fPtr = fopen(file_path.c_str(), "rb");// Open the file, - if (!_current_fPtr) // Check if it is ready for reading + if(!_current_fPtr) // Check if it is ready for reading return 0; - fseek(_current_fPtr, 0, SEEK_END); // Take the file read pointer to the end + fseek(_current_fPtr, 0 , SEEK_END);// Take the file read pointer to the end - _current_file_size = ftell(_current_fPtr); // Check how many bytes are there between and the current read pointer position (end of the file) + _current_file_size = ftell(_current_fPtr);// Check how many bytes are there between and the current read pointer position (end of the file) - if (_current_file_size == 0) { // If file is empty continue + if(_current_file_size == 0) + { // If file is empty continue fclose(_current_fPtr); _current_fPtr = nullptr; return 0; } - fseek(_current_fPtr, 0, SEEK_SET); // Take the file pointer back to the start + fseek(_current_fPtr, 0 , SEEK_SET);// Take the file pointer back to the start return _current_file_size; } -size_t FileSourceReader::read_data(unsigned char* buf, size_t read_size) { - if (!_current_fPtr) +size_t FileSourceReader::read_data(unsigned char* buf, size_t read_size) +{ + if(!_current_fPtr) return 0; // Requested read size bigger than the file size? just read as many bytes as the file size @@ -118,128 +126,146 @@ size_t FileSourceReader::read_data(unsigned char* buf, size_t read_size) { return actual_read_size; } -int FileSourceReader::close() { +int FileSourceReader::close() +{ return release(); } -FileSourceReader::~FileSourceReader() { +FileSourceReader::~FileSourceReader() +{ release(); } -int FileSourceReader::release() { - if (!_current_fPtr) +int +FileSourceReader::release() +{ + if(!_current_fPtr) return 0; fclose(_current_fPtr); _current_fPtr = nullptr; return 0; } -void FileSourceReader::reset() { +void FileSourceReader::reset() +{ if (_shuffle) std::random_shuffle(_file_names.begin(), _file_names.end()); _read_counter = 0; _curr_file_idx = 0; } -Reader::Status FileSourceReader::subfolder_reading() { - if ((_sub_dir = opendir(_folder_path.c_str())) == nullptr) - THROW("FileReader ShardID [" + TOSTR(_shard_id) + "] ERROR: Failed opening the directory at " + _folder_path); - +Reader::Status FileSourceReader::subfolder_reading() +{ std::vector entry_name_list; - std::string _full_path = _folder_path; - - while ((_entity = readdir(_sub_dir)) != nullptr) { - std::string entry_name(_entity->d_name); - if (strcmp(_entity->d_name, ".") == 0 || strcmp(_entity->d_name, "..") == 0) continue; - entry_name_list.push_back(entry_name); - } - closedir(_sub_dir); - std::sort(entry_name_list.begin(), entry_name_list.end()); - auto ret = Reader::Status::OK; - for (unsigned dir_count = 0; dir_count < entry_name_list.size(); ++dir_count) { - std::string subfolder_path = _full_path + "/" + entry_name_list[dir_count]; - filesys::path pathObj(subfolder_path); - if (filesys::exists(pathObj) && filesys::is_regular_file(pathObj)) { - // ignore files with non-image extensions - auto file_extension_idx = subfolder_path.find_last_of("."); - if (file_extension_idx != std::string::npos) { - std::string file_extension = subfolder_path.substr(file_extension_idx + 1); - std::transform(file_extension.begin(), file_extension.end(), file_extension.begin(), - [](unsigned char c) { return std::tolower(c); }); - if ((file_extension != "jpg") && (file_extension != "jpeg") && (file_extension != "png") && (file_extension != "ppm") && (file_extension != "bmp") && (file_extension != "pgm") && (file_extension != "tif") && (file_extension != "tiff") && (file_extension != "webp")) - continue; + for (auto& entry : filesys::recursive_directory_iterator(_folder_path.c_str())) { + try { + std::string entry_path = entry.path().string(); + auto entry_path_id = entry_path; + auto last_slash_idx = entry_path_id.find_last_of("\\/"); + if (std::string::npos != last_slash_idx) + { + entry_path_id.erase(0, last_slash_idx + 1); + } + if (filesys::is_regular_file(entry.path() )) + { + if(!_meta_data_reader || _meta_data_reader->exists(entry_path_id)) + { + if(get_file_shard_id() != _shard_id ) + { + _file_count_all_shards++; + incremenet_file_id(); + continue; + } + _in_batch_read_count++; + _in_batch_read_count = (_in_batch_read_count % _batch_count == 0) ? 0 : _in_batch_read_count; + std::string file_path = entry_path; + _last_file_name = file_path; + _file_names.push_back(file_path); + _file_count_all_shards++; + incremenet_file_id(); + } } - ret = open_folder(); - break; // assume directory has only files. - } else if (filesys::exists(pathObj) && filesys::is_directory(pathObj)) { - _folder_path = subfolder_path; - if (open_folder() != Reader::Status::OK) - WRN("FileReader ShardID [" + TOSTR(_shard_id) + "] File reader cannot access the storage at " + _folder_path); } - } - if (_in_batch_read_count > 0 && _in_batch_read_count < _batch_count) { + catch (const filesys::filesystem_error& ex) { + if (ex.code() == std::errc::permission_denied) + THROW("Permission denied for directory: " + entry.path().string()); + } +} + + if(_file_names.empty()) + WRN("FileReader ShardID ["+ TOSTR(_shard_id)+ "] Did not load any file from " + _folder_path) + + + if(_in_batch_read_count > 0 && _in_batch_read_count < _batch_count) + { replicate_last_image_to_fill_last_shard(); - LOG("FileReader ShardID [" + TOSTR(_shard_id) + "] Replicated " + _folder_path + _last_file_name + " " + TOSTR((_batch_count - _in_batch_read_count)) + " times to fill the last batch") + LOG("FileReader ShardID [" + TOSTR(_shard_id) + "] Replicated " + _folder_path + _last_file_name + " " + TOSTR((_batch_count - _in_batch_read_count) ) + " times to fill the last batch") } - if (!_file_names.empty()) - LOG("FileReader ShardID [" + TOSTR(_shard_id) + "] Total of " + TOSTR(_file_names.size()) + " images loaded from " + _full_path) + if(!_file_names.empty()) + LOG("FileReader ShardID ["+ TOSTR(_shard_id)+ "] Total of " + TOSTR(_file_names.size()) + " images loaded from " + _full_path ) return ret; } -void FileSourceReader::replicate_last_image_to_fill_last_shard() { - for (size_t i = _in_batch_read_count; i < _batch_count; i++) +void FileSourceReader::replicate_last_image_to_fill_last_shard() +{ + for(size_t i = _in_batch_read_count; i < _batch_count; i++) _file_names.push_back(_last_file_name); } -void FileSourceReader::replicate_last_batch_to_pad_partial_shard() { - if (_file_names.size() >= _batch_count) { +void FileSourceReader::replicate_last_batch_to_pad_partial_shard() +{ + if (_file_names.size() >= _batch_count) { for (size_t i = 0; i < _batch_count; i++) _file_names.push_back(_file_names[i - _batch_count]); } } -Reader::Status FileSourceReader::open_folder() { - if ((_src_dir = opendir(_folder_path.c_str())) == nullptr) - THROW("FileReader ShardID [" + TOSTR(_shard_id) + "] ERROR: Failed opening the directory at " + _folder_path); - while ((_entity = readdir(_src_dir)) != nullptr) { - if (_entity->d_type != DT_REG) +Reader::Status FileSourceReader::open_folder() +{ + if ((_src_dir = opendir (_folder_path.c_str())) == nullptr) + THROW("FileReader ShardID ["+ TOSTR(_shard_id)+ "] ERROR: Failed opening the directory at " + _folder_path); + + + while((_entity = readdir (_src_dir)) != nullptr) + { + if(_entity->d_type != DT_REG) continue; std::string filename(_entity->d_name); auto file_extension_idx = filename.find_last_of("."); - if (file_extension_idx != std::string::npos) { - std::string file_extension = filename.substr(file_extension_idx + 1); + if (file_extension_idx != std::string::npos) { + std::string file_extension = filename.substr(file_extension_idx+1); std::transform(file_extension.begin(), file_extension.end(), file_extension.begin(), - [](unsigned char c) { return std::tolower(c); }); + [](unsigned char c){ return std::tolower(c); }); if ((file_extension != "jpg") && (file_extension != "jpeg") && (file_extension != "png") && (file_extension != "ppm") && (file_extension != "bmp") && (file_extension != "pgm") && (file_extension != "tif") && (file_extension != "tiff") && (file_extension != "webp")) continue; - } - if (get_file_shard_id() != _shard_id) { + } + if(get_file_shard_id() != _shard_id ) + { _file_count_all_shards++; incremenet_file_id(); continue; } _in_batch_read_count++; - _in_batch_read_count = (_in_batch_read_count % _batch_count == 0) ? 0 : _in_batch_read_count; + _in_batch_read_count = (_in_batch_read_count%_batch_count == 0) ? 0 : _in_batch_read_count; std::string file_path = _folder_path; file_path.append("/"); file_path.append(_entity->d_name); + _last_file_name = file_path; _file_names.push_back(file_path); _file_count_all_shards++; incremenet_file_id(); } - if (_file_names.empty()) - WRN("FileReader ShardID [" + TOSTR(_shard_id) + "] Did not load any file from " + _folder_path) - std::sort(_file_names.begin(), _file_names.end()); - _last_file_name = _file_names[_file_names.size() - 1]; + if(_file_names.empty()) + WRN("FileReader ShardID ["+ TOSTR(_shard_id)+ "] Did not load any file from " + _folder_path) closedir(_src_dir); return Reader::Status::OK; } -size_t FileSourceReader::get_file_shard_id() { - if (_batch_count == 0 || _shard_count == 0) +size_t FileSourceReader::get_file_shard_id() +{ + if(_batch_count == 0 || _shard_count == 0) THROW("Shard (Batch) size cannot be set to 0") - // return (_file_id / (_batch_count)) % _shard_count; - return _file_id % _shard_count; -} + return _file_id % _shard_count; +} \ No newline at end of file diff --git a/tests/cpp_api_tests/rocAL_audio_unittests/CMakeLists.txt b/tests/cpp_api_tests/rocAL_audio_unittests/CMakeLists.txt new file mode 100644 index 000000000..281350ec0 --- /dev/null +++ b/tests/cpp_api_tests/rocAL_audio_unittests/CMakeLists.txt @@ -0,0 +1,73 @@ +################################################################################ +# +# MIT License +# +# Copyright (c) 2018 - 2023 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +################################################################################ +cmake_minimum_required(VERSION 3.5) + +project (rocAL_audio_unittests) +set(CMAKE_CXX_STANDARD 14) + +# ROCm Path +set(ROCM_PATH /opt/rocm CACHE PATH "Default ROCm installation path") + +# avoid setting the default installation path to /usr/local +if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) + set(CMAKE_INSTALL_PREFIX ${ROCM_PATH} CACHE PATH "rocAL default installation path" FORCE) +endif(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) +set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) + +# Add Default libdir +set(CMAKE_INSTALL_LIBDIR "lib" CACHE STRING "Library install directory") +include(GNUInstallDirs) + +list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/../../cmake) + +find_package(OpenCV QUIET) +find_package(AMDRPP QUIET) +include_directories(${ROCM_PATH}/${CMAKE_INSTALL_INCLUDEDIR}/rocal) +link_directories(${ROCM_PATH}/lib) +file(GLOB My_Source_Files ./*.cpp) +add_executable(${PROJECT_NAME} ${My_Source_Files}) + +if(OpenCV_FOUND) + if(${OpenCV_VERSION_MAJOR} EQUAL 3 OR ${OpenCV_VERSION_MAJOR} EQUAL 4) + message("-- OpenCV Found -- Version-${OpenCV_VERSION_MAJOR}.${OpenCV_VERSION_MINOR}.X Supported") + include_directories(${OpenCV_INCLUDE_DIRS}) + target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBRARIES}) + if(${OpenCV_VERSION_MAJOR} EQUAL 4) + target_compile_definitions(${PROJECT_NAME} PUBLIC USE_OPENCV_4=1) + else() + target_compile_definitions(${PROJECT_NAME} PUBLIC USE_OPENCV_4=0) + endif() + else() + message(FATAL_ERROR "OpenCV Found -- Version-${OpenCV_VERSION_MAJOR}.${OpenCV_VERSION_MINOR}.X Not Supported") + endif() +else() + message(FATAL_ERROR "OpenCV Not Found -- No Display Support") +endif() + +target_link_libraries(${PROJECT_NAME} rocal) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -mf16c -Wall ") + +install(TARGETS ${PROJECT_NAME} DESTINATION ${CMAKE_INSTALL_BINDIR}) diff --git a/tests/cpp_api_tests/rocAL_audio_unittests/README.md b/tests/cpp_api_tests/rocAL_audio_unittests/README.md new file mode 100644 index 000000000..3d90758f6 --- /dev/null +++ b/tests/cpp_api_tests/rocAL_audio_unittests/README.md @@ -0,0 +1,24 @@ +# rocAL Unit Tests +This application can be used to verify the functionality of the API offered by rocAL. + +## Build Instructions + +### Pre-requisites +* Ubuntu Linux, [version `16.04` or later](https://www.microsoft.com/software-download/windows10) +* rocAL library (Part of the MIVisionX toolkit) +* [OpenCV 3.4+](https://github.com/opencv/opencv/releases/tag/3.4.0) +* Radeon Performance Primitives (RPP) + +### Build + ```` + mkdir build + cd build + cmake ../ + make + ```` +### Running the application + ```` +./rocAL_audio_unittests + +Usage: ./rocAL_audio_unittests gpu=1/cpu=0 + ```` diff --git a/tests/cpp_api_tests/rocAL_audio_unittests/rocAL_audio_unittests.cpp b/tests/cpp_api_tests/rocAL_audio_unittests/rocAL_audio_unittests.cpp new file mode 100644 index 000000000..708b250cd --- /dev/null +++ b/tests/cpp_api_tests/rocAL_audio_unittests/rocAL_audio_unittests.cpp @@ -0,0 +1,255 @@ +/* +MIT License + +Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include +#include +#include +#include + +#include "rocal_api.h" + +#include "opencv2/opencv.hpp" +using namespace cv; + +#if USE_OPENCV_4 +#define CV_LOAD_IMAGE_COLOR IMREAD_COLOR +#define CV_BGR2GRAY COLOR_BGR2GRAY +#define CV_GRAY2RGB COLOR_GRAY2RGB +#define CV_RGB2BGR COLOR_RGB2BGR +#define CV_FONT_HERSHEY_SIMPLEX FONT_HERSHEY_SIMPLEX +#define CV_FILLED FILLED +#define CV_WINDOW_AUTOSIZE WINDOW_AUTOSIZE +#endif + +#define DISPLAY 1 +#define METADATA 0 // Switch the meta-data part once the meta-data reader (file list reader) is introduced +using namespace std::chrono; + +int test(int test_case, const char *path, float sample_rate, int downmix, unsigned max_frames, unsigned max_channels, int gpu); +int main(int argc, const char **argv) +{ + // check command-line usage + const int MIN_ARG_COUNT = 2; + printf("Usage: image_augmentation gpu=1/cpu=0 \n"); + if (argc < MIN_ARG_COUNT) + return -1; + + int argIdx = 0; + const char *path = argv[++argIdx]; + unsigned test_case = 0; + float sample_rate = 0.0; + bool downmix = false; + unsigned max_frames = 1; + unsigned max_channels = 1; + bool gpu = 0; + + if (argc >= argIdx + MIN_ARG_COUNT) + test_case = atoi(argv[++argIdx]); + + if (argc >= argIdx + MIN_ARG_COUNT) + sample_rate = atoi(argv[++argIdx]); + + if (argc >= argIdx + MIN_ARG_COUNT) + downmix = atoi(argv[++argIdx]); + + if (argc >= argIdx + MIN_ARG_COUNT) + max_frames = atoi(argv[++argIdx]); + + if (argc >= argIdx + MIN_ARG_COUNT) + max_channels = atoi(argv[++argIdx]); + + if (argc >= argIdx + MIN_ARG_COUNT) + gpu = atoi(argv[++argIdx]); + + int return_val = test(test_case, path, sample_rate, downmix, max_frames, max_channels, gpu); + return return_val; +} + +int test(int test_case, const char *path, float sample_rate, int downmix, unsigned max_frames, unsigned max_channels, int gpu) +{ + int inputBatchSize = 10; + std::cout << ">>> test case " << test_case << std::endl; + std::cout << ">>> Running on " << (gpu ? "GPU" : "CPU") << std::endl; + + auto handle = rocalCreate(inputBatchSize, + gpu ? RocalProcessMode::ROCAL_PROCESS_GPU : RocalProcessMode::ROCAL_PROCESS_CPU, 0, + 1); + + if (rocalGetStatus(handle) != ROCAL_OK) { + std::cout << "Could not create the Rocal contex\n"; + return -1; + } + + /*>>>>>>>>>>>>>>>> Creating Rocal parameters <<<<<<<<<<<<<<<<*/ + + RocalMetaData metadata_output; + // MetaData reader for input file_list which has file seperated by labels + // if (METADATA) { // To uncomment later when meta-data reader for audio is added (PR4) + // std::cout << "META DATA READER"; + // const char* file_list_path = "/workspace/rnnt/AMD/MIVisionX-data/rocal_data/audio_samples/audio_file_list.txt" ; // TODO: Add this as an arg in main() + // metadata_output = rocalCreateFileListLabelReader(handle, path, file_list_path); + // } + + //Decoder + // RocalTensor input1; // Uncomment when augmentations are enabled + // RocalTensorList non_silent_region_op; // Uncomment when NSR is introduced (PR5) + // const char* file_list_path = "/media/MIVisionX-data/rocal_data/audio_samples/audio_file_list.txt" ; // Uncomment and use it when meta-data reader is introduced (PR4) + rocalAudioFileSourceSingleShard(handle, path, 0, 1, true, false, false, false, max_frames, max_channels, 0); + if (rocalGetStatus(handle) != ROCAL_OK) { + std::cout << "Audio source could not initialize : " << rocalGetErrorMessage(handle) << std::endl; + return -1; + } + /* The augmentation cases - To uncomment as each augmentation is introduced + + switch (test_case) + { + case 0: + { + RocalTensorLayout tensorLayout; // = RocalTensorLayout::None; + RocalTensorOutputType tensorOutputType = RocalTensorOutputType::ROCAL_FP32; + output = rocalToDecibels(handle, input1, tensorLayout, tensorOutputType, true); + std::cout<<"\n Calls rocalToDecibels"; + } + break; + case 1: + { + RocalTensorLayout tensorLayout; // = RocalTensorLayout::None; + RocalTensorOutputType tensorOutputType = RocalTensorOutputType::ROCAL_FP32; + output = rocalPreEmphasisFilter(handle, input1, tensorOutputType, true); + std::cout<<"\n Calls rocalPreEmphasisFilter "; + } + break; + case 2: + { + RocalTensorLayout tensorLayout; // = RocalTensorLayout::None; + RocalTensorOutputType tensorOutputType = RocalTensorOutputType::ROCAL_FP32; + // int nfftSize = 2048; + std::vector window_fn{}; + output = rocalSpectrogram(handle, input1, tensorOutputType, true, window_fn, true, true, RocalSpectrogramLayout(0), 2, 512, 512, 256); + std::cout<<"\n Calls rocalSpectrogram "; + } + break; + case 3: + { + auto non_silent_region = rocalNonSilentRegion(handle, input1, true, -60, 1, -1, 3); + // RocalTensor begin = non_silent_region->at(0); + } + break; + case 4: + { + std::cout<<"\n Mel Filter Bank"; + RocalTensorLayout tensorLayout; // = RocalTensorLayout::None; + RocalTensorOutputType tensorOutputType = RocalTensorOutputType::ROCAL_FP32; + std::vector window_fn{}; + RocalTensor temp_output = rocalSpectrogram(handle, input1, tensorOutputType, false, window_fn, true, true, RocalSpectrogramLayout(0), 2, 512, 512, 256); + float sampleRate = 16000; + float minFreq = 0.0; + float maxFreq = sampleRate / 2; + RocalMelScaleFormula melFormula = RocalMelScaleFormula::SLANEY; + int numFilter = 128; + bool normalize = true; + + output = rocalMelFilterBank(handle, temp_output, true, maxFreq, minFreq, melFormula, numFilter, normalize, sampleRate); + } + break; + case 5: + { + RocalTensorLayout tensorLayout; // = RocalTensorLayout::None; + RocalTensorOutputType tensorOutputType = RocalTensorOutputType::ROCAL_FP32; + const size_t num_values = 3; + std::pair non_silent_region_output; + non_silent_region_output = rocalNonSilentRegion(handle, input1, false, -60, 0.0, -1, 3); + output = rocalSlice(handle, input1, tensorOutputType, true, non_silent_region_output.first, non_silent_region_output.second, {0.3f}); + } + break; + case 6: + { + std::cout<<"\n Normalize"; + RocalTensorLayout tensorLayout; + RocalTensorOutputType tensorOutputType = RocalTensorOutputType::ROCAL_FP32; + output = rocalNormalize(handle, input1, tensorOutputType, true, false, {1}); + } + break; + case 7: + { + std::cout<<"\nPad"; + RocalTensorLayout tensorLayout; + RocalTensorOutputType tensorOutputType = RocalTensorOutputType::ROCAL_FP32; + output = rocalPad(handle, input1, tensorOutputType, true, 4.0f); + } + break; + + default: + { + std::cout << "Not a valid pipeline type ! Exiting!\n"; + return -1; + } + + } + */ + rocalVerify(handle); + if (rocalGetStatus(handle) != ROCAL_OK) + { + std::cout << "Could not verify the augmentation graph " << rocalGetErrorMessage(handle); + return -1; + } + + /*>>>>>>>>>>>>>>>>>>> Diplay using OpenCV <<<<<<<<<<<<<<<<<*/ + cv::Mat mat_output, mat_input, mat_color; + int iteration = 0; + RocalTensorList output_tensor_list; + + while (rocalGetRemainingImages(handle) >= static_cast(inputBatchSize)) + { + std::cout<<"\n rocalGetRemainingImages:: "< audio_op; + output_tensor_list = rocalGetOutputTensors(handle); + std::cout << "\n *****************************Audio output**********************************\n"; + std::cout << "\n **************Printing the first 5 values of the Audio buffer**************\n"; + for(uint idx = 0; idx < output_tensor_list->size(); idx++) { + float * buffer = (float *)output_tensor_list->at(idx)->buffer(); + for(int n = 0; n < 5; n++) + std::cout << buffer[n] << "\n"; + } + + if (METADATA) { + RocalTensorList labels = rocalGetImageLabels(handle); + for(uint i = 0; i < labels->size(); i++) { + int * labels_buffer = (int *)(labels->at(i)->buffer()); + std::cout << ">>>>> LABELS : " << labels_buffer[0] << "\t"; + } + + } + std::cout<<"******************************************************************************\n"; + } + rocalRelease(handle); + return 0; +} From b180b78307a454e9e83b1888dfc17218055a8e12 Mon Sep 17 00:00:00 2001 From: Swetha B S Date: Fri, 8 Mar 2024 10:52:11 -0500 Subject: [PATCH 002/388] channge image_info to sample_info to maintain a generic name for all the use-cases --- rocAL/include/loaders/circular_buffer.h | 12 ++++++------ .../include/loaders/image/cifar10_data_loader.h | 6 +++--- rocAL/include/loaders/image/image_loader.h | 6 +++--- .../include/loaders/image/image_loader_sharded.h | 2 +- rocAL/include/loaders/loader_module.h | 4 ++-- rocAL/include/loaders/video/video_loader.h | 6 +++--- .../include/loaders/video/video_loader_sharded.h | 2 +- rocAL/include/meta_data/bounding_box_graph.h | 4 ++-- rocAL/include/meta_data/meta_data_graph.h | 4 ++-- rocAL/source/loaders/circular_buffer.cpp | 16 ++++++++-------- .../source/loaders/image/cifar10_data_loader.cpp | 16 ++++++++-------- rocAL/source/loaders/image/image_loader.cpp | 12 ++++++------ .../loaders/image/image_loader_sharded.cpp | 4 ++-- rocAL/source/loaders/video/video_loader.cpp | 14 +++++++------- .../loaders/video/video_loader_sharded.cpp | 4 ++-- rocAL/source/meta_data/bounding_box_graph.cpp | 6 +++--- rocAL/source/pipeline/master_graph.cpp | 16 ++++++++-------- 17 files changed, 67 insertions(+), 67 deletions(-) diff --git a/rocAL/include/loaders/circular_buffer.h b/rocAL/include/loaders/circular_buffer.h index ac4fafe13..67ea530cf 100644 --- a/rocAL/include/loaders/circular_buffer.h +++ b/rocAL/include/loaders/circular_buffer.h @@ -31,8 +31,8 @@ THE SOFTWARE. #include "commons.h" #include "device_manager.h" #include "device_manager_hip.h" -struct decoded_image_info { - std::vector _image_names; +struct decoded_sample_info { + std::vector _sample_names; std::vector _roi_width; std::vector _roi_height; std::vector _original_width; @@ -54,9 +54,9 @@ class CircularBuffer { void unblock_writer(); // Unblocks the thread currently waiting on get_write_buffer void push(); // The latest write goes through, effectively adds one element to the buffer void pop(); // The oldest write will be erased and overwritten in upcoming writes - void set_image_info(const decoded_image_info& info) { _last_image_info = info; } + void set_sample_info(const decoded_sample_info& info) { _last_sample_info = info; } void set_crop_image_info(const crop_image_info& info) { _last_crop_image_info = info; } - decoded_image_info& get_image_info(); + decoded_sample_info& get_sample_info(); crop_image_info& get_cropped_image_info(); bool random_bbox_crop_flag = false; void* get_read_buffer_dev(); @@ -73,8 +73,8 @@ class CircularBuffer { bool full(); bool empty(); size_t _buff_depth; - decoded_image_info _last_image_info; - std::queue _circ_image_info; //!< Stores the loaded images names, decoded_width and decoded_height(data is stored in the _circ_buff) + decoded_sample_info _last_sample_info; + std::queue _circ_sample_info; //!< Stores the loaded sample's names, decoded_width and decoded_height(data is stored in the _circ_buff) crop_image_info _last_crop_image_info; // for Random BBox crop coordinates std::queue _circ_crop_image_info; //!< Stores the crop coordinates of the images for random bbox crop (data is stored in the _circ_buff) std::mutex _names_buff_lock; diff --git a/rocAL/include/loaders/image/cifar10_data_loader.h b/rocAL/include/loaders/image/cifar10_data_loader.h index 9736b6dfc..7f8c6a784 100644 --- a/rocAL/include/loaders/image/cifar10_data_loader.h +++ b/rocAL/include/loaders/image/cifar10_data_loader.h @@ -40,7 +40,7 @@ class CIFAR10DataLoader : public LoaderModule { void reset() override; void start_loading() override; std::vector get_id() override; - decoded_image_info get_decode_image_info() override; + decoded_sample_info get_decode_sample_info() override; crop_image_info get_crop_image_info() override; Timing timing() override; void set_prefetch_queue_depth(size_t prefetch_queue_depth) override; @@ -59,8 +59,8 @@ class CIFAR10DataLoader : public LoaderModule { LoaderModuleStatus load_routine(); std::shared_ptr _reader; void *_dev_resources; - decoded_image_info _raw_img_info; // image info to store the names. In this case the ID of image is stored in _roi_width field - decoded_image_info _output_decoded_img_info; + decoded_sample_info _raw_img_info; // image info to store the names. In this case the ID of image is stored in _roi_width field + decoded_sample_info _output_decoded_img_info; bool _initialized = false; RocalMemType _mem_type; size_t _output_mem_size; diff --git a/rocAL/include/loaders/image/image_loader.h b/rocAL/include/loaders/image/image_loader.h index 082bf2015..9b65d5f92 100644 --- a/rocAL/include/loaders/image/image_loader.h +++ b/rocAL/include/loaders/image/image_loader.h @@ -49,7 +49,7 @@ class ImageLoader : public LoaderModule { LoaderModuleStatus set_cpu_sched_policy(struct sched_param sched_policy); void set_gpu_device_id(int device_id); std::vector get_id() override; - decoded_image_info get_decode_image_info() override; + decoded_sample_info get_decode_sample_info() override; crop_image_info get_crop_image_info() override; void set_prefetch_queue_depth(size_t prefetch_queue_depth) override; void shut_down() override; @@ -74,9 +74,9 @@ class ImageLoader : public LoaderModule { size_t _batch_size; std::thread _load_thread; RocalMemType _mem_type; - decoded_image_info _decoded_img_info; + decoded_sample_info _decoded_img_info; crop_image_info _crop_image_info; - decoded_image_info _output_decoded_img_info; + decoded_sample_info _output_decoded_img_info; crop_image_info _output_cropped_img_info; CircularBuffer _circ_buff; TimingDBG _swap_handle_time; diff --git a/rocAL/include/loaders/image/image_loader_sharded.h b/rocAL/include/loaders/image/image_loader_sharded.h index 3b7bdf998..7c92dfa5a 100644 --- a/rocAL/include/loaders/image/image_loader_sharded.h +++ b/rocAL/include/loaders/image/image_loader_sharded.h @@ -40,7 +40,7 @@ class ImageLoaderSharded : public LoaderModule { void reset() override; void start_loading() override; std::vector get_id() override; - decoded_image_info get_decode_image_info() override; + decoded_sample_info get_decode_sample_info() override; crop_image_info get_crop_image_info() override; Timing timing() override; void set_prefetch_queue_depth(size_t prefetch_queue_depth) override; diff --git a/rocAL/include/loaders/loader_module.h b/rocAL/include/loaders/loader_module.h index 4d4531a52..7bb7f6087 100644 --- a/rocAL/include/loaders/loader_module.h +++ b/rocAL/include/loaders/loader_module.h @@ -53,8 +53,8 @@ class LoaderModule { virtual Timing timing() = 0; // Returns timing info virtual std::vector get_id() = 0; // returns the id of the last batch of images/frames loaded virtual void start_loading() = 0; // starts internal loading thread - virtual decoded_image_info get_decode_image_info() = 0; - virtual crop_image_info get_crop_image_info() = 0; + virtual decoded_sample_info get_decode_sample_info() = 0; + virtual crop_image_info get_crop_image_info() { return {}; } virtual void set_prefetch_queue_depth(size_t prefetch_queue_depth) = 0; // introduce meta data reader virtual void set_random_bbox_data_reader(std::shared_ptr randombboxcrop_meta_data_reader) = 0; diff --git a/rocAL/include/loaders/video/video_loader.h b/rocAL/include/loaders/video/video_loader.h index 4ff85f11c..81bbaee50 100644 --- a/rocAL/include/loaders/video/video_loader.h +++ b/rocAL/include/loaders/video/video_loader.h @@ -49,7 +49,7 @@ class VideoLoader : public LoaderModule { LoaderModuleStatus set_cpu_affinity(cpu_set_t cpu_mask); LoaderModuleStatus set_cpu_sched_policy(struct sched_param sched_policy); std::vector get_id() override; - decoded_image_info get_decode_image_info() override; + decoded_sample_info get_decode_sample_info() override; void set_prefetch_queue_depth(size_t prefetch_queue_depth) override; crop_image_info get_crop_image_info() override { return _crop_img_info; } void set_random_bbox_data_reader(std::shared_ptr randombboxcrop_meta_data_reader) override{}; @@ -74,8 +74,8 @@ class VideoLoader : public LoaderModule { size_t _sequence_length; std::thread _load_thread; RocalMemType _mem_type; - decoded_image_info _decoded_img_info; - decoded_image_info _output_decoded_img_info; + decoded_sample_info _decoded_img_info; + decoded_sample_info _output_decoded_img_info; CircularBuffer _circ_buff; TimingDBG _swap_handle_time; bool _is_initialized; diff --git a/rocAL/include/loaders/video/video_loader_sharded.h b/rocAL/include/loaders/video/video_loader_sharded.h index 41cd062a6..99ae55ed6 100644 --- a/rocAL/include/loaders/video/video_loader_sharded.h +++ b/rocAL/include/loaders/video/video_loader_sharded.h @@ -41,7 +41,7 @@ class VideoLoaderSharded : public LoaderModule { void reset() override; void start_loading() override; std::vector get_id() override; - decoded_image_info get_decode_image_info() override; + decoded_sample_info get_decode_sample_info() override; void set_prefetch_queue_depth(size_t prefetch_queue_depth) override; crop_image_info get_crop_image_info() override { return _crop_img_info; } void set_random_bbox_data_reader(std::shared_ptr randombboxcrop_meta_data_reader) override{}; diff --git a/rocAL/include/meta_data/bounding_box_graph.h b/rocAL/include/meta_data/bounding_box_graph.h index 34710b3ae..224926447 100644 --- a/rocAL/include/meta_data/bounding_box_graph.h +++ b/rocAL/include/meta_data/bounding_box_graph.h @@ -31,8 +31,8 @@ typedef struct { float xc; float yc; float w; float h; } BoundingBoxCord_xcycwh class BoundingBoxGraph : public MetaDataGraph { public: void process(pMetaDataBatch input_meta_data, pMetaDataBatch output_meta_data) override; - void update_meta_data(pMetaDataBatch meta_data, decoded_image_info decode_image_info) override; - void update_random_bbox_meta_data(pMetaDataBatch input_meta_data, pMetaDataBatch output_meta_data, decoded_image_info decoded_image_info, crop_image_info crop_image_info) override; + void update_meta_data(pMetaDataBatch meta_data, decoded_sample_info decode_image_info) override; + void update_random_bbox_meta_data(pMetaDataBatch input_meta_data, pMetaDataBatch output_meta_data, decoded_sample_info decoded_image_info, crop_image_info crop_image_info) override; void update_box_encoder_meta_data(std::vector *anchors, pMetaDataBatch full_batch_meta_data, float criteria, bool offset, float scale, std::vector &means, std::vector &stds, float *encoded_boxes_data, int *encoded_labels_data) override; void update_box_iou_matcher(BoxIouMatcherInfo &iou_matcher_info, int *matches_idx_buffer, pMetaDataBatch full_batch_meta_data) override; }; diff --git a/rocAL/include/meta_data/meta_data_graph.h b/rocAL/include/meta_data/meta_data_graph.h index 563f7f069..491b2d2c2 100644 --- a/rocAL/include/meta_data/meta_data_graph.h +++ b/rocAL/include/meta_data/meta_data_graph.h @@ -41,8 +41,8 @@ class MetaDataGraph { public: virtual ~MetaDataGraph() = default; virtual void process(pMetaDataBatch input_meta_data, pMetaDataBatch output_meta_data) = 0; - virtual void update_meta_data(pMetaDataBatch meta_data, decoded_image_info decoded_image_info) = 0; - virtual void update_random_bbox_meta_data(pMetaDataBatch input_meta_data, pMetaDataBatch output_meta_data, decoded_image_info decoded_image_info, crop_image_info crop_image_info) = 0; + virtual void update_meta_data(pMetaDataBatch meta_data, decoded_sample_info decoded_image_info) = 0; + virtual void update_random_bbox_meta_data(pMetaDataBatch input_meta_data, pMetaDataBatch output_meta_data, decoded_sample_info decoded_image_info, crop_image_info crop_image_info) = 0; virtual void update_box_encoder_meta_data(std::vector *anchors, pMetaDataBatch full_batch_meta_data, float criteria, bool offset, float scale, std::vector &means, std::vector &stds, float *encoded_boxes_data, int *encoded_labels_data) = 0; virtual void update_box_iou_matcher(BoxIouMatcherInfo &iou_matcher_info, int *matches_idx_buffer, pMetaDataBatch full_batch_meta_data) = 0; std::list> _meta_nodes; diff --git a/rocAL/source/loaders/circular_buffer.cpp b/rocAL/source/loaders/circular_buffer.cpp index 25ba7cb59..134bd9e08 100644 --- a/rocAL/source/loaders/circular_buffer.cpp +++ b/rocAL/source/loaders/circular_buffer.cpp @@ -40,8 +40,8 @@ void CircularBuffer::reset() { _write_ptr = 0; _read_ptr = 0; _level = 0; - while (!_circ_image_info.empty()) - _circ_image_info.pop(); + while (!_circ_sample_info.empty()) + _circ_sample_info.pop(); if (random_bbox_crop_flag == true) { while (!_circ_crop_image_info.empty()) _circ_crop_image_info.pop(); @@ -135,7 +135,7 @@ void CircularBuffer::push() { sync(); // Pushing to the _circ_buff and _circ_buff_names must happen all at the same time std::unique_lock lock(_names_buff_lock); - _circ_image_info.push(_last_image_info); + _circ_sample_info.push(_last_sample_info); if (random_bbox_crop_flag == true) _circ_crop_image_info.push(_last_crop_image_info); increment_write_ptr(); @@ -147,7 +147,7 @@ void CircularBuffer::pop() { // Pushing to the _circ_buff and _circ_buff_names must happen all at the same time std::unique_lock lock(_names_buff_lock); increment_read_ptr(); - _circ_image_info.pop(); + _circ_sample_info.pop(); if (random_bbox_crop_flag == true) _circ_crop_image_info.pop(); } @@ -338,12 +338,12 @@ CircularBuffer::~CircularBuffer() { _initialized = false; } -decoded_image_info &CircularBuffer::get_image_info() { +decoded_sample_info &CircularBuffer::get_sample_info() { block_if_empty(); std::unique_lock lock(_names_buff_lock); - if (_level != _circ_image_info.size()) - THROW("CircularBuffer internals error, image and image info sizes not the same " + TOSTR(_level) + " != " + TOSTR(_circ_image_info.size())) - return _circ_image_info.front(); + if (_level != _circ_sample_info.size()) + THROW("CircularBuffer internals error, sample and sample info sizes not the same " + TOSTR(_level) + " != " + TOSTR(_circ_sample_info.size())) + return _circ_sample_info.front(); } crop_image_info &CircularBuffer::get_cropped_image_info() { diff --git a/rocAL/source/loaders/image/cifar10_data_loader.cpp b/rocAL/source/loaders/image/cifar10_data_loader.cpp index d3e89218e..f5ef881a3 100644 --- a/rocAL/source/loaders/image/cifar10_data_loader.cpp +++ b/rocAL/source/loaders/image/cifar10_data_loader.cpp @@ -119,7 +119,7 @@ void CIFAR10DataLoader::initialize(ReaderConfig reader_cfg, DecoderConfig decode throw; } _actual_read_size.resize(batch_size); - _raw_img_info._image_names.resize(_batch_size); + _raw_img_info._sample_names.resize(_batch_size); _raw_img_info._roi_width.resize(_batch_size); // used to store the individual image in a big raw file _raw_img_info._roi_height.resize(batch_size); _raw_img_info._original_height.resize(_batch_size); @@ -182,7 +182,7 @@ CIFAR10DataLoader::load_routine() { continue; } _actual_read_size[file_counter] = _reader->read_data(read_ptr, readSize); - _raw_img_info._image_names[file_counter] = _reader->id(); + _raw_img_info._sample_names[file_counter] = _reader->id(); _raw_img_info._roi_width[file_counter] = _output_tensor->info().max_shape()[0]; _raw_img_info._roi_height[file_counter] = _output_tensor->info().max_shape()[1]; _reader->close(); @@ -190,13 +190,13 @@ CIFAR10DataLoader::load_routine() { } if (_randombboxcrop_meta_data_reader) { // Fetch the crop co-ordinates for a batch of images - _bbox_coords = _randombboxcrop_meta_data_reader->get_batch_crop_coords(_raw_img_info._image_names); + _bbox_coords = _randombboxcrop_meta_data_reader->get_batch_crop_coords(_raw_img_info._sample_names); set_batch_random_bbox_crop_coords(_bbox_coords); _crop_image_info._crop_image_coords = get_batch_random_bbox_crop_coords(); _circ_buff.set_crop_image_info(_crop_image_info); } _file_load_time.end(); // Debug timing - _circ_buff.set_image_info(_raw_img_info); + _circ_buff.set_sample_info(_raw_img_info); _circ_buff.push(); _image_counter += _output_tensor->info().batch_size(); load_status = LoaderModuleStatus::OK; @@ -254,11 +254,11 @@ CIFAR10DataLoader::update_output_image() { if (_stopped) return LoaderModuleStatus::OK; - _output_decoded_img_info = _circ_buff.get_image_info(); + _output_decoded_img_info = _circ_buff.get_sample_info(); if (_randombboxcrop_meta_data_reader) { _output_cropped_image_info = _circ_buff.get_cropped_image_info(); } - _output_names = _output_decoded_img_info._image_names; + _output_names = _output_decoded_img_info._sample_names; _output_tensor->update_tensor_roi(_output_decoded_img_info._roi_width, _output_decoded_img_info._roi_height); _circ_buff.pop(); @@ -279,10 +279,10 @@ std::vector CIFAR10DataLoader::get_id() { return _output_names; } -decoded_image_info CIFAR10DataLoader::get_decode_image_info() { +decoded_sample_info CIFAR10DataLoader::get_decode_sample_info() { return _output_decoded_img_info; } crop_image_info CIFAR10DataLoader::get_crop_image_info() { return _output_cropped_image_info; -} +} \ No newline at end of file diff --git a/rocAL/source/loaders/image/image_loader.cpp b/rocAL/source/loaders/image/image_loader.cpp index 00abf74cc..71a8e1009 100644 --- a/rocAL/source/loaders/image/image_loader.cpp +++ b/rocAL/source/loaders/image/image_loader.cpp @@ -152,7 +152,7 @@ void ImageLoader::initialize(ReaderConfig reader_cfg, DecoderConfig decoder_cfg, } _max_tensor_width = _output_tensor->info().max_shape().at(0); _max_tensor_height = _output_tensor->info().max_shape().at(1); - _decoded_img_info._image_names.resize(_batch_size); + _decoded_img_info._sample_names.resize(_batch_size); _decoded_img_info._roi_height.resize(_batch_size); _decoded_img_info._roi_width.resize(_batch_size); _decoded_img_info._original_height.resize(_batch_size); @@ -187,7 +187,7 @@ ImageLoader::load_routine() { auto load_status = LoaderModuleStatus::NO_MORE_DATA_TO_READ; { load_status = _image_loader->load(data, - _decoded_img_info._image_names, + _decoded_img_info._sample_names, _max_tensor_width, _max_tensor_height, _decoded_img_info._roi_width, @@ -201,7 +201,7 @@ ImageLoader::load_routine() { _crop_image_info._crop_image_coords = _image_loader->get_batch_random_bbox_crop_coords(); _circ_buff.set_crop_image_info(_crop_image_info); } - _circ_buff.set_image_info(_decoded_img_info); + _circ_buff.set_sample_info(_decoded_img_info); _circ_buff.push(); _image_counter += _output_tensor->info().batch_size(); } @@ -259,11 +259,11 @@ ImageLoader::update_output_image() { if (_stopped) return LoaderModuleStatus::OK; - _output_decoded_img_info = _circ_buff.get_image_info(); + _output_decoded_img_info = _circ_buff.get_sample_info(); if (_randombboxcrop_meta_data_reader) { _output_cropped_img_info = _circ_buff.get_cropped_image_info(); } - _output_names = _output_decoded_img_info._image_names; + _output_names = _output_decoded_img_info._sample_names; _output_tensor->update_tensor_roi(_output_decoded_img_info._roi_width, _output_decoded_img_info._roi_height); _circ_buff.pop(); if (!_loop) @@ -307,7 +307,7 @@ std::vector ImageLoader::get_id() { return _output_names; } -decoded_image_info ImageLoader::get_decode_image_info() { +decoded_sample_info ImageLoader::get_decode_sample_info() { return _output_decoded_img_info; } diff --git a/rocAL/source/loaders/image/image_loader_sharded.cpp b/rocAL/source/loaders/image/image_loader_sharded.cpp index 02d7f05f0..bacf5b573 100644 --- a/rocAL/source/loaders/image/image_loader_sharded.cpp +++ b/rocAL/source/loaders/image/image_loader_sharded.cpp @@ -38,8 +38,8 @@ std::vector ImageLoaderSharded::get_id() { return _loaders[_loader_idx]->get_id(); } -decoded_image_info ImageLoaderSharded::get_decode_image_info() { - return _loaders[_loader_idx]->get_decode_image_info(); +decoded_sample_info ImageLoaderSharded::get_decode_sample_info() { + return _loaders[_loader_idx]->get_decode_sample_info(); } crop_image_info ImageLoaderSharded::get_crop_image_info() { diff --git a/rocAL/source/loaders/video/video_loader.cpp b/rocAL/source/loaders/video/video_loader.cpp index db83786d8..6a79034c1 100644 --- a/rocAL/source/loaders/video/video_loader.cpp +++ b/rocAL/source/loaders/video/video_loader.cpp @@ -131,7 +131,7 @@ void VideoLoader::initialize(ReaderConfig reader_cfg, DecoderConfig decoder_cfg, } _max_tensor_width = _output_tensor->info().max_shape().at(0); _max_tensor_height = _output_tensor->info().max_shape().at(1); - _decoded_img_info._image_names.resize(_batch_size); + _decoded_img_info._sample_names.resize(_batch_size); _decoded_img_info._roi_height.resize(_batch_size); _decoded_img_info._roi_width.resize(_batch_size); _decoded_img_info._original_height.resize(_batch_size); @@ -163,7 +163,7 @@ VideoLoader::load_routine() { auto load_status = LoaderModuleStatus::NO_MORE_DATA_TO_READ; { load_status = _video_loader->load(data, - _decoded_img_info._image_names, + _decoded_img_info._sample_names, _max_tensor_width, _max_tensor_height, _decoded_img_info._roi_width, @@ -175,7 +175,7 @@ VideoLoader::load_routine() { _output_tensor->info().color_format()); if (load_status == LoaderModuleStatus::OK) { - _circ_buff.set_image_info(_decoded_img_info); + _circ_buff.set_sample_info(_decoded_img_info); _circ_buff.push(); _image_counter += _output_tensor->info().batch_size(); } @@ -233,8 +233,8 @@ VideoLoader::update_output_image() { } if (_stopped) return LoaderModuleStatus::OK; - _output_decoded_img_info = _circ_buff.get_image_info(); - _output_names = _output_decoded_img_info._image_names; + _output_decoded_img_info = _circ_buff.get_sample_info(); + _output_names = _output_decoded_img_info._sample_names; _output_tensor->update_tensor_roi(_output_decoded_img_info._roi_width, _output_decoded_img_info._roi_height); _circ_buff.pop(); if (!_loop) @@ -277,7 +277,7 @@ std::vector VideoLoader::get_id() { return _output_names; } -decoded_image_info VideoLoader::get_decode_image_info() { +decoded_sample_info VideoLoader::get_decode_sample_info() { return _output_decoded_img_info; } @@ -294,4 +294,4 @@ std::vector> VideoLoader::get_sequence_frame_timestamps() { _sequence_frame_timestamps_vec.pop_back(); return sequence_frame_timestamp; } -#endif +#endif \ No newline at end of file diff --git a/rocAL/source/loaders/video/video_loader_sharded.cpp b/rocAL/source/loaders/video/video_loader_sharded.cpp index 5afecf642..6093b507a 100644 --- a/rocAL/source/loaders/video/video_loader_sharded.cpp +++ b/rocAL/source/loaders/video/video_loader_sharded.cpp @@ -39,8 +39,8 @@ std::vector VideoLoaderSharded::get_id() { return _loaders[_loader_idx]->get_id(); } -decoded_image_info VideoLoaderSharded::get_decode_image_info() { - return _loaders[_loader_idx]->get_decode_image_info(); +decoded_sample_info VideoLoaderSharded::get_decode_sample_info() { + return _loaders[_loader_idx]->get_decode_sample_info(); } VideoLoaderSharded::~VideoLoaderSharded() { diff --git a/rocAL/source/meta_data/bounding_box_graph.cpp b/rocAL/source/meta_data/bounding_box_graph.cpp index 0dd882a2d..b44984bb0 100644 --- a/rocAL/source/meta_data/bounding_box_graph.cpp +++ b/rocAL/source/meta_data/bounding_box_graph.cpp @@ -31,7 +31,7 @@ void BoundingBoxGraph::process(pMetaDataBatch input_meta_data, pMetaDataBatch ou } // This function is used to rescale the metadata values w.r.t the decoded image sizes -void BoundingBoxGraph::update_meta_data(pMetaDataBatch input_meta_data, decoded_image_info decode_image_info) { +void BoundingBoxGraph::update_meta_data(pMetaDataBatch input_meta_data, decoded_sample_info decode_image_info) { std::vector original_height = decode_image_info._original_height; std::vector original_width = decode_image_info._original_width; std::vector roi_width = decode_image_info._roi_width; @@ -68,7 +68,7 @@ inline float ssd_BBoxIntersectionOverUnion(const BoundingBoxCord &box1, const fl return (float)(intersection_area / (box1_area + box2_area - intersection_area)); } -void BoundingBoxGraph::update_random_bbox_meta_data(pMetaDataBatch input_meta_data, pMetaDataBatch output_meta_data, decoded_image_info decode_image_info, crop_image_info crop_image_info) { +void BoundingBoxGraph::update_random_bbox_meta_data(pMetaDataBatch input_meta_data, pMetaDataBatch output_meta_data, decoded_sample_info decode_image_info, crop_image_info crop_image_info) { std::vector original_height = decode_image_info._original_height; std::vector original_width = decode_image_info._original_width; std::vector roi_width = decode_image_info._roi_width; @@ -280,4 +280,4 @@ void BoundingBoxGraph::update_box_iou_matcher(BoxIouMatcherInfo &iou_matcher_inf matched_vals.clear(); low_quality_preds.clear(); } -} +} \ No newline at end of file diff --git a/rocAL/source/pipeline/master_graph.cpp b/rocAL/source/pipeline/master_graph.cpp index 917913338..34409ee52 100644 --- a/rocAL/source/pipeline/master_graph.cpp +++ b/rocAL/source/pipeline/master_graph.cpp @@ -903,16 +903,16 @@ void MasterGraph::output_routine() { THROW("Loader module failed to load next batch of images, status " + TOSTR(load_ret)) if (!_processing) break; - auto full_batch_image_names = _loader_module->get_id(); - auto decode_image_info = _loader_module->get_decode_image_info(); + auto full_batch_sample_names = _loader_module->get_id(); + auto decode_sample_info = _loader_module->get_decode_sample_info(); auto crop_image_info = _loader_module->get_crop_image_info(); - if (full_batch_image_names.size() != _user_batch_size) - WRN("Internal problem: names count " + TOSTR(full_batch_image_names.size())) + if (full_batch_sample_names.size() != _user_batch_size) + WRN("Internal problem: names count " + TOSTR(full_batch_sample_names.size())) // meta_data lookup is done before _meta_data_graph->process() is called to have the new meta_data ready for processing if (_meta_data_reader) - _meta_data_reader->lookup(full_batch_image_names); + _meta_data_reader->lookup(full_batch_sample_names); if (!_processing) break; @@ -936,9 +936,9 @@ void MasterGraph::output_routine() { output_meta_data = _augmented_meta_data->clone(!_augmentation_metanode); // copy the data if metadata is not processed by the nodes, else create an empty instance if (_meta_data_graph) { if (_is_random_bbox_crop) { - _meta_data_graph->update_random_bbox_meta_data(_augmented_meta_data, output_meta_data, decode_image_info, crop_image_info); + _meta_data_graph->update_random_bbox_meta_data(_augmented_meta_data, output_meta_data, decode_sample_info, crop_image_info); } else { - _meta_data_graph->update_meta_data(_augmented_meta_data, decode_image_info); + _meta_data_graph->update_meta_data(_augmented_meta_data, decode_sample_info); } _meta_data_graph->process(_augmented_meta_data, output_meta_data); } @@ -970,7 +970,7 @@ void MasterGraph::output_routine() { _sequence_start_framenum_vec.insert(_sequence_start_framenum_vec.begin(), _loader_module->get_sequence_start_frame_number()); _sequence_frame_timestamps_vec.insert(_sequence_frame_timestamps_vec.begin(), _loader_module->get_sequence_frame_timestamps()); #endif - _ring_buffer.set_meta_data(full_batch_image_names, output_meta_data); + _ring_buffer.set_meta_data(full_batch_sample_names, output_meta_data); _ring_buffer.push(); // Image data and metadata is now stored in output the ring_buffer, increases it's level by 1 } } catch (const std::exception &e) { From 853ea8e5adf1ab538958df20e4ec44fd8d402f1d Mon Sep 17 00:00:00 2001 From: Swetha B S Date: Mon, 11 Mar 2024 06:49:55 -0400 Subject: [PATCH 003/388] Change the copyright year from 2023 to 2024 --- rocAL/include/decoders/audio/audio_decoder.h | 2 +- rocAL/include/decoders/audio/audio_decoder_factory.h | 2 +- rocAL/include/decoders/audio/sndfile_decoder.h | 2 +- rocAL/include/loaders/audio/audio_loader.h | 2 +- rocAL/include/loaders/audio/audio_loader_sharded.h | 2 +- rocAL/include/loaders/audio/audio_read_and_decode.h | 2 +- rocAL/include/loaders/audio/audio_source_evaluator.h | 2 +- rocAL/include/loaders/audio/node_audio_loader.h | 2 +- rocAL/include/loaders/audio/node_audio_loader_single_shard.h | 2 +- rocAL/source/decoders/audio/audio_decoder_factory.cpp | 2 +- rocAL/source/decoders/audio/sndfile_decoder.cpp | 2 +- rocAL/source/loaders/audio/audio_loader.cpp | 2 +- rocAL/source/loaders/audio/audio_loader_sharded.cpp | 2 +- rocAL/source/loaders/audio/audio_read_and_decode.cpp | 2 +- rocAL/source/loaders/audio/audio_source_evaluator.cpp | 2 +- rocAL/source/loaders/audio/node_audio_loader.cpp | 2 +- rocAL/source/loaders/audio/node_audio_loader_single_shard.cpp | 2 +- tests/cpp_api_tests/rocAL_audio_unittests/CMakeLists.txt | 4 ++-- .../rocAL_audio_unittests/rocAL_audio_unittests.cpp | 2 +- 19 files changed, 20 insertions(+), 20 deletions(-) diff --git a/rocAL/include/decoders/audio/audio_decoder.h b/rocAL/include/decoders/audio/audio_decoder.h index 63e23cacd..d1927d6b6 100644 --- a/rocAL/include/decoders/audio/audio_decoder.h +++ b/rocAL/include/decoders/audio/audio_decoder.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/rocAL/include/decoders/audio/audio_decoder_factory.h b/rocAL/include/decoders/audio/audio_decoder_factory.h index 6eb09e2da..c4882acbf 100644 --- a/rocAL/include/decoders/audio/audio_decoder_factory.h +++ b/rocAL/include/decoders/audio/audio_decoder_factory.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/rocAL/include/decoders/audio/sndfile_decoder.h b/rocAL/include/decoders/audio/sndfile_decoder.h index 991a82a35..aa99ba21a 100644 --- a/rocAL/include/decoders/audio/sndfile_decoder.h +++ b/rocAL/include/decoders/audio/sndfile_decoder.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/rocAL/include/loaders/audio/audio_loader.h b/rocAL/include/loaders/audio/audio_loader.h index 9a59056e4..a89396b4c 100644 --- a/rocAL/include/loaders/audio/audio_loader.h +++ b/rocAL/include/loaders/audio/audio_loader.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/rocAL/include/loaders/audio/audio_loader_sharded.h b/rocAL/include/loaders/audio/audio_loader_sharded.h index 1b1db7b71..3d6603459 100644 --- a/rocAL/include/loaders/audio/audio_loader_sharded.h +++ b/rocAL/include/loaders/audio/audio_loader_sharded.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/rocAL/include/loaders/audio/audio_read_and_decode.h b/rocAL/include/loaders/audio/audio_read_and_decode.h index 151b7c961..38bc4ee72 100644 --- a/rocAL/include/loaders/audio/audio_read_and_decode.h +++ b/rocAL/include/loaders/audio/audio_read_and_decode.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/rocAL/include/loaders/audio/audio_source_evaluator.h b/rocAL/include/loaders/audio/audio_source_evaluator.h index b6b5eb7cf..df9f17881 100644 --- a/rocAL/include/loaders/audio/audio_source_evaluator.h +++ b/rocAL/include/loaders/audio/audio_source_evaluator.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/rocAL/include/loaders/audio/node_audio_loader.h b/rocAL/include/loaders/audio/node_audio_loader.h index 22eccc222..222f7df99 100644 --- a/rocAL/include/loaders/audio/node_audio_loader.h +++ b/rocAL/include/loaders/audio/node_audio_loader.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/rocAL/include/loaders/audio/node_audio_loader_single_shard.h b/rocAL/include/loaders/audio/node_audio_loader_single_shard.h index 202edf2a4..d1ffc5bfb 100644 --- a/rocAL/include/loaders/audio/node_audio_loader_single_shard.h +++ b/rocAL/include/loaders/audio/node_audio_loader_single_shard.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/rocAL/source/decoders/audio/audio_decoder_factory.cpp b/rocAL/source/decoders/audio/audio_decoder_factory.cpp index 6b7693d18..8ecb8dd90 100644 --- a/rocAL/source/decoders/audio/audio_decoder_factory.cpp +++ b/rocAL/source/decoders/audio/audio_decoder_factory.cpp @@ -1,5 +1,5 @@ /* -Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/rocAL/source/decoders/audio/sndfile_decoder.cpp b/rocAL/source/decoders/audio/sndfile_decoder.cpp index 1a8ccebaf..5407a6eef 100644 --- a/rocAL/source/decoders/audio/sndfile_decoder.cpp +++ b/rocAL/source/decoders/audio/sndfile_decoder.cpp @@ -1,5 +1,5 @@ /* -Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/rocAL/source/loaders/audio/audio_loader.cpp b/rocAL/source/loaders/audio/audio_loader.cpp index 1fa5fd645..248d920b4 100644 --- a/rocAL/source/loaders/audio/audio_loader.cpp +++ b/rocAL/source/loaders/audio/audio_loader.cpp @@ -1,5 +1,5 @@ /* -Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/rocAL/source/loaders/audio/audio_loader_sharded.cpp b/rocAL/source/loaders/audio/audio_loader_sharded.cpp index 529fe4bee..18e3efb1a 100644 --- a/rocAL/source/loaders/audio/audio_loader_sharded.cpp +++ b/rocAL/source/loaders/audio/audio_loader_sharded.cpp @@ -1,5 +1,5 @@ /* -Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/rocAL/source/loaders/audio/audio_read_and_decode.cpp b/rocAL/source/loaders/audio/audio_read_and_decode.cpp index 55a17f652..efea9a806 100644 --- a/rocAL/source/loaders/audio/audio_read_and_decode.cpp +++ b/rocAL/source/loaders/audio/audio_read_and_decode.cpp @@ -1,5 +1,5 @@ /* -Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/rocAL/source/loaders/audio/audio_source_evaluator.cpp b/rocAL/source/loaders/audio/audio_source_evaluator.cpp index 3cafbf205..d2a1728fc 100644 --- a/rocAL/source/loaders/audio/audio_source_evaluator.cpp +++ b/rocAL/source/loaders/audio/audio_source_evaluator.cpp @@ -1,5 +1,5 @@ /* -Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/rocAL/source/loaders/audio/node_audio_loader.cpp b/rocAL/source/loaders/audio/node_audio_loader.cpp index 8058be518..8dfdf9e6f 100644 --- a/rocAL/source/loaders/audio/node_audio_loader.cpp +++ b/rocAL/source/loaders/audio/node_audio_loader.cpp @@ -1,5 +1,5 @@ /* -Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/rocAL/source/loaders/audio/node_audio_loader_single_shard.cpp b/rocAL/source/loaders/audio/node_audio_loader_single_shard.cpp index 0105db9d0..6661c1d9c 100644 --- a/rocAL/source/loaders/audio/node_audio_loader_single_shard.cpp +++ b/rocAL/source/loaders/audio/node_audio_loader_single_shard.cpp @@ -1,5 +1,5 @@ /* -Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/tests/cpp_api_tests/rocAL_audio_unittests/CMakeLists.txt b/tests/cpp_api_tests/rocAL_audio_unittests/CMakeLists.txt index 281350ec0..55ba6449f 100644 --- a/tests/cpp_api_tests/rocAL_audio_unittests/CMakeLists.txt +++ b/tests/cpp_api_tests/rocAL_audio_unittests/CMakeLists.txt @@ -2,8 +2,8 @@ # # MIT License # -# Copyright (c) 2018 - 2023 Advanced Micro Devices, Inc. -# +#Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. + # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights diff --git a/tests/cpp_api_tests/rocAL_audio_unittests/rocAL_audio_unittests.cpp b/tests/cpp_api_tests/rocAL_audio_unittests/rocAL_audio_unittests.cpp index 708b250cd..9554a1067 100644 --- a/tests/cpp_api_tests/rocAL_audio_unittests/rocAL_audio_unittests.cpp +++ b/tests/cpp_api_tests/rocAL_audio_unittests/rocAL_audio_unittests.cpp @@ -1,7 +1,7 @@ /* MIT License -Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal From 3bdcf8e2e99aedafc07f8e00fcd96e273f5836b7 Mon Sep 17 00:00:00 2001 From: Swetha B S Date: Mon, 11 Mar 2024 08:10:15 -0400 Subject: [PATCH 004/388] formatting the files --- rocAL/include/decoders/audio/audio_decoder.h | 15 +++--- .../include/decoders/audio/sndfile_decoder.h | 5 +- .../loaders/audio/audio_loader_sharded.h | 9 ++-- .../source/decoders/audio/sndfile_decoder.cpp | 29 ++++++----- .../rocAL_audio_unittests.cpp | 51 +++++++++---------- 5 files changed, 53 insertions(+), 56 deletions(-) diff --git a/rocAL/include/decoders/audio/audio_decoder.h b/rocAL/include/decoders/audio/audio_decoder.h index d1927d6b6..132eb33ff 100644 --- a/rocAL/include/decoders/audio/audio_decoder.h +++ b/rocAL/include/decoders/audio/audio_decoder.h @@ -25,11 +25,12 @@ THE SOFTWARE. #include #include #include + #include "parameter_factory.h" #include "sndfile.h" class AudioDecoder { -public: + public: enum class Status { OK = 0, HEADER_DECODE_FAILED, @@ -38,14 +39,14 @@ class AudioDecoder { FAILED, NO_MEMORY }; - virtual AudioDecoder::Status initialize(const char *src_filename) = 0; // This function is responsible for initializing the audio decoder. It takes the source filename as input and returns the status of the initialization process. - virtual AudioDecoder::Status decode(float* buffer) = 0; //to pass buffer & number of frames/samples to decode - virtual AudioDecoder::Status decode_info(int* samples, int* channels, float* sample_rates) = 0; //to decode info about the audio samples + virtual AudioDecoder::Status initialize(const char* src_filename) = 0; // This function is responsible for initializing the audio decoder. It takes the source filename as input and returns the status of the initialization process. + virtual AudioDecoder::Status decode(float* buffer) = 0; // to pass buffer & number of frames/samples to decode + virtual AudioDecoder::Status decode_info(int* samples, int* channels, float* sample_rates) = 0; // to decode info about the audio samples virtual void release() = 0; virtual ~AudioDecoder() = default; -protected: - const char *_src_filename = NULL; + + protected: + const char* _src_filename = NULL; SF_INFO _sfinfo; SNDFILE* _sf_ptr; }; - diff --git a/rocAL/include/decoders/audio/sndfile_decoder.h b/rocAL/include/decoders/audio/sndfile_decoder.h index aa99ba21a..e40b5a906 100644 --- a/rocAL/include/decoders/audio/sndfile_decoder.h +++ b/rocAL/include/decoders/audio/sndfile_decoder.h @@ -25,13 +25,12 @@ THE SOFTWARE. #include "audio_decoder.h" class SndFileDecoder : public AudioDecoder { -public: + public: //! Default constructor SndFileDecoder(); - AudioDecoder::Status initialize(const char *src_filename) override; + AudioDecoder::Status initialize(const char* src_filename) override; AudioDecoder::Status decode(float* buffer) override; AudioDecoder::Status decode_info(int* samples, int* channels, float* sample_rates) override; void release() override; ~SndFileDecoder() override; }; - diff --git a/rocAL/include/loaders/audio/audio_loader_sharded.h b/rocAL/include/loaders/audio/audio_loader_sharded.h index 3d6603459..cc67ab3fe 100644 --- a/rocAL/include/loaders/audio/audio_loader_sharded.h +++ b/rocAL/include/loaders/audio/audio_loader_sharded.h @@ -26,13 +26,13 @@ THE SOFTWARE. // AudioLoaderSharded Can be used to run load and decode in multiple shards, each shard by a single loader instance, // It improves load and decode performance since each loader loads the audios in parallel using an internal thread class AudioLoaderSharded : public LoaderModule { -public: + public: explicit AudioLoaderSharded(void* dev_resources); ~AudioLoaderSharded() override; LoaderModuleStatus load_next() override; void initialize(ReaderConfig reader_cfg, DecoderConfig decoder_cfg, RocalMemType mem_type, unsigned batch_size, bool keep_orig_size = false) override; void set_output(Tensor* output_audio) override; - void set_random_bbox_data_reader(std::shared_ptr randombboxcrop_meta_data_reader) override { THROW("set_random_bbox_data_reader is not compatible with this implementation") }; + void set_random_bbox_data_reader(std::shared_ptr randombboxcrop_meta_data_reader) override{THROW("set_random_bbox_data_reader is not compatible with this implementation")}; size_t remaining_count() override; void reset() override; void start_loading() override; @@ -43,7 +43,8 @@ class AudioLoaderSharded : public LoaderModule { void shut_down() override; void feed_external_input(const std::vector& input_images_names, const std::vector& input_buffer, const std::vector& roi_xywh, unsigned int max_width, unsigned int max_height, unsigned int channels, ExternalSourceFileMode mode, bool eos) override {} -private: + + private: void increment_loader_idx(); void* _dev_resources; bool _initialized = false; @@ -52,5 +53,5 @@ class AudioLoaderSharded : public LoaderModule { size_t _shard_count = 1; void fast_forward_through_empty_loaders(); size_t _prefetch_queue_depth; - Tensor *_output_tensor; + Tensor* _output_tensor; }; diff --git a/rocAL/source/decoders/audio/sndfile_decoder.cpp b/rocAL/source/decoders/audio/sndfile_decoder.cpp index 5407a6eef..d52f3500b 100644 --- a/rocAL/source/decoders/audio/sndfile_decoder.cpp +++ b/rocAL/source/decoders/audio/sndfile_decoder.cpp @@ -20,20 +20,22 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include "sndfile_decoder.h" + +#include + #include #include -#include -#include "sndfile_decoder.h" SndFileDecoder::SndFileDecoder(){}; AudioDecoder::Status SndFileDecoder::decode(float* buffer) { int read_frame_count = 0; read_frame_count = sf_readf_float(_sf_ptr, buffer, _sfinfo.frames); - if(read_frame_count != _sfinfo.frames) { + if (read_frame_count != _sfinfo.frames) { printf("Not able to decode all frames. Only decoded %d frames\n", read_frame_count); sf_close(_sf_ptr); - AudioDecoder::Status status = Status::CONTENT_DECODE_FAILED; + AudioDecoder::Status status = Status::CONTENT_DECODE_FAILED; return status; } AudioDecoder::Status status = Status::OK; @@ -49,23 +51,23 @@ AudioDecoder::Status SndFileDecoder::decode_info(int* samples, int* channels, fl if (_sfinfo.channels < 1) { THROW("Not able to process less than" + TOSTR(_sfinfo.channels) + "channels"); sf_close(_sf_ptr); - status = Status::HEADER_DECODE_FAILED; - return status; - }; + status = Status::HEADER_DECODE_FAILED; + return status; + }; if (_sfinfo.frames < 1) { THROW("Not able to process less than" + TOSTR(_sfinfo.frames) + "frames"); sf_close(_sf_ptr); - status = Status::HEADER_DECODE_FAILED; - return status; - }; + status = Status::HEADER_DECODE_FAILED; + return status; + }; return status; } // Initialize will open a new decoder and initialize the context -AudioDecoder::Status SndFileDecoder::initialize(const char *src_filename) { +AudioDecoder::Status SndFileDecoder::initialize(const char* src_filename) { _src_filename = src_filename; AudioDecoder::Status status = Status::OK; - memset(&_sfinfo, 0, sizeof(_sfinfo)) ; + memset(&_sfinfo, 0, sizeof(_sfinfo)); if (!(_sf_ptr = sf_open(src_filename, SFM_READ, &_sfinfo))) { /* Open failed so print an error message. */ printf("Not able to open input file %s.\n", src_filename); @@ -79,9 +81,8 @@ AudioDecoder::Status SndFileDecoder::initialize(const char *src_filename) { } void SndFileDecoder::release() { - if(_sf_ptr != NULL) + if (_sf_ptr != NULL) sf_close(_sf_ptr); } SndFileDecoder::~SndFileDecoder() {} - diff --git a/tests/cpp_api_tests/rocAL_audio_unittests/rocAL_audio_unittests.cpp b/tests/cpp_api_tests/rocAL_audio_unittests/rocAL_audio_unittests.cpp index 9554a1067..1c07143e0 100644 --- a/tests/cpp_api_tests/rocAL_audio_unittests/rocAL_audio_unittests.cpp +++ b/tests/cpp_api_tests/rocAL_audio_unittests/rocAL_audio_unittests.cpp @@ -22,16 +22,16 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include -#include +#include + #include #include -#include +#include +#include #include -#include "rocal_api.h" - #include "opencv2/opencv.hpp" +#include "rocal_api.h" using namespace cv; #if USE_OPENCV_4 @@ -45,12 +45,11 @@ using namespace cv; #endif #define DISPLAY 1 -#define METADATA 0 // Switch the meta-data part once the meta-data reader (file list reader) is introduced +#define METADATA 0 // Switch the meta-data part once the meta-data reader (file list reader) is introduced using namespace std::chrono; int test(int test_case, const char *path, float sample_rate, int downmix, unsigned max_frames, unsigned max_channels, int gpu); -int main(int argc, const char **argv) -{ +int main(int argc, const char **argv) { // check command-line usage const int MIN_ARG_COUNT = 2; printf("Usage: image_augmentation gpu=1/cpu=0 \n"); @@ -88,8 +87,7 @@ int main(int argc, const char **argv) return return_val; } -int test(int test_case, const char *path, float sample_rate, int downmix, unsigned max_frames, unsigned max_channels, int gpu) -{ +int test(int test_case, const char *path, float sample_rate, int downmix, unsigned max_frames, unsigned max_channels, int gpu) { int inputBatchSize = 10; std::cout << ">>> test case " << test_case << std::endl; std::cout << ">>> Running on " << (gpu ? "GPU" : "CPU") << std::endl; @@ -109,14 +107,14 @@ int test(int test_case, const char *path, float sample_rate, int downmix, unsign // MetaData reader for input file_list which has file seperated by labels // if (METADATA) { // To uncomment later when meta-data reader for audio is added (PR4) // std::cout << "META DATA READER"; - // const char* file_list_path = "/workspace/rnnt/AMD/MIVisionX-data/rocal_data/audio_samples/audio_file_list.txt" ; // TODO: Add this as an arg in main() + // const char* file_list_path = "/workspace/rnnt/AMD/MIVisionX-data/rocal_data/audio_samples/audio_file_list.txt" ; // TODO: Add this as an arg in main() // metadata_output = rocalCreateFileListLabelReader(handle, path, file_list_path); // } - //Decoder - // RocalTensor input1; // Uncomment when augmentations are enabled - // RocalTensorList non_silent_region_op; // Uncomment when NSR is introduced (PR5) - // const char* file_list_path = "/media/MIVisionX-data/rocal_data/audio_samples/audio_file_list.txt" ; // Uncomment and use it when meta-data reader is introduced (PR4) + // Decoder + // RocalTensor input1; // Uncomment when augmentations are enabled + // RocalTensorList non_silent_region_op; // Uncomment when NSR is introduced (PR5) + // const char* file_list_path = "/media/MIVisionX-data/rocal_data/audio_samples/audio_file_list.txt" ; // Uncomment and use it when meta-data reader is introduced (PR4) rocalAudioFileSourceSingleShard(handle, path, 0, 1, true, false, false, false, max_frames, max_channels, 0); if (rocalGetStatus(handle) != ROCAL_OK) { std::cout << "Audio source could not initialize : " << rocalGetErrorMessage(handle) << std::endl; @@ -211,8 +209,7 @@ int test(int test_case, const char *path, float sample_rate, int downmix, unsign } */ rocalVerify(handle); - if (rocalGetStatus(handle) != ROCAL_OK) - { + if (rocalGetStatus(handle) != ROCAL_OK) { std::cout << "Could not verify the augmentation graph " << rocalGetErrorMessage(handle); return -1; } @@ -222,10 +219,9 @@ int test(int test_case, const char *path, float sample_rate, int downmix, unsign int iteration = 0; RocalTensorList output_tensor_list; - while (rocalGetRemainingImages(handle) >= static_cast(inputBatchSize)) - { - std::cout<<"\n rocalGetRemainingImages:: "<= static_cast(inputBatchSize)) { + std::cout << "\n rocalGetRemainingImages:: " << rocalGetRemainingImages(handle) << "\t inputBatchsize:: " << inputBatchSize; + std::cout << "\n iteration:: " << iteration; iteration++; if (rocalRun(handle) != 0) { break; @@ -234,21 +230,20 @@ int test(int test_case, const char *path, float sample_rate, int downmix, unsign output_tensor_list = rocalGetOutputTensors(handle); std::cout << "\n *****************************Audio output**********************************\n"; std::cout << "\n **************Printing the first 5 values of the Audio buffer**************\n"; - for(uint idx = 0; idx < output_tensor_list->size(); idx++) { - float * buffer = (float *)output_tensor_list->at(idx)->buffer(); - for(int n = 0; n < 5; n++) + for (uint idx = 0; idx < output_tensor_list->size(); idx++) { + float *buffer = (float *)output_tensor_list->at(idx)->buffer(); + for (int n = 0; n < 5; n++) std::cout << buffer[n] << "\n"; } if (METADATA) { RocalTensorList labels = rocalGetImageLabels(handle); - for(uint i = 0; i < labels->size(); i++) { - int * labels_buffer = (int *)(labels->at(i)->buffer()); + for (uint i = 0; i < labels->size(); i++) { + int *labels_buffer = (int *)(labels->at(i)->buffer()); std::cout << ">>>>> LABELS : " << labels_buffer[0] << "\t"; } - } - std::cout<<"******************************************************************************\n"; + std::cout << "******************************************************************************\n"; } rocalRelease(handle); return 0; From bc3481714712f0efb15e6f8c5c5043f781b9def1 Mon Sep 17 00:00:00 2001 From: Swetha B S Date: Mon, 11 Mar 2024 10:12:05 -0400 Subject: [PATCH 005/388] Resolve PR comments --- rocAL/include/decoders/audio/audio_decoder.h | 3 +- rocAL/source/api/rocal_api_data_loaders.cpp | 40 ++++++++++---------- 2 files changed, 21 insertions(+), 22 deletions(-) diff --git a/rocAL/include/decoders/audio/audio_decoder.h b/rocAL/include/decoders/audio/audio_decoder.h index 132eb33ff..f36173ec7 100644 --- a/rocAL/include/decoders/audio/audio_decoder.h +++ b/rocAL/include/decoders/audio/audio_decoder.h @@ -26,7 +26,6 @@ THE SOFTWARE. #include #include -#include "parameter_factory.h" #include "sndfile.h" class AudioDecoder { @@ -40,7 +39,7 @@ class AudioDecoder { NO_MEMORY }; virtual AudioDecoder::Status initialize(const char* src_filename) = 0; // This function is responsible for initializing the audio decoder. It takes the source filename as input and returns the status of the initialization process. - virtual AudioDecoder::Status decode(float* buffer) = 0; // to pass buffer & number of frames/samples to decode + virtual AudioDecoder::Status decode(float* buffer) = 0; // to read audio frames and store in the buffer provided virtual AudioDecoder::Status decode_info(int* samples, int* channels, float* sample_rates) = 0; // to decode info about the audio samples virtual void release() = 0; virtual ~AudioDecoder() = default; diff --git a/rocAL/source/api/rocal_api_data_loaders.cpp b/rocAL/source/api/rocal_api_data_loaders.cpp index d5543fc90..dc494a942 100644 --- a/rocAL/source/api/rocal_api_data_loaders.cpp +++ b/rocAL/source/api/rocal_api_data_loaders.cpp @@ -2099,17 +2099,17 @@ rocalJpegExternalFileSource( RocalTensor ROCAL_API_CALL rocalAudioFileSourceSingleShard( - RocalContext p_context, - const char* source_path, - unsigned shard_id, - unsigned shard_count, - bool is_output, - bool shuffle, - bool loop, - bool downmix, - unsigned max_frames, - unsigned max_channels, - unsigned storage_type) { + RocalContext p_context, + const char* source_path, + unsigned shard_id, + unsigned shard_count, + bool is_output, + bool shuffle, + bool loop, + bool downmix, + unsigned max_frames, + unsigned max_channels, + unsigned storage_type) { Tensor* output = nullptr; auto context = static_cast(p_context); try { @@ -2156,15 +2156,15 @@ rocalAudioFileSourceSingleShard( RocalTensor ROCAL_API_CALL rocalAudioFileSource( - RocalContext p_context, - const char* source_path, - unsigned internal_shard_count, - bool is_output, - bool shuffle, - bool loop, - bool downmix, - unsigned max_frames, - unsigned max_channels) { + RocalContext p_context, + const char* source_path, + unsigned internal_shard_count, + bool is_output, + bool shuffle, + bool loop, + bool downmix, + unsigned max_frames, + unsigned max_channels) { Tensor* output = nullptr; auto context = static_cast(p_context); try { From 7082127b504dd034009c466fbdf870a95571ee4c Mon Sep 17 00:00:00 2001 From: Swetha B S Date: Mon, 11 Mar 2024 14:44:55 +0000 Subject: [PATCH 006/388] NWC --- rocAL_pybind/amd/rocal/decoders.py | 16 ++++ rocAL_pybind/amd/rocal/plugin/pytorch.py | 103 +++++++++++++++++++++++ rocAL_pybind/rocal_pybind.cpp | 22 ++++- 3 files changed, 140 insertions(+), 1 deletion(-) diff --git a/rocAL_pybind/amd/rocal/decoders.py b/rocAL_pybind/amd/rocal/decoders.py index 2c4741453..28b2f71be 100644 --- a/rocAL_pybind/amd/rocal/decoders.py +++ b/rocAL_pybind/amd/rocal/decoders.py @@ -402,3 +402,19 @@ def image_slice(*inputs, file_root='', path='', annotations_file='', shard_id=0, image_decoder_slice = b.fusedDecoderCropShard( Pipeline._current_pipeline._handle, *(kwargs_pybind.values())) return (image_decoder_slice) + +def audio(*inputs, file_root='', file_list_path='', bytes_per_sample_hint=[0], shard_id=0, num_shards=1, random_shuffle=False, downmix=False, dtype=types.FLOAT, quality=50.0, max_frames=1 , max_channels=1 ,sample_rate=0.0, seed=1, storage_type=0, stick_to_shard=False, shard_size=-1): + kwargs_pybind = { + "source_path": file_root, + "shard_id": shard_id, + "num_shards": num_shards, + "is_output": False, + "shuffle": random_shuffle, + "loop": False, + "downmix": downmix, + "max_frames": max_frames, + "max_channels": max_channels, + "storage_type": storage_type + } + decoded_audio = b.audioDecoderSingleShard(Pipeline._current_pipeline._handle, *(kwargs_pybind.values())) + return decoded_audio \ No newline at end of file diff --git a/rocAL_pybind/amd/rocal/plugin/pytorch.py b/rocAL_pybind/amd/rocal/plugin/pytorch.py index bfc888ad0..80a9788a4 100644 --- a/rocAL_pybind/amd/rocal/plugin/pytorch.py +++ b/rocAL_pybind/amd/rocal/plugin/pytorch.py @@ -270,6 +270,109 @@ def __init__(self, super(ROCALClassificationIterator, self).__init__(pipe, tensor_layout=pipe._tensor_layout, tensor_dtype=pipe._tensor_dtype, multiplier=pipe._multiplier, offset=pipe._offset, display=display, device=device, device_id=device_id) +class ROCALAudioIterator(object): + """ + ROCAL iterator for audio tasks for PyTorch + Please keep in mind that Tensors returned by the iterator are + still owned by ROCAL. They are valid till the next iterator call. + If the content needs to be preserved please copy it to another tensor. + Parameters + ---------- + pipelines : list of amd.rocalLI.pipeline.Pipeline + List of pipelines to use + size : int + Number of samples in the epoch (Usually the size of the dataset). + auto_reset : bool, optional, default = False + Whether the iterator resets itself for the next epoch + or it requires reset() to be called separately. + fill_last_batch : bool, optional, default = True + Whether to fill the last batch with data up to 'self.batch_size'. + The iterator would return the first integer multiple + of self._num_gpus * self.batch_size entries which exceeds 'size'. + Setting this flag to False will cause the iterator to return + exactly 'size' entries. + dynamic_shape: bool, optional, default = False + Whether the shape of the output of the RALI pipeline can + change during execution. If True, the pytorch tensor will be resized accordingly + if the shape of RALI returned tensors changes during execution. + If False, the iterator will fail in case of change. + last_batch_padded : bool, optional, default = False + Whether the last batch provided by RALI is padded with the last sample + or it just wraps up. In the conjunction with `fill_last_batch` it tells + if the iterator returning last batch with data only partially filled with + data from the current epoch is dropping padding samples or samples from + the next epoch. If set to False next epoch will end sooner as data from + it was consumed but dropped. If set to True next epoch would be the + same length as the first one. + Example + ------- + With the data set [1,2,3,4,5,6,7] and the batch size 2: + fill_last_batch = False, last_batch_padded = True -> last batch = [7], next iteration will return [1, 2] + fill_last_batch = False, last_batch_padded = False -> last batch = [7], next iteration will return [2, 3] + fill_last_batch = True, last_batch_padded = True -> last batch = [7, 7], next iteration will return [1, 2] + fill_last_batch = True, last_batch_padded = False -> last batch = [7, 1], next iteration will return [2, 3] + + """ + def __init__(self, pipeline, tensor_layout = types.NCHW, reverse_channels = False, multiplier = [1.0, 1.0, 1.0], offset = [0.0, 0.0, 0.0], tensor_dtype = types.FLOAT, size = -1, auto_reset = False, device = "cpu", device_id = 0): + self.loader = pipeline + self.tensor_format = tensor_layout + self.multiplier = multiplier + self.offset = offset + self.reverse_channels = reverse_channels + self.device = device + self.device_id = device_id + self.output = None + self.iterator_length = b.getRemainingImages(self.loader._handle) + self.max_shape = None + self.batch_size = self.loader._batch_size + self.output_list = None + self.labels_size = self.batch_size + self.output_memory_type = self.loader._output_memory_type + + + def next(self): + return self.__next__() + + def __next__(self): + if self.loader.rocal_run() != 0: + raise StopIteration + else: + self.output_tensor_list = self.loader.get_output_tensors() + + self.output_list = [] + for i in range(len(self.output_tensor_list)): + dimensions = self.output_tensor_list[i].dimensions() + if self.device == "cpu": + torch_dtype = self.output_tensor_list[i].dtype() + output = torch.empty(dimensions, dtype=getattr(torch, torch_dtype)) + self.labels_tensor = torch.empty(self.labels_size, dtype=getattr(torch, torch_dtype)) + else: + torch_gpu_device = torch.device('cuda', self.device_id) + torch_dtype = self.output_tensor_list[i].dtype() + output = torch.empty(dimensions, dtype=getattr(torch, torch_dtype), device=torch_gpu_device) + self.labels_tensor = torch.empty(self.labels_size, dtype=getattr(torch, torch_dtype), device=torch_gpu_device) + + self.output_tensor_list[i].copy_data(ctypes.c_void_p(output.data_ptr()), self.output_memory_type) + self.output_list.append(output) + + + # self.labels = self.loader.get_image_labels() #Uncomment when meta-data is added + # self.labels_tensor = self.labels_tensor.copy_(torch.from_numpy(self.labels)).long() + + return self.output_list, self.labels_tensor, torch.tensor(self.output_tensor_list[0].get_rois().reshape(self.batch_size,4)[...,0:2]) + + def reset(self): + b.rocalResetLoaders(self.loader._handle) + + def __iter__(self): + return self + + def __len__(self): + return self.iterator_length + + def __del__(self): + b.rocalRelease(self.loader._handle) + def draw_patches(img, idx, bboxes): """!Writes images to disk as a PNG file. diff --git a/rocAL_pybind/rocal_pybind.cpp b/rocAL_pybind/rocal_pybind.cpp index 06e226dce..4c3d23c6a 100644 --- a/rocAL_pybind/rocal_pybind.cpp +++ b/rocAL_pybind/rocal_pybind.cpp @@ -277,7 +277,27 @@ PYBIND11_MODULE(rocal_pybind, m) { R"code( Returns a rocal tensor at given position `idx` in the rocalTensorlist. )code", - py::keep_alive<0, 1>()); + py::keep_alive<0, 1>()) + + + .def( + "get_rois", + [](rocalTensor &output_tensor) + { + return py::array(py::buffer_info( + (int *)(output_tensor.get_roi()), + sizeof(int), + py::format_descriptor< int>::format(), + 1, + {output_tensor.dims().at(0) * 4}, + {sizeof(int) })); + }, + R"code( + Returns a tensor ROI + ex : width, height in case of an image data + ex : samples , channels in case of an audio data + )code" + ); py::class_(m, "rocalTensorList") .def( "__getitem__", From a6c5727ef7c6084af479aabfd8edfa31a605c724 Mon Sep 17 00:00:00 2001 From: Swetha B S Date: Tue, 12 Mar 2024 02:10:34 -0400 Subject: [PATCH 007/388] Resolve PR comments --- rocAL/include/loaders/image/cifar10_data_loader.h | 2 +- rocAL/source/loaders/image/cifar10_data_loader.cpp | 2 +- rocAL/source/loaders/video/video_loader.cpp | 2 +- rocAL/source/meta_data/bounding_box_graph.cpp | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/rocAL/include/loaders/image/cifar10_data_loader.h b/rocAL/include/loaders/image/cifar10_data_loader.h index 7f8c6a784..da646f6a4 100644 --- a/rocAL/include/loaders/image/cifar10_data_loader.h +++ b/rocAL/include/loaders/image/cifar10_data_loader.h @@ -59,7 +59,7 @@ class CIFAR10DataLoader : public LoaderModule { LoaderModuleStatus load_routine(); std::shared_ptr _reader; void *_dev_resources; - decoded_sample_info _raw_img_info; // image info to store the names. In this case the ID of image is stored in _roi_width field + decoded_sample_info _raw_img_info; // sample info to store the names. In this case the ID of image is stored in _roi_width field decoded_sample_info _output_decoded_img_info; bool _initialized = false; RocalMemType _mem_type; diff --git a/rocAL/source/loaders/image/cifar10_data_loader.cpp b/rocAL/source/loaders/image/cifar10_data_loader.cpp index f5ef881a3..15e1347ce 100644 --- a/rocAL/source/loaders/image/cifar10_data_loader.cpp +++ b/rocAL/source/loaders/image/cifar10_data_loader.cpp @@ -285,4 +285,4 @@ decoded_sample_info CIFAR10DataLoader::get_decode_sample_info() { crop_image_info CIFAR10DataLoader::get_crop_image_info() { return _output_cropped_image_info; -} \ No newline at end of file +} diff --git a/rocAL/source/loaders/video/video_loader.cpp b/rocAL/source/loaders/video/video_loader.cpp index 6a79034c1..0faadeb1e 100644 --- a/rocAL/source/loaders/video/video_loader.cpp +++ b/rocAL/source/loaders/video/video_loader.cpp @@ -294,4 +294,4 @@ std::vector> VideoLoader::get_sequence_frame_timestamps() { _sequence_frame_timestamps_vec.pop_back(); return sequence_frame_timestamp; } -#endif \ No newline at end of file +#endif diff --git a/rocAL/source/meta_data/bounding_box_graph.cpp b/rocAL/source/meta_data/bounding_box_graph.cpp index b44984bb0..1943876b4 100644 --- a/rocAL/source/meta_data/bounding_box_graph.cpp +++ b/rocAL/source/meta_data/bounding_box_graph.cpp @@ -280,4 +280,4 @@ void BoundingBoxGraph::update_box_iou_matcher(BoxIouMatcherInfo &iou_matcher_inf matched_vals.clear(); low_quality_preds.clear(); } -} \ No newline at end of file +} From 89fd4cb3ec4d04ae5e13e2449488a87f391318f4 Mon Sep 17 00:00:00 2001 From: Swetha B S Date: Tue, 12 Mar 2024 02:17:37 -0400 Subject: [PATCH 008/388] Change decoded_img_info to decoded_video_info --- rocAL/include/loaders/video/video_loader.h | 4 +-- rocAL/source/loaders/video/video_loader.cpp | 30 ++++++++++----------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/rocAL/include/loaders/video/video_loader.h b/rocAL/include/loaders/video/video_loader.h index 81bbaee50..ffc45c6e6 100644 --- a/rocAL/include/loaders/video/video_loader.h +++ b/rocAL/include/loaders/video/video_loader.h @@ -74,8 +74,8 @@ class VideoLoader : public LoaderModule { size_t _sequence_length; std::thread _load_thread; RocalMemType _mem_type; - decoded_sample_info _decoded_img_info; - decoded_sample_info _output_decoded_img_info; + decoded_sample_info _decoded_video_info; + decoded_sample_info _output_decoded_video_info; CircularBuffer _circ_buff; TimingDBG _swap_handle_time; bool _is_initialized; diff --git a/rocAL/source/loaders/video/video_loader.cpp b/rocAL/source/loaders/video/video_loader.cpp index 0faadeb1e..cacb41692 100644 --- a/rocAL/source/loaders/video/video_loader.cpp +++ b/rocAL/source/loaders/video/video_loader.cpp @@ -131,11 +131,11 @@ void VideoLoader::initialize(ReaderConfig reader_cfg, DecoderConfig decoder_cfg, } _max_tensor_width = _output_tensor->info().max_shape().at(0); _max_tensor_height = _output_tensor->info().max_shape().at(1); - _decoded_img_info._sample_names.resize(_batch_size); - _decoded_img_info._roi_height.resize(_batch_size); - _decoded_img_info._roi_width.resize(_batch_size); - _decoded_img_info._original_height.resize(_batch_size); - _decoded_img_info._original_width.resize(_batch_size); + _decoded_video_info._sample_names.resize(_batch_size); + _decoded_video_info._roi_height.resize(_batch_size); + _decoded_video_info._roi_width.resize(_batch_size); + _decoded_video_info._original_height.resize(_batch_size); + _decoded_video_info._original_width.resize(_batch_size); _circ_buff.init(_mem_type, _output_mem_size, _prefetch_queue_depth); _is_initialized = true; LOG("Loader module initialized"); @@ -163,19 +163,19 @@ VideoLoader::load_routine() { auto load_status = LoaderModuleStatus::NO_MORE_DATA_TO_READ; { load_status = _video_loader->load(data, - _decoded_img_info._sample_names, + _decoded_video_info._sample_names, _max_tensor_width, _max_tensor_height, - _decoded_img_info._roi_width, - _decoded_img_info._roi_height, - _decoded_img_info._original_width, - _decoded_img_info._original_height, + _decoded_video_info._roi_width, + _decoded_video_info._roi_height, + _decoded_video_info._original_width, + _decoded_video_info._original_height, _sequence_start_framenum_vec, _sequence_frame_timestamps_vec, _output_tensor->info().color_format()); if (load_status == LoaderModuleStatus::OK) { - _circ_buff.set_sample_info(_decoded_img_info); + _circ_buff.set_sample_info(_decoded_video_info); _circ_buff.push(); _image_counter += _output_tensor->info().batch_size(); } @@ -233,9 +233,9 @@ VideoLoader::update_output_image() { } if (_stopped) return LoaderModuleStatus::OK; - _output_decoded_img_info = _circ_buff.get_sample_info(); - _output_names = _output_decoded_img_info._sample_names; - _output_tensor->update_tensor_roi(_output_decoded_img_info._roi_width, _output_decoded_img_info._roi_height); + _output_decoded_video_info = _circ_buff.get_sample_info(); + _output_names = _output_decoded_video_info._sample_names; + _output_tensor->update_tensor_roi(_output_decoded_video_info._roi_width, _output_decoded_video_info._roi_height); _circ_buff.pop(); if (!_loop) _remaining_sequences_count -= _batch_size; @@ -278,7 +278,7 @@ std::vector VideoLoader::get_id() { } decoded_sample_info VideoLoader::get_decode_sample_info() { - return _output_decoded_img_info; + return _output_decoded_video_info; } std::vector VideoLoader::get_sequence_start_frame_number() { From 5f627b25d4c8ca4c5dc40f7d67df08376c729f2a Mon Sep 17 00:00:00 2001 From: Swetha B S Date: Tue, 12 Mar 2024 07:14:51 +0000 Subject: [PATCH 009/388] Change the file_path() function to virtual from pure virtual --- rocAL/include/readers/image/caffe2_lmdb_record_reader.h | 3 --- rocAL/include/readers/image/caffe_lmdb_record_reader.h | 3 --- rocAL/include/readers/image/cifar10_data_reader.h | 3 --- rocAL/include/readers/image/coco_file_source_reader.h | 3 --- rocAL/include/readers/image/external_source_reader.h | 5 +---- rocAL/include/readers/image/image_reader.h | 2 +- rocAL/include/readers/image/mxnet_recordio_reader.h | 3 --- rocAL/include/readers/image/tf_record_reader.h | 2 -- rocAL/include/readers/video/sequence_file_source_reader.h | 5 +---- 9 files changed, 3 insertions(+), 26 deletions(-) diff --git a/rocAL/include/readers/image/caffe2_lmdb_record_reader.h b/rocAL/include/readers/image/caffe2_lmdb_record_reader.h index d30a3ad64..363ffffe6 100644 --- a/rocAL/include/readers/image/caffe2_lmdb_record_reader.h +++ b/rocAL/include/readers/image/caffe2_lmdb_record_reader.h @@ -60,9 +60,6 @@ class Caffe2LMDBRecordReader : public Reader { //! Returns the id of the latest file opened std::string id() override { return _last_id; }; - //! Returns the name of the latest file_path opened - std::string file_path() override { return _last_file_name; } - unsigned count_items() override; ~Caffe2LMDBRecordReader() override; diff --git a/rocAL/include/readers/image/caffe_lmdb_record_reader.h b/rocAL/include/readers/image/caffe_lmdb_record_reader.h index 710852b04..97be19ea4 100644 --- a/rocAL/include/readers/image/caffe_lmdb_record_reader.h +++ b/rocAL/include/readers/image/caffe_lmdb_record_reader.h @@ -60,9 +60,6 @@ class CaffeLMDBRecordReader : public Reader { //! Returns the id of the latest file opened std::string id() override { return _last_id; }; - //! Returns the name of the latest file_path opened - std::string file_path() override { return _last_file_name; } - unsigned count_items() override; ~CaffeLMDBRecordReader() override; diff --git a/rocAL/include/readers/image/cifar10_data_reader.h b/rocAL/include/readers/image/cifar10_data_reader.h index cbd397140..9b8b12276 100644 --- a/rocAL/include/readers/image/cifar10_data_reader.h +++ b/rocAL/include/readers/image/cifar10_data_reader.h @@ -54,9 +54,6 @@ class CIFAR10DataReader : public Reader { //! Returns the name of the latest data_id opened std::string id() override { return _last_id; }; - //! Returns the name of the latest file_path opened - std::string file_path() override {return _last_file_name; } - unsigned count_items() override; ~CIFAR10DataReader() override; diff --git a/rocAL/include/readers/image/coco_file_source_reader.h b/rocAL/include/readers/image/coco_file_source_reader.h index e8896a35f..fd14c5061 100644 --- a/rocAL/include/readers/image/coco_file_source_reader.h +++ b/rocAL/include/readers/image/coco_file_source_reader.h @@ -58,9 +58,6 @@ class COCOFileSourceReader : public Reader { //! Returns the name of the latest file opened std::string id() override { return _last_id; }; - //! Returns the name of the latest file_path opened - std::string file_path() override { return _last_file_name; } - unsigned count_items() override; ~COCOFileSourceReader() override; diff --git a/rocAL/include/readers/image/external_source_reader.h b/rocAL/include/readers/image/external_source_reader.h index f37f23c64..9864ed71b 100644 --- a/rocAL/include/readers/image/external_source_reader.h +++ b/rocAL/include/readers/image/external_source_reader.h @@ -58,9 +58,6 @@ class ExternalSourceReader : public Reader, public ExternalSourceImageReader { //! Returns the name of the latest file opened std::string id() override { return _last_id; } - //! Returns the name of the latest file_path opened - std::string file_path() override { return _last_file_name; } - //! Return batch_size() for count_items unless end_of_sequence has been signalled unsigned count_items() override; @@ -94,7 +91,7 @@ class ExternalSourceReader : public Reader, public ExternalSourceImageReader { unsigned _curr_file_idx; FILE* _current_fPtr; unsigned _current_file_size; - std::string _last_id, _last_file_name; + std::string _last_id; size_t _shard_id = 0; size_t _shard_count = 1; // equivalent of batch size //!< _batch_count Defines the quantum count of the images to be read. It's usually equal to the user's batch size. diff --git a/rocAL/include/readers/image/image_reader.h b/rocAL/include/readers/image/image_reader.h index e10bb2b5c..9b44c39f4 100644 --- a/rocAL/include/readers/image/image_reader.h +++ b/rocAL/include/readers/image/image_reader.h @@ -173,7 +173,7 @@ class Reader { //! Returns the number of items remained in this resource //! Returns the path of the last item opened in this resource - virtual std::string file_path() = 0; + virtual std::string file_path() { return {}; } virtual unsigned count_items() = 0; diff --git a/rocAL/include/readers/image/mxnet_recordio_reader.h b/rocAL/include/readers/image/mxnet_recordio_reader.h index eafcba565..04c1299af 100644 --- a/rocAL/include/readers/image/mxnet_recordio_reader.h +++ b/rocAL/include/readers/image/mxnet_recordio_reader.h @@ -58,9 +58,6 @@ class MXNetRecordIOReader : public Reader { //! Returns the id of the latest file opened std::string id() override { return _last_id; }; - //! Returns the name of the latest file_path opened - std::string file_path() override { return _last_file_name; } - unsigned count_items() override; ~MXNetRecordIOReader() override; diff --git a/rocAL/include/readers/image/tf_record_reader.h b/rocAL/include/readers/image/tf_record_reader.h index caf79d6bb..ef47a3700 100644 --- a/rocAL/include/readers/image/tf_record_reader.h +++ b/rocAL/include/readers/image/tf_record_reader.h @@ -60,8 +60,6 @@ class TFRecordReader : public Reader { //! Returns the id of the latest file opened std::string id() override { return _last_id; }; - //! Returns the name of the latest file_path opened - std::string file_path() override { return _last_file_name; } unsigned count_items() override; diff --git a/rocAL/include/readers/video/sequence_file_source_reader.h b/rocAL/include/readers/video/sequence_file_source_reader.h index 77b2241c8..fc68163f5 100644 --- a/rocAL/include/readers/video/sequence_file_source_reader.h +++ b/rocAL/include/readers/video/sequence_file_source_reader.h @@ -56,9 +56,6 @@ class SequenceFileSourceReader : public Reader { //! Returns the name of the latest file opened std::string id() override { return _last_id; } - //! Returns the name of the latest file_path opened - std::string file_path() override { return _last_file_name; } - unsigned count_items() override; ~SequenceFileSourceReader() override; @@ -83,7 +80,7 @@ class SequenceFileSourceReader : public Reader { unsigned _curr_file_idx; FILE *_current_fPtr; unsigned _current_file_size; - std::string _last_id, _last_file_name; + std::string _last_id; std::vector _last_sequence; size_t _sequence_length; size_t _step; From 97d06280b6016f3baeada9660a8f731984f2e123 Mon Sep 17 00:00:00 2001 From: Swetha B S Date: Tue, 12 Mar 2024 07:16:36 +0000 Subject: [PATCH 010/388] Minor change --- rocAL/include/readers/video/sequence_file_source_reader.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocAL/include/readers/video/sequence_file_source_reader.h b/rocAL/include/readers/video/sequence_file_source_reader.h index fc68163f5..a2f4a555f 100644 --- a/rocAL/include/readers/video/sequence_file_source_reader.h +++ b/rocAL/include/readers/video/sequence_file_source_reader.h @@ -54,7 +54,7 @@ class SequenceFileSourceReader : public Reader { void reset() override; //! Returns the name of the latest file opened - std::string id() override { return _last_id; } + std::string id() override { return _last_id; }; unsigned count_items() override; From 6d53e570a38a3d68ea3b0ba369ba9992a89e3a8a Mon Sep 17 00:00:00 2001 From: Swetha B S Date: Tue, 12 Mar 2024 07:17:40 +0000 Subject: [PATCH 011/388] Minor changes --- rocAL/include/readers/image/tf_record_reader.h | 1 - 1 file changed, 1 deletion(-) diff --git a/rocAL/include/readers/image/tf_record_reader.h b/rocAL/include/readers/image/tf_record_reader.h index ef47a3700..2fe1d6fb1 100644 --- a/rocAL/include/readers/image/tf_record_reader.h +++ b/rocAL/include/readers/image/tf_record_reader.h @@ -60,7 +60,6 @@ class TFRecordReader : public Reader { //! Returns the id of the latest file opened std::string id() override { return _last_id; }; - unsigned count_items() override; ~TFRecordReader() override; From 0f7b6faebb2d424b276a9386ab3d3169b6dc3558 Mon Sep 17 00:00:00 2001 From: SundarRajan28 Date: Tue, 12 Mar 2024 08:21:40 +0000 Subject: [PATCH 012/388] Adding support for file list reader --- rocAL/include/api/rocal_api_meta_data.h | 3 +- .../meta_data/label_reader_file_list.h | 55 ++++ rocAL/include/meta_data/meta_data_reader.h | 3 +- rocAL/include/pipeline/master_graph.h | 2 +- .../include/readers/image/file_list_reader.h | 106 ++++++++ rocAL/include/readers/image/image_reader.h | 1 + rocAL/source/api/rocal_api_meta_data.cpp | 10 +- .../meta_data/label_reader_file_list.cpp | 128 ++++++++++ .../meta_data/meta_data_reader_factory.cpp | 9 + rocAL/source/pipeline/master_graph.cpp | 10 +- .../source/readers/image/file_list_reader.cpp | 238 ++++++++++++++++++ rocAL/source/readers/image/reader_factory.cpp | 7 + rocAL_pybind/amd/rocal/decoders.py | 2 + rocAL_pybind/amd/rocal/readers.py | 2 +- 14 files changed, 565 insertions(+), 11 deletions(-) create mode 100644 rocAL/include/meta_data/label_reader_file_list.h create mode 100644 rocAL/include/readers/image/file_list_reader.h create mode 100644 rocAL/source/meta_data/label_reader_file_list.cpp create mode 100644 rocAL/source/readers/image/file_list_reader.cpp diff --git a/rocAL/include/api/rocal_api_meta_data.h b/rocAL/include/api/rocal_api_meta_data.h index 17407dbb8..b81724910 100644 --- a/rocAL/include/api/rocal_api_meta_data.h +++ b/rocAL/include/api/rocal_api_meta_data.h @@ -36,9 +36,10 @@ THE SOFTWARE. * \ingroup group_rocal_meta_data * \param [in] rocal_context rocal context * \param [in] source_path path to the folder that contains the dataset or metadata file + * \param file_list_path is the path to file list that contains the file names and its corresponding labels * \return RocalMetaData object, can be used to inquire about the rocal's output (processed) tensors */ -extern "C" RocalMetaData ROCAL_API_CALL rocalCreateLabelReader(RocalContext rocal_context, const char* source_path); +extern "C" RocalMetaData ROCAL_API_CALL rocalCreateLabelReader(RocalContext rocal_context, const char* source_path, const char* file_list_path = ""); /*! \brief creates video label reader * \ingroup group_rocal_meta_data diff --git a/rocAL/include/meta_data/label_reader_file_list.h b/rocAL/include/meta_data/label_reader_file_list.h new file mode 100644 index 000000000..69e756289 --- /dev/null +++ b/rocAL/include/meta_data/label_reader_file_list.h @@ -0,0 +1,55 @@ +/* +Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once +#include + +#include + +#include "commons.h" +#include "meta_data.h" +#include "meta_data_reader.h" + +class LabelReaderFileList : public MetaDataReader { + public: + void init(const MetaDataConfig& cfg, pMetaDataBatch meta_data_batch) override; + void lookup(const std::vector& image_names) override; + void read_all(const std::string& path) override; + void release(std::string image_name); + void release() override; + void print_map_contents(); + bool set_timestamp_mode() override { return false; } + const std::map>& get_map_content() override { return _map_content; } + LabelReaderFileList(); + + private: + bool exists(const std::string& image_name) override; + void add(std::string image_name, int label); + std::map> _map_content; + std::map>::iterator _itr; + std::string _path, _file_list_path; + pMetaDataBatch _output; + DIR *_src_dir, *_sub_dir; + struct dirent* _entity; + std::vector _file_names; + std::vector _subfolder_file_names; +}; diff --git a/rocAL/include/meta_data/meta_data_reader.h b/rocAL/include/meta_data/meta_data_reader.h index 035fe5411..de8780365 100644 --- a/rocAL/include/meta_data/meta_data_reader.h +++ b/rocAL/include/meta_data/meta_data_reader.h @@ -41,7 +41,8 @@ enum class MetaDataReaderType { CAFFE2_DETECTION_META_DATA_READER, TF_DETECTION_META_DATA_READER, VIDEO_LABEL_READER, - MXNET_META_DATA_READER + MXNET_META_DATA_READER, + FILE_LIST_META_DATA_READER }; struct MetaDataConfig { diff --git a/rocAL/include/pipeline/master_graph.h b/rocAL/include/pipeline/master_graph.h index 6019389b2..81e07289d 100644 --- a/rocAL/include/pipeline/master_graph.h +++ b/rocAL/include/pipeline/master_graph.h @@ -107,7 +107,7 @@ class MasterGraph { std::shared_ptr meta_add_node(std::shared_ptr node); Tensor *create_tensor(const TensorInfo &info, bool is_output); Tensor *create_loader_output_tensor(const TensorInfo &info); - std::vector create_label_reader(const char *source_path, MetaDataReaderType reader_type); + std::vector create_label_reader(const char *source_path, const char *file_list_path, MetaDataReaderType reader_type); std::vector create_video_label_reader(const char *source_path, MetaDataReaderType reader_type, unsigned sequence_length, unsigned frame_step, unsigned frame_stride, bool file_list_frame_num = true); std::vector create_coco_meta_data_reader(const char *source_path, bool is_output, MetaDataReaderType reader_type, MetaDataType label_type, bool ltrb_bbox = true, bool is_box_encoder = false, bool avoid_class_remapping = false, bool aspect_ratio_grouping = false, bool is_box_iou_matcher = false, float sigma = 0.0, unsigned pose_output_width = 0, unsigned pose_output_height = 0); diff --git a/rocAL/include/readers/image/file_list_reader.h b/rocAL/include/readers/image/file_list_reader.h new file mode 100644 index 000000000..69b308050 --- /dev/null +++ b/rocAL/include/readers/image/file_list_reader.h @@ -0,0 +1,106 @@ +/* +Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once +#include + +#include +#include +#include + +#include "commons.h" +#include "reader.h" +#include "timing_debug.h" + +class FileListReader : public Reader { + public: + //! Looks up the folder which contains the files, and loads the image names + /*! + \param desc User provided descriptor containing the files' path. + */ + Reader::Status initialize(ReaderConfig desc) override; + //! Reads the next resource item + /*! + \param buf User's provided buffer to receive the loaded images + \return Size of the loaded resource + */ + size_t read_data(unsigned char *buf, size_t max_size) override; + //! Opens the next file in the folder + /*! + \return The size of the next file, 0 if couldn't access it + */ + size_t open() override; + + //! Resets the object's state to read from the first file in the folder + void reset() override; + + //! Returns the name of the latest file opened + std::string id() override { return _last_id; } + + //! Returns the name of the latest file path opened + std::string file_path() override { return _last_file_path; } + + //! Returns the total number of items present in the list + unsigned count_items() override; + + ~FileListReader() override; + + int close() override; + + FileListReader(); + + private: + //! opens the folder containing the images + std::shared_ptr _meta_data_reader = nullptr; + Reader::Status open_folder(); + Reader::Status subfolder_reading(); + std::string _folder_path; + std::string _file_list_path; + DIR *_src_dir; + DIR *_sub_dir; + struct dirent *_entity; + std::vector _file_names; + unsigned _curr_file_idx; + FILE *_current_fPtr; + unsigned _current_file_size; + std::string _last_id; + std::string _last_file_name, _last_file_path; + size_t _shard_id = 0; + size_t _shard_count = 1; + //!< _batch_count Defines the quantum count of the images to be read. It's usually equal to the user's batch size. + /// The loader will repeat images if necessary to be able to have images available in multiples of the load_batch_count, + /// for instance if there are 10 images in the dataset and _batch_count is 3, the loader repeats 2 images as if there are 12 images available. + size_t _batch_count = 1; + size_t _file_id = 0; + size_t _in_batch_read_count = 0; + bool _loop; + bool _shuffle; + int _read_counter = 0; + //!< _file_count_all_shards total_number of files to figure out the max_batch_size (usually needed for distributed training). + size_t _file_count_all_shards; + void increment_read_ptr(); + int release(); + size_t get_file_shard_id(); + void incremenet_file_id() { _file_id++; } + void replicate_last_image_to_fill_last_shard(); + void replicate_last_batch_to_pad_partial_shard(); +}; diff --git a/rocAL/include/readers/image/image_reader.h b/rocAL/include/readers/image/image_reader.h index e10bb2b5c..f48b52d83 100644 --- a/rocAL/include/readers/image/image_reader.h +++ b/rocAL/include/readers/image/image_reader.h @@ -48,6 +48,7 @@ enum class StorageType { MXNET_RECORDIO = 7, VIDEO_FILE_SYSTEM = 8, EXTERNAL_FILE_SOURCE = 9, // to support reading from external source + FILE_LIST_SYSTEM = 10 // to support reading from file lists }; enum class ExternalSourceFileMode { diff --git a/rocAL/source/api/rocal_api_meta_data.cpp b/rocAL/source/api/rocal_api_meta_data.cpp index 313553507..d8b3fa04c 100644 --- a/rocAL/source/api/rocal_api_meta_data.cpp +++ b/rocAL/source/api/rocal_api_meta_data.cpp @@ -51,12 +51,14 @@ void RocalMetaData ROCAL_API_CALL - rocalCreateLabelReader(RocalContext p_context, const char* source_path) { + rocalCreateLabelReader(RocalContext p_context, const char* source_path, const char* file_list_path) { if (!p_context) THROW("Invalid rocal context passed to rocalCreateLabelReader") auto context = static_cast(p_context); - - return context->master_graph->create_label_reader(source_path, MetaDataReaderType::FOLDER_BASED_LABEL_READER); + if (strlen(file_list_path) == 0) + return context->master_graph->create_label_reader(source_path, file_list_path, MetaDataReaderType::FOLDER_BASED_LABEL_READER); + else + return context->master_graph->create_label_reader(source_path, file_list_path, MetaDataReaderType::FILE_LIST_META_DATA_READER); } RocalMetaData @@ -152,7 +154,7 @@ RocalMetaData if (!p_context) THROW("Invalid rocal context passed to rocalCreateTextFileBasedLabelReader") auto context = static_cast(p_context); - return context->master_graph->create_label_reader(source_path, MetaDataReaderType::TEXT_FILE_META_DATA_READER); + return context->master_graph->create_label_reader(source_path, "", MetaDataReaderType::TEXT_FILE_META_DATA_READER); } void diff --git a/rocAL/source/meta_data/label_reader_file_list.cpp b/rocAL/source/meta_data/label_reader_file_list.cpp new file mode 100644 index 000000000..175e3dc25 --- /dev/null +++ b/rocAL/source/meta_data/label_reader_file_list.cpp @@ -0,0 +1,128 @@ +/* +Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "label_reader_file_list.h" + +#include + +#include +#include +#include +#include +#include + +#include "commons.h" +#include "exception.h" +#include "filesystem.h" + +using namespace std; + +LabelReaderFileList::LabelReaderFileList() { + _src_dir = nullptr; + _entity = nullptr; + _sub_dir = nullptr; +} + +void LabelReaderFileList::init(const MetaDataConfig& cfg, pMetaDataBatch meta_data_batch) { + _file_list_path = cfg.path(); + _output = meta_data_batch; +} + +bool LabelReaderFileList::exists(const std::string& sample_name) { + return _map_content.find(sample_name) != _map_content.end(); +} + +void LabelReaderFileList::add(std::string sample_name, int label) { + pMetaData info = std::make_shared