-
Notifications
You must be signed in to change notification settings - Fork 15
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* First NWC for webDatasets * Add some funcs - NWC * NCW * cmake for tar file * Fix Cmake issue * Add WebDataset source readers * Fix issues with soure readers * NWC * Parsing correct labels by adding the tar & file_stream utilities * Changes to call the WebDataset source reader * WebDataset source reader changes to include tar utils and stream utils * Fix look up issue * Fix the outputs when testing train.tar * Fix image outputs for webdataset reader * Add the support for index file parsing * Add MetaData support for WebDataset reader to support storing ASCII values. * Fixing the 2nd component outputs in WebDataset Reader * Level 1 formatting * Rename utilities functions * Code clean up * Add webdataset source evaluator * Fix warnings and unused variables * Name changes in webdataset source reader * Add Missing component Behaviour * Fix issues with webdataset source reader * Minor changes * Commit changes for webdataset reader.py file * Add missing component behaviouour * Add MissingComponentsBehaviour enum and python changes * Fix missing components behavior empty and skip * Multi-GPU support for webdataset reader * Add reset loaders - auto_reset() * Calling resize in crop resize * Minor change for shuffle in webdataset reader * Fix issue with WebDataset Reader for index files usage * Resolve PR comments * PR 11 comments * Adding QA tests for webdataset reader * Fix QA tests of cpp * Minor change * revert the crop resize commit - requires the PR for fused crop resize * Minor change in webdataset example file * Del webdataset example file * Formatting changes * Fix a minor indendation issue * Minor changes * Make the stick_to_shard True * Fix the issue with webdataset PARTIAL policy * Update .gitignore * Update rocal_api_meta_data.h * Update rocal_api_meta_data.cpp * Update node_crop_resize.cpp * Update node_crop_resize.cpp - no changes in file * Update unit_tests.sh - remove extra comments * LBP changes 1 * Working commit - LBP * Working commit - LBP change 2 * crop resize addition * Resolve PR comments - 1 * Temp NWC * Fix the NWC * Convert the vector to shared_ptr<vector> for AsciiComponent * Update meta_data.h * Update .gitignore * PRR comments fetched * Pr comments resolution * Resolving multiple pr comments * add eof * Update unit tests.sh * Move the tar_helper_functions to helpers folder * Add tar helper files * Minor change - CmakeLists * Fix throw for missing component behaviour * Resolve PR comments * Handle the MISSING COMPOENENT BEHAVIOUR of EMPTY correctly * Fix the build error * Minor Fix * Add setup instructions for libtar in rocAL-setup.py * Resolving review comment * Removed comment * Resolving review comment and changing version number * Fix the Ascii metadata wrt rocalListOfTensorList * Fix unit test for webdataset reader * Resolving review comments and unit test changes * Fix ascii metadata tensor list vector * Fix iterator for partial policy * Minor changes * Adding CMake flag for wds reader * Minor changes * Adding reset_mem_handle API --------- Co-authored-by: fiona-gladwin <[email protected]> Co-authored-by: Kiriti Gowda <[email protected]> Co-authored-by: Sundar Rajan Vaithiyanathan <[email protected]> Co-authored-by: SundarRajan28 <[email protected]>
- Loading branch information
1 parent
23103d5
commit a846299
Showing
37 changed files
with
2,072 additions
and
42 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
################################################################################ | ||
# | ||
# MIT License | ||
# | ||
# Copyright (c) 2024 Advanced Micro Devices, Inc. | ||
# | ||
# Permission is hereby granted, free of charge, to any person obtaining a copy | ||
# of this software and associated documentation files (the "Software"), to deal | ||
# in the Software without restriction, including without limitation the rights | ||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
# copies of the Software, and to permit persons to whom the Software is | ||
# furnished to do so, subject to the following conditions: | ||
# | ||
# The above copyright notice and this permission notice shall be included in all | ||
# copies or substantial portions of the Software. | ||
# | ||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
# SOFTWARE. | ||
# | ||
################################################################################ | ||
find_path(LIBTAR_INCLUDE_DIRS | ||
NAMES libtar.h | ||
HINTS | ||
$ENV{LIBTAR_PATH}/include | ||
PATHS | ||
/usr/include | ||
/usr/local/include | ||
) | ||
mark_as_advanced(LIBTAR_INCLUDE_DIRS) | ||
|
||
find_library(LIBTAR_LIBRARIES | ||
NAMES libtar.a tar libtar | ||
HINTS | ||
$ENV{LIBTAR_PATH}/lib | ||
$ENV{LIBTAR_PATH}/lib64 | ||
PATHS ${CMAKE_SYSTEM_PREFIX_PATH} ${LIBTAR_PATH} "/usr/local" "/usr/lib" | ||
PATH_SUFFIXES lib lib64) | ||
|
||
mark_as_advanced(LIBTAR_LIBRARIES) | ||
|
||
if(LIBTAR_LIBRARIES AND LIBTAR_INCLUDE_DIRS) | ||
message("-- ${White}Using Libtar -- \n\tLibraries:${LIBTAR_LIBRARIES} \n\tIncludes:${LIBTAR_INCLUDE_DIRS}${ColourReset}") | ||
set(LIBTAR_FOUND TRUE) | ||
else() | ||
message( "-- ${Yellow}NOTE: FindLibTar failed to find -- LibTar${ColourReset}" ) | ||
endif() | ||
|
||
include(FindPackageHandleStandardArgs) | ||
find_package_handle_standard_args(LibTar | ||
FOUND_VAR LIBTAR_FOUND | ||
REQUIRED_VARS | ||
LIBTAR_LIBRARIES | ||
LIBTAR_INCLUDE_DIRS | ||
) | ||
|
||
set(LIBTAR_FOUND ${LIBTAR_FOUND} CACHE INTERNAL "") | ||
set(LIBTAR_LIBRARIES ${LIBTAR_LIBRARIES} CACHE INTERNAL "") | ||
set(LIBTAR_INCLUDE_DIRS ${LIBTAR_INCLUDE_DIRS} CACHE INTERNAL "") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
/* | ||
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. | ||
Permission is hereby granted, free of charge, to any person obtaining a copy | ||
of this software and associated documentation files (the "Software"), to deal | ||
in the Software without restriction, including without limitation the rights | ||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
copies of the Software, and to permit persons to whom the Software is | ||
furnished to do so, subject to the following conditions: | ||
The above copyright notice and this permission notice shall be included in | ||
all copies or substantial portions of the Software. | ||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
THE SOFTWARE. | ||
*/ | ||
|
||
#ifdef ENABLE_WDS | ||
#include <fstream> | ||
#include <memory> | ||
#include <string> | ||
#include <libtar.h> | ||
|
||
constexpr size_t kBlockSize = T_BLOCKSIZE; | ||
// Refactor TarArchive to use std::ifstream | ||
class TarArchive { | ||
public: | ||
TarArchive() = default; | ||
explicit TarArchive(std::unique_ptr<std::ifstream> stream); | ||
TarArchive(TarArchive &&); | ||
~TarArchive(); | ||
TarArchive &operator=(TarArchive &&); | ||
bool advance_to_next_file_in_tar(); | ||
bool at_end_of_archive() const; | ||
void seek_to_offset_in_archive(int64_t offset); | ||
int64_t get_current_archive_offset() const; | ||
int64_t get_current_header_size() const; | ||
std::ifstream* get_stream(); | ||
|
||
enum EntryType { | ||
ENTRY_NONE = 0, | ||
ENTRY_FILE, | ||
ENTRY_DIR, | ||
ENTRY_HARDLINK, | ||
ENTRY_SYMLINK, | ||
ENTRY_CHARDEV, | ||
ENTRY_BLOCKDEV, | ||
ENTRY_FIFO, | ||
ENTRY_NOT_DEFINED | ||
}; | ||
|
||
const std::string& get_current_file_name() const; | ||
size_t get_current_file_size() const; | ||
EntryType get_current_file_type() const; | ||
std::shared_ptr<void> read_current_file(); | ||
size_t read_into_buffer(void *buffer, size_t count); | ||
bool is_end_of_file() const; | ||
std::unique_ptr<std::ifstream> release_file_stream(); | ||
|
||
private: | ||
std::unique_ptr<std::ifstream> _stream; // Using std::ifstream directly | ||
int _instance_handle = -1; | ||
void* _handle = nullptr; | ||
friend ssize_t read_tar_archive(int, void *, size_t); | ||
std::string _filename; | ||
size_t _filesize = 0; | ||
EntryType _filetype = ENTRY_NONE; | ||
size_t _readoffset = 0; | ||
int64_t _current_header = 0; | ||
bool _eof = true; | ||
void mark_end_of_file(); | ||
void parse_current_header(); | ||
}; | ||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.