diff --git a/.pipelines/pypl-publishing.yml b/.pipelines/pypl-publishing.yml
index 29ce0c007..502e6db3e 100644
--- a/.pipelines/pypl-publishing.yml
+++ b/.pipelines/pypl-publishing.yml
@@ -29,6 +29,11 @@ parameters:
   type: boolean
   default: true
 
+- name: enable_linux_rocm
+  displayName: 'Whether Linux ROCm package is built.'
+  type: boolean
+  default: true
+
 - name: ort_version
   displayName: 'OnnxRuntime version'
   type: string
@@ -49,6 +54,11 @@ parameters:
   type: string
   default: '1.18.0-dev-20240426-0116-b842effa29'
 
+- name: ort_rocm_version
+  displayName: 'OnnxRuntime ROCm version'
+  type: string
+  default: '1.19.0-dev-20240602-1103-217b66f'
+
 - name: cuda_versions
   displayName: 'CUDA versions'
   type: string
@@ -77,6 +87,7 @@ stages:
   parameters:
     enable_linux_cpu: ${{ parameters.enable_linux_cpu }}
     enable_linux_cuda: ${{ parameters.enable_linux_cuda }}
+    enable_linux_rocm: ${{ parameters.enable_linux_rocm }}
     enable_win_cpu: ${{ parameters.enable_win_cpu }}
     enable_win_cuda: ${{ parameters.enable_win_cuda }}
     enable_win_dml: ${{ parameters.enable_win_dml }}
@@ -84,6 +95,7 @@ stages:
     ort_version: ${{ parameters.ort_version }}
     ort_cuda_118_version: ${{ parameters.ort_cuda_118_version }}
     ort_cuda_122_version: ${{ parameters.ort_cuda_122_version }}
+    ort_rocm_version: ${{ parameters.ort_rocm_version }}
     ort_dml_version: ${{ parameters.ort_dml_version }}
     cuda_versions: ${{ parameters.cuda_versions }}
     build_config: ${{ parameters.build_config }}
diff --git a/.pipelines/stages/jobs/nuget-packaging-job.yml b/.pipelines/stages/jobs/nuget-packaging-job.yml
index ada915890..d8e0ebc70 100644
--- a/.pipelines/stages/jobs/nuget-packaging-job.yml
+++ b/.pipelines/stages/jobs/nuget-packaging-job.yml
@@ -73,6 +73,8 @@ jobs:
         value: 'Microsoft.ML.OnnxRuntime.Gpu.Linux'
     ${{ elseif eq(parameters.ep, 'directml')}}:
       value: 'Microsoft.ML.OnnxRuntime.DirectML'
+    ${{ elseif eq(parameters.ep, 'rocm')}}:
+      value: 'Microsoft.ML.OnnxRuntime.ROCm'
     ${{ else }}:
       value: 'Microsoft.ML.OnnxRuntime'
 
diff --git a/.pipelines/stages/jobs/py-packaging-job.yml b/.pipelines/stages/jobs/py-packaging-job.yml
index 103835472..400394b72 100644
--- a/.pipelines/stages/jobs/py-packaging-job.yml
+++ b/.pipelines/stages/jobs/py-packaging-job.yml
@@ -103,6 +103,8 @@ jobs:
         value: 'Microsoft.ML.OnnxRuntime.Gpu.Linux'
     ${{ elseif eq(parameters.ep, 'directml')}}:
       value: 'Microsoft.ML.OnnxRuntime.DirectML'
+    ${{ elseif eq(parameters.ep, 'rocm')}}:
+      value: 'Microsoft.ML.OnnxRuntime.ROCm'
     ${{ else }}:
       value: 'Microsoft.ML.OnnxRuntime'
 
diff --git a/.pipelines/stages/py-packaging-stage.yml b/.pipelines/stages/py-packaging-stage.yml
index b4c77e97c..cb382d7c0 100644
--- a/.pipelines/stages/py-packaging-stage.yml
+++ b/.pipelines/stages/py-packaging-stage.yml
@@ -11,12 +11,16 @@ parameters:
   type: boolean
 - name: enable_linux_cuda
   type: boolean
+- name: enable_linux_rocm
+  type: boolean
 - name: ort_version
   type: string
 - name: ort_cuda_118_version
   type: string
 - name: ort_cuda_122_version
   type: string
+- name: ort_rocm_version
+  type: string
 - name: ort_dml_version
   type: string
 - name: cuda_versions
@@ -109,7 +113,14 @@ stages:
         os: 'linux'
         build_config: ${{ parameters.build_config }}
 
-
+  - ${{ if eq(parameters.enable_linux_rocm, true) }}:
+    - template: jobs/py-packaging-job.yml
+      parameters:
+        arch: 'x64'
+        ep: 'rocm'
+        ort_version: ${{ parameters.ort_rocm_version }}
+        os: 'linux'
+        build_config: ${{ parameters.build_config }}
 
 
 
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 10275e702..73e6b0537 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -25,6 +25,8 @@ include(cmake/external/onnxruntime_external_deps.cmake)
 include(cmake/global_variables.cmake)
 # Checking if CUDA is supported
 include(cmake/check_cuda.cmake)
+# Checking if ROCm is supported
+include(cmake/check_rocm.cmake)
 # Checking if DML is supported
 include(cmake/check_dml.cmake)
 
diff --git a/build.py b/build.py
index 055203751..cb98cf336 100644
--- a/build.py
+++ b/build.py
@@ -101,6 +101,8 @@ def _parse_args():
              "Used when --use_cuda is specified.",
     )
 
+    parser.add_argument("--use_rocm", action="store_true", help="Whether to use ROCm. Default is to not use rocm.")
+
     parser.add_argument("--use_dml", action="store_true", help="Whether to use DML. Default is to not use DML.")
 
     # The following options are mutually exclusive (cross compiling options such as android, ios, etc.)
@@ -443,6 +445,7 @@ def update(args: argparse.Namespace, env: dict[str, str]):
         str(args.build_dir),
         "-DCMAKE_POSITION_INDEPENDENT_CODE=ON",
         f"-DUSE_CUDA={'ON' if args.use_cuda else 'OFF'}",
+        f"-DUSE_ROCM={'ON' if args.use_rocm else 'OFF'}",
         f"-DUSE_DML={'ON' if args.use_dml else 'OFF'}",
         f"-DENABLE_JAVA={'ON' if args.build_java else 'OFF'}",
         f"-DBUILD_WHEEL={build_wheel}",
@@ -562,4 +565,4 @@ def clean(args: argparse.Namespace, env: dict[str, str]):
         build(arguments, environment)
 
     if arguments.test and not arguments.skip_tests:
-        test(arguments, environment)
\ No newline at end of file
+        test(arguments, environment)
diff --git a/cmake/check_rocm.cmake b/cmake/check_rocm.cmake
new file mode 100644
index 000000000..9526449b1
--- /dev/null
+++ b/cmake/check_rocm.cmake
@@ -0,0 +1,8 @@
+if(USE_ROCM AND NOT EXISTS "${ORT_LIB_DIR}/${ONNXRUNTIME_PROVIDERS_ROCM_LIB}")
+  message(FATAL_ERROR "Expected the ONNX Runtime providers ROCm library to be found at ${ORT_LIB_DIR}/${ONNXRUNTIME_PROVIDERS_ROCM_LIB}. Actual: Not found.")
+endif()
+
+if(USE_ROCM)
+  list(APPEND onnxruntime_libs "${ORT_LIB_DIR}/${ONNXRUNTIME_PROVIDERS_ROCM_LIB}")
+  add_compile_definitions(USE_ROCM=1)
+endif()
\ No newline at end of file
diff --git a/cmake/global_variables.cmake b/cmake/global_variables.cmake
index 2fbf298bb..22dd8a6ad 100644
--- a/cmake/global_variables.cmake
+++ b/cmake/global_variables.cmake
@@ -27,16 +27,19 @@ endif()
 if(WIN32)
   set(ONNXRUNTIME_LIB "onnxruntime.dll")
   set(ONNXRUNTIME_PROVIDERS_CUDA_LIB "onnxruntime_providers_cuda.dll")
+  set(ONNXRUNTIME_PROVIDERS_ROCM_LIB "onnxruntime_providers_rocm.dll")
   set(ONNXRUNTIME_ALL_SHARED_LIBS "onnxruntime*.dll")
   set(ONNXRUNTIME_EXTENSIONS_LIB "tfmtok_c.lib")
   set(ONNXRUNTIME_EXTENSIONS_FILES "tfmtok_c.dll")
 elseif(APPLE)
   set(ONNXRUNTIME_LIB "libonnxruntime.dylib")
   set(ONNXRUNTIME_PROVIDERS_CUDA_LIB "libonnxruntime_providers_cuda.dylib")
+  set(ONNXRUNTIME_PROVIDERS_ROCM_LIB "libonnxruntime_providers_rocm.dylib")
   set(ONNXRUNTIME_ALL_SHARED_LIBS "libonnxruntime*.dylib")
 else()
   set(ONNXRUNTIME_LIB "libonnxruntime.so")
   set(ONNXRUNTIME_PROVIDERS_CUDA_LIB "libonnxruntime_providers_cuda.so")
+  set(ONNXRUNTIME_PROVIDERS_ROCM_LIB "libonnxruntime_providers_rocm.so")
   set(ONNXRUNTIME_ALL_SHARED_LIBS "libonnxruntime*.so*")
   set(ONNXRUNTIME_EXTENSIONS_LIB "tfmtok_c.so")
 endif()
diff --git a/cmake/options.cmake b/cmake/options.cmake
index c1aeaa1a5..2aec9d3b2 100644
--- a/cmake/options.cmake
+++ b/cmake/options.cmake
@@ -2,6 +2,7 @@ include(CMakeDependentOption)
 
 # features
 option(USE_CUDA "Build with CUDA support" ON)
+option(USE_ROCM "Build with ROCm support" ON)
 option(USE_DML "Build with DML support" OFF)
 
 # bindings
diff --git a/cmake/package.cmake b/cmake/package.cmake
index 49b6e69a1..87ad79345 100644
--- a/cmake/package.cmake
+++ b/cmake/package.cmake
@@ -38,6 +38,8 @@ elseif (LINUX)
   if (CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "x64")
     if (USE_CUDA)
       set(CPACK_PACKAGE_FILE_NAME "onnxruntime-genai-${VERSION_INFO}-linux-x64-cuda")
+    elseif (USE_ROCM)
+      set(CPACK_PACKAGE_FILE_NAME "onnxruntime-genai-${VERSION_INFO}-linux-x64-rocm")
     else ()
       set(CPACK_PACKAGE_FILE_NAME "onnxruntime-genai-${VERSION_INFO}-linux-x64")
     endif ()
diff --git a/cmake/presets/CMakeLinuxBuildPresets.json b/cmake/presets/CMakeLinuxBuildPresets.json
index 8155e2285..47a0ec704 100644
--- a/cmake/presets/CMakeLinuxBuildPresets.json
+++ b/cmake/presets/CMakeLinuxBuildPresets.json
@@ -100,6 +100,38 @@
     {
       "name": "linux_gcc_cuda_minsizerel",
       "configurePreset": "linux_gcc_cuda_minsizerel"
+    },
+    {
+      "name": "linux_gcc_rocm_release_asan",
+      "configurePreset": "linux_gcc_rocm_release_asan"
+    },
+    {
+      "name": "linux_gcc_rocm_debug_asan",
+      "configurePreset": "linux_gcc_rocm_debug_asan"
+    },
+    {
+      "name": "linux_gcc_rocm_relwithdebinfo_asan",
+      "configurePreset": "linux_gcc_rocm_relwithdebinfo_asan"
+    },
+    {
+      "name": "linux_gcc_rocm_minsizerel_asan",
+      "configurePreset": "linux_gcc_rocm_minsizerel_asan"
+    },
+    {
+      "name": "linux_gcc_rocm_release",
+      "configurePreset": "linux_gcc_rocm_release"
+    },
+    {
+      "name": "linux_gcc_rocm_debug",
+      "configurePreset": "linux_gcc_rocm_debug"
+    },
+    {
+      "name": "linux_gcc_rocm_relwithdebinfo",
+      "configurePreset": "linux_gcc_rocm_relwithdebinfo"
+    },
+    {
+      "name": "linux_gcc_rocm_minsizerel",
+      "configurePreset": "linux_gcc_rocm_minsizerel"
     }
   ]
 }
\ No newline at end of file
diff --git a/cmake/presets/CMakeLinuxDefaultConfigPresets.json b/cmake/presets/CMakeLinuxDefaultConfigPresets.json
index 559d1dae0..c2816213f 100644
--- a/cmake/presets/CMakeLinuxDefaultConfigPresets.json
+++ b/cmake/presets/CMakeLinuxDefaultConfigPresets.json
@@ -9,7 +9,8 @@
         "CMAKE_EXE_LINKER_FLAGS_INIT": "-Wl,-z,now",
         "CMAKE_MODULE_LINKER_FLAGS_INIT": "-Wl,-z,now",
         "CMAKE_SHARED_LINKER_FLAGS_INIT": "-Wl,-z,now",
-        "USE_CUDA": "OFF"
+        "USE_CUDA": "OFF",
+        "USE_ROCM": "OFF"
       },
       "environment": {
         "CC": "gcc",
@@ -29,6 +30,13 @@
         "CMAKE_CUDA_ARCHITECTURES": "60;61;70;75;80;86"
       }
     },
+    {
+      "name": "linux_gcc_rocm_default",
+      "inherits": "linux_gcc_default",
+      "cacheVariables": {
+        "USE_ROCM": "ON"
+      }
+    },
     {
       "name": "linux_clang_default",
       "inherits": "linux_gcc_default",
diff --git a/cmake/presets/CMakeLinuxGccConfigPresets.json b/cmake/presets/CMakeLinuxGccConfigPresets.json
index d5518f9ad..bdd58f6c1 100644
--- a/cmake/presets/CMakeLinuxGccConfigPresets.json
+++ b/cmake/presets/CMakeLinuxGccConfigPresets.json
@@ -155,6 +155,82 @@
         "linux_minsizerel_default"
       ],
       "binaryDir": "${sourceDir}/build/cuda"
+    },
+    {
+      "name": "linux_gcc_rocm_release_asan",
+      "displayName": "linux gcc rocm release asan",
+      "inherits": [
+        "linux_gcc_asan_default",
+        "linux_gcc_rocm_default",
+        "linux_release_default"
+      ],
+      "binaryDir": "${sourceDir}/build/rocm"
+    },
+    {
+      "name": "linux_gcc_rocm_debug_asan",
+      "displayName": "linux gcc rocm debug asan",
+      "inherits": [
+        "linux_gcc_asan_default",
+        "linux_gcc_rocm_default",
+        "linux_debug_default"
+      ],
+      "binaryDir": "${sourceDir}/build/rocm"
+    },
+    {
+      "name": "linux_gcc_rocm_relwithdebinfo_asan",
+      "displayName": "linux gcc rocm relwithdebinfo asan",
+      "inherits": [
+        "linux_gcc_asan_default",
+        "linux_gcc_rocm_default",
+        "linux_relwithdebinfo_default"
+      ],
+      "binaryDir": "${sourceDir}/build/rocm"
+    },
+    {
+      "name": "linux_gcc_rocm_minsizerel_asan",
+      "displayName": "linux gcc rocm minsizerel asan",
+      "inherits": [
+        "linux_gcc_asan_default",
+        "linux_gcc_rocm_default",
+        "linux_minsizerel_default"
+      ],
+      "binaryDir": "${sourceDir}/build/rocm"
+    },
+    {
+      "name": "linux_gcc_rocm_release",
+      "displayName": "linux gcc rocm release",
+      "inherits": [
+        "linux_gcc_rocm_default",
+        "linux_release_default"
+      ],
+      "binaryDir": "${sourceDir}/build/rocm"
+    },
+    {
+      "name": "linux_gcc_rocm_debug",
+      "displayName": "linux gcc rocm debug",
+      "inherits": [
+        "linux_gcc_rocm_default",
+        "linux_debug_default"
+      ],
+      "binaryDir": "${sourceDir}/build/rocm"
+    },
+    {
+      "name": "linux_gcc_rocm_relwithdebinfo",
+      "displayName": "linux gcc rocm relwithdebinfo",
+      "inherits": [
+        "linux_gcc_rocm_default",
+        "linux_relwithdebinfo_default"
+      ],
+      "binaryDir": "${sourceDir}/build/rocm"
+    },
+    {
+      "name": "linux_gcc_rocm_minsizerel",
+      "displayName": "linux gcc rocm minsizerel",
+      "inherits": [
+        "linux_gcc_rocm_default",
+        "linux_minsizerel_default"
+      ],
+      "binaryDir": "${sourceDir}/build/rocm"
     }
   ]
 }
\ No newline at end of file
diff --git a/cmake/presets/CMakeMacOSConfigPresets.json b/cmake/presets/CMakeMacOSConfigPresets.json
index 1ea6d85c8..a425dc595 100644
--- a/cmake/presets/CMakeMacOSConfigPresets.json
+++ b/cmake/presets/CMakeMacOSConfigPresets.json
@@ -11,7 +11,8 @@
       "cacheVariables": {
         "CMAKE_POSITION_INDEPENDENT_CODE": "ON",
         "CMAKE_OSX_ARCHITECTURES": "arm64",
-        "USE_CUDA": "OFF"
+        "USE_CUDA": "OFF",
+        "USE_ROCM": "OFF"
       },
       "environment": {
         "CC": "clang",
diff --git a/cmake/presets/CMakeWinConfigPresets.json b/cmake/presets/CMakeWinConfigPresets.json
index ddbfbacfd..f75549694 100644
--- a/cmake/presets/CMakeWinConfigPresets.json
+++ b/cmake/presets/CMakeWinConfigPresets.json
@@ -11,7 +11,8 @@
         "CMAKE_EXE_LINKER_FLAGS_INIT": "/profile /DYNAMICBASE",
         "CMAKE_MODULE_LINKER_FLAGS_INIT": "/profile /DYNAMICBASE",
         "CMAKE_SHARED_LINKER_FLAGS_INIT": "/profile /DYNAMICBASE",
-        "USE_CUDA": "OFF"
+        "USE_CUDA": "OFF",
+        "USE_ROCM": "OFF"
       },
       "condition": {
         "type": "equals",
diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt
index 383298f77..9bc5ea1ab 100644
--- a/src/python/CMakeLists.txt
+++ b/src/python/CMakeLists.txt
@@ -33,6 +33,8 @@ if(BUILD_WHEEL)
   message("Setting up wheel files in : ${WHEEL_FILES_DIR}")
   if(USE_CUDA)
     set(TARGET_NAME "onnxruntime-genai-cuda")
+  elseif(USE_ROCM)
+    set(TARGET_NAME "onnxruntime-genai-rocm")
   elseif(USE_DML)
     set(TARGET_NAME "onnxruntime-genai-directml")
   else()
@@ -73,6 +75,19 @@ if(BUILD_WHEEL)
     "libnvonnxparser.so.8"
     "libnvonnxparser.so.10"
 
+    "libamdhip64.so.5"
+    "libamdhip64.so.6"
+    "libhipblas.so.0"
+    "libhipblas.so.2"
+    "libhipfft.so"
+    "libhipfft.so.0"
+    "libhiprtc.so.5"
+    "libhsa-runtime64.so.1"
+    "librccl.so.1"
+    "librocblas.so.3"
+    "librocfft.so.0"
+    "libroctracer64.so.4"
+    "libMIOpen.so.1"
   )
   set(modified_exclude_list)
   foreach(item IN LISTS auditwheel_exclude_list)
diff --git a/src/python/py/models/builder.py b/src/python/py/models/builder.py
index 8da1e8046..a6366ebaa 100644
--- a/src/python/py/models/builder.py
+++ b/src/python/py/models/builder.py
@@ -58,6 +58,10 @@ def __init__(self, config, io_dtype, onnx_dtype, ep, cache_dir, extra_options):
             "cuda": {
                 "enable_cuda_graph": enable_cuda_graph,        # "1" if the the model is able to enable cuda graph, "0" otherwise
             },
+            "rocm":{
+                "tunable_op_enable": "1",
+                "tunable_op_tuning_enable": "1"
+            },
             "dml": {},
             "web": {},
         }
@@ -1960,7 +1964,7 @@ def make_attention_mask_reformatting_for_gqa(self):
         self.mask_attrs["total_seq_len"] = cast_2_name
 
     def make_attention_mask_reformatting_for_sparse_attn(self):
-        # Make nodes for the attention mask subgraph that calculates 
+        # Make nodes for the attention mask subgraph that calculates
         # attributes about the 2D attention mask to use in SparseAttention
         #
         #                attention_mask
@@ -2178,7 +2182,7 @@ def calculate_block_mask(self):
         crows_name = "block_row_indices"
         self.make_external_tensor(crows.detach().numpy().astype(np.int32), crows_name)
         self.mask_attrs["block_row_indices"] = crows_name
-        
+
         cols_name = "block_col_indices"
         self.make_external_tensor(cols.detach().numpy().astype(np.int32), cols_name)
         self.mask_attrs["block_col_indices"] = cols_name
@@ -2248,7 +2252,7 @@ def make_mlp_proj(self, layer_id, mlp, root_input):
         #           DownProjMatMul
         #                 |
         #            DownProjAdd
-        
+
         # Make input MatMul and Add nodes
         up_matmul_name = f"/model/layers.{layer_id}/mlp/up_proj/MatMul"
         self.make_matmul(mlp.up_proj.weight.detach().numpy(), up_matmul_name, root_input)
diff --git a/src/python/python.cpp b/src/python/python.cpp
index c22c31582..a2ea0bad8 100644
--- a/src/python/python.cpp
+++ b/src/python/python.cpp
@@ -495,6 +495,14 @@ PYBIND11_MODULE(onnxruntime_genai, m) {
 #endif
   });
 
+  m.def("is_rocm_available", []() {
+#if USE_ROCM
+    return true;
+#else
+        return false;
+#endif
+  });
+
   m.def("set_current_gpu_device_id", [](int device_id) { Ort::SetCurrentGpuDeviceId(device_id); });
   m.def("get_current_gpu_device_id", []() { return Ort::GetCurrentGpuDeviceId(); });
 }
diff --git a/test/python/test_onnxruntime_genai_api.py b/test/python/test_onnxruntime_genai_api.py
index 7780bb24c..0cbf52e08 100644
--- a/test/python/test_onnxruntime_genai_api.py
+++ b/test/python/test_onnxruntime_genai_api.py
@@ -18,6 +18,9 @@
 if og.is_dml_available():
     devices.append("dml")
 
+if og.is_rocm_available():
+    devices.append("rocm")
+
 @pytest.mark.parametrize(
     "relative_model_path",
     (
diff --git a/tools/ci_build/github/linux/docker/manylinux/Dockerfile.manylinux2_28_rocm b/tools/ci_build/github/linux/docker/manylinux/Dockerfile.manylinux2_28_rocm
new file mode 100644
index 000000000..6af5a8a21
--- /dev/null
+++ b/tools/ci_build/github/linux/docker/manylinux/Dockerfile.manylinux2_28_rocm
@@ -0,0 +1,11 @@
+FROM onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cpu_x64_ubi8_gcc12:20240530.3
+
+ADD scripts /tmp/scripts
+RUN cd /tmp/scripts && /tmp/scripts/install_centos_gcc12.sh && /tmp/scripts/install_deps.sh && rm -rf /tmp/scripts
+
+ARG BUILD_UID=1001
+ARG BUILD_USER=onnxruntimedev
+RUN adduser --uid $BUILD_UID $BUILD_USER
+WORKDIR /home/$BUILD_USER
+USER $BUILD_USER
+