diff --git a/CMakePresets.json b/CMakePresets.json
index 748637f7b38f..beb38ec85f56 100644
--- a/CMakePresets.json
+++ b/CMakePresets.json
@@ -13,10 +13,7 @@
       "generator": "Ninja",
       "binaryDir": "${sourceDir}/build",
       "cacheVariables": {
-        "CMAKE_BUILD_TYPE": "Debug",
-        "CMAKE_CXX_FLAGS_RELEASE": "-O2",
-        "CMAKE_CXX_FLAGS_RELWITHDEBINFO": "-O2 -g",
-        "CMAKE_CXX_FLAGS_MINSIZEREL": "-Os"
+        "CMAKE_BUILD_TYPE": "Debug"
       }
     },
     {
@@ -41,13 +38,11 @@
     },
     {
       "name": "msvc-release",
+      "inherits": "msvc-debug",
       "displayName": "MSVC (Release)",
-      "description": "Debug build using Ninja generator and MSVC with vcpkg dependencies.",
-      "generator": "Ninja",
-      "binaryDir": "${sourceDir}/build",
+      "description": "Release build using Ninja generator and MSVC with vcpkg dependencies.",
       "cacheVariables": {
-        "CMAKE_BUILD_TYPE": "Release",
-        "CMAKE_TOOLCHAIN_FILE": "$env{VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake"
+        "CMAKE_BUILD_TYPE": "Release"
       }
     },
     {
diff --git a/README_cmake.md b/README_cmake.md
index 362d4594b577..a59ab16f68b5 100644
--- a/README_cmake.md
+++ b/README_cmake.md
@@ -377,7 +377,6 @@ targets (like tests and documentation) are built.
 | Option                 | Default              | Description                                                                              |
 | ---------------------- | -------------------- | ---------------------------------------------------------------------------------------- |
 | `WITH_TESTS`           | `ON`                 | Enable building unit and integration tests                                               |
-| `WITH_APPS`            | `ON`                 | Enable testing sample applications (run `ctest -L apps` to actually build and test them) |
 | `WITH_PYTHON_BINDINGS` | `ON` if Python found | Enable building Python 3.x bindings                                                      |
 | `WITH_DOCS`            | `OFF`                | Enable building the documentation via Doxygen                                            |
 | `WITH_UTILS`           | `ON`                 | Enable building various utilities including the trace visualizer                         |
diff --git a/src/CodeGen_PyTorch.cpp b/src/CodeGen_PyTorch.cpp
index b52dc3ab0572..b17cc80e7084 100644
--- a/src/CodeGen_PyTorch.cpp
+++ b/src/CodeGen_PyTorch.cpp
@@ -172,7 +172,7 @@ void CodeGen_PyTorch::compile(const LoweredFunc &f, bool is_cuda) {
                     << c_print_name(buffer_args[i].name) << "_buffer.host_dirty(),"
                     << "\"device not synchronized for buffer "
                     << c_print_name(buffer_args[i].name)
-                    << ", make sure all update stages are excplicitly computed on GPU."
+                    << ", make sure all update stages are explicitly computed on GPU."
                     << "\");\n";
                 stream << get_indent();
                 stream
@@ -386,7 +386,7 @@ inline int test1_th_(at::Tensor &_buf, float _alpha, int32_t _beta) {
 
     AT_ASSERTM(err == 0, "Halide call failed");
     // Make sure data is on device
-    AT_ASSERTM(!_buf_buffer.host_dirty(),"device not synchronized for buffer _buf, make sure all update stages are excplicitly computed on GPU.");
+    AT_ASSERTM(!_buf_buffer.host_dirty(),"device not synchronized for buffer _buf, make sure all update stages are explicitly computed on GPU.");
     _buf_buffer.device_detach_native();
 
     return 0;
diff --git a/src/EliminateBoolVectors.h b/src/EliminateBoolVectors.h
index bdb3882909ed..77a7331ccff5 100644
--- a/src/EliminateBoolVectors.h
+++ b/src/EliminateBoolVectors.h
@@ -16,7 +16,7 @@ namespace Internal {
  * u16x8, u16x8), where the first argument is a vector of integers representing
  * a mask. This pass converts vectors of bools to vectors of integers to meet
  * this requirement. This is done by injecting intrinsics to convert bools to
- * architecture-specific masks, and using a select_mask instrinsic instead of a
+ * architecture-specific masks, and using a select_mask intrinsic instead of a
  * Select node. This also converts any intrinsics that operate on vectorized
  * conditions to a *_mask equivalent (if_then_else, require). Because the masks
  * are architecture specific, they may not be stored or loaded. On Stores, the
diff --git a/src/Schedule.h b/src/Schedule.h
index 0532b6ce1038..f0c5d0797be2 100644
--- a/src/Schedule.h
+++ b/src/Schedule.h
@@ -575,7 +575,7 @@ class FuncSchedule {
     // @}
 
     /** You may explicitly specify an estimate of some of the function
-     * dimensions. See \ref Func::estimate */
+     * dimensions. See \ref Func::set_estimate */
     // @{
     const std::vector<Bound> &estimates() const;
     std::vector<Bound> &estimates();
diff --git a/src/autoschedulers/adams2019/autotune_loop.sh b/src/autoschedulers/adams2019/autotune_loop.sh
index 7f99692402fc..3de57185aeca 100755
--- a/src/autoschedulers/adams2019/autotune_loop.sh
+++ b/src/autoschedulers/adams2019/autotune_loop.sh
@@ -1,3 +1,5 @@
+#!/bin/bash
+
 # Build the generator to autotune. This script will be autotuning the
 # autoscheduler's cost model training pipeline, which is large enough
 # to be interesting.
diff --git a/src/autoschedulers/mullapudi2016/AutoSchedule.cpp b/src/autoschedulers/mullapudi2016/AutoSchedule.cpp
index 0ea9bacb8a72..d8227393e286 100644
--- a/src/autoschedulers/mullapudi2016/AutoSchedule.cpp
+++ b/src/autoschedulers/mullapudi2016/AutoSchedule.cpp
@@ -1957,7 +1957,7 @@ Partitioner::GroupAnalysis Partitioner::analyze_group(const Group &g, bool show_
     float load_slope = arch_params.balance / arch_params.last_level_cache_size;
     for (const auto &f_load : group_load_costs) {
         internal_assert(g.inlined.find(f_load.first) == g.inlined.end())
-            << "Intermediates of inlined pure fuction \"" << f_load.first
+            << "Intermediates of inlined pure function \"" << f_load.first
             << "\" should not have been in the group_load_costs\n";
 
         const auto &alloc_reg = get_element(alloc_regions, f_load.first);
diff --git a/src/runtime/HalideBuffer.h b/src/runtime/HalideBuffer.h
index 6f8265478ab1..6aa89846c080 100644
--- a/src/runtime/HalideBuffer.h
+++ b/src/runtime/HalideBuffer.h
@@ -836,7 +836,7 @@ class Buffer {
     // @{
 
     // The overload with one argument is 'explicit', so that
-    // (say) int is not implicitly convertable to Buffer<int>
+    // (say) int is not implicitly convertible to Buffer<int>
     explicit Buffer(int first) {
         static_assert(!T_is_void,
                       "To construct an Buffer<void>, pass a halide_type_t as the first argument to the constructor");
diff --git a/src/runtime/HalideRuntime.h b/src/runtime/HalideRuntime.h
index b457e02d67ff..355469ee5acb 100644
--- a/src/runtime/HalideRuntime.h
+++ b/src/runtime/HalideRuntime.h
@@ -187,7 +187,7 @@ typedef bool (*halide_semaphore_try_acquire_t)(struct halide_semaphore_t *, int)
 
 /** A task representing a serial for loop evaluated over some range.
  * Note that task_parent is a pass through argument that should be
- * passed to any dependent taks that are invokved using halide_do_parallel_tasks
+ * passed to any dependent taks that are invoked using halide_do_parallel_tasks
  * underneath this call. */
 typedef int (*halide_loop_task_t)(void *user_context, int min, int extent,
                                   uint8_t *closure, void *task_parent);
@@ -929,7 +929,7 @@ extern void halide_memoization_cache_evict(void *user_context, uint64_t eviction
  * the case where halide_memoization_cache_lookup is handling multiple
  * buffers.  (This corresponds to memoizing a Tuple in Halide.) Note
  * that the host pointer must be sufficient to get to all information
- * the relase operation needs. The default Halide cache impleemntation
+ * the release operation needs. The default Halide cache impleemntation
  * accomplishes this by storing extra data before the start of the user
  * modifiable host storage.
  *
diff --git a/src/runtime/HalideRuntimeD3D12Compute.h b/src/runtime/HalideRuntimeD3D12Compute.h
index 5814f85a8de2..96634330be15 100644
--- a/src/runtime/HalideRuntimeD3D12Compute.h
+++ b/src/runtime/HalideRuntimeD3D12Compute.h
@@ -78,7 +78,7 @@ struct halide_d3d12compute_command_queue;
 extern int halide_d3d12compute_acquire_context(void *user_context, struct halide_d3d12compute_device **device_ret,
                                                struct halide_d3d12compute_command_queue **queue_ret, bool create);
 
-/** This call balances each successfull halide_d3d12compute_acquire_context call.
+/** This call balances each successful halide_d3d12compute_acquire_context call.
  * If halide_d3d12compute_acquire_context is replaced, this routine must be replaced
  * as well.
  */
diff --git a/src/runtime/HalideRuntimeHexagonDma.h b/src/runtime/HalideRuntimeHexagonDma.h
index ccabac597628..42b1ea35dc31 100644
--- a/src/runtime/HalideRuntimeHexagonDma.h
+++ b/src/runtime/HalideRuntimeHexagonDma.h
@@ -50,7 +50,7 @@ typedef enum {
 extern const struct halide_device_interface_t *halide_hexagon_dma_device_interface();
 
 /** This API is used to set up the DMA device interface to be used for DMA transfer. This also internally 
- * creates the DMA device handle and populates all the Buffer releated parameters (width, height, stride)
+ * creates the DMA device handle and populates all the Buffer related parameters (width, height, stride)
  * to be used for DMA configuration.
  */
 extern int halide_hexagon_dma_device_wrap_native(void *user_context, struct halide_buffer_t *buf,
@@ -90,7 +90,7 @@ extern int halide_hexagon_dma_prepare_for_copy_to_device(void *user_context, str
 extern int halide_hexagon_dma_unprepare(void *user_context, struct halide_buffer_t *buf);
 
 /** This API is used to setup the hexagon Operation modes. We will setup the necessary Operating frequency
- * based on the power mode choosen. Check the structure halide_hexagon_power_mode_t defined in Halide HalideRuntimeHexagonHost.h
+ * based on the power mode chosen. Check the structure halide_hexagon_power_mode_t defined in Halide HalideRuntimeHexagonHost.h
  * for the supported power modes.
  */
 extern int halide_hexagon_dma_power_mode_voting(void *user_context, halide_hexagon_power_mode_t cornercase);
diff --git a/src/runtime/HalideRuntimeHexagonHost.h b/src/runtime/HalideRuntimeHexagonHost.h
index c5756504ac4b..ff0092ffd111 100644
--- a/src/runtime/HalideRuntimeHexagonHost.h
+++ b/src/runtime/HalideRuntimeHexagonHost.h
@@ -80,7 +80,7 @@ typedef enum halide_hexagon_power_mode_t {
 } halide_hexagon_power_mode_t;
 
 /** More detailed power settings to control Hexagon.
- * @param set_mips - Set to TRUE to requst MIPS
+ * @param set_mips - Set to TRUE to request MIPS
  * @param mipsPerThread - mips requested per thread, to establish a minimal clock frequency per HW thread
  * @param mipsTotal - Total mips requested, to establish total number of MIPS required across all HW threads
  * @param set_bus_bw - Set to TRUE to request bus_bw
diff --git a/src/runtime/HalideRuntimeMetal.h b/src/runtime/HalideRuntimeMetal.h
index 802d6659317f..fef18c895101 100644
--- a/src/runtime/HalideRuntimeMetal.h
+++ b/src/runtime/HalideRuntimeMetal.h
@@ -87,7 +87,7 @@ struct halide_metal_command_queue;
 extern int halide_metal_acquire_context(void *user_context, struct halide_metal_device **device_ret,
                                         struct halide_metal_command_queue **queue_ret, bool create);
 
-/** This call balances each successfull halide_metal_acquire_context call.
+/** This call balances each successful halide_metal_acquire_context call.
  * If halide_metal_acquire_context is replaced, this routine must be replaced
  * as well.
  */