triton-lang · Devjiu · Apr 17, 2024 · May 1, 2024 · May 2, 2024 · May 6, 2024
@@ -0,0 +1,158 @@
+name: Build and test
+run-name: ${{ inputs.run_name }}
+
+on:
+  workflow_dispatch:
+  pull_request:
+    branches:
+      - main
+      # You can name your branch dev-foo to get CI runs.
+      - 'dev-**'
+  push:
+    branches:
+      - main
+
+jobs:
+  pre-commit:
+    name: Pre-commit checks
+    runs-on:
+      - glados
+      - intel
+      - x86
+    steps:
+      - name: Print inputs
+        run: |
+          echo "${{ toJSON(github.event.inputs) }}"
+          echo INSTALL_IPEX=${{ env.INSTALL_IPEX }}
+
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Install Python 3.11
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+
+      - name: Run pre-commit checks
+        run: |
+          pip install --upgrade pre-commit
+
+          # TODO: ignore the first yapf failure until https://github.com/google/yapf/issues/1164 is fixed
+          python3 -m pre_commit run --all-files --verbose yapf &> /dev/null || true
+          # If first run of yapf worked and made changes reset the tree to the original state
+          git reset --hard
+
+          python3 -m pre_commit run --show-diff-on-failure --color=always --all-files --verbose
+
+  build-test:
+    name: Build and test on ${{ matrix.config.runner }}
+    runs-on: ${{ matrix.config.runs_on }}
+    strategy:
+      matrix:
+        python: ['3.11']
+        config:
+          - {runner: 'Ubuntu Intel x86', runs_on: ['glados', 'intel', 'x86'], target-os: 'ubuntu', arch: 'x86'}
+          - {runner: 'MacOS-latest ARM64', runs_on: ['macos-latest'], target-os: 'macos',  arch: 'arm64'}
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          submodules: recursive
+
+      - name: Install Python ${{ matrix.python }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python }}
+
+      - name: Install pip and apt dependencies
+        env:
+          RUNNER_TARGET_OS: ${{ matrix.config.target-os }}
+        run: |
+          echo "RUNNER_TARGET_OS: ${RUNNER_TARGET_OS}"
+          python3 -m pip install --upgrade pip
+          python3 -m pip install wheel cmake==3.24 ninja pytest-xdist lit pybind11
+          if [[ "${RUNNER_TARGET_OS}" == "ubuntu" ]]; then
+            sudo apt-get update
+            sudo apt-get install -y zlib1g-dev g++
+          fi
+          pip install torch==2.1.2
+
+      - name: Install Triton
+        run: |
+          echo "PATH is '$PATH'"
+          cd python
+          python3 -m pip install --no-build-isolation -vvv '.[tests]'
+
+      - name: Run python unit tests for MacOS Arm64
+        if: matrix.config.target-os == 'macos'
+        run: |
+          export CC=$(which clang)
+          export TRITON_DISABLE_OPENMP=1 # temporary
+          export TRITON_CPU_BACKEND=1
+
+          # Document some versions/flags
+          echo "xcode-select:"; xcode-select -p
+          echo "CC: ${CC}"
+          clang --version
+          echo "TRITON_DISABLE_OPENMP=${TRITON_DISABLE_OPENMP}"
+          echo "TRITON_CPU_BACKEND=${TRITON_CPU_BACKEND}"
+
+          # Skip bfloat16 tests for now
+          # We are generating bfcvt for bfloat16 tests when converting to fp32.
+          # This is only for Clang15, works OK for Clang16
+          # TODO - fix this using driver flags.
+          python -m pytest -s -n 32 --device cpu \
+            python/test/unit/language/test_core.py -m cpu -k "not bfloat16"
+          python -m pytest -s -n 32 --device cpu \
+            python/test/unit/cpu/test_math.py \
+            python/test/unit/cpu/test_opt.py \
+            python/test/unit/language/test_annotations.py \
+            python/test/unit/language/test_block_pointer.py \
+            python/test/unit/language/test_compile_errors.py \
+            python/test/unit/language/test_conversions.py \
+            python/test/unit/language/test_decorator.py \
+            python/test/unit/language/test_pipeliner.py \
+            python/test/unit/language/test_random.py \
+            python/test/unit/language/test_standard.py \
+            python/test/unit/runtime/test_autotuner.py \
+            python/test/unit/runtime/test_bindings.py \
+            python/test/unit/runtime/test_cache.py \
+            python/test/unit/runtime/test_driver.py \
+            python/test/unit/runtime/test_jit.py \
+            python/test/unit/runtime/test_launch.py \
+            python/test/unit/runtime/test_subproc.py \
+            python/test/unit/test_debug_dump.py \
+            -k "not bfloat16"
+
+      - name: Run python unit tests for Intel
+        if: matrix.config.target-os == 'ubuntu'
+        run: |
+          python -m pytest -s -n 32 --device cpu python/test/unit/language/test_core.py -m cpu
+          python -m pytest -s -n 32 --device cpu \
+            python/test/unit/cpu/test_math.py \
+            python/test/unit/cpu/test_opt.py \
+            python/test/unit/language/test_annotations.py \
+            python/test/unit/language/test_block_pointer.py \
+            python/test/unit/language/test_compile_errors.py \
+            python/test/unit/language/test_conversions.py \
+            python/test/unit/language/test_decorator.py \
+            python/test/unit/language/test_pipeliner.py \
+            python/test/unit/language/test_random.py \
+            python/test/unit/language/test_standard.py \
+            python/test/unit/runtime/test_autotuner.py \
+            python/test/unit/runtime/test_bindings.py \
+            python/test/unit/runtime/test_cache.py \
+            python/test/unit/runtime/test_driver.py \
+            python/test/unit/runtime/test_jit.py \
+            python/test/unit/runtime/test_launch.py \
+            python/test/unit/runtime/test_subproc.py \
+            python/test/unit/test_debug_dump.py
+
+      - name: Run lit tests
+        run: |
+          cd python
+          LIT_TEST_DIR="build/$(ls build | grep -i cmake)/test"
+          if [ ! -d "${LIT_TEST_DIR}" ]; then
+            echo "Could not find '${LIT_TEST_DIR}'" ; exit -1
+          fi
+          lit -v "${LIT_TEST_DIR}/TritonCPU"
@@ -9,13 +9,16 @@
 name: Integration Tests
 on:
   workflow_dispatch:
-  pull_request:
-    branches-ignore: ['llvm-**']
-  merge_group:
-    branches: [main, 'dev-**']
-    types: [checks_requested]
-  push:
-    branches: [main]
+# Disabled automatic triggers because tests in this workflow fail to run.
+#   pull_request:
+#     # You can name your branch dev-foo to get CI runs.
+#     branches-ignore: ['llvm-**']
+#   merge_group:
+#     branches: [main, 'dev-**']
+#     types: [checks_requested]
+#   push:
+#     branches: [main]
+
 concurrency:
   group: ${{ github.ref }}
   cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}

@@ -8,13 +8,15 @@ name: Integration Tests
 
 on:
   workflow_dispatch:
-  pull_request:
-    branches-ignore: ['llvm-**']
-  merge_group:
-    branches: [main, 'dev-**']
-    types: [checks_requested]
-  push:
-    branches: [main]
+# Disabled automatic triggers because tests in this workflow fail to run.
+#   pull_request:
+#     # You can name your branch dev-foo to get CI runs.
+#     branches-ignore: ['llvm-**']
+#   merge_group:
+#     branches: [main, 'dev-**']
+#     types: [checks_requested]
+#   push:
+#     branches: [main]
 
 concurrency:
   group: ${{ github.ref }}

@@ -9,6 +9,7 @@ python/triton*.egg-info/
 
 python/triton/_C/*.pyd
 python/triton/_C/*.so
+python/triton/_C/*.so.*
 python/triton/_C/*.dylib
 python/triton/_C/*.pdb
 python/triton/_C/*.exe

@@ -0,0 +1,3 @@
+[submodule "sleef"]
+	path = third_party/sleef
+	url = https://github.com/shibatch/sleef
@@ -0,0 +1,80 @@
+# Code of Conduct
+
+## Our Pledge
+
+In the interest of fostering an open and welcoming environment, we as
+contributors and maintainers pledge to make participation in our project and
+our community a harassment-free experience for everyone, regardless of age, body
+size, disability, ethnicity, sex characteristics, gender identity and expression,
+level of experience, education, socio-economic status, nationality, personal
+appearance, race, religion, or sexual identity and orientation.
+
+## Our Standards
+
+Examples of behavior that contributes to creating a positive environment
+include:
+
+* Using welcoming and inclusive language
+* Being respectful of differing viewpoints and experiences
+* Gracefully accepting constructive criticism
+* Focusing on what is best for the community
+* Showing empathy towards other community members
+
+Examples of unacceptable behavior by participants include:
+
+* The use of sexualized language or imagery and unwelcome sexual attention or
+advances
+* Trolling, insulting/derogatory comments, and personal or political attacks
+* Public or private harassment
+* Publishing others' private information, such as a physical or electronic
+address, without explicit permission
+* Other conduct which could reasonably be considered inappropriate in a
+professional setting
+
+## Our Responsibilities
+
+Project maintainers are responsible for clarifying the standards of acceptable
+behavior and are expected to take appropriate and fair corrective action in
+response to any instances of unacceptable behavior.
+
+Project maintainers have the right and responsibility to remove, edit, or
+reject comments, commits, code, wiki edits, issues, and other contributions
+that are not aligned to this Code of Conduct, or to ban temporarily or
+permanently any contributor for other behaviors that they deem inappropriate,
+threatening, offensive, or harmful.
+
+## Scope
+
+This Code of Conduct applies within all project spaces, and it also applies when
+an individual is representing the project or its community in public spaces.
+Examples of representing a project or community include using an official
+project e-mail address, posting via an official social media account, or acting
+as an appointed representative at an online or offline event. Representation of
+a project may be further defined and clarified by project maintainers.
+
+This Code of Conduct also applies outside the project spaces when there is a
+reasonable belief that an individual's behavior may have a negative impact on
+the project or its community.
+
+## Enforcement
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported by contacting the project team at <[email protected]>. All
+complaints will be reviewed and investigated and will result in a response that
+is deemed necessary and appropriate to the circumstances. The project team is
+obligated to maintain confidentiality with regard to the reporter of an incident.
+Further details of specific enforcement policies may be posted separately.
+
+Project maintainers who do not follow or enforce the Code of Conduct in good
+faith may face temporary or permanent repercussions as determined by other
+members of the project's leadership.
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
+available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
+
+[homepage]: https://www.contributor-covenant.org
+
+For answers to common questions about this code of conduct, see
+https://www.contributor-covenant.org/faq
@@ -1,3 +1,28 @@
+# Triton-CPU
+
+A long-lived development branch to build an experimental CPU backend for [Triton](https://github.com/openai/triton).
+
+This repository clones the main Triton repository, but we intend to minimize
+divergences in the core (and ideally upstream anything that needs to change and
+isn't too CPU-specific).  Most of the CPU work should be in a backend
+subdirectory (similar to how GPU vendors are supported today).  We're starting
+with a clone to give ourselves maximum development flexibility as this project
+gets off the ground!
+
+# How to use it?
+
+Build it like a normal Triton, but just pass TRITON_CPU_BACKEND=1 to use the CPU backend over a GPU backend, if any.
+
+```
+TRITON_CPU_BACKEND=1 python3 tutorials/01-vector-add.py
+```
+
+**NOTE: It's still work in progress.**
+
+---
+
+# Upstream README
+
 <div align="center">
   <img src="https://lh5.googleusercontent.com/wzQKEsTFkrgNQO9JjhGH5wFvslJr1saLtLaJ_a6Fp_gNENpvt3VG7BmztwngU9hFJaU4CPwGiw1opQtDvTkLrxWRbO_a12Q-pdESWHgtmheIHcPbOL5ZMC4TSiJVe5ty1w=w3517" alt="Triton logo">
 </div>

@@ -4,6 +4,7 @@
 #include "third_party/nvidia/include/Dialect/NVGPU/IR/Dialect.h"
 #include "third_party/proton/dialect/include/Dialect/Proton/IR/Dialect.h"
 #include "triton/Dialect/Triton/IR/Dialect.h"
+#include "triton/Dialect/TritonCPU/IR/Dialect.h"
 #include "triton/Dialect/TritonGPU/IR/Dialect.h"
 #include "triton/Dialect/TritonNvidiaGPU/IR/Dialect.h"
 
@@ -16,12 +17,17 @@
 #include "triton/Dialect/TritonGPU/Transforms/Passes.h"
 #include "triton/Dialect/TritonNvidiaGPU/Transforms/Passes.h"
 
+#include "cpu/include/ScalarizePass/ScalarizeInterfaceImpl.h"
+#include "cpu/include/TritonCPUToLLVM/Passes.h"
+#include "cpu/include/TritonCPUTransforms/Passes.h"
+#include "cpu/include/TritonToTritonCPU/Passes.h"
 #include "nvidia/include/NVGPUToLLVM/Passes.h"
 #include "nvidia/include/TritonNVIDIAGPUToLLVM/Passes.h"
 #include "triton/Conversion/TritonGPUToLLVM/Passes.h"
 #include "triton/Conversion/TritonToTritonGPU/Passes.h"
 #include "triton/Target/LLVMIR/Passes.h"
 
+#include "mlir/Dialect/AMX/AMXDialect.h"
 #include "mlir/Dialect/LLVMIR/NVVMDialect.h"
 #include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
 #include "mlir/InitAllPasses.h"
@@ -68,12 +74,21 @@ inline void registerTritonDialects(mlir::DialectRegistry &registry) {
   mlir::triton::registerTritonAMDGPUInsertInstructionSchedHints();
   mlir::triton::registerTritonAMDGPULowerInstructionSchedHints();
 
+  // CPU passes
+  mlir::triton::cpu::registerTritonToTritonCPUPasses();
+  mlir::triton::cpu::registerTritonCPUTransformsPasses();
+  mlir::triton::cpu::registerTritonCPUToLLVMPasses();
+  mlir::triton::cpu::registerTritonOpScalarizeExternalModels(registry);
+
   // TODO: register Triton & TritonGPU passes
   registry
       .insert<mlir::triton::TritonDialect, mlir::cf::ControlFlowDialect,
+              mlir::triton::cpu::TritonCPUDialect,
               mlir::triton::nvidia_gpu::TritonNvidiaGPUDialect,
               mlir::triton::gpu::TritonGPUDialect, mlir::math::MathDialect,
               mlir::arith::ArithDialect, mlir::scf::SCFDialect,
+              mlir::memref::MemRefDialect, mlir::vector::VectorDialect,
+              mlir::amx::AMXDialect, mlir::tensor::TensorDialect,
               mlir::gpu::GPUDialect, mlir::LLVM::LLVMDialect,
               mlir::NVVM::NVVMDialect, mlir::triton::nvgpu::NVGPUDialect,
               mlir::triton::amdgpu::TritonAMDGPUDialect,

diff --git a/include/triton/Analysis/Utility.h b/include/triton/Analysis/Utility.h
@@ -5,6 +5,7 @@
 #include "mlir/Analysis/SliceAnalysis.h"
 #include "mlir/Support/LLVM.h"
 #include "triton/Dialect/Triton/IR/Dialect.h"
+#include "triton/Dialect/TritonCPU/IR/Dialect.h"
 #include "triton/Dialect/TritonGPU/IR/Dialect.h"
 #include "triton/Tools/LinearLayout.h"
 

@@ -1,3 +1,3 @@
 set(LLVM_TARGET_DEFINITIONS Passes.td)
 mlir_tablegen(Passes.h.inc -gen-pass-decls --name TritonToTritonGPU)
-add_public_tablegen_target(TritonConversionPassIncGen)
+add_public_tablegen_target(TritonConversionToGPUPassIncGen)
@@ -1,5 +1,5 @@
-#ifndef TRITON_CONVERSION_PASSES_H
-#define TRITON_CONVERSION_PASSES_H
+#ifndef TRITON_CONVERSION_TO_GPU_PASSES_H
+#define TRITON_CONVERSION_TO_GPU_PASSES_H
 
 #include "triton/Conversion/TritonToTritonGPU/TritonToTritonGPUPass.h"