Skip to content

Commit

Permalink
Merge branch 'main' into portable
Browse files Browse the repository at this point in the history
  • Loading branch information
rickardp authored Jan 31, 2024
2 parents 0ee8f7f + b90db7e commit 1f35064
Show file tree
Hide file tree
Showing 47 changed files with 470 additions and 328 deletions.
8 changes: 8 additions & 0 deletions .git-blame-ignore-revs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# ran black and isort for coherent code formatting
bfa0e33294f2b1dc25e65a33be2397f989824298

# reran black with linelength 80 for greater readability
ea7c14f8ef64924f2d0ff80df3cdabf2c7299848

# Remove f-prefix from strings that don't use formatting
7727fa4c8c6c1ef2b109120aff4196a0a6bf3ed6
2 changes: 1 addition & 1 deletion .github/workflows/build_documentation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@ jobs:
package: bitsandbytes
repo_owner: TimDettmers
secrets:
token: ${{ secrets.HUGGINGFACE_PUSH }}
hf_token: ${{ secrets.HUGGINGFACE_PUSH }}
19 changes: 19 additions & 0 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
name: Lint

on:
push:
branches:
- main
pull_request:

jobs:
Lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v4
with:
python-version: "3.12"
- uses: pre-commit/[email protected]
env:
RUFF_OUTPUT_FORMAT: github
8 changes: 8 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.1.15
hooks:
- id: ruff
args:
- --fix
# - id: ruff-format # TODO: enable when the time is right
6 changes: 2 additions & 4 deletions benchmarking/switchback/make_plot_with_jsonl.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os

import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt
import pandas as pd

cmap=plt.get_cmap('cool')

Expand Down
20 changes: 14 additions & 6 deletions benchmarking/switchback/speed_benchmark.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,22 @@
import json

import time

import torch
import torch.nn as nn

from bitsandbytes.triton.int8_matmul_mixed_dequantize import (
int8_matmul_mixed_dequantize,
)
from bitsandbytes.triton.int8_matmul_rowwise_dequantize import (
int8_matmul_rowwise_dequantize,
)
from bitsandbytes.triton.quantize_columnwise_and_transpose import (
quantize_columnwise_and_transpose,
)
from bitsandbytes.triton.quantize_global import (
quantize_global,
quantize_global_transpose,
)
from bitsandbytes.triton.quantize_rowwise import quantize_rowwise
from bitsandbytes.triton.quantize_columnwise_and_transpose import quantize_columnwise_and_transpose
from bitsandbytes.triton.int8_matmul_rowwise_dequantize import int8_matmul_rowwise_dequantize
from bitsandbytes.triton.quantize_global import quantize_global, quantize_global_transpose
from bitsandbytes.triton.int8_matmul_mixed_dequantize import int8_matmul_mixed_dequantize

# KNOW ISSUE: need to optimize "w_quantize_colwise_transpose" when embeddim is too large.

Expand Down
4 changes: 2 additions & 2 deletions bitsandbytes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

from . import cuda_setup, utils, research
from . import cuda_setup, research, utils
from .autograd._functions import (
MatmulLtState,
bmm_cublas,
matmul,
matmul_4bit,
matmul_cublas,
mm_cublas,
matmul_4bit
)
from .cextension import COMPILED_WITH_CUDA
from .nn import modules
Expand Down
103 changes: 45 additions & 58 deletions bitsandbytes/__main__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,7 @@
import os
from os.path import isdir
import sys
import shlex
import subprocess

from warnings import warn
from typing import Tuple
from os.path import isdir

import torch

Expand All @@ -20,7 +16,7 @@ def find_file_recursive(folder, filename):
out = glob.glob(os.path.join(folder, "**", filename + ext))
outs.extend(out)
except Exception as e:
raise RuntimeError('Error: Something when wrong when trying to find file. {e}')
raise RuntimeError('Error: Something when wrong when trying to find file.') from e

return outs

Expand Down Expand Up @@ -62,14 +58,11 @@ def generate_bug_report_information():
print_header(f"{path} CUDA PATHS")
paths = find_file_recursive(path, '*cuda*')
print(paths)
except:
print(f'Could not read LD_LIBRARY_PATH: {path}')
except Exception as e:
print(f'Could not read LD_LIBRARY_PATH: {path} ({e})')
print('')





def print_header(
txt: str, width: int = HEADER_WIDTH, filler: str = "+"
) -> None:
Expand All @@ -78,67 +71,61 @@ def print_header(


def print_debug_info() -> None:
from . import PACKAGE_GITHUB_URL
print(
"\nAbove we output some debug information. Please provide this info when "
f"creating an issue via {PACKAGE_GITHUB_URL}/issues/new/choose ...\n"
)


generate_bug_report_information()
def main():
generate_bug_report_information()

from . import COMPILED_WITH_CUDA
from .cuda_setup.main import get_compute_capabilities

from . import COMPILED_WITH_CUDA, PACKAGE_GITHUB_URL
from .cuda_setup.env_vars import to_be_ignored
from .cuda_setup.main import get_compute_capabilities

print_header("OTHER")
print(f"COMPILED_WITH_CUDA = {COMPILED_WITH_CUDA}")
print(f"COMPUTE_CAPABILITIES_PER_GPU = {get_compute_capabilities()}")
print_header("")
print_header("DEBUG INFO END")
print_header("")
print("Checking that the library is importable and CUDA is callable...")
print("\nWARNING: Please be sure to sanitize sensitive info from any such env vars!\n")

print_header("OTHER")
print(f"COMPILED_WITH_CUDA = {COMPILED_WITH_CUDA}")
print(f"COMPUTE_CAPABILITIES_PER_GPU = {get_compute_capabilities()}")
print_header("")
print_header("DEBUG INFO END")
print_header("")
print(
"""
Running a quick check that:
+ library is importable
+ CUDA function is callable
"""
)
print("\nWARNING: Please be sure to sanitize sensible info from any such env vars!\n")
try:
from bitsandbytes.optim import Adam

try:
from bitsandbytes.optim import Adam
p = torch.nn.Parameter(torch.rand(10, 10).cuda())
a = torch.rand(10, 10).cuda()

p = torch.nn.Parameter(torch.rand(10, 10).cuda())
a = torch.rand(10, 10).cuda()
p1 = p.data.sum().item()

p1 = p.data.sum().item()
adam = Adam([p])

adam = Adam([p])
out = a * p
loss = out.sum()
loss.backward()
adam.step()

out = a * p
loss = out.sum()
loss.backward()
adam.step()
p2 = p.data.sum().item()

p2 = p.data.sum().item()
assert p1 != p2
print("SUCCESS!")
print("Installation was successful!")
except ImportError:
print()
warn(
f"WARNING: {__package__} is currently running as CPU-only!\n"
"Therefore, 8-bit optimizers and GPU quantization are unavailable.\n\n"
f"If you think that this is so erroneously,\nplease report an issue!"
)
print_debug_info()
except Exception as e:
print(e)
print_debug_info()
sys.exit(1)

assert p1 != p2
print("SUCCESS!")
print("Installation was successful!")
sys.exit(0)

except ImportError:
print()
warn(
f"WARNING: {__package__} is currently running as CPU-only!\n"
"Therefore, 8-bit optimizers and GPU quantization are unavailable.\n\n"
f"If you think that this is so erroneously,\nplease report an issue!"
)
print_debug_info()
sys.exit(0)
except Exception as e:
print(e)
print_debug_info()
sys.exit(1)
if __name__ == "__main__":
main()
2 changes: 1 addition & 1 deletion bitsandbytes/autograd/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from ._functions import undo_layout, get_inverse_transform_indices
from ._functions import get_inverse_transform_indices, undo_layout
26 changes: 13 additions & 13 deletions bitsandbytes/autograd/_functions.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import operator
import warnings
from dataclasses import dataclass
from functools import reduce # Required in Python 3
from typing import Tuple, Optional, List
import operator
from typing import Callable, Optional, Tuple
import warnings
from warnings import warn

import torch
Expand All @@ -14,9 +14,6 @@
def prod(iterable):
return reduce(operator.mul, iterable, 1)

tensor = torch.Tensor


# The inverse transformation for the colTuring and colAmpere format were contributed by Alex Borzunov:
# https://github.com/bigscience-workshop/petals/blob/main/src/petals/utils/linear8bitlt_patch.py

Expand Down Expand Up @@ -56,7 +53,10 @@ def get_current_outlier_idx(self):
return torch.Tensor(list(self.outliers)).to(torch.int64)


def get_inverse_transform_indices(transform_tile: callable, tile_size: Tuple[int, int]):
def get_inverse_transform_indices(
transform_tile: Callable[[torch.Tensor], torch.Tensor],
tile_size: Tuple[int, int],
):
"""
Compute a permutation of indices that invert the specified (tiled) matrix transformation
Expand Down Expand Up @@ -496,7 +496,7 @@ class MatMul4Bit(torch.autograd.Function):
# backward is mostly the same, but adds one extra clause (see "elif state.CxB is not None")

@staticmethod
def forward(ctx, A, B, out=None, bias=None, quant_state: F.QuantState = None):
def forward(ctx, A, B, out=None, bias=None, quant_state: Optional[F.QuantState] = None):
# default of pytorch behavior if inputs are empty
ctx.is_empty = False
if prod(A.shape) == 0:
Expand Down Expand Up @@ -549,10 +549,10 @@ def backward(ctx, grad_output):


def matmul(
A: tensor,
B: tensor,
out: tensor = None,
state: MatmulLtState = None,
A: torch.Tensor,
B: torch.Tensor,
out: Optional[torch.Tensor] = None,
state: Optional[MatmulLtState] = None,
threshold=0.0,
bias=None
):
Expand All @@ -562,7 +562,7 @@ def matmul(
return MatMul8bitLt.apply(A, B, out, bias, state)


def matmul_4bit(A: tensor, B: tensor, quant_state: F.QuantState, out: tensor = None, bias=None):
def matmul_4bit(A: torch.Tensor, B: torch.Tensor, quant_state: F.QuantState, out: Optional[torch.Tensor] = None, bias=None):
assert quant_state is not None
if A.numel() == A.shape[-1] and A.requires_grad == False:
if A.shape[-1] % quant_state.blocksize != 0:
Expand Down
9 changes: 3 additions & 6 deletions bitsandbytes/cextension.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
import ctypes as ct
import os
import torch

from pathlib import Path
from warnings import warn

from bitsandbytes.cuda_setup.main import CUDASetup
import torch

from bitsandbytes.cuda_setup.main import CUDASetup

setup = CUDASetup.get_instance()
if setup.initialized != True:
Expand All @@ -25,7 +22,7 @@
Inspect the output of the command and see if you can locate CUDA libraries. You might need to add them
to your LD_LIBRARY_PATH. If you suspect a bug, please take the information from python -m bitsandbytes
and open an issue at: https://github.com/TimDettmers/bitsandbytes/issues''')
lib.cadam32bit_grad_fp32 # runs on an error if the library could not be found -> COMPILED_WITH_CUDA=False
_ = lib.cadam32bit_grad_fp32 # runs on an error if the library could not be found -> COMPILED_WITH_CUDA=False
lib.get_context.restype = ct.c_void_p
lib.get_cusparse.restype = ct.c_void_p
lib.cget_managed_ptr.restype = ct.c_void_p
Expand Down
Loading

0 comments on commit 1f35064

Please sign in to comment.