Skip to content
This repository has been archived by the owner on Oct 11, 2024. It is now read-only.

Abf149/fix semi structured sparse #16

Merged
merged 26 commits into from
Feb 16, 2024
Merged
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
b8810c7
.gitignore magic_wand dir
afeldman-nm Feb 2, 2024
d56b4c4
added 2:4 example (not actually using 2:4 yet\!)
afeldman-nm Feb 2, 2024
1a8bc1c
use only cuda:0
afeldman-nm Feb 2, 2024
2c6ff26
wip semi_structured_sparse_w16a16
afeldman-nm Feb 2, 2024
2856b91
restructuring sparsity
afeldman-nm Feb 3, 2024
708fe1b
difficulty creating sparse parameter class
afeldman-nm Feb 4, 2024
40a8afb
first successful run with 2:4 sparse model; compat with magic_wand br…
afeldman-nm Feb 4, 2024
017a296
Merge branch 'main' into semi_structured
afeldman-nm Feb 5, 2024
a344b60
woops uncommenting assert statement
afeldman-nm Feb 5, 2024
7a2a7ed
fixes
afeldman-nm Feb 8, 2024
0711a74
bfloat16
afeldman-nm Feb 9, 2024
fc85cac
hopefully removed magic_wand submodule
afeldman-nm Feb 9, 2024
d7b2f41
wip bench
afeldman-nm Feb 12, 2024
ef64711
initial integration
LucasWilkinson Feb 14, 2024
202e655
disable the semi-sparse stuff temporarily
LucasWilkinson Feb 14, 2024
7f67d62
Merge branch 'main' into lwilkinson/sparse-fused-gemm-integration
LucasWilkinson Feb 14, 2024
131a0a5
format fix
LucasWilkinson Feb 14, 2024
5c6a55e
remove sparse benchmark
LucasWilkinson Feb 14, 2024
ae57f2c
small format fix
LucasWilkinson Feb 14, 2024
fb95394
remove useless comments
LucasWilkinson Feb 14, 2024
b5ffb39
cleanup spacing
LucasWilkinson Feb 14, 2024
9b69f56
revert
LucasWilkinson Feb 14, 2024
1fbc82f
missed pack
LucasWilkinson Feb 14, 2024
c99f958
2:4 fix
afeldman-nm Feb 16, 2024
8f0416b
merge
afeldman-nm Feb 16, 2024
b80101a
lint/format
afeldman-nm Feb 16, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 16 additions & 6 deletions vllm/model_executor/layers/sparsity/sparse_w16a16_linear_method.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@
from vllm.model_executor.layers.linear import LinearMethodBase, set_weight_attrs
from vllm.model_executor.layers.sparsity.base_config import SparsityConfig
from vllm.model_executor.layers.parameters import LazyCompressedParameter
from magic_wand import (CompressedStorageFormat, SparseBEGemmStorageFormat)
from magic_wand.semi_structured import (pad_tensor_to_multiple,
extract_valid_rows)
from magic_wand import (CompressedStorageFormat, SparseBEGemmStorageFormat,
SparseSemiStructuredStorageFormat)
from magic_wand.ops import be_ds_gemm


Expand Down Expand Up @@ -54,11 +57,18 @@ def apply_weights(
if w.has_uncompressed_data:
assert not w.has_compressed_data
output = F.linear(x, w.uncompressed_data, bias)
# The current 2:4 implementation was running dense so ignore it
# for now and instead just explicitly decompress as usual
# elif self.storage_format_cls == SparseSemiStructuredStorageFormat:
# assert bias is None
# raise NotImplementedError
elif self.storage_format_cls == SparseSemiStructuredStorageFormat:
assert bias is None
w_encap = w.compressed_data.encapsulated_torch_sparse_tensor
out_shape = (x.shape[:-1] + (w_encap.shape[0], ))
reshaped_x, valid_rows_range = pad_tensor_to_multiple(
x.reshape(-1, x.shape[-1]), 8)
output = F.linear(
reshaped_x, w_encap,
torch.nn.Parameter(torch.zeros((w_encap.shape[0], ))).to(
reshaped_x.dtype).to(reshaped_x.device)).contiguous()
output = extract_valid_rows(output, valid_rows_range)
return output.reshape(out_shape)
elif self.storage_format_cls == SparseBEGemmStorageFormat:
assert bias is None
assert w.compress_transposed
Expand Down
Loading