Skip to content

Commit

Permalink
Disable Disco for q4f16_ft and q8f16_ft quantization (mlc-ai#1094)
Browse files Browse the repository at this point in the history
  • Loading branch information
LeshengJin authored Oct 20, 2023
1 parent 9bf5723 commit 62d0c03
Showing 1 changed file with 3 additions and 1 deletion.
4 changes: 3 additions & 1 deletion mlc_llm/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -597,6 +597,9 @@ def build_model_from_args(args: argparse.Namespace):
"`num_shards` should be used together with "
"`--build-model-only` and `--convert-weight-only`"
)
use_ft_quant = args.quantization.name in ["q4f16_ft", "q8f16_ft"]
if use_ft_quant:
raise ValueError("Multi-GPU deployments are not available for ft quantization.")
os.makedirs(args.artifact_path, exist_ok=True)
if args.debug_dump:
os.makedirs(os.path.join(args.artifact_path, "debug"), exist_ok=True)
Expand All @@ -614,7 +617,6 @@ def build_model_from_args(args: argparse.Namespace):
config = json.load(i_f)

if not use_cache or args.convert_weight_only:

model_generators = {
"llama": llama,
"mistral": llama,
Expand Down

0 comments on commit 62d0c03

Please sign in to comment.