diff --git a/vllm/v1/worker/hpu_model_runner.py b/vllm/v1/worker/hpu_model_runner.py index 9da1691469dab..448ad0a3c349f 100644 --- a/vllm/v1/worker/hpu_model_runner.py +++ b/vllm/v1/worker/hpu_model_runner.py @@ -654,6 +654,7 @@ def __init__( self.max_prefill_batch_size = 16 # TODO(kzawora): add knob for that self.padding_aware_scheduling = True # TODO(kzawora): add knob for that self.padding_ratio_threshold = 0.9 # TODO(kzawora): add knob for that + os.environ['VLLM_CONTIGUOUS_PA'] = 'false' # NOTE(kzawora): this is a workaround self.use_contiguous_pa = os.environ.get('VLLM_CONTIGUOUS_PA', 'true').lower() == 'true' self.seen_configs: set = set()