fix comments

octoml · Feb 14, 2024 · f439b97 · f439b97
1 parent 65ae93a
commit f439b97
Show file tree

Hide file tree

Showing 2 changed files with 3 additions and 1 deletion.
diff --git a/serve/mlc_serve/engine/engine_common.py b/serve/mlc_serve/engine/engine_common.py
@@ -277,7 +277,7 @@ def get_requests_to_process(
     if is_prompt_batch:
         for state in current_states:
             if is_evicted_parallel_sampling_request(state):
-                # TODO(vvchernov): we still need mask if apply_penallty = True
+                # TODO(vvchernov): we still need mask if apply_penalty = True
                 # if state.sampling_params.repetition_penalty != 1.0:
                 set_mask_prompt_to(state)
                 requests.append(

diff --git a/serve/mlc_serve/model/sampler.py b/serve/mlc_serve/model/sampler.py
@@ -447,6 +447,8 @@ def adjust_logits(
             batch_size,
         )
 
+        # It was checked that vLLM and HF approaches for repetition penalty are the same
+        # For calculation of it their combination is used (see references below)
         # Calculate repetition penalty use vLLM approach
         # https://github.com/vllm-project/vllm/blob/0580aab02ffe60fee50bddc80b787828eb233c44/vllm/model_executor/layers/sampler.py#L177
         # and RepetitionPenaltyLogitsProcessor approach from HF TGI API