diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py index a8b1c94325902..c3fa0e44e5e8d 100644 --- a/vllm/entrypoints/openai/serving_chat.py +++ b/vllm/entrypoints/openai/serving_chat.py @@ -324,7 +324,7 @@ async def chat_completion_stream_generator( else: tool_parsers = [None] * num_choices except RuntimeError as e: - logger.error("Error in tool parser creation: %s", e) + logger.exception("Error in tool parser creation.") data = self.create_streaming_error_response(str(e)) yield f"data: {data}\n\n" yield "data: [DONE]\n\n" @@ -600,7 +600,7 @@ async def chat_completion_stream_generator( except ValueError as e: # TODO: Use a vllm-specific Validation Error - logger.error("error in chat completion stream generator: %s", e) + logger.exception("Error in chat completion stream generator.") data = self.create_streaming_error_response(str(e)) yield f"data: {data}\n\n" # Send the final done message after all response.n are finished @@ -687,7 +687,7 @@ async def chat_completion_full_generator( try: tool_parser = self.tool_parser(tokenizer) except RuntimeError as e: - logger.error("Error in tool parser creation: %s", e) + logger.exception("Error in tool parser creation.") return self.create_error_response(str(e)) tool_call_info = tool_parser.extract_tool_calls( diff --git a/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py index bcbcda3fa528a..e7ea82ebd5411 100644 --- a/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py @@ -103,9 +103,9 @@ def extract_tool_calls( tool_calls=tool_calls, content=content if content else None) - except Exception as e: - logger.error("Error in extracting tool call from response %s", - e) + except Exception: + logger.exception( + "Error in extracting tool call from response.") return ExtractedToolCallInformation(tools_called=False, tool_calls=[], content=model_output) @@ -333,6 +333,6 @@ def extract_tool_calls_streaming( return delta - except Exception as e: - logger.error("Error trying to handle streaming tool call: %s", e) + except Exception: + logger.exception("Error trying to handle streaming tool call.") return None # do not stream a delta. skip this token ID. diff --git a/vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py index 905ab7db3d04c..cb391e11bbde2 100644 --- a/vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py @@ -166,8 +166,8 @@ def extract_tool_calls_streaming( tool_call_arr["arguments"] = self.get_argments(tool_call_arr) self.prev_tool_call_arr = [tool_call_arr] return delta - except Exception as e: - logger.error("Error trying to handle streaming tool call: %s", e) + except Exception: + logger.exception("Error trying to handle streaming tool call.") logger.debug( "Skipping chunk as a result of tool streaming extraction " "error") diff --git a/vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py index 3cf34bc4928a5..1b836a687a1c3 100644 --- a/vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py @@ -112,9 +112,8 @@ def extract_tool_calls( content=None) return ret - except Exception as e: - logger.error("Error in extracting tool call from response: %s", e) - print("ERROR", e) + except Exception: + logger.exception("Error in extracting tool call from response.") # return information to just treat the tool call as regular JSON return ExtractedToolCallInformation(tools_called=False, tool_calls=[], @@ -269,8 +268,8 @@ def extract_tool_calls_streaming( self.prev_tool_call_arr = tool_call_arr return delta - except Exception as e: - logger.error("Error trying to handle streaming tool call: %s", e) + except Exception: + logger.exception("Error trying to handle streaming tool call.") logger.debug( "Skipping chunk as a result of tool streaming extraction " "error") diff --git a/vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py index c6dc0688e38f9..ff4e88f29d39e 100644 --- a/vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py @@ -111,8 +111,8 @@ def extract_tool_calls( tool_calls=tool_calls, content=content if len(content) > 0 else None) - except Exception as e: - logger.error("Error in extracting tool call from response: %s", e) + except Exception: + logger.exception("Error in extracting tool call from response.") # return information to just treat the tool call as regular JSON return ExtractedToolCallInformation(tools_called=False, tool_calls=[], @@ -298,8 +298,8 @@ def extract_tool_calls_streaming( self.prev_tool_call_arr = tool_call_arr return delta - except Exception as e: - logger.error("Error trying to handle streaming tool call: %s", e) + except Exception: + logger.exception("Error trying to handle streaming tool call.") logger.debug( "Skipping chunk as a result of tool streaming extraction " "error") diff --git a/vllm/executor/multiproc_worker_utils.py b/vllm/executor/multiproc_worker_utils.py index e14ecc13a9dc0..884267d23dfc8 100644 --- a/vllm/executor/multiproc_worker_utils.py +++ b/vllm/executor/multiproc_worker_utils.py @@ -3,7 +3,6 @@ import os import sys import threading -import traceback import uuid from dataclasses import dataclass from multiprocessing import Queue @@ -227,10 +226,9 @@ def _run_worker_process( except KeyboardInterrupt: break except BaseException as e: - tb = traceback.format_exc() - logger.error( - "Exception in worker %s while processing method %s: %s, %s", - process_name, method, e, tb) + logger.exception( + "Exception in worker %s while processing method %s.", + process_name, method) exception = e result_queue.put( Result(task_id=task_id, value=output, exception=exception)) diff --git a/vllm/model_executor/model_loader/weight_utils.py b/vllm/model_executor/model_loader/weight_utils.py index 1e2857ee28cbf..0c51314bc90df 100644 --- a/vllm/model_executor/model_loader/weight_utils.py +++ b/vllm/model_executor/model_loader/weight_utils.py @@ -499,8 +499,8 @@ def kv_cache_scales_loader( logger.error("File or directory '%s' not found.", filename) except json.JSONDecodeError: logger.error("Error decoding JSON in file '%s'.", filename) - except Exception as e: - logger.error("An error occurred while reading '%s': %s", filename, e) + except Exception: + logger.exception("An error occurred while reading '%s'.", filename) # This section is reached if and only if any of the excepts are hit # Return an empty iterable (list) => no KV cache scales are loaded # which ultimately defaults to 1.0 scales diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py index fa487e2f917d8..30bbf5107475d 100644 --- a/vllm/platforms/cuda.py +++ b/vllm/platforms/cuda.py @@ -137,10 +137,9 @@ def is_full_nvlink(cls, physical_device_ids: List[int]) -> bool: pynvml.NVML_P2P_CAPS_INDEX_NVLINK) if p2p_status != pynvml.NVML_P2P_STATUS_OK: return False - except pynvml.NVMLError as error: - logger.error( + except pynvml.NVMLError: + logger.exception( "NVLink detection failed. This is normal if your" - " machine has no NVLink equipped.", - exc_info=error) + " machine has no NVLink equipped.") return False return True