diff --git a/tests/v1/ec_connector/integration/test_epd_correctness.py b/tests/v1/ec_connector/integration/test_epd_correctness.py index 69c4c58e349b..616d34441ab8 100644 --- a/tests/v1/ec_connector/integration/test_epd_correctness.py +++ b/tests/v1/ec_connector/integration/test_epd_correctness.py @@ -237,9 +237,8 @@ def main(): for i, prompt_data in enumerate(test_prompts): print( - f"\nRunning prompt {i + 1}/{len(test_prompts)}: { - prompt_data['description'] - }" + f"\nRunning prompt {i + 1}/{len(test_prompts)}: " + f"{prompt_data['description']}" ) output_str = run_chat_completion( diff --git a/vllm/v1/worker/gpu/spec_decode/eagle_cudagraph.py b/vllm/v1/worker/gpu/spec_decode/eagle_cudagraph.py index a6f50d68cc68..dcdeedda60a7 100644 --- a/vllm/v1/worker/gpu/spec_decode/eagle_cudagraph.py +++ b/vllm/v1/worker/gpu/spec_decode/eagle_cudagraph.py @@ -35,13 +35,16 @@ class EagleCudaGraphManager: self.compilation_config = vllm_config.compilation_config assert self.compilation_config is not None + cudagraph_mode: CUDAGraphMode if self.compilation_config.cudagraph_mode is None: - self.cudagraph_mode = CUDAGraphMode.NONE + cudagraph_mode = CUDAGraphMode.NONE else: - self.cudagraph_mode = self.compilation_config.cudagraph_mode - if self.cudagraph_mode == CUDAGraphMode.FULL: + cudagraph_mode = self.compilation_config.cudagraph_mode + if cudagraph_mode == CUDAGraphMode.FULL: # NOTE(woosuk): For Eagle, we only use CUDA graphs for decode. - self.cudagraph_mode = CUDAGraphMode.FULL_DECODE_ONLY + cudagraph_mode = CUDAGraphMode.FULL_DECODE_ONLY + + self.cudagraph_mode = cudagraph_mode self.cudagraph_sizes = get_cudagraph_sizes( self.compilation_config.cudagraph_capture_sizes,