[ci/lint] Add back default arg for pre-commit (#12279)

Signed-off-by: kevin <kevin@anyscale.com>
2025-12-16 21:35:50 +08:00 · 2025-01-21 17:15:27 -08:00 · 2025-01-21 17:15:27 -08:00 · 64ea24d0b3
commit 64ea24d0b3
parent df76e5af26
6 changed files with 26 additions and 29 deletions
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@ -16,4 +16,4 @@ jobs:
    - run: echo "::add-matcher::.github/workflows/matchers/actionlint.json"
    - uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
      with:
-        extra_args: --hook-stage manual
+        extra_args: --all-files --hook-stage manual
--- a/tests/models/decoder_only/language/test_gguf.py
+++ b/tests/models/decoder_only/language/test_gguf.py
@ -74,11 +74,7 @@ DOLPHIN_CONFIG = GGUFTestConfig(
 )
 MODELS = [
-    LLAMA_CONFIG,
+    LLAMA_CONFIG, QWEN2_CONFIG, PHI3_CONFIG, GPT2_CONFIG, STABLELM_CONFIG,
    QWEN2_CONFIG,
    PHI3_CONFIG,
    GPT2_CONFIG,
    STABLELM_CONFIG,
    DOLPHIN_CONFIG
    # STARCODER_CONFIG, # broken
 ]
@ -114,11 +110,12 @@ def test_models(
            messages, tokenize=False, add_generation_prompt=True)
    # Run unquantized model.
-    with vllm_runner(model_name=model.original_model,
+    with vllm_runner(
-                     enforce_eager=True, # faster tests
+            model_name=model.original_model,
-                     dtype=dtype,
+            enforce_eager=True,  # faster tests
-                     max_model_len=MAX_MODEL_LEN,
+            dtype=dtype,
-                     tensor_parallel_size=tp_size) as original_model:
+            max_model_len=MAX_MODEL_LEN,
            tensor_parallel_size=tp_size) as original_model:
        original_outputs = original_model.generate_greedy_logprobs(
            example_prompts[:-1], max_tokens, num_logprobs)
--- a/vllm/model_executor/models/siglip.py
+++ b/vllm/model_executor/models/siglip.py
@ -350,10 +350,8 @@ class SiglipMLP(nn.Module):
        else:
            # For other quantization, we require the hidden size to be a
            # multiple of 64
-            quantizable = (
+            quantizable = (config.hidden_size % 64 == 0
-                config.hidden_size % 64 == 0
+                           and config.intermediate_size % 64 == 0)
                and config.intermediate_size % 64 == 0
            )
        self.fc1 = ColumnParallelLinear(
            config.hidden_size,
            config.intermediate_size,
--- a/vllm/platforms/init.py
+++ b/vllm/platforms/init.py
@ -101,7 +101,7 @@ def cpu_platform_plugin() -> Optional[str]:
    try:
        from importlib.metadata import version
        is_cpu = "cpu" in version("vllm")
-        if is_cpu == False:
+        if not is_cpu:
            import platform
            is_cpu = platform.machine().lower().startswith("arm")
--- a/vllm/v1/stats/common.py
+++ b/vllm/v1/stats/common.py
@ -10,10 +10,11 @@ from msgspec import field as msgspec_field
 from vllm.sampling_params import SamplingParams
-class RequestStatsUpdate(msgspec.Struct,
+class RequestStatsUpdate(
-                         array_like=True,
+        msgspec.Struct,  # type: ignore
-                         omit_defaults=True,
+        array_like=True,
-                         gc=False):
+        omit_defaults=True,
        gc=False):
    """
    An update to the request stats.
@ -341,8 +342,8 @@ class RequestStats:
            self.queued_ts_s = ts
        elif update.type == RequestStatsUpdate.Type.PREFILLING:
            self.prefill_start_ts_s_lst.append(ts)
-            self.num_cached_tokens = update.num_cached_tokens
+            self.num_cached_tokens = update.num_cached_tokens or 0
-            self.num_computed_tokens = update.num_computed_tokens
+            self.num_computed_tokens = update.num_computed_tokens or 0
        elif update.type == RequestStatsUpdate.Type.PREEMPTED:
            self._reset_for_preemption(ts)
        elif update.type == RequestStatsUpdate.Type.DECODING:
@ -350,7 +351,7 @@ class RequestStats:
        elif update.type == RequestStatsUpdate.Type.DETOKENIZED:
            self._record_detokenized_output(
                ts,
-                update.num_new_tokens,
+                update.num_new_tokens or 0,
            )
        elif update.type == RequestStatsUpdate.Type.FINISHED:
            self.finished_ts_s = ts
@ -425,10 +426,11 @@ class EngineCoreProcessStats:
    output_queue_size: Optional[int] = None
-class EngineCoreStatsSnapshot(msgspec.Struct,
+class EngineCoreStatsSnapshot(
-                              array_like=True,
+        msgspec.Struct,  # type: ignore
-                              omit_defaults=True,
+        array_like=True,
-                              gc=False):
+        omit_defaults=True,
        gc=False):
    """
    A snapshot of the EngineCore's current stats over a period of time.
    """