add more pytorch related tests for torch nightly (#17422)

Signed-off-by: Yang Wang <elainewy@meta.com>
2026-03-16 16:27:15 +08:00 · 2025-05-02 03:29:59 -07:00 · 2025-05-02 03:29:59 -07:00 · b8b0859b5c
commit b8b0859b5c
parent d7543862bd
3 changed files with 14 additions and 4 deletions
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@ -293,6 +293,7 @@ steps:
  parallelism: 4

 - label: PyTorch Compilation Unit Tests
+  torch_nightly: true
  source_file_dependencies:
    - vllm/
    - tests/compile
@ -302,6 +303,7 @@ steps:
    - pytest -v -s compile/test_sequence_parallelism.py

 - label: PyTorch Fullgraph Smoke Test # 9min
+  torch_nightly: true
  source_file_dependencies:
  - vllm/
  - tests/compile
@ -312,6 +314,7 @@ steps:
  - pytest -v -s compile/piecewise/test_toy_llama.py

 - label: PyTorch Fullgraph Test # 18min
+  torch_nightly: true
  source_file_dependencies:
  - vllm/
  - tests/compile
@ -436,6 +439,7 @@ steps:
 #####  models test  #####

 - label: Basic Models Test # 24min
+  torch_nightly: true
  source_file_dependencies:
  - vllm/
  - tests/models
--- a/requirements/nightly_torch_test.txt
+++ b/requirements/nightly_torch_test.txt
@ -23,5 +23,11 @@ runai-model-streamer-s3==0.11.0
 tensorizer>=2.9.0
 lm-eval==0.4.8
 buildkite-test-collector==0.1.9
-
 lm-eval[api]==0.4.8 # required for model evaluation test
+
+# required for quantization test
+bitsandbytes>=0.45.3
+
+# required for minicpmo_26 test
+vector_quantize_pytorch
+vocos
--- a/vllm/sampling_params.py
+++ b/vllm/sampling_params.py
@ -186,9 +186,9 @@ class SamplingParams(
        logits_processors: list of functions that modify logits based on
            previously generated tokens, and optionally prompt tokens as
            a first argument.
-        truncate_prompt_tokens: If set to -1, will use the truncation size 
-            supported by the model. If set to an integer k, will use only 
-            the last k tokens from the prompt (i.e., left truncation). 
+        truncate_prompt_tokens: If set to -1, will use the truncation size
+            supported by the model. If set to an integer k, will use only
+            the last k tokens from the prompt (i.e., left truncation).
            Defaults to None (i.e., no truncation).
        guided_decoding: If provided, the engine will construct a guided
            decoding logits processor from these parameters. Defaults to None.