[CI] Add smoke test for non-uniform AutoFP8 quantization (#6702)

This commit is contained in:
Michael Goin 2024-07-23 18:45:12 -04:00 committed by GitHub
parent 72fc704803
commit 01c16ede6b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -13,6 +13,7 @@ from vllm.model_executor.layers.quantization.fp8 import (Fp8KVCacheMethod,
MODELS = [
"neuralmagic/Meta-Llama-3-8B-Instruct-FP8-KV",
"nm-testing/Phi-3-mini-128k-instruct-FP8",
"nm-testing/Qwen2-0.5B-Instruct-FP8-SkipQKV",
]