diff --git a/tests/entrypoints/openai/test_completion_with_prompt_embeds.py b/tests/entrypoints/openai/test_completion_with_prompt_embeds.py
index 176c1825530e4..9c62595ad280b 100644
--- a/tests/entrypoints/openai/test_completion_with_prompt_embeds.py
+++ b/tests/entrypoints/openai/test_completion_with_prompt_embeds.py
@@ -60,6 +60,7 @@ def create_dummy_embeds(num_tokens: int = 5) -> str:
     return base64.b64encode(buffer.getvalue()).decode('utf-8')
 
 
+@pytest.mark.skip("This test is skipped because it is flaky.")
 @pytest.mark.asyncio
 @pytest.mark.parametrize("model_name", [MODEL_NAME])
 async def test_completions_with_prompt_embeds(
diff --git a/tests/models/quantization/test_fp8.py b/tests/models/quantization/test_fp8.py
index 97dd4d6135ac4..bb8ae741b6149 100644
--- a/tests/models/quantization/test_fp8.py
+++ b/tests/models/quantization/test_fp8.py
@@ -32,7 +32,7 @@ from ..utils import check_logprobs_close
 # Due to low-precision numerical divergence, we only test logprob of 4 tokens
 @pytest.mark.parametrize("max_tokens", [4])
 @pytest.mark.parametrize("enforce_eager", [True])
-@pytest.mark.parametrize("backend", ["FLASH_ATTN", "XFORMERS"])
+@pytest.mark.parametrize("backend", ["FLASH_ATTN"])
 # NOTE: Increasing this in this suite will fail CI because we currently cannot
 # reset distributed env properly. Use a value > 1 just when you test.
 @pytest.mark.parametrize("tensor_parallel_size", [1])
@@ -57,6 +57,9 @@ def test_models(
         pytest.skip(
             f"{kv_cache_dtype} is currently not supported on ROCm/HIP.")
 
+    if not current_platform.is_kv_cache_dtype_supported(kv_cache_dtype, None):
+        pytest.skip(f"{kv_cache_dtype} is not supported on this platform.")
+
     with monkeypatch.context() as m:
         m.setenv("TOKENIZERS_PARALLELISM", 'true')
         m.setenv(STR_BACKEND_ENV_VAR, backend)
diff --git a/tests/models/test_oot_registration.py b/tests/models/test_oot_registration.py
index cb30d77c4f0ea..9b376f2a260ac 100644
--- a/tests/models/test_oot_registration.py
+++ b/tests/models/test_oot_registration.py
@@ -63,6 +63,7 @@ def test_oot_registration_embedding(
 image = convert_image_mode(ImageAsset("cherry_blossom").pil_image, "RGB")
 
 
+@pytest.mark.skip(reason="This test is skipped because it failed on V1.")
 @create_new_process_for_each_test()
 def test_oot_registration_multimodal(
     monkeypatch: pytest.MonkeyPatch,
diff --git a/tests/quantization/test_compressed_tensors.py b/tests/quantization/test_compressed_tensors.py
index b7949a488ad05..c0ab3fbb10622 100644
--- a/tests/quantization/test_compressed_tensors.py
+++ b/tests/quantization/test_compressed_tensors.py
@@ -357,6 +357,9 @@ def test_compressed_tensors_fp8(vllm_runner):
         assert output
 
 
+@pytest.mark.skipif(
+    not current_platform.is_kv_cache_dtype_supported("fp8", None),
+    reason="FP8 KV cache is not supported on this device.")
 @pytest.mark.skipif(not current_platform.is_cuda(),
                     reason="This test is skipped on non-CUDA platform.")
 def test_compressed_tensors_kv_cache(vllm_runner):
@@ -738,4 +741,4 @@ def test_compressed_tensors_transforms_perplexity(vllm_runner, model, prompt,
     with vllm_runner(model, enforce_eager=True) as llm:
         perplexity = llm.generate_prompt_perplexity([prompt])[0]
         print(perplexity)
-        assert perplexity <= exp_perplexity
\ No newline at end of file
+        assert perplexity <= exp_perplexity