diff --git a/docs/serving/openai_compatible_server.md b/docs/serving/openai_compatible_server.md
index dfed15d4ace97..595115ab6ff77 100644
--- a/docs/serving/openai_compatible_server.md
+++ b/docs/serving/openai_compatible_server.md
@@ -840,3 +840,8 @@ Key capabilities:
 The following example shows how to deploy a large model like DeepSeek R1 with Ray Serve LLM: <gh-file:examples/online_serving/ray_serve_deepseek.py>.
 
 Learn more about Ray Serve LLM with the official [Ray Serve LLM documentation](https://docs.ray.io/en/latest/serve/llm/serving-llms.html).
+
+curl http://localhost:8002/v1/rerank -H "Content-Type: application/json"   -d '{
+    "query": "What is the capital of France?",
+    "documents": ["The capital of France is Paris.", "The capital of Germany is Berlin."]
+  }'
\ No newline at end of file
diff --git a/requirements/cuda.txt b/requirements/cuda.txt
index fb30e493f80b3..77242381e124e 100644
--- a/requirements/cuda.txt
+++ b/requirements/cuda.txt
@@ -5,10 +5,4 @@ numba == 0.60.0; python_version == '3.9' # v0.61 doesn't support Python 3.9. Req
 numba == 0.61.2; python_version > '3.9'
 
 # Dependencies for NVIDIA GPUs
-ray[cgraph]>=2.48.0 # Ray Compiled Graph, required for pipeline parallelism in V1.
-torch==2.7.1
-torchaudio==2.7.1
-# These must be updated alongside torch
-torchvision==0.22.1 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
-# https://github.com/facebookresearch/xformers/releases/tag/v0.0.31
-xformers==0.0.31; platform_system == 'Linux' and platform_machine == 'x86_64'  # Requires PyTorch >= 2.7
\ No newline at end of file
+torch==2.8.0
\ No newline at end of file
diff --git a/tests/models/quantization/test_mxfp4.py b/tests/models/quantization/test_mxfp4.py
index 7b8a334bbc369..425e20f846f1d 100644
--- a/tests/models/quantization/test_mxfp4.py
+++ b/tests/models/quantization/test_mxfp4.py
@@ -39,3 +39,20 @@ def test_models(example_prompts, model_name) -> None:
         expected_str = EXPECTED_STRS_MAP[model_name][i]
         assert expected_str == output_str, (
             f"Expected: {expected_str!r}\nvLLM: {output_str!r}")
+
+curl https://localhost:8002/v1/embeddings \
+  -H "Content-Type: application/json" \
+  -d '{
+    "input": "Query: What is the capital of France? \n\nDocuments: \n1. Paris is the capital city of France.\n2. Berlin is the capital of Germany.\n \n Rank the documents from most to least relevant to the query and provide a relevance score",
+    "model": "$MODEL",
+    "encoding_format": "float"
+  }'
+
+
+curl https://localhost:8002/v1/rerank \
+  -H "Content-Type: application/json" \
+  -d '{
+    "input": "Query: What is the capital of France? \n\nDocuments: \n1. Paris is the capital city of France.\n2. Berlin is the capital of Germany.\n \n Rank the documents from most to least relevant to the query and provide a relevance score",
+    "prompt": "Query: What is the capital of France? \n\nDocuments: \n1. Paris is the capital city of France.\n2. Berlin is the capital of Germany.\n \n Rank the documents from most to least relevant to the query and provide a relevance score"
+    "model": "BAAI/bge-reranker-v2-m3",
+  }'