diff --git a/vllm/attention/backends/abstract.py b/vllm/attention/backends/abstract.py
index 03f4c40302eb8..025ede1eb0a4e 100644
--- a/vllm/attention/backends/abstract.py
+++ b/vllm/attention/backends/abstract.py
@@ -294,6 +294,12 @@ class AttentionImpl(ABC, Generic[T]):
     # Some features like decode context parallelism require the softmax lse.
     can_return_lse_for_decode: bool = False
 
+    # Whether the attention impl supports Prefill Context Parallelism.
+    supports_pcp: bool = False
+    # Whether the attention impl(or ops) supports MTP
+    # when cp_kv_cache_interleave_size > 1
+    supports_mtp_with_cp_non_trivial_interleave_size: bool = False
+
     # some attention backends might not always want to return lse
     # even if they can return lse (for efficiency reasons)
     need_to_return_lse_for_decode: bool = False
diff --git a/vllm/config/parallel.py b/vllm/config/parallel.py
index 0327832c4fb8c..1f9dd38ac9114 100644
--- a/vllm/config/parallel.py
+++ b/vllm/config/parallel.py
@@ -317,11 +317,6 @@ class ParallelConfig:
                     "num_redundant_experts."
                 )
 
-        if self.prefill_context_parallel_size > 1:
-            raise ValueError(
-                "Prefill context parallelism is not fully supported. "
-                "Please set prefill_context_parallel_size to 1."
-            )
         return self
 
     @property
diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py
index a3a9eec9b3203..0e75daf0d722c 100644
--- a/vllm/config/vllm.py
+++ b/vllm/config/vllm.py
@@ -820,11 +820,6 @@ class VllmConfig:
                 f"({self.parallel_config.cp_kv_cache_interleave_size})."
             )
 
-        assert (
-            self.parallel_config.cp_kv_cache_interleave_size == 1
-            or self.speculative_config is None
-        ), "MTP with cp_kv_cache_interleave_size > 1 is not supported now."
-
         # Do this after all the updates to compilation_config.mode
         self.compilation_config.set_splitting_ops_for_v1(
             all2all_backend=self.parallel_config.all2all_backend,
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index 3f23b95641d61..757023e12d439 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -1848,16 +1848,6 @@ class EngineArgs:
         default_chunked_prefill = model_config.is_chunked_prefill_supported
         default_prefix_caching = model_config.is_prefix_caching_supported
 
-        if self.prefill_context_parallel_size > 1:
-            default_chunked_prefill = False
-            default_prefix_caching = False
-            logger.warning_once(
-                "--prefill-context-parallel-size > 1 is not compatible with "
-                "chunked prefill and prefix caching now. Chunked prefill "
-                "and prefix caching have been disabled by default.",
-                scope="local",
-            )
-
         if self.enable_chunked_prefill is None:
             self.enable_chunked_prefill = default_chunked_prefill
 
diff --git a/vllm/v1/worker/cp_utils.py b/vllm/v1/worker/cp_utils.py
new file mode 100644
index 0000000000000..f666c739b0be7
--- /dev/null
+++ b/vllm/v1/worker/cp_utils.py
@@ -0,0 +1,42 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from typing import TYPE_CHECKING, Any, cast
+
+from vllm.config import VllmConfig, get_layers_from_vllm_config
+
+if TYPE_CHECKING:
+    from vllm.model_executor.layers.attention_layer_base import AttentionLayerBase
+else:
+    AttentionLayerBase = object
+
+
+def check_attention_cp_compatibility(vllm_config: VllmConfig) -> None:
+    pcp_size = vllm_config.parallel_config.prefill_context_parallel_size
+    dcp_size = vllm_config.parallel_config.decode_context_parallel_size
+    interleave_size = vllm_config.parallel_config.cp_kv_cache_interleave_size
+    if pcp_size * dcp_size > 1:
+        layer_type = cast(type[Any], AttentionLayerBase)
+        layers = get_layers_from_vllm_config(vllm_config, layer_type)
+        for layer in layers.values():
+            layer_impl = getattr(layer, "impl", None)
+            if layer_impl is None:
+                continue
+            if vllm_config.speculative_config is not None and interleave_size > 1:
+                assert layer_impl.supports_mtp_with_cp_non_trivial_interleave_size, (
+                    "MTP with cp_kv_cache_interleave_size > 1 is not "
+                    f"supported in {layer_impl.__class__.__name__}."
+                )
+            if dcp_size > 1:
+                assert layer_impl.need_to_return_lse_for_decode, (
+                    "DCP requires attention impls to return"
+                    " the softmax lse for decode, but the impl "
+                    f"{layer_impl.__class__.__name__} "
+                    "does not return the softmax lse for decode."
+                )
+
+            if pcp_size > 1:
+                assert layer_impl.supports_pcp, (
+                    "PCP requires attention impls' support, "
+                    f"but the impl {layer_impl.__class__.__name__} "
+                    "does not support PCP."
+                )
diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py
index 7dc86f1ee4815..0e2bf9df9a18f 100644
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -148,6 +148,7 @@ from vllm.v1.spec_decode.ngram_proposer import NgramProposer
 from vllm.v1.spec_decode.suffix_decoding import SuffixDecodingProposer
 from vllm.v1.structured_output.utils import apply_grammar_bitmask
 from vllm.v1.utils import CpuGpuBuffer, record_function_or_nullcontext
+from vllm.v1.worker.cp_utils import check_attention_cp_compatibility
 from vllm.v1.worker.dp_utils import coordinate_batch_across_dp
 from vllm.v1.worker.ec_connector_model_runner_mixin import ECConnectorModelRunnerMixin
 from vllm.v1.worker.gpu_input_batch import CachedRequestState, InputBatch
@@ -4736,6 +4737,9 @@ class GPUModelRunner(
             attention_backend_list, kv_cache_config.kv_cache_groups
         )
 
+        # Check if attention backend supports PCP&DCP and related features.
+        check_attention_cp_compatibility(self.vllm_config)
+
         for i, attn_backend_map in enumerate(attention_backend_maps):
             self.attn_groups.append(create_attn_groups(attn_backend_map, i))
 
@@ -5394,20 +5398,6 @@ class GPUModelRunner(
                 kv_transfer_group.register_kv_caches(kv_caches)
             kv_transfer_group.set_host_xfer_buffer_ops(copy_kv_blocks)
 
-        if self.dcp_world_size > 1:
-            layer_type = cast(type[Any], AttentionLayerBase)
-            layers = get_layers_from_vllm_config(self.vllm_config, layer_type)
-            for layer in layers.values():
-                layer_impl = getattr(layer, "impl", None)
-                if layer_impl is None:
-                    continue
-                assert layer_impl.need_to_return_lse_for_decode, (
-                    "DCP requires attention impls to return"
-                    " the softmax lse for decode, but the impl "
-                    f"{layer_impl.__class__.__name__} "
-                    "does not return the softmax lse for decode."
-                )
-
     def may_add_encoder_only_layers_to_kv_cache_config(self) -> None:
         """
         Add encoder-only layers to the KV cache config.